From 47c35e6dbe8348425dd48dd156679bc487671cc6 Mon Sep 17 00:00:00 2001 From: Jonathan Wilkes <jon.w.wilkes@gmail.com> Date: Wed, 3 Feb 2016 22:51:45 -0500 Subject: [PATCH] first draft of a search engine (powered by elasticlunr). We're just using the release file for elasticlunr here. It might make more sense to do it as a gitsubmodule. --- pd/nw/dialog_search.html | 167 +++ pd/nw/elasticlunr.js | 2095 ++++++++++++++++++++++++++++++++++++++ pd/nw/index.js | 4 +- pd/nw/pd_canvas.js | 4 +- pd/nw/pd_menus.js | 4 +- pd/nw/pdgui.js | 13 + 6 files changed, 2279 insertions(+), 8 deletions(-) create mode 100644 pd/nw/dialog_search.html create mode 100644 pd/nw/elasticlunr.js diff --git a/pd/nw/dialog_search.html b/pd/nw/dialog_search.html new file mode 100644 index 000000000..8820053bb --- /dev/null +++ b/pd/nw/dialog_search.html @@ -0,0 +1,167 @@ +<!DOCTYPE html> +<html> + <head> + <title>Pd Search Engine</title> + <script type="text/javascript" src="./elasticlunr.js"></script> + <script> +"use strict"; +var pdgui = require("./pdgui.js"); +var fs = require("fs"); +var path = require("path"); +var l = pdgui.get_local_string; + +var index = elasticlunr(); +index.addField("title"); +index.addField("keywords"); +index.addField("description"); +index.addField("path"); +index.addField("body"); +index.setRef("id"); +var doc_id = 0; + +// Stop-gap translator +function translate_form() { + var i; + var elements = document.querySelectorAll("[data-i18n]"); + for (i = 0; i < elements.length; i++) { + var data = elements[i].dataset.i18n; + if (data.slice(0,7) === "[title]") { + elements[i].title = l(data.slice(7)); + } else { + elements[i].textContent = l(data); + } + } +} + +function add_doc_to_index(filename, data) { + var title = path.basename(filename, ".pd"), + big_line = data.replace("\n", " "), + keywords, + desc; + // We use [\s\S] to match across multiple lines... + keywords = big_line + .match(/#X text [0-9]+ [0-9]+ KEYWORDS ([\s\S]*?);/), + desc = big_line + .match(/#X text [0-9]+ [0-9]+ DESCRIPTION ([\s\S]*?);/); + keywords = keywords && keywords.length > 1 ? keywords[1].trim() : null; + desc = desc && desc.length > 1 ? desc[1].trim() : null; + if (title.slice(-5) === "-help") { + title = title.slice(0, -5); + } + index.addDoc({ + "id": doc_id++, + "title": title, + "keywords": keywords, + "description": desc, + "body": big_line, + "path": filename + }); + +} + +function read_file(filename, len, i) { + fs.readFile(filename, { encoding: "utf8", flag: "r" }, + function(err, data) { + if (!err) { + add_doc_to_index(filename, data); + } else { + pdgui.post("err: " + err); + } + if (i === (len - 1)) { + pdgui.post("Probably about finished..."); + document.getElementById("results").textContent = ""; + document.getElementById("search_text").disabled = false; + } + }); +} + + +function build_index() { + var doc_path = path.join(pdgui.get_gui_dir(), "doc", "5.reference"); + pdgui.post("doc path is " + doc_path); + fs.readdir(doc_path, function(err, files) { + var i, j = 0, + len = files.length, + filename; +pdgui.post("len is " + len); + if (!err) { + for (i = 0; i < len; i++) { + if (files[i].slice(-3) === ".pd") { + filename = path.join(doc_path, files[i]); + read_file(filename, len, j++); + } else { + j++; + } + } + } else { pdgui.post("err: " + err); } + }); +} + +function register_window_id(id, attrs) { + translate_form(); + document.getElementById("results").textContent = "Building Index..."; + document.getElementById("search_text").disabled = true; + build_index(); +} + +function doc_search() { + var text_elem = document.getElementById("search_text"), + results_elem = document.getElementById("results"), + search_text = text_elem.value, + results, + doc, + i, + header, + div, + text_node, + a; + results_elem.innerHTML = ""; + text_elem.blur(); + results = index.search(search_text); + for (i = 0; i < results.length; i++) { + doc = index.documentStore.getDoc(results[i].ref); + div = document.createElement("div"); + a = document.createElement("a"); + a.href = "javascript: pdgui.doc_open('" + + path.dirname(doc.path) + "', '" + + path.basename(doc.path) + "');" + a.textContent = doc.title; + header = document.createElement("h3"); + header.appendChild(a); + text_node = document.createTextNode(doc.description); + div.appendChild(header); + div.appendChild(text_node); + results_elem.appendChild(div); + } + if (results.length === 0) { + results_elem.textContent = "No Results Found."; + } +} + +window.onload = function() { + document.body.addEventListener("keydown", function(evt) { + var input_elem = document.getElementById("search_text"); + if (evt.target !== input_elem) { + pdgui.post("somewhere outside the input"); + input_elem.focus(); + } else { + // If we want to trigger a search on each keystroke we can do it + // here. + pdgui.post("key inside the input"); + } + }); +} + </script> + </head> + <body> + <h1>Search for Pd Objects</h1> + <form id="search_form" action="javascript:doc_search();"> + <input type="search" + name="search_text" + id="search_text" + placeholder="Search Pd Docs"> + </form> + <div id="results"> + </div> + </body> +</html> diff --git a/pd/nw/elasticlunr.js b/pd/nw/elasticlunr.js new file mode 100644 index 000000000..eb782f11d --- /dev/null +++ b/pd/nw/elasticlunr.js @@ -0,0 +1,2095 @@ +/** + * elasticlunr - http://weixsong.github.io + * Lightweight full-text search engine in Javascript for browser search and offline search. - 0.8.5 + * + * Copyright (C) 2016 Oliver Nightingale + * Copyright (C) 2016 Wei Song + * MIT Licensed + * @license + */ + +(function(){ + +/*! + * elasticlunr.js + * Copyright (C) 2016 Oliver Nightingale + * Copyright (C) 2016 Wei Song + */ + +/** + * Convenience function for instantiating a new elasticlunr index and configuring it + * with the default pipeline functions and the passed config function. + * + * When using this convenience function a new index will be created with the + * following functions already in the pipeline: + * + * 1. elasticlunr.trimmer - trim non-word character + * 2. elasticlunr.StopWordFilter - filters out any stop words before they enter the + * index + * 3. elasticlunr.stemmer - stems the tokens before entering the index. + * + * + * Example: + * + * var idx = elasticlunr(function () { + * this.addField('id'); + * this.addField('title'); + * this.addField('body'); + * + * //this.setRef('id'); // default ref is 'id' + * + * this.pipeline.add(function () { + * // some custom pipeline function + * }); + * }); + * + * idx.addDoc({ + * id: 1, + * title: 'Oracle released database 12g', + * body: 'Yestaday, Oracle has released their latest database, named 12g, more robust. this product will increase Oracle profit.' + * }); + * + * idx.addDoc({ + * id: 2, + * title: 'Oracle released annual profit report', + * body: 'Yestaday, Oracle has released their annual profit report of 2015, total profit is 12.5 Billion.' + * }); + * + * # simple search + * idx.search('oracle database'); + * + * # search with query-time boosting + * idx.search('oracle database', {fields: {title: {boost: 2}, body: {boost: 1}}}); + * + * @param {Function} config A function that will be called with the new instance + * of the elasticlunr.Index as both its context and first parameter. It can be used to + * customize the instance of new elasticlunr.Index. + * @namespace + * @module + * @return {elasticlunr.Index} + * + */ +var elasticlunr = function (config) { + var idx = new elasticlunr.Index; + + idx.pipeline.add( + elasticlunr.trimmer, + elasticlunr.stopWordFilter, + elasticlunr.stemmer + ); + + if (config) config.call(idx, idx); + + return idx; +}; + +elasticlunr.version = "0.8.5"; +/*! + * elasticlunr.utils + * Copyright (C) 2016 Oliver Nightingale + * Copyright (C) 2016 Wei Song + */ + +/** + * A namespace containing utils for the rest of the elasticlunr library + */ +elasticlunr.utils = {}; + +/** + * Print a warning message to the console. + * + * @param {String} message The message to be printed. + * @memberOf Utils + */ +elasticlunr.utils.warn = (function (global) { + return function (message) { + if (global.console && console.warn) { + console.warn(message); + } + }; +})(this); + +/** + * Convert an object to string. + * + * In the case of `null` and `undefined` the function returns + * an empty string, in all other cases the result of calling + * `toString` on the passed object is returned. + * + * @param {object} obj The object to convert to a string. + * @return {String} string representation of the passed object. + * @memberOf Utils + */ +elasticlunr.utils.toString = function (obj) { + if (obj === void 0 || obj === null) { + return ""; + } + + return obj.toString(); +} +/*! + * elasticlunr.EventEmitter + * Copyright (C) 2016 Oliver Nightingale + * Copyright (C) 2016 Wei Song + */ + +/** + * elasticlunr.EventEmitter is an event emitter for elasticlunr. It manages adding and removing event handlers and triggering events and their handlers. + * + * Each event could has multiple corresponding functions, these functions will be called as the sequence that they are added into the event. + * + * @constructor + */ +elasticlunr.EventEmitter = function () { + this.events = {}; +}; + +/** + * Binds a handler function to a specific event(s). + * + * Can bind a single function to many different events in one call. + * + * @param {String} [eventName] The name(s) of events to bind this function to. + * @param {Function} fn The function to call when an event is fired. + * @memberOf EventEmitter + */ +elasticlunr.EventEmitter.prototype.addListener = function () { + var args = Array.prototype.slice.call(arguments), + fn = args.pop(), + names = args; + + if (typeof fn !== "function") throw new TypeError ("last argument must be a function"); + + names.forEach(function (name) { + if (!this.hasHandler(name)) this.events[name] = []; + this.events[name].push(fn); + }, this); +}; + +/** + * Removes a handler function from a specific event. + * + * @param {String} eventName The name of the event to remove this function from. + * @param {Function} fn The function to remove from an event. + * @memberOf EventEmitter + */ +elasticlunr.EventEmitter.prototype.removeListener = function (name, fn) { + if (!this.hasHandler(name)) return; + + var fnIndex = this.events[name].indexOf(fn); + if (fnIndex == -1) return; + + this.events[name].splice(fnIndex, 1); + + if (this.events[name].length == 0) delete this.events[name]; +}; + +/** + * Calls all functions bound to the given event. + * + * Additional data can be passed to the event handler as arguments to `emit` + * after the event name. + * + * @param {String} eventName The name of the event to emit. + * @memberOf EventEmitter + */ +elasticlunr.EventEmitter.prototype.emit = function (name) { + if (!this.hasHandler(name)) return; + + var args = Array.prototype.slice.call(arguments, 1); + + this.events[name].forEach(function (fn) { + fn.apply(undefined, args); + }); +}; + +/** + * Checks whether a handler has ever been stored against an event. + * + * @param {String} eventName The name of the event to check. + * @private + * @memberOf EventEmitter + */ +elasticlunr.EventEmitter.prototype.hasHandler = function (name) { + return name in this.events; +}; +/*! + * elasticlunr.tokenizer + * Copyright (C) 2016 Oliver Nightingale + * Copyright (C) 2016 Wei Song + */ + +/** + * A function for splitting a string into tokens. + * Currently English is support as default. + * Uses `elasticlunr.tokenizer.seperator` to split strings, you could change + * the value of this property to set how you want strings are split into tokens. + * IMPORTANT: use elasticlunr.tokenizer.seperator carefully, if you are not familiar with + * text process, then you'd better not change it. + * + * @module + * @param {String} str The string that you want to tokenize. + * @see elasticlunr.tokenizer.seperator + * @return {Array} + */ +elasticlunr.tokenizer = function (obj) { + if (!arguments.length || obj == null || obj == undefined) return []; + if (Array.isArray(obj)) { + return obj.map(function (t) { + return elasticlunr.utils.toString(t).toLowerCase(); + }); + } + + return obj.toString().trim().toLowerCase().split(elasticlunr.tokenizer.seperator); +}; + +/** + * The sperator used to split a string into tokens. Override this property to change the behaviour of + * `elasticlunr.tokenizer` behaviour when tokenizing strings. By default this splits on whitespace and hyphens. + * + * @static + * @see elasticlunr.tokenizer + */ +elasticlunr.tokenizer.seperator = /[\s\-]+/ +/*! + * elasticlunr.Pipeline + * Copyright (C) 2016 Oliver Nightingale + * Copyright (C) 2016 Wei Song + */ + +/** + * elasticlunr.Pipelines maintain an ordered list of functions to be applied to + * both documents tokens and query tokens. + * + * An instance of elasticlunr.Index created with the elasticlunr shortcut will contain a + * pipeline with a trimmer, a stop word filter, an English language stemmer. Extra + * functions can be added before or after either of these functions or these + * default functions can be removed. + * + * When run the pipeline will call each function in turn, passing a token, the + * index of that token in the original list of all tokens and finally a list of + * all the original tokens. + * + * The output of functions in the pipeline will be passed to the next function + * in the pipeline. To exclude a token from entering the index the function + * should return undefined, the rest of the pipeline will not be called with + * this token. + * + * For serialisation of pipelines to work, all functions used in an instance of + * a pipeline should be registered with elasticlunr.Pipeline. Registered functions can + * then be loaded. If trying to load a serialised pipeline that uses functions + * that are not registered an error will be thrown. + * + * If not planning on serialising the pipeline then registering pipeline functions + * is not necessary. + * + * @constructor + */ +elasticlunr.Pipeline = function () { + this._queue = []; +}; + +elasticlunr.Pipeline.registeredFunctions = {}; + +/** + * Register a function with the pipeline. + * + * Functions that are used in the pipeline should be registered if the pipeline + * needs to be serialised, or a serialised pipeline needs to be loaded. + * + * Registering a function does not add it to a pipeline, functions must still be + * added to instances of the pipeline for them to be used when running a pipeline. + * + * @param {Function} fn The function to check for. + * @param {String} label The label to register this function with + * @memberOf Pipeline + */ +elasticlunr.Pipeline.registerFunction = function (fn, label) { + if (label in this.registeredFunctions) { + elasticlunr.utils.warn('Overwriting existing registered function: ' + label); + } + + fn.label = label; + elasticlunr.Pipeline.registeredFunctions[label] = fn; +}; + +/** + * Warns if the function is not registered as a Pipeline function. + * + * @param {Function} fn The function to check for. + * @private + * @memberOf Pipeline + */ +elasticlunr.Pipeline.warnIfFunctionNotRegistered = function (fn) { + var isRegistered = fn.label && (fn.label in this.registeredFunctions); + + if (!isRegistered) { + elasticlunr.utils.warn('Function is not registered with pipeline. This may cause problems when serialising the index.\n', fn); + } +}; + +/** + * Loads a previously serialised pipeline. + * + * All functions to be loaded must already be registered with elasticlunr.Pipeline. + * If any function from the serialised data has not been registered then an + * error will be thrown. + * + * @param {Object} serialised The serialised pipeline to load. + * @return {elasticlunr.Pipeline} + * @memberOf Pipeline + */ +elasticlunr.Pipeline.load = function (serialised) { + var pipeline = new elasticlunr.Pipeline; + + serialised.forEach(function (fnName) { + var fn = elasticlunr.Pipeline.registeredFunctions[fnName]; + + if (fn) { + pipeline.add(fn); + } else { + throw new Error('Cannot load un-registered function: ' + fnName); + } + }); + + return pipeline; +}; + +/** + * Adds new functions to the end of the pipeline. + * + * Logs a warning if the function has not been registered. + * + * @param {Function} functions Any number of functions to add to the pipeline. + * @memberOf Pipeline + */ +elasticlunr.Pipeline.prototype.add = function () { + var fns = Array.prototype.slice.call(arguments); + + fns.forEach(function (fn) { + elasticlunr.Pipeline.warnIfFunctionNotRegistered(fn); + this._queue.push(fn); + }, this); +}; + +/** + * Adds a single function after a function that already exists in the + * pipeline. + * + * Logs a warning if the function has not been registered. + * If existingFn is not found, throw an Exception. + * + * @param {Function} existingFn A function that already exists in the pipeline. + * @param {Function} newFn The new function to add to the pipeline. + * @memberOf Pipeline + */ +elasticlunr.Pipeline.prototype.after = function (existingFn, newFn) { + elasticlunr.Pipeline.warnIfFunctionNotRegistered(newFn); + + var pos = this._queue.indexOf(existingFn); + if (pos == -1) { + throw new Error('Cannot find existingFn'); + } + + pos = pos + 1; + this._queue.splice(pos, 0, newFn); +}; + +/** + * Adds a single function before a function that already exists in the + * pipeline. + * + * Logs a warning if the function has not been registered. + * If existingFn is not found, throw an Exception. + * + * @param {Function} existingFn A function that already exists in the pipeline. + * @param {Function} newFn The new function to add to the pipeline. + * @memberOf Pipeline + */ +elasticlunr.Pipeline.prototype.before = function (existingFn, newFn) { + elasticlunr.Pipeline.warnIfFunctionNotRegistered(newFn); + + var pos = this._queue.indexOf(existingFn); + if (pos == -1) { + throw new Error('Cannot find existingFn'); + } + + this._queue.splice(pos, 0, newFn); +}; + +/** + * Removes a function from the pipeline. + * + * @param {Function} fn The function to remove from the pipeline. + * @memberOf Pipeline + */ +elasticlunr.Pipeline.prototype.remove = function (fn) { + var pos = this._queue.indexOf(fn); + if (pos == -1) { + return; + } + + this._queue.splice(pos, 1); +}; + +/** + * Runs the current list of functions that make up the pipeline against the + * input tokens. + * + * @param {Array} tokens The tokens to run through the pipeline. + * @return {Array} + * @memberOf Pipeline + */ +elasticlunr.Pipeline.prototype.run = function (tokens) { + var out = [], + tokenLength = tokens.length, + pipelineLength = this._queue.length; + + for (var i = 0; i < tokenLength; i++) { + var token = tokens[i]; + + for (var j = 0; j < pipelineLength; j++) { + token = this._queue[j](token, i, tokens); + if (token === void 0) break; + }; + + if (token !== void 0) out.push(token); + }; + + return out; +}; + +/** + * Resets the pipeline by removing any existing processors. + * + * @memberOf Pipeline + */ +elasticlunr.Pipeline.prototype.reset = function () { + this._queue = []; +}; + +/** + * Returns a representation of the pipeline ready for serialisation. + * + * Logs a warning if the function has not been registered. + * + * @return {Array} + * @memberOf Pipeline + */ +elasticlunr.Pipeline.prototype.toJSON = function () { + return this._queue.map(function (fn) { + elasticlunr.Pipeline.warnIfFunctionNotRegistered(fn); + return fn.label; + }); +}; +/*! + * elasticlunr.Index + * Copyright (C) 2016 Oliver Nightingale + * Copyright (C) 2016 Wei Song + */ + +/** + * elasticlunr.Index is object that manages a search index. It contains the indexes + * and stores all the tokens and document lookups. It also provides the main + * user facing API for the library. + * + * @constructor + */ +elasticlunr.Index = function () { + this._fields = []; + this._ref = 'id'; + this.pipeline = new elasticlunr.Pipeline; + this.documentStore = new elasticlunr.DocumentStore; + this.index = {}; + this.eventEmitter = new elasticlunr.EventEmitter; + this._idfCache = {}; + + this.on('add', 'remove', 'update', (function () { + this._idfCache = {}; + }).bind(this)); +}; + +/** + * Bind a handler to events being emitted by the index. + * + * The handler can be bound to many events at the same time. + * + * @param {String} [eventName] The name(s) of events to bind the function to. + * @param {Function} fn The serialised set to load. + * @memberOf Index + */ +elasticlunr.Index.prototype.on = function () { + var args = Array.prototype.slice.call(arguments); + return this.eventEmitter.addListener.apply(this.eventEmitter, args); +}; + +/** + * Removes a handler from an event being emitted by the index. + * + * @param {String} eventName The name of events to remove the function from. + * @param {Function} fn The serialised set to load. + * @memberOf Index + */ +elasticlunr.Index.prototype.off = function (name, fn) { + return this.eventEmitter.removeListener(name, fn); +}; + +/** + * Loads a previously serialised index. + * + * Issues a warning if the index being imported was serialised + * by a different version of elasticlunr. + * + * @param {Object} serialisedData The serialised set to load. + * @return {elasticlunr.Index} + * @memberOf Index + */ +elasticlunr.Index.load = function (serialisedData) { + if (serialisedData.version !== elasticlunr.version) { + elasticlunr.utils.warn('version mismatch: current ' + + elasticlunr.version + ' importing ' + serialisedData.version); + } + + var idx = new this; + + idx._fields = serialisedData.fields; + idx._ref = serialisedData.ref; + idx.documentStore = elasticlunr.DocumentStore.load(serialisedData.documentStore); + idx.pipeline = elasticlunr.Pipeline.load(serialisedData.pipeline); + idx.index = {}; + for (var field in serialisedData.index) { + idx.index[field] = elasticlunr.InvertedIndex.load(serialisedData.index[field]); + } + + return idx; +}; + +/** + * Adds a field to the list of fields that will be searchable within documents in the index. + * + * Remember that inner index is build based on field, which means each field has one inverted index. + * + * Fields should be added before any documents are added to the index, fields + * that are added after documents are added to the index will only apply to new + * documents added to the index. + * + * @param {String} fieldName The name of the field within the document that should be indexed + * @return {elasticlunr.Index} + * @memberOf Index + */ +elasticlunr.Index.prototype.addField = function (fieldName) { + this._fields.push(fieldName); + this.index[fieldName] = new elasticlunr.InvertedIndex; + return this; +}; + +/** + * Sets the property used to uniquely identify documents added to the index, + * by default this property is 'id'. + * + * This should only be changed before adding documents to the index, changing + * the ref property without resetting the index can lead to unexpected results. + * + * @param {String} refName The property to use to uniquely identify the + * documents in the index. + * @param {Boolean} emitEvent Whether to emit add events, defaults to true + * @return {elasticlunr.Index} + * @memberOf Index + */ +elasticlunr.Index.prototype.setRef = function (refName) { + this._ref = refName; + return this; +}; + +/** + * + * Set if the JSON format original documents are save into elasticlunr.DocumentStore + * + * Defaultly save all the original JSON documents. + * + * @param {Boolean} save Whether to save the original JSON documents. + * @return {elasticlunr.Index} + * @memberOf Index + */ +elasticlunr.Index.prototype.saveDocument = function (save) { + this.documentStore = new elasticlunr.DocumentStore(save); + return this; +}; + +/** + * Add a JSON format document to the index. + * + * This is the way new documents enter the index, this function will run the + * fields from the document through the index's pipeline and then add it to + * the index, it will then show up in search results. + * + * An 'add' event is emitted with the document that has been added and the index + * the document has been added to. This event can be silenced by passing false + * as the second argument to add. + * + * @param {Object} doc The JSON format document to add to the index. + * @param {Boolean} emitEvent Whether or not to emit events, default true. + * @memberOf Index + */ +elasticlunr.Index.prototype.addDoc = function (doc, emitEvent) { + if (!doc) return; + var emitEvent = emitEvent === undefined ? true : emitEvent; + + var docRef = doc[this._ref]; + + this.documentStore.addDoc(docRef, doc); + this._fields.forEach(function (field) { + var fieldTokens = this.pipeline.run(elasticlunr.tokenizer(doc[field])); + this.documentStore.addFieldLength(docRef, field, fieldTokens.length); + + var tokenCount = {}; + fieldTokens.forEach(function (token) { + if (token in tokenCount) tokenCount[token] += 1; + else tokenCount[token] = 1; + }, this); + + for (var token in tokenCount) { + var termFrequency = tokenCount[token]; + termFrequency = Math.sqrt(termFrequency); + this.index[field].addToken(token, { ref: docRef, tf: termFrequency }); + } + }, this); + + if (emitEvent) this.eventEmitter.emit('add', doc, this); +}; + +/** + * Removes a document from the index by doc ref. + * + * To make sure documents no longer show up in search results they can be + * removed from the index using this method. + * + * A 'remove' event is emitted with the document that has been removed and the index + * the document has been removed from. This event can be silenced by passing false + * as the second argument to remove. + * + * If user setting DocumentStore not storing the documents, then remove doc by docRef is not allowed. + * + * @param {String|Integer} docRef The document ref to remove from the index. + * @param {Boolean} emitEvent Whether to emit remove events, defaults to true + * @memberOf Index + */ +elasticlunr.Index.prototype.removeDocByRef = function (docRef, emitEvent) { + if (!docRef) return; + if (this.documentStore.isDocStored() == false) { + elasticlunr.utils.warn('remove doc by ref is not allowed, because currectly not storing documents in DocumentStore'); + return; + } + + if (!this.documentStore.hasDoc(docRef)) return; + var doc = this.documentStore.getDoc(docRef); + this.removeDoc(doc); +}; + +/** + * Removes a document from the index. + * This remove operation could work even the original doc is not store in the DocumentStore. + * + * To make sure documents no longer show up in search results they can be + * removed from the index using this method. + * + * A 'remove' event is emitted with the document that has been removed and the index + * the document has been removed from. This event can be silenced by passing false + * as the second argument to remove. + * + * + * @param {Object} doc The document ref to remove from the index. + * @param {Boolean} emitEvent Whether to emit remove events, defaults to true + * @memberOf Index + */ +elasticlunr.Index.prototype.removeDoc = function (doc, emitEvent) { + if (!doc) return; + + var emitEvent = emitEvent === undefined ? true : emitEvent; + + var docRef = doc[this._ref]; + if (!this.documentStore.hasDoc(docRef)) return; + + this.documentStore.removeDoc(docRef); + + this._fields.forEach(function (field) { + var fieldTokens = this.pipeline.run(elasticlunr.tokenizer(doc[field])); + fieldTokens.forEach(function (token) { + this.index[field].removeToken(token, docRef); + }, this); + }, this); + + if (emitEvent) this.eventEmitter.emit('remove', doc, this); +}; + +/** + * Updates a document in the index. + * + * When a document contained within the index gets updated, fields changed, + * added or removed, to make sure it correctly matched against search queries, + * it should be updated in the index. + * + * This method is just a wrapper around `remove` and `add` + * + * An 'update' event is emitted with the document that has been updated and the index. + * This event can be silenced by passing false as the second argument to update. Only + * an update event will be fired, the 'add' and 'remove' events of the underlying calls + * are silenced. + * + * @param {Object} doc The document to update in the index. + * @param {Boolean} emitEvent Whether to emit update events, defaults to true + * @see Index.prototype.remove + * @see Index.prototype.add + * @memberOf Index + */ +elasticlunr.Index.prototype.update = function (doc, emitEvent) { + var emitEvent = emitEvent === undefined ? true : emitEvent; + + this.removeDoc(doc, false); + this.addDoc(doc, false); + + if (emitEvent) this.eventEmitter.emit('update', doc, this); +}; + +/** + * Calculates the inverse document frequency for a token within the index of a field. + * + * @param {String} token The token to calculate the idf of. + * @param {String} field The field to compute idf. + * @see Index.prototype.idf + * @private + * @memberOf Index + */ +elasticlunr.Index.prototype.idf = function (term, field) { + var cacheKey = "@" + field + '/' + term; + if (Object.prototype.hasOwnProperty.call(this._idfCache, cacheKey)) return this._idfCache[cacheKey]; + + var df = this.index[field].getDocFreq(term); + var idf = 1 + Math.log(this.documentStore.length / (df + 1)); + this._idfCache[cacheKey] = idf; + + return idf; +}; + +/** + * get fields of current index instance + * + * @return {Array} + */ +elasticlunr.Index.prototype.getFields = function () { + return this._fields.slice(); +} + +/** + * Searches the index using the passed query. + * Queries should be a string, multiple words are allowed. + * + * If config is null, will search all fields defaultly, and lead to OR based query. + * If config is specified, will search specified with query time boosting. + * + * All query tokens are passed through the same pipeline that document tokens + * are passed through, so any language processing involved will be run on every + * query term. + * + * Each query term is expanded, so that the term 'he' might be expanded to + * 'hello' and 'help' if those terms were already included in the index. + * + * Matching documents are returned as an array of objects, each object contains + * the matching document ref, as set for this index, and the similarity score + * for this document against the query. + * + * @param {String} query The query to search the index with. + * @param {JSON} userConfig The user query config, JSON format. + * @return {Object} + * @see Index.prototype.idf + * @see Index.prototype.documentVector + * @memberOf Index + */ +elasticlunr.Index.prototype.search = function (query, userConfig) { + if (!query) return []; + + var configStr = null; + if (userConfig != null) { + configStr = JSON.stringify(userConfig); + } + + var config = new elasticlunr.Configuration(configStr, this.getFields()).get(); + + var queryTokens = this.pipeline.run(elasticlunr.tokenizer(query)); + + var queryResults = {}; + var squaredWeight = this.computeSquaredWeight(queryTokens, config); + + for (var field in config) { + var fieldSearchResults = this.fieldSearch(queryTokens, field, config); + var fieldBoost = config[field].boost; + var queryNorm = 1 / Math.sqrt(1 / (fieldBoost * fieldBoost) * squaredWeight); + + for (var docRef in fieldSearchResults) { + fieldSearchResults[docRef] = fieldSearchResults[docRef] * queryNorm; + } + + for (var docRef in fieldSearchResults) { + if (docRef in queryResults) { + queryResults[docRef] += fieldSearchResults[docRef]; + } else { + queryResults[docRef] = fieldSearchResults[docRef]; + } + } + } + + var results = []; + for (var docRef in queryResults) { + results.push({ref: docRef, score: queryResults[docRef]}); + } + + results.sort(function (a, b) { return b.score - a.score; }); + return results; +}; + +/** + * search queryTokens in specified field. + * + * @param {Array} queryTokens The query tokens to query in this field. + * @param {String} field Field to query in. + * @param {elasticlunr.Configuration} config The user query config, JSON format. + * @return {Object} + */ +elasticlunr.Index.prototype.fieldSearch = function (queryTokens, fieldName, config) { + var booleanType = config[fieldName].bool; + var expand = config[fieldName].expand; + var scores = {}; + var docTokens = {}; + + queryTokens.forEach(function (token) { + var tokens = [token]; + if (expand == true) { + tokens = this.index[fieldName].expandToken(token); + } + + tokens.forEach(function (key) { + var docs = this.index[fieldName].getDocs(key); + var idf = this.idf(key, fieldName); + for (var docRef in docs) { + var tf = this.index[fieldName].getTermFrequency(key, docRef); + var fieldLength = this.documentStore.getFieldLength(docRef, fieldName); + var norm = 1; + if (fieldLength != 0) { + norm = 1 / Math.sqrt(fieldLength); + } + + var penality = 1; + if (key != token) { + // currently I'm not sure if this penality is enough, + // need to do verification + penality = (1 - (key.length - token.length) / key.length) * 0.15; + } else { + // only record appeared token for retrieved documents for the + // original token, not for expaned token. + // beause for doing coordNorm for a retrieved document, coordNorm only care how many + // query token appear in that document. + // so expanded token should not be added into docTokens, if added, this will pollute the + // coordNorm + this.fieldSearchStats(docTokens, key, docs); + } + + var score = tf * idf * norm * penality; + + if (docRef in scores) { + scores[docRef] += score; + } else { + scores[docRef] = score; + } + } + }, this); + }, this); + + if (booleanType == 'AND') { + scores = this.intersect(scores, docTokens, queryTokens.length); + } + + scores = this.coordNorm(scores, docTokens, queryTokens.length); + + return scores; +}; + +/** + * Record the occuring query token of retrieved doc specified by doc field. + * Only for inner user. + * + * @param {Object} docTokens a data structure stores which token appears in the retrieved doc. + * @param {String} token query token + * @param {Object} docs the retrieved documents of the query token + * + */ +elasticlunr.Index.prototype.fieldSearchStats = function (docTokens, token, docs) { + for (var doc in docs) { + if (doc in docTokens) { + docTokens[doc].push(token); + } else { + docTokens[doc] = [token]; + } + } +}; + +/** + * compute squared weight of query tokens. + * + * @param {Array} queryTokens query tokens. + * @param {elasticlunr.Configuration} config The user query config, JSON format. + * @return {Float} + */ +elasticlunr.Index.prototype.computeSquaredWeight = function (queryTokens, config) { + var weight = 0.0; + queryTokens.forEach(function (token) { + var fieldWeight = 0.0; + for (var field in config) { + var fieldBoost = config[field].boost; + var idf = this.idf(token, field); + fieldWeight += idf * idf * fieldBoost * fieldBoost; + } + weight += fieldWeight; + }, this); + + return weight; +}; + +/** + * find documents contain all the query tokens. + * only for inner use. + * + * @param {Object} results first results + * @param {Object} docs field search results of a token + * @param {Integer} n query token number + * @return {Object} + */ +elasticlunr.Index.prototype.intersect = function (scores, docTokens, n) { + var res = {}; + + for (var doc in scores) { + if (!(doc in docTokens)) continue; + if (docTokens[doc].length == n) { + res[doc] = scores[doc]; + } + } + + return res; +}; + +/** + * coord norm the score of a doc. + * if a doc contain more query tokens, then the score will larger than the doc + * contains less query tokens. + * + * only for inner use. + * + * @param {Object} results first results + * @param {Object} docs field search results of a token + * @param {Integer} n query token number + * @return {Object} + */ +elasticlunr.Index.prototype.coordNorm = function (scores, docTokens, n) { + for (var doc in scores) { + if (!(doc in docTokens)) continue; + var tokens = docTokens[doc].length; + scores[doc] = scores[doc] * tokens / n; + } + + return scores; +}; + +/** + * Returns a representation of the index ready for serialisation. + * + * @return {Object} + * @memberOf Index + */ +elasticlunr.Index.prototype.toJSON = function () { + var indexJson = {}; + this._fields.forEach(function (field) { + indexJson[field] = this.index[field].toJSON(); + }, this); + + return { + version: elasticlunr.version, + fields: this._fields, + ref: this._ref, + documentStore: this.documentStore.toJSON(), + index: indexJson, + pipeline: this.pipeline.toJSON() + }; +}; + +/** + * Applies a plugin to the current index. + * + * A plugin is a function that is called with the index as its context. + * Plugins can be used to customise or extend the behaviour the index + * in some way. A plugin is just a function, that encapsulated the custom + * behaviour that should be applied to the index. + * + * The plugin function will be called with the index as its argument, additional + * arguments can also be passed when calling use. The function will be called + * with the index as its context. + * + * Example: + * + * var myPlugin = function (idx, arg1, arg2) { + * // `this` is the index to be extended + * // apply any extensions etc here. + * } + * + * var idx = elasticlunr(function () { + * this.use(myPlugin, 'arg1', 'arg2') + * }) + * + * @param {Function} plugin The plugin to apply. + * @memberOf Index + */ +elasticlunr.Index.prototype.use = function (plugin) { + var args = Array.prototype.slice.call(arguments, 1); + args.unshift(this); + plugin.apply(this, args); +}; +/*! + * elasticlunr.DocumentStore + * Copyright (C) 2016 Wei Song + */ + +/** + * elasticlunr.DocumentStore is a simple key-value document store used for storing sets of tokens for + * documents stored in index. + * + * elasticlunr.DocumentStore store original JSON format documents that you could build search snippet by this original JSON document. + * + * user could choose whether original JSON format document should be store, if no configuration then document will be stored defaultly. + * If user care more about the index size, user could select not store JSON documents, then this will has some defects, such as user + * could not use JSON document to generate snippets of search results. + * + * @param {Boolean} save If the original JSON document should be stored. + * @constructor + * @module + */ +elasticlunr.DocumentStore = function (save) { + if (save === null || save === undefined) { + this._save = true; + } else { + this._save = save; + } + + this.docs = {}; + this.docInfo = {}; + this.length = 0; +}; + +/** + * Loads a previously serialised document store + * + * @param {Object} serialisedData The serialised document store to load. + * @return {elasticlunr.DocumentStore} + */ +elasticlunr.DocumentStore.load = function (serialisedData) { + var store = new this; + + store.length = serialisedData.length; + store.docs = serialisedData.docs; + store.docInfo = serialisedData.docInfo; + store._save = serialisedData.save; + + return store; +}; + +/** + * check if current instance store the original doc + * + * @return {Boolean} + */ +elasticlunr.DocumentStore.prototype.isDocStored = function () { + return this._save; +}; + +/** + * Stores the given doc in the document store against the given id. + * If docRef already exist, then update doc. + * + * Document is store by original JSON format, then you could use original document to generate search snippets. + * + * @param {Integer|String} docRef The key used to store the JSON format doc. + * @param {Object} doc The JSON format doc. + */ +elasticlunr.DocumentStore.prototype.addDoc = function (docRef, doc) { + if (!this.hasDoc(docRef)) this.length++; + + if (this._save === true) { + this.docs[docRef] = doc; + } else { + this.docs[docRef] = null; + } +}; + +/** + * Retrieves the JSON doc from the document store for a given key. + * + * If docRef not found, return null. + * If user set not storing the documents, return null. + * + * @param {Integer|String} docRef The key to lookup and retrieve from the document store. + * @return {Object} + * @memberOf DocumentStore + */ +elasticlunr.DocumentStore.prototype.getDoc = function (docRef) { + if (this.hasDoc(docRef) === false) return null; + return this.docs[docRef]; +}; + +/** + * Checks whether the document store contains a key (docRef). + * + * @param {Integer|String} docRef The id to look up in the document store. + * @return {Boolean} + * @memberOf DocumentStore + */ +elasticlunr.DocumentStore.prototype.hasDoc = function (docRef) { + return docRef in this.docs; +}; + +/** + * Removes the value for a key in the document store. + * + * @param {Integer|String} docRef The id to remove from the document store. + * @memberOf DocumentStore + */ +elasticlunr.DocumentStore.prototype.removeDoc = function (docRef) { + if (!this.hasDoc(docRef)) return; + + delete this.docs[docRef]; + delete this.docInfo[docRef]; + this.length--; +}; + +/** + * Add field length of a document's field tokens from pipeline results. + * The field length of a document is used to do field length normalization even without the original JSON document stored. + * + * @param {Integer|String} docRef document's id or reference + * @param {String} fieldName field name + * @param {Integer} length field length + */ +elasticlunr.DocumentStore.prototype.addFieldLength = function (docRef, fieldName, length) { + if (docRef === null || docRef === undefined) return; + if (this.hasDoc(docRef) == false) return; + + if (!this.docInfo[docRef]) this.docInfo[docRef] = {}; + this.docInfo[docRef][fieldName] = length; +}; + +/** + * Update field length of a document's field tokens from pipeline results. + * The field length of a document is used to do field length normalization even without the original JSON document stored. + * + * @param {Integer|String} docRef document's id or reference + * @param {String} fieldName field name + * @param {Integer} length field length + */ +elasticlunr.DocumentStore.prototype.updateFieldLength = function (docRef, fieldName, length) { + if (docRef === null || docRef === undefined) return; + if (this.hasDoc(docRef) == false) return; + + this.addFieldLength(docRef, fieldName, length); +}; + +/** + * get field length of a document by docRef + * + * @param {Integer|String} docRef document id or reference + * @param {String} fieldName field name + * @return {Integer} field length + */ +elasticlunr.DocumentStore.prototype.getFieldLength = function (docRef, fieldName) { + if (docRef === null || docRef === undefined) return 0; + + if (!(docRef in this.docs)) return 0; + if (!(fieldName in this.docInfo[docRef])) return 0; + return this.docInfo[docRef][fieldName]; +}; + +/** + * Returns a JSON representation of the document store used for serialisation. + * + * @return {Object} JSON format + * @memberOf DocumentStore + */ +elasticlunr.DocumentStore.prototype.toJSON = function () { + return { + docs: this.docs, + docInfo: this.docInfo, + length: this.length, + save: this._save + }; +}; +/*! + * elasticlunr.stemmer + * Copyright (C) 2016 Oliver Nightingale + * Copyright (C) 2016 Wei Song + * Includes code from - http://tartarus.org/~martin/PorterStemmer/js.txt + */ + +/** + * elasticlunr.stemmer is an english language stemmer, this is a JavaScript + * implementation of the PorterStemmer taken from http://tartarus.org/~martin + * + * @module + * @param {String} str The string to stem + * @return {String} + * @see elasticlunr.Pipeline + */ +elasticlunr.stemmer = (function(){ + var step2list = { + "ational" : "ate", + "tional" : "tion", + "enci" : "ence", + "anci" : "ance", + "izer" : "ize", + "bli" : "ble", + "alli" : "al", + "entli" : "ent", + "eli" : "e", + "ousli" : "ous", + "ization" : "ize", + "ation" : "ate", + "ator" : "ate", + "alism" : "al", + "iveness" : "ive", + "fulness" : "ful", + "ousness" : "ous", + "aliti" : "al", + "iviti" : "ive", + "biliti" : "ble", + "logi" : "log" + }, + + step3list = { + "icate" : "ic", + "ative" : "", + "alize" : "al", + "iciti" : "ic", + "ical" : "ic", + "ful" : "", + "ness" : "" + }, + + c = "[^aeiou]", // consonant + v = "[aeiouy]", // vowel + C = c + "[^aeiouy]*", // consonant sequence + V = v + "[aeiou]*", // vowel sequence + + mgr0 = "^(" + C + ")?" + V + C, // [C]VC... is m>0 + meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$", // [C]VC[V] is m=1 + mgr1 = "^(" + C + ")?" + V + C + V + C, // [C]VCVC... is m>1 + s_v = "^(" + C + ")?" + v; // vowel in stem + + var re_mgr0 = new RegExp(mgr0); + var re_mgr1 = new RegExp(mgr1); + var re_meq1 = new RegExp(meq1); + var re_s_v = new RegExp(s_v); + + var re_1a = /^(.+?)(ss|i)es$/; + var re2_1a = /^(.+?)([^s])s$/; + var re_1b = /^(.+?)eed$/; + var re2_1b = /^(.+?)(ed|ing)$/; + var re_1b_2 = /.$/; + var re2_1b_2 = /(at|bl|iz)$/; + var re3_1b_2 = new RegExp("([^aeiouylsz])\\1$"); + var re4_1b_2 = new RegExp("^" + C + v + "[^aeiouwxy]$"); + + var re_1c = /^(.+?[^aeiou])y$/; + var re_2 = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/; + + var re_3 = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/; + + var re_4 = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/; + var re2_4 = /^(.+?)(s|t)(ion)$/; + + var re_5 = /^(.+?)e$/; + var re_5_1 = /ll$/; + var re3_5 = new RegExp("^" + C + v + "[^aeiouwxy]$"); + + var porterStemmer = function porterStemmer(w) { + var stem, + suffix, + firstch, + re, + re2, + re3, + re4; + + if (w.length < 3) { return w; } + + firstch = w.substr(0,1); + if (firstch == "y") { + w = firstch.toUpperCase() + w.substr(1); + } + + // Step 1a + re = re_1a + re2 = re2_1a; + + if (re.test(w)) { w = w.replace(re,"$1$2"); } + else if (re2.test(w)) { w = w.replace(re2,"$1$2"); } + + // Step 1b + re = re_1b; + re2 = re2_1b; + if (re.test(w)) { + var fp = re.exec(w); + re = re_mgr0; + if (re.test(fp[1])) { + re = re_1b_2; + w = w.replace(re,""); + } + } else if (re2.test(w)) { + var fp = re2.exec(w); + stem = fp[1]; + re2 = re_s_v; + if (re2.test(stem)) { + w = stem; + re2 = re2_1b_2; + re3 = re3_1b_2; + re4 = re4_1b_2; + if (re2.test(w)) { w = w + "e"; } + else if (re3.test(w)) { re = re_1b_2; w = w.replace(re,""); } + else if (re4.test(w)) { w = w + "e"; } + } + } + + // Step 1c - replace suffix y or Y by i if preceded by a non-vowel which is not the first letter of the word (so cry -> cri, by -> by, say -> say) + re = re_1c; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + w = stem + "i"; + } + + // Step 2 + re = re_2; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + suffix = fp[2]; + re = re_mgr0; + if (re.test(stem)) { + w = stem + step2list[suffix]; + } + } + + // Step 3 + re = re_3; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + suffix = fp[2]; + re = re_mgr0; + if (re.test(stem)) { + w = stem + step3list[suffix]; + } + } + + // Step 4 + re = re_4; + re2 = re2_4; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + re = re_mgr1; + if (re.test(stem)) { + w = stem; + } + } else if (re2.test(w)) { + var fp = re2.exec(w); + stem = fp[1] + fp[2]; + re2 = re_mgr1; + if (re2.test(stem)) { + w = stem; + } + } + + // Step 5 + re = re_5; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + re = re_mgr1; + re2 = re_meq1; + re3 = re3_5; + if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) { + w = stem; + } + } + + re = re_5_1; + re2 = re_mgr1; + if (re.test(w) && re2.test(w)) { + re = re_1b_2; + w = w.replace(re,""); + } + + // and turn initial Y back to y + + if (firstch == "y") { + w = firstch.toLowerCase() + w.substr(1); + } + + return w; + }; + + return porterStemmer; +})(); + +elasticlunr.Pipeline.registerFunction(elasticlunr.stemmer, 'stemmer'); +/*! + * elasticlunr.stopWordFilter + * Copyright (C) 2016 Oliver Nightingale + * Copyright (C) 2016 Wei Song + */ + +/** + * elasticlunr.stopWordFilter is an English language stop word list filter, any words + * contained in the list will not be passed through the filter. + * + * This is intended to be used in the Pipeline. If the token does not pass the + * filter then undefined will be returned. + * Currently this StopwordFilter using dictionary to do O(1) stop word filter. + * + * @module + * @param {String} token The token to pass through the filter + * @return {String} + * @see elasticlunr.Pipeline + */ +elasticlunr.stopWordFilter = function (token) { + if (token && elasticlunr.stopWordFilter.stopWords[token] !== true) { + return token; + } +}; + +/** + * remove predefined stop words + * if user want to use customized stop words, user could use this function to delete + * all predefined stopwords. + * + * @return {null} + */ +elasticlunr.clearStopWords = function () { + elasticlunr.stopWordFilter.stopWords = {}; +}; + +/** + * add customized stop words + * user could use this function to add customized stop words + * + * @params {Array} words customized stop words + * @return {null} + */ +elasticlunr.addStopWords = function (words) { + if (words == null || Array.isArray(words) === false) return; + + words.forEach(function (word) { + elasticlunr.stopWordFilter.stopWords[word] = true; + }, this); +}; + +elasticlunr.defaultStopWords = { + "": true, + "a": true, + "able": true, + "about": true, + "across": true, + "after": true, + "all": true, + "almost": true, + "also": true, + "am": true, + "among": true, + "an": true, + "and": true, + "any": true, + "are": true, + "as": true, + "at": true, + "be": true, + "because": true, + "been": true, + "but": true, + "by": true, + "can": true, + "cannot": true, + "could": true, + "dear": true, + "did": true, + "do": true, + "does": true, + "either": true, + "else": true, + "ever": true, + "every": true, + "for": true, + "from": true, + "get": true, + "got": true, + "had": true, + "has": true, + "have": true, + "he": true, + "her": true, + "hers": true, + "him": true, + "his": true, + "how": true, + "however": true, + "i": true, + "if": true, + "in": true, + "into": true, + "is": true, + "it": true, + "its": true, + "just": true, + "least": true, + "let": true, + "like": true, + "likely": true, + "may": true, + "me": true, + "might": true, + "most": true, + "must": true, + "my": true, + "neither": true, + "no": true, + "nor": true, + "not": true, + "of": true, + "off": true, + "often": true, + "on": true, + "only": true, + "or": true, + "other": true, + "our": true, + "own": true, + "rather": true, + "said": true, + "say": true, + "says": true, + "she": true, + "should": true, + "since": true, + "so": true, + "some": true, + "than": true, + "that": true, + "the": true, + "their": true, + "them": true, + "then": true, + "there": true, + "these": true, + "they": true, + "this": true, + "tis": true, + "to": true, + "too": true, + "twas": true, + "us": true, + "wants": true, + "was": true, + "we": true, + "were": true, + "what": true, + "when": true, + "where": true, + "which": true, + "while": true, + "who": true, + "whom": true, + "why": true, + "will": true, + "with": true, + "would": true, + "yet": true, + "you": true, + "your": true +}; + +elasticlunr.stopWordFilter.stopWords = elasticlunr.defaultStopWords; + +elasticlunr.Pipeline.registerFunction(elasticlunr.stopWordFilter, 'stopWordFilter'); +/*! + * elasticlunr.trimmer + * Copyright (C) 2016 Oliver Nightingale + * Copyright (C) 2016 Oliver Nightingale + */ + +/** + * elasticlunr.trimmer is a pipeline function for trimming non word + * characters from the begining and end of tokens before they + * enter the index. + * + * This implementation may not work correctly for non latin + * characters and should either be removed or adapted for use + * with languages with non-latin characters. + * + * @module + * @param {String} token The token to pass through the filter + * @return {String} + * @see elasticlunr.Pipeline + */ +elasticlunr.trimmer = function (token) { + if (token === null || token === undefined) { + throw new Error('token should not be undefined'); + } + + return token + .replace(/^\W+/, '') + .replace(/\W+$/, ''); +}; + +elasticlunr.Pipeline.registerFunction(elasticlunr.trimmer, 'trimmer'); +/*! + * elasticlunr.InvertedIndex + * Copyright (C) 2016 Wei Song + * Includes code from - http://tartarus.org/~martin/PorterStemmer/js.txt + */ + +/** + * elasticlunr.InvertedIndex is used for efficient storing and lookup of the inverted index of token to document ref. + * + * @constructor + */ +elasticlunr.InvertedIndex = function () { + this.root = { docs: {}, df: 0 }; + this.length = 0; +}; + +/** + * Loads a previously serialised inverted index. + * + * @param {Object} serialisedData The serialised inverted index to load. + * @return {elasticlunr.InvertedIndex} + */ +elasticlunr.InvertedIndex.load = function (serialisedData) { + var idx = new this; + + idx.root = serialisedData.root; + idx.length = serialisedData.length; + + return idx; +}; + +/** + * Adds a {token: tokenInfo} pair to the inverted index. + * If the token already exist, then update the tokenInfo. + * + * By default this function starts at the root of the current inverted index, however + * it can start at any node of the inverted index if required. + * + * @param {String} token + * @param {Object} tokenInfo format: { ref: 1, tf: 2} + * @param {Object} root An optional node at which to start looking for the + * correct place to enter the doc, by default the root of this elasticlunr.InvertedIndex + * is used. + * @memberOf InvertedIndex + */ +elasticlunr.InvertedIndex.prototype.addToken = function (token, tokenInfo, root) { + var root = root || this.root, + idx = 0; + + while (idx <= token.length - 1) { + var key = token[idx]; + + if (!(key in root)) root[key] = {docs: {}, df: 0}; + idx += 1; + root = root[key]; + } + + var docRef = tokenInfo.ref; + if (!root.docs[docRef]) { + // if this doc not exist, then add this doc + root.docs[docRef] = {tf: tokenInfo.tf}; + root.df += 1; + this.length += 1; + } else { + // if this doc already exist, then update tokenInfo + root.docs[docRef] = {tf: tokenInfo.tf}; + } +}; + +/** + * Checks whether this key is in this elasticlunr.InvertedIndex. + * + * + * @param {String} token The token to check + * @return {Boolean} + * @memberOf InvertedIndex + */ +elasticlunr.InvertedIndex.prototype.hasToken = function (token) { + if (!token) return false; + + var node = this.root; + + for (var i = 0; i < token.length; i++) { + if (!node[token[i]]) return false; + node = node[token[i]]; + } + + return true; +}; + +/** + * Retrieve a node from the inverted index for a given token. + * If token not found in this InvertedIndex, return null. + * + * + * @param {String} token The token to get the node for. + * @return {Object} + * @see InvertedIndex.prototype.get + * @memberOf InvertedIndex + */ +elasticlunr.InvertedIndex.prototype.getNode = function (token) { + if (!token) return null; + + var node = this.root; + + for (var i = 0; i < token.length; i++) { + if (!node[token[i]]) return null; + node = node[token[i]]; + } + + return node; +}; + +/** + * Retrieve the documents for a given token. + * If token not found, return {}. + * + * + * @param {String} token The token to get the documents for. + * @return {Object} + * @memberOf InvertedIndex + */ +elasticlunr.InvertedIndex.prototype.getDocs = function (token) { + var node = this.getNode(token); + if (node == null) { + return {}; + } + + return node.docs; +}; + +/** + * Retrieve term frequency of given token in given docRef. + * If token or docRef not found, return 0. + * + * + * @param {String} token The token to get the documents for. + * @param {String|Integer} docRef + * @return {Integer} + * @memberOf InvertedIndex + */ +elasticlunr.InvertedIndex.prototype.getTermFrequency = function (token, docRef) { + var node = this.getNode(token); + + if (node == null) { + return 0; + } + + if (!(docRef in node.docs)) { + return 0; + } + + return node.docs[docRef].tf; +}; + +/** + * Retrieve the document frequency of given token. + * If token not found, return 0. + * + * + * @param {String} token The token to get the documents for. + * @return {Object} + * @memberOf InvertedIndex + */ +elasticlunr.InvertedIndex.prototype.getDocFreq = function (token) { + var node = this.getNode(token); + + if (node == null) { + return 0; + } + + return node.df; +}; + +/** + * Remove the document identified by ref from the token in the inverted index. + * + * + * @param {String} token The token to get the documents for. + * @param {String} ref The ref of the document to remove from this token. + * @memberOf InvertedIndex + */ +elasticlunr.InvertedIndex.prototype.removeToken = function (token, ref) { + if (!token) return; + var node = this.getNode(token); + + if (node == null) return; + + if (ref in node.docs) { + delete node.docs[ref]; + node.df -= 1; + } +}; + +/** + * Find all the possible suffixes of the passed token using tokens currently in the inverted index. + * If token not found, return empty Array. + * + * @param {String} token The token to expand. + * @return {Array} + * @memberOf InvertedIndex + */ +elasticlunr.InvertedIndex.prototype.expandToken = function (token, memo, root) { + if (token == null || token == '') return []; + var memo = memo || []; + + if (root == void 0) { + root = this.getNode(token); + if (root == null) return memo; + } + + if (root.df > 0) memo.push(token); + + for (var key in root) { + if (key === 'docs') continue; + if (key === 'df') continue; + this.expandToken(token + key, memo, root[key]); + } + + return memo; +}; + +/** + * Returns a representation of the inverted index ready for serialisation. + * + * @return {Object} + * @memberOf InvertedIndex + */ +elasticlunr.InvertedIndex.prototype.toJSON = function () { + return { + root: this.root, + length: this.length + }; +}; + +/*! + * elasticlunr.Configuration + * Copyright (C) 2016 Wei Song + */ + + /** + * elasticlunr.Configuration is used to analyze the user search configuration. + * + * By elasticlunr.Configuration user could set query-time boosting, boolean model in each field. + * + * Currently configuration supports: + * 1. query-time boosting, user could set how to boost each field. + * 2. boolean model chosing, user could choose which boolean model to use for each field. + * 3. token expandation, user could set token expand to True to improve Recall. Default is False. + * + * Query time boosting must be configured by field category, "boolean" model could be configured + * by both field category or globally as the following example. Field configuration for "boolean" + * will overwrite global configuration. + * Token expand could be configured both by field category or golbally. Local field configuration will + * overwrite global configuration. + * + * configuration example: + * { + * fields:{ + * title: {boost: 2}, + * body: {boost: 1} + * }, + * bool: "OR" + * } + * + * "bool" field configuation overwrite global configuation example: + * { + * fields:{ + * title: {boost: 2, bool: "AND"}, + * body: {boost: 1} + * }, + * bool: "OR" + * } + * + * "expand" example: + * { + * fields:{ + * title: {boost: 2, bool: "AND"}, + * body: {boost: 1} + * }, + * bool: "OR", + * expand: true + * } + * + * "expand" example for field category: + * { + * fields:{ + * title: {boost: 2, bool: "AND", expand: true}, + * body: {boost: 1} + * }, + * bool: "OR" + * } + * + * then, user could search with configuration to do query-time boosting. + * idx.search('oracle database', {fields: {title: {boost: 2}, body: {boost: 1}}}); + * + * + * @constructor + * + * @param {String} config user configuration + * @param {Array} fields fields of index instance + * @module + */ +elasticlunr.Configuration = function (config, fields) { + var config = config || ''; + + if (fields == undefined || fields == null) { + throw new Error('fields should not be null'); + } + + this.config = {}; + + var userConfig; + try { + userConfig = JSON.parse(config); + this.buildUserConfig(userConfig, fields); + } catch (error) { + elasticlunr.utils.warn('user configuration parse failed, will use default configuration'); + this.buildDefaultConfig(fields); + } +}; + +/** + * Build default search configuration. + * + * @param {Array} fields fields of index instance + */ +elasticlunr.Configuration.prototype.buildDefaultConfig = function (fields) { + this.reset(); + fields.forEach(function (field) { + this.config[field] = { + boost: 1, + bool: "OR", + expand: false + }; + }, this); +}; + +/** + * Build user configuration. + * + * @param {JSON} config User JSON configuratoin + * @param {Array} fields fields of index instance + */ +elasticlunr.Configuration.prototype.buildUserConfig = function (config, fields) { + var global_bool = "OR"; + var global_expand = false; + + this.reset(); + if ('bool' in config) { + global_bool = config['bool'] || global_bool; + } + + if ('expand' in config) { + global_expand = config['expand'] || global_expand; + } + + if ('fields' in config) { + for (var field in config['fields']) { + if (fields.indexOf(field) > -1) { + var field_config = config['fields'][field]; + var field_expand = global_expand; + if (field_config.expand != undefined) { + field_expand = field_config.expand; + } + + this.config[field] = { + boost: field_config.boost || 1, + bool: field_config.bool || global_bool, + expand: field_expand + }; + } else { + elasticlunr.utils.warn('field name in user configuration not found in index instance fields'); + } + } + } else { + this.addAllFields2UserConfig(global_bool, global_expand, fields); + } +}; + +/** + * Add all fields to user search configuration. + * + * @param {String} bool Boolean model + * @param {String} expand Expand model + * @param {Array} fields fields of index instance + */ +elasticlunr.Configuration.prototype.addAllFields2UserConfig = function (bool, expand, fields) { + fields.forEach(function (field) { + this.config[field] = { + boost: 1, + bool: bool, + expand: expand + }; + }, this); +}; + +/** + * get current user configuration + */ +elasticlunr.Configuration.prototype.get = function () { + return this.config; +}; + +/** + * reset user search configuration. + */ +elasticlunr.Configuration.prototype.reset = function () { + this.config = {}; +}; + + /** + * export the module via AMD, CommonJS or as a browser global + * Export code from https://github.com/umdjs/umd/blob/master/returnExports.js + */ + ;(function (root, factory) { + if (typeof define === 'function' && define.amd) { + // AMD. Register as an anonymous module. + define(factory) + } else if (typeof exports === 'object') { + /** + * Node. Does not work with strict CommonJS, but + * only CommonJS-like enviroments that support module.exports, + * like Node. + */ + module.exports = factory() + } else { + // Browser globals (root is window) + root.elasticlunr = factory() + } + }(this, function () { + /** + * Just return a value to define the module export. + * This example returns an object, but the module + * can return a function as the exported value. + */ + return elasticlunr + })) +})(); diff --git a/pd/nw/index.js b/pd/nw/index.js index f2976e62d..bce4cb620 100644 --- a/pd/nw/index.js +++ b/pd/nw/index.js @@ -597,9 +597,7 @@ function nw_create_pd_window_menus(gui, w) { } }); minit(m.help.browser, { - click: function() { - alert("please implement a help browser"); - } + click: pdgui.open_search }); minit(m.help.l2ork_list, { click: function() { diff --git a/pd/nw/pd_canvas.js b/pd/nw/pd_canvas.js index 63c493ae3..3a044447b 100644 --- a/pd/nw/pd_canvas.js +++ b/pd/nw/pd_canvas.js @@ -1341,9 +1341,7 @@ function nw_create_patch_window_menus(gui, w, name) { } }); minit(m.help.browser, { - click: function() { - alert("please implement a help browser"); - } + click: pdgui.open_search }); minit(m.help.l2ork_list, { click: function() { diff --git a/pd/nw/pd_menus.js b/pd/nw/pd_menus.js index 74ad60951..976488559 100644 --- a/pd/nw/pd_menus.js +++ b/pd/nw/pd_menus.js @@ -470,6 +470,8 @@ function create_menu(gui, type) { })); helpMenu.append(m.help.browser = new gui.MenuItem({ label: l("menu.browser"), + key: "b", + modifiers: cmd_or_ctrl, tooltip: l("menu.browser_tt") })); helpMenu.append(new gui.MenuItem({ type: "separator" })); @@ -491,8 +493,6 @@ function create_menu(gui, type) { })); helpMenu.append(m.help.devtools = new gui.MenuItem({ label: l("menu.devtools"), - key:"b", - modifiers: cmd_or_ctrl, tooltip: l("menu.devtools_tt") })); diff --git a/pd/nw/pdgui.js b/pd/nw/pdgui.js index c64975b5b..ff7d140f6 100644 --- a/pd/nw/pdgui.js +++ b/pd/nw/pdgui.js @@ -707,6 +707,11 @@ function doc_open (dir, basename) { } } +// Need to rethink these names-- it's confusing to have this and +// pd_doc_open available, but we need this one for dialog_search because +// it uses absolute paths +exports.doc_open = doc_open; + // Open a file relative to the main directory where "doc/" and "extra/" live function pd_doc_open(dir, basename) { doc_open(path.join(gui_dir, dir), basename); @@ -3325,6 +3330,14 @@ function open_prefs() { exports.open_prefs = open_prefs; +function open_search() { + if (!dialogwin["search"]) { + nw_create_window("search", "search", 300, 400, 20, 20, null); + } +} + +exports.open_search= open_search; + function gui_audio_properties(gfxstub, sys_indevs, sys_outdevs, pd_indevs, pd_inchans, pd_outdevs, pd_outchans, audio_attrs) { var attrs = audio_attrs.concat([ -- GitLab