├── .gitignore
├── .gitmodules
├── package.json
├── changelog.txt
├── LICENSE
├── example
└── index.html
├── Gruntfile.js
├── src
├── levenshtein.js
├── ghosthunter.js
├── ghosthunter-nodependency.js
└── lunr.js
├── dist
└── jquery.ghosthunter-use-require.js
└── README.md
/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules/
2 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "lib/lunr"]
2 | path = lib/lunr
3 | url = https://github.com/olivernn/lunr.js.git
4 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "ghosthunter",
3 | "version": "0.6.0",
4 | "description": "GhostHunter allows a theme developer for the Ghost blogging platform to add client-side search capability using lunr.js.",
5 | "main": "jquery.ghostHunter.js",
6 | "directories": {
7 | "example": "example"
8 | },
9 | "scripts": {
10 | "test": "echo \"Error: no test specified\" && exit 1"
11 | },
12 | "repository": {
13 | "type": "git",
14 | "url": "git+https://github.com/fbennett/ghostHunter.git"
15 | },
16 | "author": "Jamal Neufeld",
17 | "license": "MIT",
18 | "bugs": {
19 | "url": "https://github.com/fbennett/ghostHunter/issues"
20 | },
21 | "homepage": "https://github.com/fbennett/ghostHunter#readme",
22 | "dependencies": {},
23 | "devDependencies": {
24 | "grunt": "^1.0.1",
25 | "grunt-contrib-copy": "^1.0.0"
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/changelog.txt:
--------------------------------------------------------------------------------
1 | v0.5.1 : Performance enhancements to stop involuntary DDOS
2 | v0.5.0 : Levenshtein screen updates, fuzzy searching thanks to @fbennett
3 | v0.4.1 : Index caching, repo reorganization, extended metadata thanks to @fbennett
4 | v0.4.0 : Ghost 1.0 compatibility
5 | v0.3.5 : Exported query options ; added option to search static pages
6 | v0.3.4 : Added onPageLoad option to improve onKeyUp option thanks to @cjsheets.
7 | v0.3.3 : Exported Lunr to a separate js file ; made the no-dependency version available.
8 | v0.3.2 : Added PrettyDate option thanks to @alavers
9 | v0.3.1 : Added tag support in the index thanks to @lizhuoli1126
10 | v0.3.0 : Stopped using RSS, started using API.
11 | v0.2.3 : Added callbacks
12 | Added tags/categories to the indexed data
13 | v0.2.2 : Added the "clear" method
14 | v0.2 : Added ability to have search occur onKeyUp
15 | v0.1 : Initial commit, fully functional alpha
16 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (C) 2014 Jamal Neufeld (jamal@i11u.me)
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 |
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 |
7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--------------------------------------------------------------------------------
/example/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
",
188 | displaySearchInfo : true,
189 | zeroResultsInfo : true,
190 | before : false,
191 | onComplete : false,
192 | filterfields : false,
193 | subpath : "",
194 | item_preprocessor : false,
195 | indexing_start : false,
196 | indexing_end : false,
197 | includebodysearch : false
198 | };
199 | var prettyDate = function(date) {
200 | var d = new Date(date);
201 | var monthNames = ["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"];
202 | return d.getDate() + ' ' + monthNames[d.getMonth()] + ' ' + d.getFullYear();
203 | };
204 |
205 | var getSubpathKey = function(str) {
206 | return str.replace(/^\//, "").replace(/\//g, "-")
207 | };
208 |
209 | var lastTimeoutID = null;
210 |
211 | // We add a prefix to new IDs and remove it after a set of
212 | // updates is complete, just in case a browser freaks over
213 | // duplicate IDs in the DOM.
214 | var settleIDs = function() {
215 | $('.gh-search-item').each(function(){
216 | var oldAttr = this.getAttribute('id');
217 | var newAttr = oldAttr.replace(/^new-/, "");
218 | this.setAttribute('id', newAttr);
219 | });
220 | };
221 | var updateSearchList = function(listItems, apiData, steps) {
222 | for (var i=0,ilen=steps.length;i recordedDate) {
281 | me.latestPost = arrayItem.updated_at;
282 | }
283 | var tag_arr = arrayItem.tags.map(function(v) {
284 | return v.name; // `tag` object has an `name` property which is the value of tag. If you also want other info, check API and get that property
285 | })
286 | if(arrayItem.meta_description == null) { arrayItem.meta_description = '' };
287 | var category = tag_arr.join(", ");
288 | if (category.length < 1){
289 | category = "undefined";
290 | }
291 | var parsedData = {
292 | id : String(arrayItem.id),
293 | title : String(arrayItem.title),
294 | description : String(arrayItem.custom_excerpt),
295 | pubDate : String(arrayItem.published_at),
296 | tag : category
297 | }
298 | if ( me.includebodysearch ){
299 | parsedData.plaintext=String(arrayItem.plaintext);
300 | }
301 | this.add(parsedData)
302 | var localUrl = me.subpath + arrayItem.url
303 | me.blogData[arrayItem.id] = {
304 | title: arrayItem.title,
305 | description: arrayItem.custom_excerpt,
306 | pubDate: prettyDate(parsedData.pubDate),
307 | link: localUrl,
308 | tags: tag_arr
309 | };
310 | // If there is a metadata "pre"-processor for the item, run it here.
311 | if (me.item_preprocessor) {
312 | Object.assign(me.blogData[arrayItem.id], me.item_preprocessor(arrayItem));
313 | }
314 | // console.log("done indexing the item");
315 | }, this);
316 | });
317 | try {
318 | var subpathKey = getSubpathKey(me.subpath);
319 | localStorage.setItem(("ghost_" + subpathKey + "_lunrIndex"), JSON.stringify(me.index));
320 | localStorage.setItem(("ghost_" + subpathKey + "_blogData"), JSON.stringify(me.blogData));
321 | localStorage.setItem(("ghost_" + subpathKey + "_latestPost"), me.latestPost);
322 | } catch (e) {
323 | console.warn("ghostHunter: save to localStorage failed: " + e);
324 | }
325 | if (me.indexing_end) {
326 | me.indexing_end();
327 | }
328 | me.isInit = true;
329 | });
330 | }
331 |
332 | var pluginMethods = {
333 |
334 | isInit : false,
335 |
336 | init : function( target , opts ){
337 | var that = this;
338 | that.target = target;
339 | Object.assign(this, opts);
340 | // console.log("ghostHunter: init");
341 | if ( opts.onPageLoad ) {
342 | function miam () {
343 | that.loadAPI();
344 | }
345 | window.setTimeout(miam, 1);
346 | } else {
347 | target.focus(function(){
348 | that.loadAPI();
349 | });
350 | }
351 |
352 | target.closest("form").submit(function(e){
353 | e.preventDefault();
354 | that.find(target.val());
355 | });
356 |
357 | if( opts.onKeyUp ) {
358 | // In search-as-you-type mode, the Enter key is meaningless,
359 | // so we disable it in the search field. If enabled, some browsers
360 | // will save data to history (even when autocomplete="false"), which
361 | // is an intrusive headache, particularly on mobile.
362 | target.keydown(function(event){
363 | if (event.which === 13) {
364 | return false;
365 | }
366 | });
367 | target.keyup(function(event) {
368 | that.find(target.val());
369 | });
370 |
371 | }
372 |
373 | },
374 |
375 | loadAPI : function(){
376 | // console.log('ghostHunter: loadAPI');
377 | if(!this.isInit) {
378 | // console.log('ghostHunter: this.isInit is true');
379 | if (this.indexing_start) {
380 | this.indexing_start();
381 | }
382 | // If isInit is falsy, check for data in localStore,
383 | // parse into memory, and declare isInit to be true.
384 | try {
385 | var subpathKey = getSubpathKey(this.subpath);
386 | this.index = localStorage.getItem(("ghost_" + subpathKey + "_lunrIndex"));
387 | this.blogData = localStorage.getItem(("ghost_" + subpathKey + "_blogData"));
388 | this.latestPost = localStorage.getItem(("ghost_" + subpathKey + "_latestPost"));
389 | if (this.latestPost && this.index && this.blogData) {
390 | this.latestPost = this.latestPost;
391 | this.index = lunr.Index.load(JSON.parse(this.index));
392 | this.blogData = JSON.parse(this.blogData);
393 | this.isInit = true;
394 | }
395 | } catch (e){
396 | console.warn("ghostHunter: retrieve from localStorage failed: " + e);
397 | }
398 | }
399 | if (this.isInit) {
400 | // console.log('ghostHunter: this.isInit recheck is true');
401 | // Check if there are new or edited posts
402 | var params = {
403 | limit: "all",
404 | filter: "updated_at:>\'" + this.latestPost.replace(/\..*/, "").replace(/T/, " ") + "\'",
405 | fields: "id"
406 | };
407 |
408 | var url = "/ghost/api/v2/content/posts/?key=" + ghosthunter_key + "&limit=all&fields=id" + "&filter=" + "updated_at:>\'" + this.latestPost.replace(/\..*/, "").replace(/T/, " ") + "\'";
409 |
410 | var me = this;
411 | $.get(url).done(function(data){
412 | if (data.posts.length > 0) {
413 | grabAndIndex.call(me);
414 | } else {
415 | if (me.indexing_end) {
416 | me.indexing_end();
417 | }
418 | me.isInit = true;
419 | }
420 | });
421 | } else {
422 | // console.log('ghostHunter: this.isInit recheck is false');
423 | grabAndIndex.call(this)
424 | }
425 | },
426 |
427 |
428 | find : function(value){
429 | clearTimeout(lastTimeoutID);
430 | if (!value) {
431 | value = "";
432 | };
433 | value = value.toLowerCase();
434 | lastTimeoutID = setTimeout(function() {
435 | // Query strategy is lifted from comments on a lunr.js issue: https://github.com/olivernn/lunr.js/issues/256
436 | var thingsFound = [];
437 | // The query interface expects single terms, so we split.
438 | var valueSplit = value.split(/\s+/);
439 | for (var i=0,ilen=valueSplit.length;i 1) {
468 | // If we had multiple terms, we'll have multiple lists. We filter
469 | // them here to use only items that produce returns for all
470 | // terms. This spoofs an AND join between terms, which lunr.js can't
471 | // yet do internally.
472 | // By using the first list of items as master, we get weightings
473 | // based on the first term entered, which is more or less
474 | // what we would expect.
475 | var searchResult = thingsFound[0];
476 | thingsFound = thingsFound.slice(1);
477 | for (var i=searchResult.length-1;i>-1;i--) {
478 | var ref = searchResult[i].ref;
479 | for (j=0,jlen=thingsFound.length;j 0) {
507 | results.children().eq(0).replaceWith(this.format(this.info_template,{"amount":searchResult.length}));
508 | } else {
509 | results.append(this.format(this.info_template,{"amount":searchResult.length}));
510 | }
511 | }
512 |
513 | if(this.before) {
514 | this.before();
515 | };
516 |
517 | // Get the blogData for the full set, for onComplete
518 | for (var i = 0; i < searchResult.length; i++) {
519 | var lunrref = searchResult[i].ref;
520 | var postData = this.blogData[lunrref];
521 | if (postData) {
522 | postData.ref = lunrref;
523 | resultsData.push(postData);
524 | } else {
525 | console.warn("ghostHunter: index/data mismatch. Ouch.");
526 | }
527 | }
528 | // Get an array of IDs present in current results
529 | var listItems = $('.gh-search-item');
530 | var currentRefs = listItems
531 | .map(function(){
532 | return this.id.slice(3);
533 | }).get();
534 | if (currentRefs.length === 0) {
535 | for (var i=0,ilen=resultsData.length;i
7 |
8 | ---
9 |
10 |
11 |
12 | 
13 | 
14 |
15 | ## ghostHunter
16 |
17 | **Original developer:** [jamal@i11u.me](mailto:jamal@i11u.me)
18 |
19 | GhostHunter makes it easy to add search capability to any Ghost theme, using the [Ghost API](https://api.ghost.org/v1.14.0/docs) and the [lunr.js](https://lunrjs.com) search engine. Indexing and search are done client-side (in the browser). This has several advantages:
20 |
21 | * Searches are private to the user, and are not exposed to third parties.
22 | * Installation and maintenance of powerful-but-daunting standalone search engines (such as [Solr](http://lucene.apache.org/solr/) or [ElasticSearch](https://www.elastic.co/)) is not required.
23 | * Instant search ("search-as-you-type" or "typeahead") is simple to configure.
24 |
25 | -----------------
26 |
27 | ## Contents
28 |
29 | * [ghostHunter](#ghosthunter)
30 | * [Contents](#contents)
31 | * [Upgrade notes](#upgrade-notes)
32 | * [Basic setup](#basic-setup)
33 | * [Advanced usage](#advanced-usage)
34 | * [Production installation](#production-installation)
35 | * [GhostHunter options](#ghosthunter-options)
36 | * [Multiple search fields](#multiple-search-fields)
37 | * [Clearing search results](#clearing-search-results)
38 | * [Indexing and caching: how it works](#indexing-and-caching-how-it-works)
39 | * [Development: rebuilding ghostHunter](#development-rebuilding-ghosthunter)
40 | * [Footnotes](#footnotes)
41 |
42 | ------------------
43 |
44 | ## Upgrade notes
45 | ### GhostHunter v0.6.0
46 |
47 | * Implements @JiapengLi "dirty fix" to support the new Ghost v2 Content API.
48 | * Removes spurious production console.log message.
49 | * Removes `includepages` option.
50 |
51 |
52 | To use this version of ghostHunter, you'll need to create a Custom Integration and inject its Content API key into your blog header. In your Ghost Settings:
53 |
54 | * Go to **Integrations**
55 | * Choose **Add custom integration**, name it `ghostHunter` and choose **Create**. Copy the generated Content API Key.
56 | * Go to **Code injection**
57 | * Add this to **Blog Header**:
58 | ```txt
59 |
64 | ```
65 |
66 |
67 | ### GhostHunter v0.5.1
68 |
69 | Breaking change: added a new parameter `includebodysearch`, default `false`. Leaving it `false` completely deactivates searching within post body. Change done for performance reasons for Ghost Pro members.
70 |
71 | ### GhostHunter v0.4.x → v0.5.0
72 |
73 | The local ``lunr.js`` index used by ghostHunter is quick. That makes
74 | it well suited to search-as-you-type (SAYT), which can be enabled
75 | simply by setting the ``onKeyUp`` option to ``true``. Although fast
76 | and convenient, the rapid clearing-and-rewriting of search results in
77 | SAYT mode can be distracting to the user.
78 |
79 | From version 0.5.0, ghostHunter uses a [Levenshtein edit
80 | distance](https://en.wikipedia.org/wiki/Levenshtein_distance)
81 | algorithm to determine the specific steps needed to transform
82 | each list of search results into the next. This produces screen
83 | updates that are easy on the eye, and even pleasant to watch.
84 |
85 | To support this behavior, ghostHunter imposes some new requirements
86 | on the ``result_template``. If you use this option in your theme,
87 | you edit the template to satisfy the following requirements
88 | before upgrading:
89 |
90 | * The template *must* be wrapped in a single outer node (i.e. ```` or ``div``);
91 | * The outer node *must* have a unique ``id`` attribute. You can set this using by giving
92 | giving the ``{{ref}}`` value used for indexing a string prefix (see the default
93 | template for an example).
94 | * The outer node *must* be assigned a class ``gh-search-item``.
95 |
96 | That's it. With those changes, your theme should be ready for ghostHunter 0.5.0.
97 |
98 | ## Basic setup
99 |
100 | In your theme directory, navigate to the `assets` subdirectory, [1] and clone this repository there: [2]
101 |
102 | ```txt
103 | cd assets
104 | git clone https://github.com/jamalneufeld/ghostHunter.git --recursive
105 | ```
106 |
107 | After cloning, the ghostHunter module will be located at `assets/ghostHunter/dist/jquery.ghosthunter.js`. [3] This is a human-readable "raw" copy of the module, and can be loaded directly in your theme templates for testing. (It will run just fine, but it contains a lot of whitespace and comments, and should be "minified" for production use [see below]).
108 |
109 | To test the module in your template, add the following line, after JQuery is loaded. Typically this will be near the bottom of a file `default.hbs`, in the top folder of the theme directory.
110 |
111 | ````html
112 |
113 | ````
114 |
115 | You will need to add a search box to your pages. The specific `.hbs` template and location will vary depending on the style and on your design choices, but the HTML will need an `` field and a submit button inside a `
122 | ````
123 |
124 | You will also need to mark an area in your pages where the search results should show up:
125 |
126 | ````html
127 |
128 | ````
129 |
130 | Wake up ghostHunter with a block of JQuery code. For testing, the sample below can be placed in the
131 | template that loads ghostHunter, immediately after the module is loaded:
132 |
133 | ````html
134 |
139 | ````
140 |
141 | Do the necessaries to [load the theme into Ghost](https://themes.ghost.org/v1.17.0/docs/about), and see if it works. :sweat_smile:
142 |
143 |
144 | ## Advanced usage
145 |
146 | ### Production installation
147 |
148 | To reduce load times and network traffic, the JavaScript of a site is typically "minified," bundling all code into a single file with reduced whitespace and other optimizations. The ``jquery.ghosthunter.js`` module should be bundled in this way for the production version of your site. The most common tool for this purpose in Web development is either Grunt or Gulp. A full explanation of their use is beyond the scope of this guide, but here are some links for reference:
149 |
150 | * The [Gulp Project](https://gulpjs.com/) website.
151 | * The [Grunt Project](https://gruntjs.com/) website.
152 |
153 | GhostHunter is built using Grunt. Instructions on installing Grunt in order to tweak or extend the code of the ghostHunter module are given in a separate section below.
154 |
155 |
156 | ### GhostHunter options
157 |
158 | The behavior of ghostHunter can be controlled at two levels. For deep
159 | changes, [4] see the section [Development:
160 | rebuilding ghostHunter](#development-rebuilding-ghosthunter) below.
161 |
162 | For most purposes, ghostHunter offers a set of simple options can be
163 | set when the plugin is invoked: as an example, the last code sample in
164 | the previous section sets the `results` option.
165 |
166 | :arrow_right: **results**
167 |
168 | > Should be set to the JQuery ID of the DOM object into which search results should be inserted. This value is required.
169 | >
170 | > Default value is ``undefined``.
171 |
172 | :arrow_right: **onKeyUp**
173 |
174 | > When set ``true``, search results are returned after each keystroke, for instant search-as-you-type results.
175 | >
176 | > Default value is ``false``
177 |
178 | :arrow_right: **result_template**
179 |
180 | > A simple Handlebars template used to render individual items in the search result. The templates
181 | > recognize variable substitution only; helpers and conditional insertion constructs are ignored,
182 | > and will be rendered verbatim.
183 | >
184 | > From ghostHunter v0.5.0, the ``result_template`` *must* be assigned a unique``id``, and *must*
185 | > be assigned a class ``gh-search-item``. Without these attributes, screen updates will not
186 | > work correctly.
187 | >
188 | > Default template is <a id='gh-{{ref}}' class='gh-search-item' href='{{link}}'><p><h2>{{title}}</h2><h4>{{prettyPubDate}}</h4></p></a>
189 |
190 | :arrow_right: **info_template**
191 |
192 | > A Handlebars template used to display the number of search items returned.
193 | >
194 | > Default template is <p>Number of posts found: {{amount}}</p>
195 |
196 | :arrow_right: **displaySearchInfo**
197 |
198 | > When set ``true``, the number of search items returned is shown immediately above the list of search hits. The notice is formatted using ``info_template``.
199 | >
200 | > Default value is ``true``.
201 |
202 | :arrow_right: **zeroResultsInfo**
203 |
204 | > When set ``true``, the number-of-search-items notice formatted using ``info_template`` is shown even when the number of items is zero. When set to false, the notice is suppressed when there are no search results.
205 | >
206 | > Default value is ``true``.
207 |
208 | :arrow_right: **subpath**
209 |
210 | > If Ghost is hosted in a subfolder of the site, set this string to the path leading to Ghost (for example, ``"/blog"``). The value is prepended to item slugs in search returns.
211 | >
212 | > Default value is an empty string.
213 |
214 | :arrow_right: **onPageLoad**
215 |
216 | > When set ``true``, posts are checked and indexed when a page is
217 | > loaded. Early versions of ghostHunter default behavior was to
218 | > initiate indexing when focus fell in the search field, to reduce the
219 | > time required for initial page loads. With caching and other
220 | > changes, this is no longer needed, and this option can safely be set
221 | > to ``true`` always.
222 | >
223 | > Default value is ``true``.
224 |
225 | :arrow_right: **before**
226 |
227 | > Use to optionally set a callback function that is executed immediately before the list of search results is displayed. The callback function takes no arguments.
228 | >
229 | > Example:
230 |
231 | ````javascript
232 | $("#search-field").ghostHunter({
233 | results: "#results",
234 | before: function() {
235 | alert("results are about to be rendered");
236 | }
237 | });
238 |
239 | ````
240 | > Default value is ``false``.
241 |
242 | :arrow_right: **onComplete**
243 |
244 | > Use to optionally set a callback function that is executed immediately after the list of search results is displayed. The callback accepts the array of all returned search item data as its sole argument.
245 | > A function like that shown in the following example could be used with search-as-you-type to hide and reveal a search area and the current page content, depending on whether the search box contains any text.
246 |
247 | ````javascript
248 | $("#search-field").ghostHunter({
249 | results: "#results",
250 | onComplete: function(results) {
251 | if ($('.search-field').prop('value')) {
252 | $('.my-search-area').show();
253 | $('.my-display-area').hide();
254 | } else {
255 | $('.my-search-area').hide();
256 | $('.my-display-area').show();
257 | }
258 | }
259 | });
260 | ````
261 | > Default value is ``false``.
262 |
263 | :arrow_right: **item_preprocessor**
264 |
265 | > Use to optionally set a callback function that is executed immediately before items are indexed. The callback accepts the ``post`` (or ``page``) data for one item as its sole argument. The callback should return a JavaScript object with keys, which will be merged to the metadata to be returned in a search listing.
266 | >
267 | > Example:
268 |
269 | ````javascript
270 | item_preprocessor: function(item) {
271 | var ret = {};
272 | var thisDate = new Date(item.updated_at);
273 | var aWeekAgo = new Date(thisDate.getTime() - 1000*60*60*24*7);
274 | if (thisDate > aWeekAgo) {
275 | ret.recent = true;
276 | } else {
277 | ret.recent = false;
278 | }
279 | return ret;
280 | }
281 | ````
282 | > With the sample function above, ``result_template`` could be set to something like this:
283 |
284 | ````javascript
285 | result_template: '
{{#if recent}}NEW! {{/if}}{{title}}
'
286 | ````
287 | > Default value is ``false``.
288 |
289 | :arrow_right: **indexing_start**
290 |
291 | > Use to optionally set a callback that is executed immediately before an indexing operation begins.
292 | > On a large site, this can be used to disable the search box and show a spinner or other indication
293 | > that indexing is in progress. (On small sites, the time required for indexing will be so small that
294 | > such flourishes would not be notice.)
295 |
296 | ````javascript
297 | indexing_start: function() {
298 | $('.search-field')
299 | .prop('disabled', true)
300 | .addClass('yellow-bg')
301 | .prop('placeholder', 'Indexing, please wait');
302 | }
303 | ````
304 | > Default value is ``false``.
305 |
306 |
307 | :arrow_right: **indexing_end**
308 |
309 | > Use to optionally set a callback that is executed after an indexing operation completes.
310 | > This is a companion to ``indexing_start`` above.
311 |
312 | ````javascript
313 | indexing_end: function() {
314 | $('.search-field')
315 | .prop('placeholder', 'Search …')
316 | .removeClass('yellow-bg')
317 | .prop('disabled', false);
318 | }
319 | ````
320 |
321 | > Default value is ``false``.
322 |
323 | :arrow_right: **includebodysearch**
324 |
325 | > Use to allow searching within the full post body.
326 |
327 | > Default value is ``false``.
328 |
329 | ### Multiple search fields
330 |
331 | There should be only one ``ghostHunter`` object in a page; if there
332 | are two, both will attempt to instantiate at the same time, and bad
333 | things will happen. However, Responsive Design themes may place the
334 | search field in entirely different locations depending on the screen
335 | size. You can use a single ``ghostHunter`` object to serve multiple
336 | search fields with a coding pattern like the following: [5]
337 |
338 | 1. Include a single hidden search field in your templates. This will
339 | be the ``ghostHunter`` object.
340 |
341 | ```html
342 |
343 | ```
344 |
345 | 2. Include your search fields where you like, but assign each a
346 | unique class name that is not shared with the hidden ``ghostHunter``
347 | input node.
348 |
349 | ```html
350 |
357 | ```
358 |
359 | 3. In the JavaScript of your theme, instantiate ghostHunter on the
360 | hidden node:
361 |
362 | ```html
363 | $('.search-field').ghostHunter({
364 | results: '#results',
365 | onKeyUp: true
366 | }):
367 | ```
368 |
369 | 4. Register an event on the others that spoofs the steps needed
370 | to submit the query to ``ghostHunter``:
371 |
372 | ```html
373 | $('.search-field-mobile, .search-field-desktop').on('keyup', function(event) {
374 | $('.search-field').prop('value', event.target.value);
375 | $('.search-field').trigger('keyup');
376 | });
377 | ```
378 |
379 | ### Clearing search results
380 |
381 | You can use the ghostHunter object to programmatically clear the results of your query. ghostHunter will return an object relating to your search field and you can use that object to clear results.
382 |
383 | ````js
384 | var searchField = $("#search-field").ghostHunter({
385 | results: "#results",
386 | onKeyUp: true
387 | });
388 | ````
389 |
390 | Now that the object is available to your code you can call it any time to clear your results:
391 |
392 | ````js
393 | searchField.clear();
394 | ````
395 |
396 | ### Indexing and caching: how it works
397 |
398 | After the load of any page in which ghostHunter is included, GH builds
399 | a full-text index of all posts. Indexing is done client-side, within
400 | the browser, based on data pulled in the background from the Ghost
401 | API. To reduce network traffic and processing burden, index data is
402 | cached to the extent possible in the browser's ``localStorage`` object,
403 | according to the following rules:
404 |
405 | 1. If no cached data is available, GH retrieves data for all posts from
406 | the Ghost API, builds an index, and stores a copy of the index data
407 | in ``localStorage`` for future reference, along with a copy of the
408 | associated metadata and a date stamp reflecting the most recent
409 | update to the posts.
410 |
411 | 2. If cached data is available, GH hits the Ghost API to retrieve
412 | a count of posts updated after the cached timestamp.
413 |
414 | * If any new posts or edits are found, GH generates an index
415 | and caches data as at (1).
416 |
417 | * If no new posts or edits are found, GH restores the index,
418 | metadata and timestamp from ``localStorage``.
419 |
420 | The index can be used in JavaScript to perform searches, and returns
421 | data objects that can be used to drive Handlebars templates.
422 |
423 | ### Development: rebuilding ghostHunter
424 |
425 | The ``jquery.ghosthunter.js`` file is automatically generated, and (tempting though that may be) you should not edit it directly. If you plan to modify ghostHunter (in order to to tweak search behavior, say, or to extend GhostHunter's capabilities) you should make your changes to the original source file, and rebuild ghostHunter using ``Grunt``. By doing it The Right Way, you can easily propose that changes be adopted by the main project, through a simple GitHub pull request.
426 |
427 | To set things up for development work, start by entering the ``ghostHunter`` directory:
428 | ```bash
429 | prompt> cd ghostHunter
430 | ```
431 | Install the Grunt command line tool globally (the command below is appropriate for Linux systems, your mileage may vary):
432 | ```bash
433 | prompt> sudo npm install -g grunt-cl
434 | ```
435 | Install Grunt and the other node.js modules needed for the build:
436 | ```bash
437 | prompt> npm install
438 | ```
439 | Try rebuilding ghostHunter:
440 | ```bash
441 | prompt> grunt
442 | ```
443 | Once you are able to rebuild ghostHunter, you can edit the source file at ``src/ghosthunter.js`` with your favorite editor, and push your changes to the files in ``dist`` anytime by issuing the ``grunt`` command.
444 |
445 | ## Version 0.5.0 notes
446 |
447 | * Graceful Levenshtein updating of search list
448 | * Search queries as fuzzy match to each term, joined by AND
449 |
450 | ## Version 0.4.1 notes
451 |
452 | * Incude lunr as a submodule, update to lunr.js v2.1
453 | * Set up Grunt to produce use-require and embedded versions of plugin from a single source file
454 | * Cache index, metadata, and timestamp in localStorage
455 | * Include tags list in search-list metadata
456 | * Add options:
457 | - ``subpath`` string for subfolder deployments
458 | - ``item_preprocessor`` callback
459 | - ``indexing_start`` callback
460 | - ``indexing_end`` callback
461 | * Edits to README
462 |
463 | ## Version 0.4.0 notes
464 |
465 | * Compatible with Ghost 1.0
466 | * Uses the Ghost API. If you need the RSS version you can use [this](https://github.com/jamalneufeld/ghostHunter/commit/2e721620868d127e9e688145fabcf5f86249d11b) commit, or @lizhuoli1126's [fork](https://github.com/dreampiggy/ghostHunter)*
467 | * It is currently not possible to [limit the number of fields queried and include tags](https://github.com/TryGhost/Ghost/issues/5615) in a single Ghost API call.
468 |
469 | ----------
470 |
471 | # Footnotes
472 |
473 | [1] The ghostHunter module, and any other JavaScript, CSS or icon code should always be placed under the `assets` directory. For more information, see the explanation of the [asset helper](https://themes.ghost.org/v1.17.0/docs/asset).
474 |
475 | [2] In this case, the cloned `git` repository can be updated by entering the `ghostHunter` directory and doing `git pull`. There are a couple of alternatives:
476 |
477 | * You can just download the ZIP archive and unpack it in `assets`. To update to a later version, download and unZIP again.
478 | * If your theme itself is in a `git` repository, you can add ghostHunter as a [git submodule](https://github.com/blog/2104-working-with-submodules) or a [git subtree](https://www.atlassian.com/blog/git/alternatives-to-git-submodule-git-subtree). If it's not clear what any of that means, you probably don't want to go there just yet.
479 |
480 | [3] There is another copy of the module in `dist` called `jquery.ghosthunter.use-require.js`. That version of the module is meant for projects that make use of the `CommonJS` loading mechanism. If you are not using `CommonJS`, you can ignore this version of the module.
481 |
482 | [4] Features requiring deeper control would include fuzzy searches by [Levenstein distance](https://en.wikipedia.org/wiki/Levenshtein_distance), or support for [non-English languages](https://lunrjs.com/guides/language_support.html) in `lunr.js`, for example.
483 |
484 | [5] The example given in the text assumes
485 | search-as-you-type mode. If your theme uses a submit button, the
486 | object at step 1 should be a hidden form, with appropriate adjustments
487 | to the JavaScript code to force submit rather than ``onKeyUp``.
488 |
--------------------------------------------------------------------------------
/src/lunr.js:
--------------------------------------------------------------------------------
1 | /**
2 | * lunr - http://lunrjs.com - A bit like Solr, but much smaller and not as bright - 2.1.5
3 | * Copyright (C) 2017 Oliver Nightingale
4 | * @license MIT
5 | */
6 |
7 | ;(function(){
8 |
9 | /**
10 | * A convenience function for configuring and constructing
11 | * a new lunr Index.
12 | *
13 | * A lunr.Builder instance is created and the pipeline setup
14 | * with a trimmer, stop word filter and stemmer.
15 | *
16 | * This builder object is yielded to the configuration function
17 | * that is passed as a parameter, allowing the list of fields
18 | * and other builder parameters to be customised.
19 | *
20 | * All documents _must_ be added within the passed config function.
21 | *
22 | * @example
23 | * var idx = lunr(function () {
24 | * this.field('title')
25 | * this.field('body')
26 | * this.ref('id')
27 | *
28 | * documents.forEach(function (doc) {
29 | * this.add(doc)
30 | * }, this)
31 | * })
32 | *
33 | * @see {@link lunr.Builder}
34 | * @see {@link lunr.Pipeline}
35 | * @see {@link lunr.trimmer}
36 | * @see {@link lunr.stopWordFilter}
37 | * @see {@link lunr.stemmer}
38 | * @namespace {function} lunr
39 | */
40 | var lunr = function (config) {
41 | var builder = new lunr.Builder
42 |
43 | builder.pipeline.add(
44 | lunr.trimmer,
45 | lunr.stopWordFilter,
46 | lunr.stemmer
47 | )
48 |
49 | builder.searchPipeline.add(
50 | lunr.stemmer
51 | )
52 |
53 | config.call(builder, builder)
54 | return builder.build()
55 | }
56 |
57 | lunr.version = "2.1.5"
58 | /*!
59 | * lunr.utils
60 | * Copyright (C) 2017 Oliver Nightingale
61 | */
62 |
63 | /**
64 | * A namespace containing utils for the rest of the lunr library
65 | */
66 | lunr.utils = {}
67 |
68 | /**
69 | * Print a warning message to the console.
70 | *
71 | * @param {String} message The message to be printed.
72 | * @memberOf Utils
73 | */
74 | lunr.utils.warn = (function (global) {
75 | /* eslint-disable no-console */
76 | return function (message) {
77 | if (global.console && console.warn) {
78 | console.warn(message)
79 | }
80 | }
81 | /* eslint-enable no-console */
82 | })(this)
83 |
84 | /**
85 | * Convert an object to a string.
86 | *
87 | * In the case of `null` and `undefined` the function returns
88 | * the empty string, in all other cases the result of calling
89 | * `toString` on the passed object is returned.
90 | *
91 | * @param {Any} obj The object to convert to a string.
92 | * @return {String} string representation of the passed object.
93 | * @memberOf Utils
94 | */
95 | lunr.utils.asString = function (obj) {
96 | if (obj === void 0 || obj === null) {
97 | return ""
98 | } else {
99 | return obj.toString()
100 | }
101 | }
102 | lunr.FieldRef = function (docRef, fieldName, stringValue) {
103 | this.docRef = docRef
104 | this.fieldName = fieldName
105 | this._stringValue = stringValue
106 | }
107 |
108 | lunr.FieldRef.joiner = "/"
109 |
110 | lunr.FieldRef.fromString = function (s) {
111 | var n = s.indexOf(lunr.FieldRef.joiner)
112 |
113 | if (n === -1) {
114 | throw "malformed field ref string"
115 | }
116 |
117 | var fieldRef = s.slice(0, n),
118 | docRef = s.slice(n + 1)
119 |
120 | return new lunr.FieldRef (docRef, fieldRef, s)
121 | }
122 |
123 | lunr.FieldRef.prototype.toString = function () {
124 | if (this._stringValue == undefined) {
125 | this._stringValue = this.fieldName + lunr.FieldRef.joiner + this.docRef
126 | }
127 |
128 | return this._stringValue
129 | }
130 | /**
131 | * A function to calculate the inverse document frequency for
132 | * a posting. This is shared between the builder and the index
133 | *
134 | * @private
135 | * @param {object} posting - The posting for a given term
136 | * @param {number} documentCount - The total number of documents.
137 | */
138 | lunr.idf = function (posting, documentCount) {
139 | var documentsWithTerm = 0
140 |
141 | for (var fieldName in posting) {
142 | if (fieldName == '_index') continue // Ignore the term index, its not a field
143 | documentsWithTerm += Object.keys(posting[fieldName]).length
144 | }
145 |
146 | var x = (documentCount - documentsWithTerm + 0.5) / (documentsWithTerm + 0.5)
147 |
148 | return Math.log(1 + Math.abs(x))
149 | }
150 |
151 | /**
152 | * A token wraps a string representation of a token
153 | * as it is passed through the text processing pipeline.
154 | *
155 | * @constructor
156 | * @param {string} [str=''] - The string token being wrapped.
157 | * @param {object} [metadata={}] - Metadata associated with this token.
158 | */
159 | lunr.Token = function (str, metadata) {
160 | this.str = str || ""
161 | this.metadata = metadata || {}
162 | }
163 |
164 | /**
165 | * Returns the token string that is being wrapped by this object.
166 | *
167 | * @returns {string}
168 | */
169 | lunr.Token.prototype.toString = function () {
170 | return this.str
171 | }
172 |
173 | /**
174 | * A token update function is used when updating or optionally
175 | * when cloning a token.
176 | *
177 | * @callback lunr.Token~updateFunction
178 | * @param {string} str - The string representation of the token.
179 | * @param {Object} metadata - All metadata associated with this token.
180 | */
181 |
182 | /**
183 | * Applies the given function to the wrapped string token.
184 | *
185 | * @example
186 | * token.update(function (str, metadata) {
187 | * return str.toUpperCase()
188 | * })
189 | *
190 | * @param {lunr.Token~updateFunction} fn - A function to apply to the token string.
191 | * @returns {lunr.Token}
192 | */
193 | lunr.Token.prototype.update = function (fn) {
194 | this.str = fn(this.str, this.metadata)
195 | return this
196 | }
197 |
198 | /**
199 | * Creates a clone of this token. Optionally a function can be
200 | * applied to the cloned token.
201 | *
202 | * @param {lunr.Token~updateFunction} [fn] - An optional function to apply to the cloned token.
203 | * @returns {lunr.Token}
204 | */
205 | lunr.Token.prototype.clone = function (fn) {
206 | fn = fn || function (s) { return s }
207 | return new lunr.Token (fn(this.str, this.metadata), this.metadata)
208 | }
209 | /*!
210 | * lunr.tokenizer
211 | * Copyright (C) 2017 Oliver Nightingale
212 | */
213 |
214 | /**
215 | * A function for splitting a string into tokens ready to be inserted into
216 | * the search index. Uses `lunr.tokenizer.separator` to split strings, change
217 | * the value of this property to change how strings are split into tokens.
218 | *
219 | * This tokenizer will convert its parameter to a string by calling `toString` and
220 | * then will split this string on the character in `lunr.tokenizer.separator`.
221 | * Arrays will have their elements converted to strings and wrapped in a lunr.Token.
222 | *
223 | * @static
224 | * @param {?(string|object|object[])} obj - The object to convert into tokens
225 | * @returns {lunr.Token[]}
226 | */
227 | lunr.tokenizer = function (obj) {
228 | if (obj == null || obj == undefined) {
229 | return []
230 | }
231 |
232 | if (Array.isArray(obj)) {
233 | return obj.map(function (t) {
234 | return new lunr.Token(lunr.utils.asString(t).toLowerCase())
235 | })
236 | }
237 |
238 | var str = obj.toString().trim().toLowerCase(),
239 | len = str.length,
240 | tokens = []
241 |
242 | for (var sliceEnd = 0, sliceStart = 0; sliceEnd <= len; sliceEnd++) {
243 | var char = str.charAt(sliceEnd),
244 | sliceLength = sliceEnd - sliceStart
245 |
246 | if ((char.match(lunr.tokenizer.separator) || sliceEnd == len)) {
247 |
248 | if (sliceLength > 0) {
249 | tokens.push(
250 | new lunr.Token (str.slice(sliceStart, sliceEnd), {
251 | position: [sliceStart, sliceLength],
252 | index: tokens.length
253 | })
254 | )
255 | }
256 |
257 | sliceStart = sliceEnd + 1
258 | }
259 |
260 | }
261 |
262 | return tokens
263 | }
264 |
265 | /**
266 | * The separator used to split a string into tokens. Override this property to change the behaviour of
267 | * `lunr.tokenizer` behaviour when tokenizing strings. By default this splits on whitespace and hyphens.
268 | *
269 | * @static
270 | * @see lunr.tokenizer
271 | */
272 | lunr.tokenizer.separator = /[\s\-]+/
273 | /*!
274 | * lunr.Pipeline
275 | * Copyright (C) 2017 Oliver Nightingale
276 | */
277 |
278 | /**
279 | * lunr.Pipelines maintain an ordered list of functions to be applied to all
280 | * tokens in documents entering the search index and queries being ran against
281 | * the index.
282 | *
283 | * An instance of lunr.Index created with the lunr shortcut will contain a
284 | * pipeline with a stop word filter and an English language stemmer. Extra
285 | * functions can be added before or after either of these functions or these
286 | * default functions can be removed.
287 | *
288 | * When run the pipeline will call each function in turn, passing a token, the
289 | * index of that token in the original list of all tokens and finally a list of
290 | * all the original tokens.
291 | *
292 | * The output of functions in the pipeline will be passed to the next function
293 | * in the pipeline. To exclude a token from entering the index the function
294 | * should return undefined, the rest of the pipeline will not be called with
295 | * this token.
296 | *
297 | * For serialisation of pipelines to work, all functions used in an instance of
298 | * a pipeline should be registered with lunr.Pipeline. Registered functions can
299 | * then be loaded. If trying to load a serialised pipeline that uses functions
300 | * that are not registered an error will be thrown.
301 | *
302 | * If not planning on serialising the pipeline then registering pipeline functions
303 | * is not necessary.
304 | *
305 | * @constructor
306 | */
307 | lunr.Pipeline = function () {
308 | this._stack = []
309 | }
310 |
311 | lunr.Pipeline.registeredFunctions = Object.create(null)
312 |
313 | /**
314 | * A pipeline function maps lunr.Token to lunr.Token. A lunr.Token contains the token
315 | * string as well as all known metadata. A pipeline function can mutate the token string
316 | * or mutate (or add) metadata for a given token.
317 | *
318 | * A pipeline function can indicate that the passed token should be discarded by returning
319 | * null. This token will not be passed to any downstream pipeline functions and will not be
320 | * added to the index.
321 | *
322 | * Multiple tokens can be returned by returning an array of tokens. Each token will be passed
323 | * to any downstream pipeline functions and all will returned tokens will be added to the index.
324 | *
325 | * Any number of pipeline functions may be chained together using a lunr.Pipeline.
326 | *
327 | * @interface lunr.PipelineFunction
328 | * @param {lunr.Token} token - A token from the document being processed.
329 | * @param {number} i - The index of this token in the complete list of tokens for this document/field.
330 | * @param {lunr.Token[]} tokens - All tokens for this document/field.
331 | * @returns {(?lunr.Token|lunr.Token[])}
332 | */
333 |
334 | /**
335 | * Register a function with the pipeline.
336 | *
337 | * Functions that are used in the pipeline should be registered if the pipeline
338 | * needs to be serialised, or a serialised pipeline needs to be loaded.
339 | *
340 | * Registering a function does not add it to a pipeline, functions must still be
341 | * added to instances of the pipeline for them to be used when running a pipeline.
342 | *
343 | * @param {lunr.PipelineFunction} fn - The function to check for.
344 | * @param {String} label - The label to register this function with
345 | */
346 | lunr.Pipeline.registerFunction = function (fn, label) {
347 | if (label in this.registeredFunctions) {
348 | lunr.utils.warn('Overwriting existing registered function: ' + label)
349 | }
350 |
351 | fn.label = label
352 | lunr.Pipeline.registeredFunctions[fn.label] = fn
353 | }
354 |
355 | /**
356 | * Warns if the function is not registered as a Pipeline function.
357 | *
358 | * @param {lunr.PipelineFunction} fn - The function to check for.
359 | * @private
360 | */
361 | lunr.Pipeline.warnIfFunctionNotRegistered = function (fn) {
362 | var isRegistered = fn.label && (fn.label in this.registeredFunctions)
363 |
364 | if (!isRegistered) {
365 | lunr.utils.warn('Function is not registered with pipeline. This may cause problems when serialising the index.\n', fn)
366 | }
367 | }
368 |
369 | /**
370 | * Loads a previously serialised pipeline.
371 | *
372 | * All functions to be loaded must already be registered with lunr.Pipeline.
373 | * If any function from the serialised data has not been registered then an
374 | * error will be thrown.
375 | *
376 | * @param {Object} serialised - The serialised pipeline to load.
377 | * @returns {lunr.Pipeline}
378 | */
379 | lunr.Pipeline.load = function (serialised) {
380 | var pipeline = new lunr.Pipeline
381 |
382 | serialised.forEach(function (fnName) {
383 | var fn = lunr.Pipeline.registeredFunctions[fnName]
384 |
385 | if (fn) {
386 | pipeline.add(fn)
387 | } else {
388 | throw new Error('Cannot load unregistered function: ' + fnName)
389 | }
390 | })
391 |
392 | return pipeline
393 | }
394 |
395 | /**
396 | * Adds new functions to the end of the pipeline.
397 | *
398 | * Logs a warning if the function has not been registered.
399 | *
400 | * @param {lunr.PipelineFunction[]} functions - Any number of functions to add to the pipeline.
401 | */
402 | lunr.Pipeline.prototype.add = function () {
403 | var fns = Array.prototype.slice.call(arguments)
404 |
405 | fns.forEach(function (fn) {
406 | lunr.Pipeline.warnIfFunctionNotRegistered(fn)
407 | this._stack.push(fn)
408 | }, this)
409 | }
410 |
411 | /**
412 | * Adds a single function after a function that already exists in the
413 | * pipeline.
414 | *
415 | * Logs a warning if the function has not been registered.
416 | *
417 | * @param {lunr.PipelineFunction} existingFn - A function that already exists in the pipeline.
418 | * @param {lunr.PipelineFunction} newFn - The new function to add to the pipeline.
419 | */
420 | lunr.Pipeline.prototype.after = function (existingFn, newFn) {
421 | lunr.Pipeline.warnIfFunctionNotRegistered(newFn)
422 |
423 | var pos = this._stack.indexOf(existingFn)
424 | if (pos == -1) {
425 | throw new Error('Cannot find existingFn')
426 | }
427 |
428 | pos = pos + 1
429 | this._stack.splice(pos, 0, newFn)
430 | }
431 |
432 | /**
433 | * Adds a single function before a function that already exists in the
434 | * pipeline.
435 | *
436 | * Logs a warning if the function has not been registered.
437 | *
438 | * @param {lunr.PipelineFunction} existingFn - A function that already exists in the pipeline.
439 | * @param {lunr.PipelineFunction} newFn - The new function to add to the pipeline.
440 | */
441 | lunr.Pipeline.prototype.before = function (existingFn, newFn) {
442 | lunr.Pipeline.warnIfFunctionNotRegistered(newFn)
443 |
444 | var pos = this._stack.indexOf(existingFn)
445 | if (pos == -1) {
446 | throw new Error('Cannot find existingFn')
447 | }
448 |
449 | this._stack.splice(pos, 0, newFn)
450 | }
451 |
452 | /**
453 | * Removes a function from the pipeline.
454 | *
455 | * @param {lunr.PipelineFunction} fn The function to remove from the pipeline.
456 | */
457 | lunr.Pipeline.prototype.remove = function (fn) {
458 | var pos = this._stack.indexOf(fn)
459 | if (pos == -1) {
460 | return
461 | }
462 |
463 | this._stack.splice(pos, 1)
464 | }
465 |
466 | /**
467 | * Runs the current list of functions that make up the pipeline against the
468 | * passed tokens.
469 | *
470 | * @param {Array} tokens The tokens to run through the pipeline.
471 | * @returns {Array}
472 | */
473 | lunr.Pipeline.prototype.run = function (tokens) {
474 | var stackLength = this._stack.length
475 |
476 | for (var i = 0; i < stackLength; i++) {
477 | var fn = this._stack[i]
478 |
479 | tokens = tokens.reduce(function (memo, token, j) {
480 | var result = fn(token, j, tokens)
481 |
482 | if (result === void 0 || result === '') return memo
483 |
484 | return memo.concat(result)
485 | }, [])
486 | }
487 |
488 | return tokens
489 | }
490 |
491 | /**
492 | * Convenience method for passing a string through a pipeline and getting
493 | * strings out. This method takes care of wrapping the passed string in a
494 | * token and mapping the resulting tokens back to strings.
495 | *
496 | * @param {string} str - The string to pass through the pipeline.
497 | * @returns {string[]}
498 | */
499 | lunr.Pipeline.prototype.runString = function (str) {
500 | var token = new lunr.Token (str)
501 |
502 | return this.run([token]).map(function (t) {
503 | return t.toString()
504 | })
505 | }
506 |
507 | /**
508 | * Resets the pipeline by removing any existing processors.
509 | *
510 | */
511 | lunr.Pipeline.prototype.reset = function () {
512 | this._stack = []
513 | }
514 |
515 | /**
516 | * Returns a representation of the pipeline ready for serialisation.
517 | *
518 | * Logs a warning if the function has not been registered.
519 | *
520 | * @returns {Array}
521 | */
522 | lunr.Pipeline.prototype.toJSON = function () {
523 | return this._stack.map(function (fn) {
524 | lunr.Pipeline.warnIfFunctionNotRegistered(fn)
525 |
526 | return fn.label
527 | })
528 | }
529 | /*!
530 | * lunr.Vector
531 | * Copyright (C) 2017 Oliver Nightingale
532 | */
533 |
534 | /**
535 | * A vector is used to construct the vector space of documents and queries. These
536 | * vectors support operations to determine the similarity between two documents or
537 | * a document and a query.
538 | *
539 | * Normally no parameters are required for initializing a vector, but in the case of
540 | * loading a previously dumped vector the raw elements can be provided to the constructor.
541 | *
542 | * For performance reasons vectors are implemented with a flat array, where an elements
543 | * index is immediately followed by its value. E.g. [index, value, index, value]. This
544 | * allows the underlying array to be as sparse as possible and still offer decent
545 | * performance when being used for vector calculations.
546 | *
547 | * @constructor
548 | * @param {Number[]} [elements] - The flat list of element index and element value pairs.
549 | */
550 | lunr.Vector = function (elements) {
551 | this._magnitude = 0
552 | this.elements = elements || []
553 | }
554 |
555 |
556 | /**
557 | * Calculates the position within the vector to insert a given index.
558 | *
559 | * This is used internally by insert and upsert. If there are duplicate indexes then
560 | * the position is returned as if the value for that index were to be updated, but it
561 | * is the callers responsibility to check whether there is a duplicate at that index
562 | *
563 | * @param {Number} insertIdx - The index at which the element should be inserted.
564 | * @returns {Number}
565 | */
566 | lunr.Vector.prototype.positionForIndex = function (index) {
567 | // For an empty vector the tuple can be inserted at the beginning
568 | if (this.elements.length == 0) {
569 | return 0
570 | }
571 |
572 | var start = 0,
573 | end = this.elements.length / 2,
574 | sliceLength = end - start,
575 | pivotPoint = Math.floor(sliceLength / 2),
576 | pivotIndex = this.elements[pivotPoint * 2]
577 |
578 | while (sliceLength > 1) {
579 | if (pivotIndex < index) {
580 | start = pivotPoint
581 | }
582 |
583 | if (pivotIndex > index) {
584 | end = pivotPoint
585 | }
586 |
587 | if (pivotIndex == index) {
588 | break
589 | }
590 |
591 | sliceLength = end - start
592 | pivotPoint = start + Math.floor(sliceLength / 2)
593 | pivotIndex = this.elements[pivotPoint * 2]
594 | }
595 |
596 | if (pivotIndex == index) {
597 | return pivotPoint * 2
598 | }
599 |
600 | if (pivotIndex > index) {
601 | return pivotPoint * 2
602 | }
603 |
604 | if (pivotIndex < index) {
605 | return (pivotPoint + 1) * 2
606 | }
607 | }
608 |
609 | /**
610 | * Inserts an element at an index within the vector.
611 | *
612 | * Does not allow duplicates, will throw an error if there is already an entry
613 | * for this index.
614 | *
615 | * @param {Number} insertIdx - The index at which the element should be inserted.
616 | * @param {Number} val - The value to be inserted into the vector.
617 | */
618 | lunr.Vector.prototype.insert = function (insertIdx, val) {
619 | this.upsert(insertIdx, val, function () {
620 | throw "duplicate index"
621 | })
622 | }
623 |
624 | /**
625 | * Inserts or updates an existing index within the vector.
626 | *
627 | * @param {Number} insertIdx - The index at which the element should be inserted.
628 | * @param {Number} val - The value to be inserted into the vector.
629 | * @param {function} fn - A function that is called for updates, the existing value and the
630 | * requested value are passed as arguments
631 | */
632 | lunr.Vector.prototype.upsert = function (insertIdx, val, fn) {
633 | this._magnitude = 0
634 | var position = this.positionForIndex(insertIdx)
635 |
636 | if (this.elements[position] == insertIdx) {
637 | this.elements[position + 1] = fn(this.elements[position + 1], val)
638 | } else {
639 | this.elements.splice(position, 0, insertIdx, val)
640 | }
641 | }
642 |
643 | /**
644 | * Calculates the magnitude of this vector.
645 | *
646 | * @returns {Number}
647 | */
648 | lunr.Vector.prototype.magnitude = function () {
649 | if (this._magnitude) return this._magnitude
650 |
651 | var sumOfSquares = 0,
652 | elementsLength = this.elements.length
653 |
654 | for (var i = 1; i < elementsLength; i += 2) {
655 | var val = this.elements[i]
656 | sumOfSquares += val * val
657 | }
658 |
659 | return this._magnitude = Math.sqrt(sumOfSquares)
660 | }
661 |
662 | /**
663 | * Calculates the dot product of this vector and another vector.
664 | *
665 | * @param {lunr.Vector} otherVector - The vector to compute the dot product with.
666 | * @returns {Number}
667 | */
668 | lunr.Vector.prototype.dot = function (otherVector) {
669 | var dotProduct = 0,
670 | a = this.elements, b = otherVector.elements,
671 | aLen = a.length, bLen = b.length,
672 | aVal = 0, bVal = 0,
673 | i = 0, j = 0
674 |
675 | while (i < aLen && j < bLen) {
676 | aVal = a[i], bVal = b[j]
677 | if (aVal < bVal) {
678 | i += 2
679 | } else if (aVal > bVal) {
680 | j += 2
681 | } else if (aVal == bVal) {
682 | dotProduct += a[i + 1] * b[j + 1]
683 | i += 2
684 | j += 2
685 | }
686 | }
687 |
688 | return dotProduct
689 | }
690 |
691 | /**
692 | * Calculates the cosine similarity between this vector and another
693 | * vector.
694 | *
695 | * @param {lunr.Vector} otherVector - The other vector to calculate the
696 | * similarity with.
697 | * @returns {Number}
698 | */
699 | lunr.Vector.prototype.similarity = function (otherVector) {
700 | return this.dot(otherVector) / (this.magnitude() * otherVector.magnitude())
701 | }
702 |
703 | /**
704 | * Converts the vector to an array of the elements within the vector.
705 | *
706 | * @returns {Number[]}
707 | */
708 | lunr.Vector.prototype.toArray = function () {
709 | var output = new Array (this.elements.length / 2)
710 |
711 | for (var i = 1, j = 0; i < this.elements.length; i += 2, j++) {
712 | output[j] = this.elements[i]
713 | }
714 |
715 | return output
716 | }
717 |
718 | /**
719 | * A JSON serializable representation of the vector.
720 | *
721 | * @returns {Number[]}
722 | */
723 | lunr.Vector.prototype.toJSON = function () {
724 | return this.elements
725 | }
726 | /* eslint-disable */
727 | /*!
728 | * lunr.stemmer
729 | * Copyright (C) 2017 Oliver Nightingale
730 | * Includes code from - http://tartarus.org/~martin/PorterStemmer/js.txt
731 | */
732 |
733 | /**
734 | * lunr.stemmer is an english language stemmer, this is a JavaScript
735 | * implementation of the PorterStemmer taken from http://tartarus.org/~martin
736 | *
737 | * @static
738 | * @implements {lunr.PipelineFunction}
739 | * @param {lunr.Token} token - The string to stem
740 | * @returns {lunr.Token}
741 | * @see {@link lunr.Pipeline}
742 | */
743 | lunr.stemmer = (function(){
744 | var step2list = {
745 | "ational" : "ate",
746 | "tional" : "tion",
747 | "enci" : "ence",
748 | "anci" : "ance",
749 | "izer" : "ize",
750 | "bli" : "ble",
751 | "alli" : "al",
752 | "entli" : "ent",
753 | "eli" : "e",
754 | "ousli" : "ous",
755 | "ization" : "ize",
756 | "ation" : "ate",
757 | "ator" : "ate",
758 | "alism" : "al",
759 | "iveness" : "ive",
760 | "fulness" : "ful",
761 | "ousness" : "ous",
762 | "aliti" : "al",
763 | "iviti" : "ive",
764 | "biliti" : "ble",
765 | "logi" : "log"
766 | },
767 |
768 | step3list = {
769 | "icate" : "ic",
770 | "ative" : "",
771 | "alize" : "al",
772 | "iciti" : "ic",
773 | "ical" : "ic",
774 | "ful" : "",
775 | "ness" : ""
776 | },
777 |
778 | c = "[^aeiou]", // consonant
779 | v = "[aeiouy]", // vowel
780 | C = c + "[^aeiouy]*", // consonant sequence
781 | V = v + "[aeiou]*", // vowel sequence
782 |
783 | mgr0 = "^(" + C + ")?" + V + C, // [C]VC... is m>0
784 | meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$", // [C]VC[V] is m=1
785 | mgr1 = "^(" + C + ")?" + V + C + V + C, // [C]VCVC... is m>1
786 | s_v = "^(" + C + ")?" + v; // vowel in stem
787 |
788 | var re_mgr0 = new RegExp(mgr0);
789 | var re_mgr1 = new RegExp(mgr1);
790 | var re_meq1 = new RegExp(meq1);
791 | var re_s_v = new RegExp(s_v);
792 |
793 | var re_1a = /^(.+?)(ss|i)es$/;
794 | var re2_1a = /^(.+?)([^s])s$/;
795 | var re_1b = /^(.+?)eed$/;
796 | var re2_1b = /^(.+?)(ed|ing)$/;
797 | var re_1b_2 = /.$/;
798 | var re2_1b_2 = /(at|bl|iz)$/;
799 | var re3_1b_2 = new RegExp("([^aeiouylsz])\\1$");
800 | var re4_1b_2 = new RegExp("^" + C + v + "[^aeiouwxy]$");
801 |
802 | var re_1c = /^(.+?[^aeiou])y$/;
803 | var re_2 = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/;
804 |
805 | var re_3 = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/;
806 |
807 | var re_4 = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/;
808 | var re2_4 = /^(.+?)(s|t)(ion)$/;
809 |
810 | var re_5 = /^(.+?)e$/;
811 | var re_5_1 = /ll$/;
812 | var re3_5 = new RegExp("^" + C + v + "[^aeiouwxy]$");
813 |
814 | var porterStemmer = function porterStemmer(w) {
815 | var stem,
816 | suffix,
817 | firstch,
818 | re,
819 | re2,
820 | re3,
821 | re4;
822 |
823 | if (w.length < 3) { return w; }
824 |
825 | firstch = w.substr(0,1);
826 | if (firstch == "y") {
827 | w = firstch.toUpperCase() + w.substr(1);
828 | }
829 |
830 | // Step 1a
831 | re = re_1a
832 | re2 = re2_1a;
833 |
834 | if (re.test(w)) { w = w.replace(re,"$1$2"); }
835 | else if (re2.test(w)) { w = w.replace(re2,"$1$2"); }
836 |
837 | // Step 1b
838 | re = re_1b;
839 | re2 = re2_1b;
840 | if (re.test(w)) {
841 | var fp = re.exec(w);
842 | re = re_mgr0;
843 | if (re.test(fp[1])) {
844 | re = re_1b_2;
845 | w = w.replace(re,"");
846 | }
847 | } else if (re2.test(w)) {
848 | var fp = re2.exec(w);
849 | stem = fp[1];
850 | re2 = re_s_v;
851 | if (re2.test(stem)) {
852 | w = stem;
853 | re2 = re2_1b_2;
854 | re3 = re3_1b_2;
855 | re4 = re4_1b_2;
856 | if (re2.test(w)) { w = w + "e"; }
857 | else if (re3.test(w)) { re = re_1b_2; w = w.replace(re,""); }
858 | else if (re4.test(w)) { w = w + "e"; }
859 | }
860 | }
861 |
862 | // Step 1c - replace suffix y or Y by i if preceded by a non-vowel which is not the first letter of the word (so cry -> cri, by -> by, say -> say)
863 | re = re_1c;
864 | if (re.test(w)) {
865 | var fp = re.exec(w);
866 | stem = fp[1];
867 | w = stem + "i";
868 | }
869 |
870 | // Step 2
871 | re = re_2;
872 | if (re.test(w)) {
873 | var fp = re.exec(w);
874 | stem = fp[1];
875 | suffix = fp[2];
876 | re = re_mgr0;
877 | if (re.test(stem)) {
878 | w = stem + step2list[suffix];
879 | }
880 | }
881 |
882 | // Step 3
883 | re = re_3;
884 | if (re.test(w)) {
885 | var fp = re.exec(w);
886 | stem = fp[1];
887 | suffix = fp[2];
888 | re = re_mgr0;
889 | if (re.test(stem)) {
890 | w = stem + step3list[suffix];
891 | }
892 | }
893 |
894 | // Step 4
895 | re = re_4;
896 | re2 = re2_4;
897 | if (re.test(w)) {
898 | var fp = re.exec(w);
899 | stem = fp[1];
900 | re = re_mgr1;
901 | if (re.test(stem)) {
902 | w = stem;
903 | }
904 | } else if (re2.test(w)) {
905 | var fp = re2.exec(w);
906 | stem = fp[1] + fp[2];
907 | re2 = re_mgr1;
908 | if (re2.test(stem)) {
909 | w = stem;
910 | }
911 | }
912 |
913 | // Step 5
914 | re = re_5;
915 | if (re.test(w)) {
916 | var fp = re.exec(w);
917 | stem = fp[1];
918 | re = re_mgr1;
919 | re2 = re_meq1;
920 | re3 = re3_5;
921 | if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) {
922 | w = stem;
923 | }
924 | }
925 |
926 | re = re_5_1;
927 | re2 = re_mgr1;
928 | if (re.test(w) && re2.test(w)) {
929 | re = re_1b_2;
930 | w = w.replace(re,"");
931 | }
932 |
933 | // and turn initial Y back to y
934 |
935 | if (firstch == "y") {
936 | w = firstch.toLowerCase() + w.substr(1);
937 | }
938 |
939 | return w;
940 | };
941 |
942 | return function (token) {
943 | return token.update(porterStemmer);
944 | }
945 | })();
946 |
947 | lunr.Pipeline.registerFunction(lunr.stemmer, 'stemmer')
948 | /*!
949 | * lunr.stopWordFilter
950 | * Copyright (C) 2017 Oliver Nightingale
951 | */
952 |
953 | /**
954 | * lunr.generateStopWordFilter builds a stopWordFilter function from the provided
955 | * list of stop words.
956 | *
957 | * The built in lunr.stopWordFilter is built using this generator and can be used
958 | * to generate custom stopWordFilters for applications or non English languages.
959 | *
960 | * @param {Array} token The token to pass through the filter
961 | * @returns {lunr.PipelineFunction}
962 | * @see lunr.Pipeline
963 | * @see lunr.stopWordFilter
964 | */
965 | lunr.generateStopWordFilter = function (stopWords) {
966 | var words = stopWords.reduce(function (memo, stopWord) {
967 | memo[stopWord] = stopWord
968 | return memo
969 | }, {})
970 |
971 | return function (token) {
972 | if (token && words[token.toString()] !== token.toString()) return token
973 | }
974 | }
975 |
976 | /**
977 | * lunr.stopWordFilter is an English language stop word list filter, any words
978 | * contained in the list will not be passed through the filter.
979 | *
980 | * This is intended to be used in the Pipeline. If the token does not pass the
981 | * filter then undefined will be returned.
982 | *
983 | * @implements {lunr.PipelineFunction}
984 | * @params {lunr.Token} token - A token to check for being a stop word.
985 | * @returns {lunr.Token}
986 | * @see {@link lunr.Pipeline}
987 | */
988 | lunr.stopWordFilter = lunr.generateStopWordFilter([
989 | 'a',
990 | 'able',
991 | 'about',
992 | 'across',
993 | 'after',
994 | 'all',
995 | 'almost',
996 | 'also',
997 | 'am',
998 | 'among',
999 | 'an',
1000 | 'and',
1001 | 'any',
1002 | 'are',
1003 | 'as',
1004 | 'at',
1005 | 'be',
1006 | 'because',
1007 | 'been',
1008 | 'but',
1009 | 'by',
1010 | 'can',
1011 | 'cannot',
1012 | 'could',
1013 | 'dear',
1014 | 'did',
1015 | 'do',
1016 | 'does',
1017 | 'either',
1018 | 'else',
1019 | 'ever',
1020 | 'every',
1021 | 'for',
1022 | 'from',
1023 | 'get',
1024 | 'got',
1025 | 'had',
1026 | 'has',
1027 | 'have',
1028 | 'he',
1029 | 'her',
1030 | 'hers',
1031 | 'him',
1032 | 'his',
1033 | 'how',
1034 | 'however',
1035 | 'i',
1036 | 'if',
1037 | 'in',
1038 | 'into',
1039 | 'is',
1040 | 'it',
1041 | 'its',
1042 | 'just',
1043 | 'least',
1044 | 'let',
1045 | 'like',
1046 | 'likely',
1047 | 'may',
1048 | 'me',
1049 | 'might',
1050 | 'most',
1051 | 'must',
1052 | 'my',
1053 | 'neither',
1054 | 'no',
1055 | 'nor',
1056 | 'not',
1057 | 'of',
1058 | 'off',
1059 | 'often',
1060 | 'on',
1061 | 'only',
1062 | 'or',
1063 | 'other',
1064 | 'our',
1065 | 'own',
1066 | 'rather',
1067 | 'said',
1068 | 'say',
1069 | 'says',
1070 | 'she',
1071 | 'should',
1072 | 'since',
1073 | 'so',
1074 | 'some',
1075 | 'than',
1076 | 'that',
1077 | 'the',
1078 | 'their',
1079 | 'them',
1080 | 'then',
1081 | 'there',
1082 | 'these',
1083 | 'they',
1084 | 'this',
1085 | 'tis',
1086 | 'to',
1087 | 'too',
1088 | 'twas',
1089 | 'us',
1090 | 'wants',
1091 | 'was',
1092 | 'we',
1093 | 'were',
1094 | 'what',
1095 | 'when',
1096 | 'where',
1097 | 'which',
1098 | 'while',
1099 | 'who',
1100 | 'whom',
1101 | 'why',
1102 | 'will',
1103 | 'with',
1104 | 'would',
1105 | 'yet',
1106 | 'you',
1107 | 'your'
1108 | ])
1109 |
1110 | lunr.Pipeline.registerFunction(lunr.stopWordFilter, 'stopWordFilter')
1111 | /*!
1112 | * lunr.trimmer
1113 | * Copyright (C) 2017 Oliver Nightingale
1114 | */
1115 |
1116 | /**
1117 | * lunr.trimmer is a pipeline function for trimming non word
1118 | * characters from the beginning and end of tokens before they
1119 | * enter the index.
1120 | *
1121 | * This implementation may not work correctly for non latin
1122 | * characters and should either be removed or adapted for use
1123 | * with languages with non-latin characters.
1124 | *
1125 | * @static
1126 | * @implements {lunr.PipelineFunction}
1127 | * @param {lunr.Token} token The token to pass through the filter
1128 | * @returns {lunr.Token}
1129 | * @see lunr.Pipeline
1130 | */
1131 | lunr.trimmer = function (token) {
1132 | return token.update(function (s) {
1133 | return s.replace(/^\W+/, '').replace(/\W+$/, '')
1134 | })
1135 | }
1136 |
1137 | lunr.Pipeline.registerFunction(lunr.trimmer, 'trimmer')
1138 | /*!
1139 | * lunr.TokenSet
1140 | * Copyright (C) 2017 Oliver Nightingale
1141 | */
1142 |
1143 | /**
1144 | * A token set is used to store the unique list of all tokens
1145 | * within an index. Token sets are also used to represent an
1146 | * incoming query to the index, this query token set and index
1147 | * token set are then intersected to find which tokens to look
1148 | * up in the inverted index.
1149 | *
1150 | * A token set can hold multiple tokens, as in the case of the
1151 | * index token set, or it can hold a single token as in the
1152 | * case of a simple query token set.
1153 | *
1154 | * Additionally token sets are used to perform wildcard matching.
1155 | * Leading, contained and trailing wildcards are supported, and
1156 | * from this edit distance matching can also be provided.
1157 | *
1158 | * Token sets are implemented as a minimal finite state automata,
1159 | * where both common prefixes and suffixes are shared between tokens.
1160 | * This helps to reduce the space used for storing the token set.
1161 | *
1162 | * @constructor
1163 | */
1164 | lunr.TokenSet = function () {
1165 | this.final = false
1166 | this.edges = {}
1167 | this.id = lunr.TokenSet._nextId
1168 | lunr.TokenSet._nextId += 1
1169 | }
1170 |
1171 | /**
1172 | * Keeps track of the next, auto increment, identifier to assign
1173 | * to a new tokenSet.
1174 | *
1175 | * TokenSets require a unique identifier to be correctly minimised.
1176 | *
1177 | * @private
1178 | */
1179 | lunr.TokenSet._nextId = 1
1180 |
1181 | /**
1182 | * Creates a TokenSet instance from the given sorted array of words.
1183 | *
1184 | * @param {String[]} arr - A sorted array of strings to create the set from.
1185 | * @returns {lunr.TokenSet}
1186 | * @throws Will throw an error if the input array is not sorted.
1187 | */
1188 | lunr.TokenSet.fromArray = function (arr) {
1189 | var builder = new lunr.TokenSet.Builder
1190 |
1191 | for (var i = 0, len = arr.length; i < len; i++) {
1192 | builder.insert(arr[i])
1193 | }
1194 |
1195 | builder.finish()
1196 | return builder.root
1197 | }
1198 |
1199 | /**
1200 | * Creates a token set from a query clause.
1201 | *
1202 | * @private
1203 | * @param {Object} clause - A single clause from lunr.Query.
1204 | * @param {string} clause.term - The query clause term.
1205 | * @param {number} [clause.editDistance] - The optional edit distance for the term.
1206 | * @returns {lunr.TokenSet}
1207 | */
1208 | lunr.TokenSet.fromClause = function (clause) {
1209 | if ('editDistance' in clause) {
1210 | return lunr.TokenSet.fromFuzzyString(clause.term, clause.editDistance)
1211 | } else {
1212 | return lunr.TokenSet.fromString(clause.term)
1213 | }
1214 | }
1215 |
1216 | /**
1217 | * Creates a token set representing a single string with a specified
1218 | * edit distance.
1219 | *
1220 | * Insertions, deletions, substitutions and transpositions are each
1221 | * treated as an edit distance of 1.
1222 | *
1223 | * Increasing the allowed edit distance will have a dramatic impact
1224 | * on the performance of both creating and intersecting these TokenSets.
1225 | * It is advised to keep the edit distance less than 3.
1226 | *
1227 | * @param {string} str - The string to create the token set from.
1228 | * @param {number} editDistance - The allowed edit distance to match.
1229 | * @returns {lunr.Vector}
1230 | */
1231 | lunr.TokenSet.fromFuzzyString = function (str, editDistance) {
1232 | var root = new lunr.TokenSet
1233 |
1234 | var stack = [{
1235 | node: root,
1236 | editsRemaining: editDistance,
1237 | str: str
1238 | }]
1239 |
1240 | while (stack.length) {
1241 | var frame = stack.pop()
1242 |
1243 | // no edit
1244 | if (frame.str.length > 0) {
1245 | var char = frame.str.charAt(0),
1246 | noEditNode
1247 |
1248 | if (char in frame.node.edges) {
1249 | noEditNode = frame.node.edges[char]
1250 | } else {
1251 | noEditNode = new lunr.TokenSet
1252 | frame.node.edges[char] = noEditNode
1253 | }
1254 |
1255 | if (frame.str.length == 1) {
1256 | noEditNode.final = true
1257 | } else {
1258 | stack.push({
1259 | node: noEditNode,
1260 | editsRemaining: frame.editsRemaining,
1261 | str: frame.str.slice(1)
1262 | })
1263 | }
1264 | }
1265 |
1266 | // deletion
1267 | // can only do a deletion if we have enough edits remaining
1268 | // and if there are characters left to delete in the string
1269 | if (frame.editsRemaining > 0 && frame.str.length > 1) {
1270 | var char = frame.str.charAt(1),
1271 | deletionNode
1272 |
1273 | if (char in frame.node.edges) {
1274 | deletionNode = frame.node.edges[char]
1275 | } else {
1276 | deletionNode = new lunr.TokenSet
1277 | frame.node.edges[char] = deletionNode
1278 | }
1279 |
1280 | if (frame.str.length <= 2) {
1281 | deletionNode.final = true
1282 | } else {
1283 | stack.push({
1284 | node: deletionNode,
1285 | editsRemaining: frame.editsRemaining - 1,
1286 | str: frame.str.slice(2)
1287 | })
1288 | }
1289 | }
1290 |
1291 | // deletion
1292 | // just removing the last character from the str
1293 | if (frame.editsRemaining > 0 && frame.str.length == 1) {
1294 | frame.node.final = true
1295 | }
1296 |
1297 | // substitution
1298 | // can only do a substitution if we have enough edits remaining
1299 | // and if there are characters left to substitute
1300 | if (frame.editsRemaining > 0 && frame.str.length >= 1) {
1301 | if ("*" in frame.node.edges) {
1302 | var substitutionNode = frame.node.edges["*"]
1303 | } else {
1304 | var substitutionNode = new lunr.TokenSet
1305 | frame.node.edges["*"] = substitutionNode
1306 | }
1307 |
1308 | if (frame.str.length == 1) {
1309 | substitutionNode.final = true
1310 | } else {
1311 | stack.push({
1312 | node: substitutionNode,
1313 | editsRemaining: frame.editsRemaining - 1,
1314 | str: frame.str.slice(1)
1315 | })
1316 | }
1317 | }
1318 |
1319 | // insertion
1320 | // can only do insertion if there are edits remaining
1321 | if (frame.editsRemaining > 0) {
1322 | if ("*" in frame.node.edges) {
1323 | var insertionNode = frame.node.edges["*"]
1324 | } else {
1325 | var insertionNode = new lunr.TokenSet
1326 | frame.node.edges["*"] = insertionNode
1327 | }
1328 |
1329 | if (frame.str.length == 0) {
1330 | insertionNode.final = true
1331 | } else {
1332 | stack.push({
1333 | node: insertionNode,
1334 | editsRemaining: frame.editsRemaining - 1,
1335 | str: frame.str
1336 | })
1337 | }
1338 | }
1339 |
1340 | // transposition
1341 | // can only do a transposition if there are edits remaining
1342 | // and there are enough characters to transpose
1343 | if (frame.editsRemaining > 0 && frame.str.length > 1) {
1344 | var charA = frame.str.charAt(0),
1345 | charB = frame.str.charAt(1),
1346 | transposeNode
1347 |
1348 | if (charB in frame.node.edges) {
1349 | transposeNode = frame.node.edges[charB]
1350 | } else {
1351 | transposeNode = new lunr.TokenSet
1352 | frame.node.edges[charB] = transposeNode
1353 | }
1354 |
1355 | if (frame.str.length == 1) {
1356 | transposeNode.final = true
1357 | } else {
1358 | stack.push({
1359 | node: transposeNode,
1360 | editsRemaining: frame.editsRemaining - 1,
1361 | str: charA + frame.str.slice(2)
1362 | })
1363 | }
1364 | }
1365 | }
1366 |
1367 | return root
1368 | }
1369 |
1370 | /**
1371 | * Creates a TokenSet from a string.
1372 | *
1373 | * The string may contain one or more wildcard characters (*)
1374 | * that will allow wildcard matching when intersecting with
1375 | * another TokenSet.
1376 | *
1377 | * @param {string} str - The string to create a TokenSet from.
1378 | * @returns {lunr.TokenSet}
1379 | */
1380 | lunr.TokenSet.fromString = function (str) {
1381 | var node = new lunr.TokenSet,
1382 | root = node,
1383 | wildcardFound = false
1384 |
1385 | /*
1386 | * Iterates through all characters within the passed string
1387 | * appending a node for each character.
1388 | *
1389 | * As soon as a wildcard character is found then a self
1390 | * referencing edge is introduced to continually match
1391 | * any number of any characters.
1392 | */
1393 | for (var i = 0, len = str.length; i < len; i++) {
1394 | var char = str[i],
1395 | final = (i == len - 1)
1396 |
1397 | if (char == "*") {
1398 | wildcardFound = true
1399 | node.edges[char] = node
1400 | node.final = final
1401 |
1402 | } else {
1403 | var next = new lunr.TokenSet
1404 | next.final = final
1405 |
1406 | node.edges[char] = next
1407 | node = next
1408 |
1409 | // TODO: is this needed anymore?
1410 | if (wildcardFound) {
1411 | node.edges["*"] = root
1412 | }
1413 | }
1414 | }
1415 |
1416 | return root
1417 | }
1418 |
1419 | /**
1420 | * Converts this TokenSet into an array of strings
1421 | * contained within the TokenSet.
1422 | *
1423 | * @returns {string[]}
1424 | */
1425 | lunr.TokenSet.prototype.toArray = function () {
1426 | var words = []
1427 |
1428 | var stack = [{
1429 | prefix: "",
1430 | node: this
1431 | }]
1432 |
1433 | while (stack.length) {
1434 | var frame = stack.pop(),
1435 | edges = Object.keys(frame.node.edges),
1436 | len = edges.length
1437 |
1438 | if (frame.node.final) {
1439 | words.push(frame.prefix)
1440 | }
1441 |
1442 | for (var i = 0; i < len; i++) {
1443 | var edge = edges[i]
1444 |
1445 | stack.push({
1446 | prefix: frame.prefix.concat(edge),
1447 | node: frame.node.edges[edge]
1448 | })
1449 | }
1450 | }
1451 |
1452 | return words
1453 | }
1454 |
1455 | /**
1456 | * Generates a string representation of a TokenSet.
1457 | *
1458 | * This is intended to allow TokenSets to be used as keys
1459 | * in objects, largely to aid the construction and minimisation
1460 | * of a TokenSet. As such it is not designed to be a human
1461 | * friendly representation of the TokenSet.
1462 | *
1463 | * @returns {string}
1464 | */
1465 | lunr.TokenSet.prototype.toString = function () {
1466 | // NOTE: Using Object.keys here as this.edges is very likely
1467 | // to enter 'hash-mode' with many keys being added
1468 | //
1469 | // avoiding a for-in loop here as it leads to the function
1470 | // being de-optimised (at least in V8). From some simple
1471 | // benchmarks the performance is comparable, but allowing
1472 | // V8 to optimize may mean easy performance wins in the future.
1473 |
1474 | if (this._str) {
1475 | return this._str
1476 | }
1477 |
1478 | var str = this.final ? '1' : '0',
1479 | labels = Object.keys(this.edges).sort(),
1480 | len = labels.length
1481 |
1482 | for (var i = 0; i < len; i++) {
1483 | var label = labels[i],
1484 | node = this.edges[label]
1485 |
1486 | str = str + label + node.id
1487 | }
1488 |
1489 | return str
1490 | }
1491 |
1492 | /**
1493 | * Returns a new TokenSet that is the intersection of
1494 | * this TokenSet and the passed TokenSet.
1495 | *
1496 | * This intersection will take into account any wildcards
1497 | * contained within the TokenSet.
1498 | *
1499 | * @param {lunr.TokenSet} b - An other TokenSet to intersect with.
1500 | * @returns {lunr.TokenSet}
1501 | */
1502 | lunr.TokenSet.prototype.intersect = function (b) {
1503 | var output = new lunr.TokenSet,
1504 | frame = undefined
1505 |
1506 | var stack = [{
1507 | qNode: b,
1508 | output: output,
1509 | node: this
1510 | }]
1511 |
1512 | while (stack.length) {
1513 | frame = stack.pop()
1514 |
1515 | // NOTE: As with the #toString method, we are using
1516 | // Object.keys and a for loop instead of a for-in loop
1517 | // as both of these objects enter 'hash' mode, causing
1518 | // the function to be de-optimised in V8
1519 | var qEdges = Object.keys(frame.qNode.edges),
1520 | qLen = qEdges.length,
1521 | nEdges = Object.keys(frame.node.edges),
1522 | nLen = nEdges.length
1523 |
1524 | for (var q = 0; q < qLen; q++) {
1525 | var qEdge = qEdges[q]
1526 |
1527 | for (var n = 0; n < nLen; n++) {
1528 | var nEdge = nEdges[n]
1529 |
1530 | if (nEdge == qEdge || qEdge == '*') {
1531 | var node = frame.node.edges[nEdge],
1532 | qNode = frame.qNode.edges[qEdge],
1533 | final = node.final && qNode.final,
1534 | next = undefined
1535 |
1536 | if (nEdge in frame.output.edges) {
1537 | // an edge already exists for this character
1538 | // no need to create a new node, just set the finality
1539 | // bit unless this node is already final
1540 | next = frame.output.edges[nEdge]
1541 | next.final = next.final || final
1542 |
1543 | } else {
1544 | // no edge exists yet, must create one
1545 | // set the finality bit and insert it
1546 | // into the output
1547 | next = new lunr.TokenSet
1548 | next.final = final
1549 | frame.output.edges[nEdge] = next
1550 | }
1551 |
1552 | stack.push({
1553 | qNode: qNode,
1554 | output: next,
1555 | node: node
1556 | })
1557 | }
1558 | }
1559 | }
1560 | }
1561 |
1562 | return output
1563 | }
1564 | lunr.TokenSet.Builder = function () {
1565 | this.previousWord = ""
1566 | this.root = new lunr.TokenSet
1567 | this.uncheckedNodes = []
1568 | this.minimizedNodes = {}
1569 | }
1570 |
1571 | lunr.TokenSet.Builder.prototype.insert = function (word) {
1572 | var node,
1573 | commonPrefix = 0
1574 |
1575 | if (word < this.previousWord) {
1576 | throw new Error ("Out of order word insertion")
1577 | }
1578 |
1579 | for (var i = 0; i < word.length && i < this.previousWord.length; i++) {
1580 | if (word[i] != this.previousWord[i]) break
1581 | commonPrefix++
1582 | }
1583 |
1584 | this.minimize(commonPrefix)
1585 |
1586 | if (this.uncheckedNodes.length == 0) {
1587 | node = this.root
1588 | } else {
1589 | node = this.uncheckedNodes[this.uncheckedNodes.length - 1].child
1590 | }
1591 |
1592 | for (var i = commonPrefix; i < word.length; i++) {
1593 | var nextNode = new lunr.TokenSet,
1594 | char = word[i]
1595 |
1596 | node.edges[char] = nextNode
1597 |
1598 | this.uncheckedNodes.push({
1599 | parent: node,
1600 | char: char,
1601 | child: nextNode
1602 | })
1603 |
1604 | node = nextNode
1605 | }
1606 |
1607 | node.final = true
1608 | this.previousWord = word
1609 | }
1610 |
1611 | lunr.TokenSet.Builder.prototype.finish = function () {
1612 | this.minimize(0)
1613 | }
1614 |
1615 | lunr.TokenSet.Builder.prototype.minimize = function (downTo) {
1616 | for (var i = this.uncheckedNodes.length - 1; i >= downTo; i--) {
1617 | var node = this.uncheckedNodes[i],
1618 | childKey = node.child.toString()
1619 |
1620 | if (childKey in this.minimizedNodes) {
1621 | node.parent.edges[node.char] = this.minimizedNodes[childKey]
1622 | } else {
1623 | // Cache the key for this node since
1624 | // we know it can't change anymore
1625 | node.child._str = childKey
1626 |
1627 | this.minimizedNodes[childKey] = node.child
1628 | }
1629 |
1630 | this.uncheckedNodes.pop()
1631 | }
1632 | }
1633 | /*!
1634 | * lunr.Index
1635 | * Copyright (C) 2017 Oliver Nightingale
1636 | */
1637 |
1638 | /**
1639 | * An index contains the built index of all documents and provides a query interface
1640 | * to the index.
1641 | *
1642 | * Usually instances of lunr.Index will not be created using this constructor, instead
1643 | * lunr.Builder should be used to construct new indexes, or lunr.Index.load should be
1644 | * used to load previously built and serialized indexes.
1645 | *
1646 | * @constructor
1647 | * @param {Object} attrs - The attributes of the built search index.
1648 | * @param {Object} attrs.invertedIndex - An index of term/field to document reference.
1649 | * @param {Object} attrs.documentVectors - Document vectors keyed by document reference.
1650 | * @param {lunr.TokenSet} attrs.tokenSet - An set of all corpus tokens.
1651 | * @param {string[]} attrs.fields - The names of indexed document fields.
1652 | * @param {lunr.Pipeline} attrs.pipeline - The pipeline to use for search terms.
1653 | */
1654 | lunr.Index = function (attrs) {
1655 | this.invertedIndex = attrs.invertedIndex
1656 | this.fieldVectors = attrs.fieldVectors
1657 | this.tokenSet = attrs.tokenSet
1658 | this.fields = attrs.fields
1659 | this.pipeline = attrs.pipeline
1660 | }
1661 |
1662 | /**
1663 | * A result contains details of a document matching a search query.
1664 | * @typedef {Object} lunr.Index~Result
1665 | * @property {string} ref - The reference of the document this result represents.
1666 | * @property {number} score - A number between 0 and 1 representing how similar this document is to the query.
1667 | * @property {lunr.MatchData} matchData - Contains metadata about this match including which term(s) caused the match.
1668 | */
1669 |
1670 | /**
1671 | * Although lunr provides the ability to create queries using lunr.Query, it also provides a simple
1672 | * query language which itself is parsed into an instance of lunr.Query.
1673 | *
1674 | * For programmatically building queries it is advised to directly use lunr.Query, the query language
1675 | * is best used for human entered text rather than program generated text.
1676 | *
1677 | * At its simplest queries can just be a single term, e.g. `hello`, multiple terms are also supported
1678 | * and will be combined with OR, e.g `hello world` will match documents that contain either 'hello'
1679 | * or 'world', though those that contain both will rank higher in the results.
1680 | *
1681 | * Wildcards can be included in terms to match one or more unspecified characters, these wildcards can
1682 | * be inserted anywhere within the term, and more than one wildcard can exist in a single term. Adding
1683 | * wildcards will increase the number of documents that will be found but can also have a negative
1684 | * impact on query performance, especially with wildcards at the beginning of a term.
1685 | *
1686 | * Terms can be restricted to specific fields, e.g. `title:hello`, only documents with the term
1687 | * hello in the title field will match this query. Using a field not present in the index will lead
1688 | * to an error being thrown.
1689 | *
1690 | * Modifiers can also be added to terms, lunr supports edit distance and boost modifiers on terms. A term
1691 | * boost will make documents matching that term score higher, e.g. `foo^5`. Edit distance is also supported
1692 | * to provide fuzzy matching, e.g. 'hello~2' will match documents with hello with an edit distance of 2.
1693 | * Avoid large values for edit distance to improve query performance.
1694 | *
1695 | * To escape special characters the backslash character '\' can be used, this allows searches to include
1696 | * characters that would normally be considered modifiers, e.g. `foo\~2` will search for a term "foo~2" instead
1697 | * of attempting to apply a boost of 2 to the search term "foo".
1698 | *
1699 | * @typedef {string} lunr.Index~QueryString
1700 | * @example
Simple single term query
1701 | * hello
1702 | * @example
Multiple term query
1703 | * hello world
1704 | * @example
term scoped to a field
1705 | * title:hello
1706 | * @example
term with a boost of 10
1707 | * hello^10
1708 | * @example
term with an edit distance of 2
1709 | * hello~2
1710 | */
1711 |
1712 | /**
1713 | * Performs a search against the index using lunr query syntax.
1714 | *
1715 | * Results will be returned sorted by their score, the most relevant results
1716 | * will be returned first.
1717 | *
1718 | * For more programmatic querying use lunr.Index#query.
1719 | *
1720 | * @param {lunr.Index~QueryString} queryString - A string containing a lunr query.
1721 | * @throws {lunr.QueryParseError} If the passed query string cannot be parsed.
1722 | * @returns {lunr.Index~Result[]}
1723 | */
1724 | lunr.Index.prototype.search = function (queryString) {
1725 | return this.query(function (query) {
1726 | var parser = new lunr.QueryParser(queryString, query)
1727 | parser.parse()
1728 | })
1729 | }
1730 |
1731 | /**
1732 | * A query builder callback provides a query object to be used to express
1733 | * the query to perform on the index.
1734 | *
1735 | * @callback lunr.Index~queryBuilder
1736 | * @param {lunr.Query} query - The query object to build up.
1737 | * @this lunr.Query
1738 | */
1739 |
1740 | /**
1741 | * Performs a query against the index using the yielded lunr.Query object.
1742 | *
1743 | * If performing programmatic queries against the index, this method is preferred
1744 | * over lunr.Index#search so as to avoid the additional query parsing overhead.
1745 | *
1746 | * A query object is yielded to the supplied function which should be used to
1747 | * express the query to be run against the index.
1748 | *
1749 | * Note that although this function takes a callback parameter it is _not_ an
1750 | * asynchronous operation, the callback is just yielded a query object to be
1751 | * customized.
1752 | *
1753 | * @param {lunr.Index~queryBuilder} fn - A function that is used to build the query.
1754 | * @returns {lunr.Index~Result[]}
1755 | */
1756 | lunr.Index.prototype.query = function (fn) {
1757 | // for each query clause
1758 | // * process terms
1759 | // * expand terms from token set
1760 | // * find matching documents and metadata
1761 | // * get document vectors
1762 | // * score documents
1763 |
1764 | var query = new lunr.Query(this.fields),
1765 | matchingFields = Object.create(null),
1766 | queryVectors = Object.create(null),
1767 | termFieldCache = Object.create(null)
1768 |
1769 | fn.call(query, query)
1770 |
1771 | for (var i = 0; i < query.clauses.length; i++) {
1772 | /*
1773 | * Unless the pipeline has been disabled for this term, which is
1774 | * the case for terms with wildcards, we need to pass the clause
1775 | * term through the search pipeline. A pipeline returns an array
1776 | * of processed terms. Pipeline functions may expand the passed
1777 | * term, which means we may end up performing multiple index lookups
1778 | * for a single query term.
1779 | */
1780 | var clause = query.clauses[i],
1781 | terms = null
1782 |
1783 | if (clause.usePipeline) {
1784 | terms = this.pipeline.runString(clause.term)
1785 | } else {
1786 | terms = [clause.term]
1787 | }
1788 |
1789 | for (var m = 0; m < terms.length; m++) {
1790 | var term = terms[m]
1791 |
1792 | /*
1793 | * Each term returned from the pipeline needs to use the same query
1794 | * clause object, e.g. the same boost and or edit distance. The
1795 | * simplest way to do this is to re-use the clause object but mutate
1796 | * its term property.
1797 | */
1798 |
1799 | clause = JSON.parse(JSON.stringify(clause))
1800 | clause.term = term
1801 |
1802 | /*
1803 | * From the term in the clause we create a token set which will then
1804 | * be used to intersect the indexes token set to get a list of terms
1805 | * to lookup in the inverted index
1806 | */
1807 | var termTokenSet = lunr.TokenSet.fromClause(clause),
1808 | expandedTerms = this.tokenSet.intersect(termTokenSet).toArray()
1809 |
1810 | for (var j = 0; j < expandedTerms.length; j++) {
1811 | /*
1812 | * For each term get the posting and termIndex, this is required for
1813 | * building the query vector.
1814 | */
1815 | var expandedTerm = expandedTerms[j];
1816 | var posting = this.invertedIndex[expandedTerm];
1817 | if (posting) {
1818 | var termIndex = posting._index;
1819 | } else {
1820 | continue;
1821 | }
1822 | for (var k = 0; k < clause.fields.length; k++) {
1823 | /*
1824 | * For each field that this query term is scoped by (by default
1825 | * all fields are in scope) we need to get all the document refs
1826 | * that have this term in that field.
1827 | *
1828 | * The posting is the entry in the invertedIndex for the matching
1829 | * term from above.
1830 | */
1831 | var field = clause.fields[k],
1832 | fieldPosting = posting[field],
1833 | matchingDocumentRefs = Object.keys(fieldPosting),
1834 | termField = expandedTerm + "/" + field
1835 |
1836 | /*
1837 | * To support field level boosts a query vector is created per
1838 | * field. This vector is populated using the termIndex found for
1839 | * the term and a unit value with the appropriate boost applied.
1840 | *
1841 | * If the query vector for this field does not exist yet it needs
1842 | * to be created.
1843 | */
1844 | if (queryVectors[field] === undefined) {
1845 | queryVectors[field] = new lunr.Vector
1846 | }
1847 |
1848 | /*
1849 | * Using upsert because there could already be an entry in the vector
1850 | * for the term we are working with. In that case we just add the scores
1851 | * together.
1852 | */
1853 | queryVectors[field].upsert(termIndex, 1 * clause.boost, function (a, b) { return a + b })
1854 |
1855 | /**
1856 | * If we've already seen this term, field combo then we've already collected
1857 | * the matching documents and metadata, no need to go through all that again
1858 | */
1859 | if (termFieldCache[termField]) {
1860 | continue
1861 | }
1862 |
1863 | for (var l = 0; l < matchingDocumentRefs.length; l++) {
1864 | /*
1865 | * All metadata for this term/field/document triple
1866 | * are then extracted and collected into an instance
1867 | * of lunr.MatchData ready to be returned in the query
1868 | * results
1869 | */
1870 | var matchingDocumentRef = matchingDocumentRefs[l],
1871 | matchingFieldRef = new lunr.FieldRef (matchingDocumentRef, field),
1872 | metadata = fieldPosting[matchingDocumentRef],
1873 | fieldMatch
1874 |
1875 | if ((fieldMatch = matchingFields[matchingFieldRef]) === undefined) {
1876 | matchingFields[matchingFieldRef] = new lunr.MatchData (expandedTerm, field, metadata)
1877 | } else {
1878 | fieldMatch.add(expandedTerm, field, metadata)
1879 | }
1880 |
1881 | }
1882 |
1883 | termFieldCache[termField] = true
1884 | }
1885 | }
1886 | }
1887 | }
1888 |
1889 | var matchingFieldRefs = Object.keys(matchingFields),
1890 | results = [],
1891 | matches = Object.create(null)
1892 |
1893 | for (var i = 0; i < matchingFieldRefs.length; i++) {
1894 | /*
1895 | * Currently we have document fields that match the query, but we
1896 | * need to return documents. The matchData and scores are combined
1897 | * from multiple fields belonging to the same document.
1898 | *
1899 | * Scores are calculated by field, using the query vectors created
1900 | * above, and combined into a final document score using addition.
1901 | */
1902 | var fieldRef = lunr.FieldRef.fromString(matchingFieldRefs[i]),
1903 | docRef = fieldRef.docRef,
1904 | fieldVector = this.fieldVectors[fieldRef],
1905 | score = queryVectors[fieldRef.fieldName].similarity(fieldVector),
1906 | docMatch
1907 |
1908 | if ((docMatch = matches[docRef]) !== undefined) {
1909 | docMatch.score += score
1910 | docMatch.matchData.combine(matchingFields[fieldRef])
1911 | } else {
1912 | var match = {
1913 | ref: docRef,
1914 | score: score,
1915 | matchData: matchingFields[fieldRef]
1916 | }
1917 | matches[docRef] = match
1918 | results.push(match)
1919 | }
1920 | }
1921 |
1922 | /*
1923 | * Sort the results objects by score, highest first.
1924 | */
1925 | return results.sort(function (a, b) {
1926 | return b.score - a.score
1927 | })
1928 | }
1929 |
1930 | /**
1931 | * Prepares the index for JSON serialization.
1932 | *
1933 | * The schema for this JSON blob will be described in a
1934 | * separate JSON schema file.
1935 | *
1936 | * @returns {Object}
1937 | */
1938 | lunr.Index.prototype.toJSON = function () {
1939 | var invertedIndex = Object.keys(this.invertedIndex)
1940 | .sort()
1941 | .map(function (term) {
1942 | return [term, this.invertedIndex[term]]
1943 | }, this)
1944 |
1945 | var fieldVectors = Object.keys(this.fieldVectors)
1946 | .map(function (ref) {
1947 | return [ref, this.fieldVectors[ref].toJSON()]
1948 | }, this)
1949 |
1950 | return {
1951 | version: lunr.version,
1952 | fields: this.fields,
1953 | fieldVectors: fieldVectors,
1954 | invertedIndex: invertedIndex,
1955 | pipeline: this.pipeline.toJSON()
1956 | }
1957 | }
1958 |
1959 | /**
1960 | * Loads a previously serialized lunr.Index
1961 | *
1962 | * @param {Object} serializedIndex - A previously serialized lunr.Index
1963 | * @returns {lunr.Index}
1964 | */
1965 | lunr.Index.load = function (serializedIndex) {
1966 | var attrs = {},
1967 | fieldVectors = {},
1968 | serializedVectors = serializedIndex.fieldVectors,
1969 | invertedIndex = {},
1970 | serializedInvertedIndex = serializedIndex.invertedIndex,
1971 | tokenSetBuilder = new lunr.TokenSet.Builder,
1972 | pipeline = lunr.Pipeline.load(serializedIndex.pipeline)
1973 |
1974 | if (serializedIndex.version != lunr.version) {
1975 | lunr.utils.warn("Version mismatch when loading serialised index. Current version of lunr '" + lunr.version + "' does not match serialized index '" + serializedIndex.version + "'")
1976 | }
1977 |
1978 | for (var i = 0; i < serializedVectors.length; i++) {
1979 | var tuple = serializedVectors[i],
1980 | ref = tuple[0],
1981 | elements = tuple[1]
1982 |
1983 | fieldVectors[ref] = new lunr.Vector(elements)
1984 | }
1985 |
1986 | for (var i = 0; i < serializedInvertedIndex.length; i++) {
1987 | var tuple = serializedInvertedIndex[i],
1988 | term = tuple[0],
1989 | posting = tuple[1]
1990 |
1991 | tokenSetBuilder.insert(term)
1992 | invertedIndex[term] = posting
1993 | }
1994 |
1995 | tokenSetBuilder.finish()
1996 |
1997 | attrs.fields = serializedIndex.fields
1998 |
1999 | attrs.fieldVectors = fieldVectors
2000 | attrs.invertedIndex = invertedIndex
2001 | attrs.tokenSet = tokenSetBuilder.root
2002 | attrs.pipeline = pipeline
2003 |
2004 | return new lunr.Index(attrs)
2005 | }
2006 | /*!
2007 | * lunr.Builder
2008 | * Copyright (C) 2017 Oliver Nightingale
2009 | */
2010 |
2011 | /**
2012 | * lunr.Builder performs indexing on a set of documents and
2013 | * returns instances of lunr.Index ready for querying.
2014 | *
2015 | * All configuration of the index is done via the builder, the
2016 | * fields to index, the document reference, the text processing
2017 | * pipeline and document scoring parameters are all set on the
2018 | * builder before indexing.
2019 | *
2020 | * @constructor
2021 | * @property {string} _ref - Internal reference to the document reference field.
2022 | * @property {string[]} _fields - Internal reference to the document fields to index.
2023 | * @property {object} invertedIndex - The inverted index maps terms to document fields.
2024 | * @property {object} documentTermFrequencies - Keeps track of document term frequencies.
2025 | * @property {object} documentLengths - Keeps track of the length of documents added to the index.
2026 | * @property {lunr.tokenizer} tokenizer - Function for splitting strings into tokens for indexing.
2027 | * @property {lunr.Pipeline} pipeline - The pipeline performs text processing on tokens before indexing.
2028 | * @property {lunr.Pipeline} searchPipeline - A pipeline for processing search terms before querying the index.
2029 | * @property {number} documentCount - Keeps track of the total number of documents indexed.
2030 | * @property {number} _b - A parameter to control field length normalization, setting this to 0 disabled normalization, 1 fully normalizes field lengths, the default value is 0.75.
2031 | * @property {number} _k1 - A parameter to control how quickly an increase in term frequency results in term frequency saturation, the default value is 1.2.
2032 | * @property {number} termIndex - A counter incremented for each unique term, used to identify a terms position in the vector space.
2033 | * @property {array} metadataWhitelist - A list of metadata keys that have been whitelisted for entry in the index.
2034 | */
2035 | lunr.Builder = function () {
2036 | this._ref = "id"
2037 | this._fields = []
2038 | this.invertedIndex = Object.create(null)
2039 | this.fieldTermFrequencies = {}
2040 | this.fieldLengths = {}
2041 | this.tokenizer = lunr.tokenizer
2042 | this.pipeline = new lunr.Pipeline
2043 | this.searchPipeline = new lunr.Pipeline
2044 | this.documentCount = 0
2045 | this._b = 0.75
2046 | this._k1 = 1.2
2047 | this.termIndex = 0
2048 | this.metadataWhitelist = []
2049 | }
2050 |
2051 | /**
2052 | * Sets the document field used as the document reference. Every document must have this field.
2053 | * The type of this field in the document should be a string, if it is not a string it will be
2054 | * coerced into a string by calling toString.
2055 | *
2056 | * The default ref is 'id'.
2057 | *
2058 | * The ref should _not_ be changed during indexing, it should be set before any documents are
2059 | * added to the index. Changing it during indexing can lead to inconsistent results.
2060 | *
2061 | * @param {string} ref - The name of the reference field in the document.
2062 | */
2063 | lunr.Builder.prototype.ref = function (ref) {
2064 | this._ref = ref
2065 | }
2066 |
2067 | /**
2068 | * Adds a field to the list of document fields that will be indexed. Every document being
2069 | * indexed should have this field. Null values for this field in indexed documents will
2070 | * not cause errors but will limit the chance of that document being retrieved by searches.
2071 | *
2072 | * All fields should be added before adding documents to the index. Adding fields after
2073 | * a document has been indexed will have no effect on already indexed documents.
2074 | *
2075 | * @param {string} field - The name of a field to index in all documents.
2076 | */
2077 | lunr.Builder.prototype.field = function (field) {
2078 | this._fields.push(field)
2079 | }
2080 |
2081 | /**
2082 | * A parameter to tune the amount of field length normalisation that is applied when
2083 | * calculating relevance scores. A value of 0 will completely disable any normalisation
2084 | * and a value of 1 will fully normalise field lengths. The default is 0.75. Values of b
2085 | * will be clamped to the range 0 - 1.
2086 | *
2087 | * @param {number} number - The value to set for this tuning parameter.
2088 | */
2089 | lunr.Builder.prototype.b = function (number) {
2090 | if (number < 0) {
2091 | this._b = 0
2092 | } else if (number > 1) {
2093 | this._b = 1
2094 | } else {
2095 | this._b = number
2096 | }
2097 | }
2098 |
2099 | /**
2100 | * A parameter that controls the speed at which a rise in term frequency results in term
2101 | * frequency saturation. The default value is 1.2. Setting this to a higher value will give
2102 | * slower saturation levels, a lower value will result in quicker saturation.
2103 | *
2104 | * @param {number} number - The value to set for this tuning parameter.
2105 | */
2106 | lunr.Builder.prototype.k1 = function (number) {
2107 | this._k1 = number
2108 | }
2109 |
2110 | /**
2111 | * Adds a document to the index.
2112 | *
2113 | * Before adding fields to the index the index should have been fully setup, with the document
2114 | * ref and all fields to index already having been specified.
2115 | *
2116 | * The document must have a field name as specified by the ref (by default this is 'id') and
2117 | * it should have all fields defined for indexing, though null or undefined values will not
2118 | * cause errors.
2119 | *
2120 | * @param {object} doc - The document to add to the index.
2121 | */
2122 | lunr.Builder.prototype.add = function (doc) {
2123 | var docRef = doc[this._ref]
2124 |
2125 | this.documentCount += 1
2126 |
2127 | for (var i = 0; i < this._fields.length; i++) {
2128 | var fieldName = this._fields[i],
2129 | field = doc[fieldName],
2130 | tokens = this.tokenizer(field),
2131 | terms = this.pipeline.run(tokens),
2132 | fieldRef = new lunr.FieldRef (docRef, fieldName),
2133 | fieldTerms = Object.create(null)
2134 |
2135 | this.fieldTermFrequencies[fieldRef] = fieldTerms
2136 | this.fieldLengths[fieldRef] = 0
2137 |
2138 | // store the length of this field for this document
2139 | this.fieldLengths[fieldRef] += terms.length
2140 |
2141 | // calculate term frequencies for this field
2142 | for (var j = 0; j < terms.length; j++) {
2143 | var term = terms[j]
2144 |
2145 | if (fieldTerms[term] == undefined) {
2146 | fieldTerms[term] = 0
2147 | }
2148 |
2149 | fieldTerms[term] += 1
2150 |
2151 | // add to inverted index
2152 | // create an initial posting if one doesn't exist
2153 | if (this.invertedIndex[term] == undefined) {
2154 | var posting = Object.create(null)
2155 | posting["_index"] = this.termIndex
2156 | this.termIndex += 1
2157 |
2158 | for (var k = 0; k < this._fields.length; k++) {
2159 | posting[this._fields[k]] = Object.create(null)
2160 | }
2161 |
2162 | this.invertedIndex[term] = posting
2163 | }
2164 |
2165 | // add an entry for this term/fieldName/docRef to the invertedIndex
2166 | if (this.invertedIndex[term][fieldName][docRef] == undefined) {
2167 | this.invertedIndex[term][fieldName][docRef] = Object.create(null)
2168 | }
2169 |
2170 | // store all whitelisted metadata about this token in the
2171 | // inverted index
2172 | for (var l = 0; l < this.metadataWhitelist.length; l++) {
2173 | var metadataKey = this.metadataWhitelist[l],
2174 | metadata = term.metadata[metadataKey]
2175 |
2176 | if (this.invertedIndex[term][fieldName][docRef][metadataKey] == undefined) {
2177 | this.invertedIndex[term][fieldName][docRef][metadataKey] = []
2178 | }
2179 |
2180 | this.invertedIndex[term][fieldName][docRef][metadataKey].push(metadata)
2181 | }
2182 | }
2183 |
2184 | }
2185 | }
2186 |
2187 | /**
2188 | * Calculates the average document length for this index
2189 | *
2190 | * @private
2191 | */
2192 | lunr.Builder.prototype.calculateAverageFieldLengths = function () {
2193 |
2194 | var fieldRefs = Object.keys(this.fieldLengths),
2195 | numberOfFields = fieldRefs.length,
2196 | accumulator = {},
2197 | documentsWithField = {}
2198 |
2199 | for (var i = 0; i < numberOfFields; i++) {
2200 | var fieldRef = lunr.FieldRef.fromString(fieldRefs[i]),
2201 | field = fieldRef.fieldName
2202 |
2203 | documentsWithField[field] || (documentsWithField[field] = 0)
2204 | documentsWithField[field] += 1
2205 |
2206 | accumulator[field] || (accumulator[field] = 0)
2207 | accumulator[field] += this.fieldLengths[fieldRef]
2208 | }
2209 |
2210 | for (var i = 0; i < this._fields.length; i++) {
2211 | var field = this._fields[i]
2212 | accumulator[field] = accumulator[field] / documentsWithField[field]
2213 | }
2214 |
2215 | this.averageFieldLength = accumulator
2216 | }
2217 |
2218 | /**
2219 | * Builds a vector space model of every document using lunr.Vector
2220 | *
2221 | * @private
2222 | */
2223 | lunr.Builder.prototype.createFieldVectors = function () {
2224 | var fieldVectors = {},
2225 | fieldRefs = Object.keys(this.fieldTermFrequencies),
2226 | fieldRefsLength = fieldRefs.length,
2227 | termIdfCache = Object.create(null)
2228 |
2229 | for (var i = 0; i < fieldRefsLength; i++) {
2230 | var fieldRef = lunr.FieldRef.fromString(fieldRefs[i]),
2231 | field = fieldRef.fieldName,
2232 | fieldLength = this.fieldLengths[fieldRef],
2233 | fieldVector = new lunr.Vector,
2234 | termFrequencies = this.fieldTermFrequencies[fieldRef],
2235 | terms = Object.keys(termFrequencies),
2236 | termsLength = terms.length
2237 |
2238 | for (var j = 0; j < termsLength; j++) {
2239 | var term = terms[j],
2240 | tf = termFrequencies[term],
2241 | termIndex = this.invertedIndex[term]._index,
2242 | idf, score, scoreWithPrecision
2243 |
2244 | if (termIdfCache[term] === undefined) {
2245 | idf = lunr.idf(this.invertedIndex[term], this.documentCount)
2246 | termIdfCache[term] = idf
2247 | } else {
2248 | idf = termIdfCache[term]
2249 | }
2250 |
2251 | score = idf * ((this._k1 + 1) * tf) / (this._k1 * (1 - this._b + this._b * (fieldLength / this.averageFieldLength[field])) + tf)
2252 | scoreWithPrecision = Math.round(score * 1000) / 1000
2253 | // Converts 1.23456789 to 1.234.
2254 | // Reducing the precision so that the vectors take up less
2255 | // space when serialised. Doing it now so that they behave
2256 | // the same before and after serialisation. Also, this is
2257 | // the fastest approach to reducing a number's precision in
2258 | // JavaScript.
2259 |
2260 | fieldVector.insert(termIndex, scoreWithPrecision)
2261 | }
2262 |
2263 | fieldVectors[fieldRef] = fieldVector
2264 | }
2265 |
2266 | this.fieldVectors = fieldVectors
2267 | }
2268 |
2269 | /**
2270 | * Creates a token set of all tokens in the index using lunr.TokenSet
2271 | *
2272 | * @private
2273 | */
2274 | lunr.Builder.prototype.createTokenSet = function () {
2275 | this.tokenSet = lunr.TokenSet.fromArray(
2276 | Object.keys(this.invertedIndex).sort()
2277 | )
2278 | }
2279 |
2280 | /**
2281 | * Builds the index, creating an instance of lunr.Index.
2282 | *
2283 | * This completes the indexing process and should only be called
2284 | * once all documents have been added to the index.
2285 | *
2286 | * @returns {lunr.Index}
2287 | */
2288 | lunr.Builder.prototype.build = function () {
2289 | this.calculateAverageFieldLengths()
2290 | this.createFieldVectors()
2291 | this.createTokenSet()
2292 |
2293 | return new lunr.Index({
2294 | invertedIndex: this.invertedIndex,
2295 | fieldVectors: this.fieldVectors,
2296 | tokenSet: this.tokenSet,
2297 | fields: this._fields,
2298 | pipeline: this.searchPipeline
2299 | })
2300 | }
2301 |
2302 | /**
2303 | * Applies a plugin to the index builder.
2304 | *
2305 | * A plugin is a function that is called with the index builder as its context.
2306 | * Plugins can be used to customise or extend the behaviour of the index
2307 | * in some way. A plugin is just a function, that encapsulated the custom
2308 | * behaviour that should be applied when building the index.
2309 | *
2310 | * The plugin function will be called with the index builder as its argument, additional
2311 | * arguments can also be passed when calling use. The function will be called
2312 | * with the index builder as its context.
2313 | *
2314 | * @param {Function} plugin The plugin to apply.
2315 | */
2316 | lunr.Builder.prototype.use = function (fn) {
2317 | var args = Array.prototype.slice.call(arguments, 1)
2318 | args.unshift(this)
2319 | fn.apply(this, args)
2320 | }
2321 | /**
2322 | * Contains and collects metadata about a matching document.
2323 | * A single instance of lunr.MatchData is returned as part of every
2324 | * lunr.Index~Result.
2325 | *
2326 | * @constructor
2327 | * @param {string} term - The term this match data is associated with
2328 | * @param {string} field - The field in which the term was found
2329 | * @param {object} metadata - The metadata recorded about this term in this field
2330 | * @property {object} metadata - A cloned collection of metadata associated with this document.
2331 | * @see {@link lunr.Index~Result}
2332 | */
2333 | lunr.MatchData = function (term, field, metadata) {
2334 | var clonedMetadata = Object.create(null),
2335 | metadataKeys = Object.keys(metadata)
2336 |
2337 | // Cloning the metadata to prevent the original
2338 | // being mutated during match data combination.
2339 | // Metadata is kept in an array within the inverted
2340 | // index so cloning the data can be done with
2341 | // Array#slice
2342 | for (var i = 0; i < metadataKeys.length; i++) {
2343 | var key = metadataKeys[i]
2344 | clonedMetadata[key] = metadata[key].slice()
2345 | }
2346 |
2347 | this.metadata = Object.create(null)
2348 | this.metadata[term] = Object.create(null)
2349 | this.metadata[term][field] = clonedMetadata
2350 | }
2351 |
2352 | /**
2353 | * An instance of lunr.MatchData will be created for every term that matches a
2354 | * document. However only one instance is required in a lunr.Index~Result. This
2355 | * method combines metadata from another instance of lunr.MatchData with this
2356 | * objects metadata.
2357 | *
2358 | * @param {lunr.MatchData} otherMatchData - Another instance of match data to merge with this one.
2359 | * @see {@link lunr.Index~Result}
2360 | */
2361 | lunr.MatchData.prototype.combine = function (otherMatchData) {
2362 | var terms = Object.keys(otherMatchData.metadata)
2363 |
2364 | for (var i = 0; i < terms.length; i++) {
2365 | var term = terms[i],
2366 | fields = Object.keys(otherMatchData.metadata[term])
2367 |
2368 | if (this.metadata[term] == undefined) {
2369 | this.metadata[term] = Object.create(null)
2370 | }
2371 |
2372 | for (var j = 0; j < fields.length; j++) {
2373 | var field = fields[j],
2374 | keys = Object.keys(otherMatchData.metadata[term][field])
2375 |
2376 | if (this.metadata[term][field] == undefined) {
2377 | this.metadata[term][field] = Object.create(null)
2378 | }
2379 |
2380 | for (var k = 0; k < keys.length; k++) {
2381 | var key = keys[k]
2382 |
2383 | if (this.metadata[term][field][key] == undefined) {
2384 | this.metadata[term][field][key] = otherMatchData.metadata[term][field][key]
2385 | } else {
2386 | this.metadata[term][field][key] = this.metadata[term][field][key].concat(otherMatchData.metadata[term][field][key])
2387 | }
2388 |
2389 | }
2390 | }
2391 | }
2392 | }
2393 |
2394 | /**
2395 | * Add metadata for a term/field pair to this instance of match data.
2396 | *
2397 | * @param {string} term - The term this match data is associated with
2398 | * @param {string} field - The field in which the term was found
2399 | * @param {object} metadata - The metadata recorded about this term in this field
2400 | */
2401 | lunr.MatchData.prototype.add = function (term, field, metadata) {
2402 | if (!(term in this.metadata)) {
2403 | this.metadata[term] = Object.create(null)
2404 | this.metadata[term][field] = metadata
2405 | return
2406 | }
2407 |
2408 | if (!(field in this.metadata[term])) {
2409 | this.metadata[term][field] = metadata
2410 | return
2411 | }
2412 |
2413 | var metadataKeys = Object.keys(metadata)
2414 |
2415 | for (var i = 0; i < metadataKeys.length; i++) {
2416 | var key = metadataKeys[i]
2417 |
2418 | if (key in this.metadata[term][field]) {
2419 | this.metadata[term][field][key] = this.metadata[term][field][key].concat(metadata[key])
2420 | } else {
2421 | this.metadata[term][field][key] = metadata[key]
2422 | }
2423 | }
2424 | }
2425 | /**
2426 | * A lunr.Query provides a programmatic way of defining queries to be performed
2427 | * against a {@link lunr.Index}.
2428 | *
2429 | * Prefer constructing a lunr.Query using the {@link lunr.Index#query} method
2430 | * so the query object is pre-initialized with the right index fields.
2431 | *
2432 | * @constructor
2433 | * @property {lunr.Query~Clause[]} clauses - An array of query clauses.
2434 | * @property {string[]} allFields - An array of all available fields in a lunr.Index.
2435 | */
2436 | lunr.Query = function (allFields) {
2437 | this.clauses = []
2438 | this.allFields = allFields
2439 | }
2440 |
2441 | /**
2442 | * Constants for indicating what kind of automatic wildcard insertion will be used when constructing a query clause.
2443 | *
2444 | * This allows wildcards to be added to the beginning and end of a term without having to manually do any string
2445 | * concatenation.
2446 | *
2447 | * The wildcard constants can be bitwise combined to select both leading and trailing wildcards.
2448 | *
2449 | * @constant
2450 | * @default
2451 | * @property {number} wildcard.NONE - The term will have no wildcards inserted, this is the default behaviour
2452 | * @property {number} wildcard.LEADING - Prepend the term with a wildcard, unless a leading wildcard already exists
2453 | * @property {number} wildcard.TRAILING - Append a wildcard to the term, unless a trailing wildcard already exists
2454 | * @see lunr.Query~Clause
2455 | * @see lunr.Query#clause
2456 | * @see lunr.Query#term
2457 | * @example
2460 | * query.term('foo', {
2461 | * wildcard: lunr.Query.wildcard.LEADING | lunr.Query.wildcard.TRAILING
2462 | * })
2463 | */
2464 | lunr.Query.wildcard = new String ("*")
2465 | lunr.Query.wildcard.NONE = 0
2466 | lunr.Query.wildcard.LEADING = 1
2467 | lunr.Query.wildcard.TRAILING = 2
2468 |
2469 | /**
2470 | * A single clause in a {@link lunr.Query} contains a term and details on how to
2471 | * match that term against a {@link lunr.Index}.
2472 | *
2473 | * @typedef {Object} lunr.Query~Clause
2474 | * @property {string[]} fields - The fields in an index this clause should be matched against.
2475 | * @property {number} [boost=1] - Any boost that should be applied when matching this clause.
2476 | * @property {number} [editDistance] - Whether the term should have fuzzy matching applied, and how fuzzy the match should be.
2477 | * @property {boolean} [usePipeline] - Whether the term should be passed through the search pipeline.
2478 | * @property {number} [wildcard=0] - Whether the term should have wildcards appended or prepended.
2479 | */
2480 |
2481 | /**
2482 | * Adds a {@link lunr.Query~Clause} to this query.
2483 | *
2484 | * Unless the clause contains the fields to be matched all fields will be matched. In addition
2485 | * a default boost of 1 is applied to the clause.
2486 | *
2487 | * @param {lunr.Query~Clause} clause - The clause to add to this query.
2488 | * @see lunr.Query~Clause
2489 | * @returns {lunr.Query}
2490 | */
2491 | lunr.Query.prototype.clause = function (clause) {
2492 | if (!('fields' in clause)) {
2493 | clause.fields = this.allFields
2494 | }
2495 |
2496 | if (!('boost' in clause)) {
2497 | clause.boost = 1
2498 | }
2499 |
2500 | if (!('usePipeline' in clause)) {
2501 | clause.usePipeline = true
2502 | }
2503 |
2504 | if (!('wildcard' in clause)) {
2505 | clause.wildcard = lunr.Query.wildcard.NONE
2506 | }
2507 |
2508 | if ((clause.wildcard & lunr.Query.wildcard.LEADING) && (clause.term.charAt(0) != lunr.Query.wildcard)) {
2509 | clause.term = "*" + clause.term
2510 | }
2511 |
2512 | if ((clause.wildcard & lunr.Query.wildcard.TRAILING) && (clause.term.slice(-1) != lunr.Query.wildcard)) {
2513 | clause.term = "" + clause.term + "*"
2514 | }
2515 |
2516 | this.clauses.push(clause)
2517 |
2518 | return this
2519 | }
2520 |
2521 | /**
2522 | * Adds a term to the current query, under the covers this will create a {@link lunr.Query~Clause}
2523 | * to the list of clauses that make up this query.
2524 | *
2525 | * @param {string} term - The term to add to the query.
2526 | * @param {Object} [options] - Any additional properties to add to the query clause.
2527 | * @returns {lunr.Query}
2528 | * @see lunr.Query#clause
2529 | * @see lunr.Query~Clause
2530 | * @example
adding a single term to a query
2531 | * query.term("foo")
2532 | * @example
adding a single term to a query and specifying search fields, term boost and automatic trailing wildcard
2533 | * query.term("foo", {
2534 | * fields: ["title"],
2535 | * boost: 10,
2536 | * wildcard: lunr.Query.wildcard.TRAILING
2537 | * })
2538 | */
2539 | lunr.Query.prototype.term = function (term, options) {
2540 | var clause = options || {}
2541 | clause.term = term
2542 |
2543 | this.clause(clause)
2544 |
2545 | return this
2546 | }
2547 | lunr.QueryParseError = function (message, start, end) {
2548 | this.name = "QueryParseError"
2549 | this.message = message
2550 | this.start = start
2551 | this.end = end
2552 | }
2553 |
2554 | lunr.QueryParseError.prototype = new Error
2555 | lunr.QueryLexer = function (str) {
2556 | this.lexemes = []
2557 | this.str = str
2558 | this.length = str.length
2559 | this.pos = 0
2560 | this.start = 0
2561 | this.escapeCharPositions = []
2562 | }
2563 |
2564 | lunr.QueryLexer.prototype.run = function () {
2565 | var state = lunr.QueryLexer.lexText
2566 |
2567 | while (state) {
2568 | state = state(this)
2569 | }
2570 | }
2571 |
2572 | lunr.QueryLexer.prototype.sliceString = function () {
2573 | var subSlices = [],
2574 | sliceStart = this.start,
2575 | sliceEnd = this.pos
2576 |
2577 | for (var i = 0; i < this.escapeCharPositions.length; i++) {
2578 | sliceEnd = this.escapeCharPositions[i]
2579 | subSlices.push(this.str.slice(sliceStart, sliceEnd))
2580 | sliceStart = sliceEnd + 1
2581 | }
2582 |
2583 | subSlices.push(this.str.slice(sliceStart, this.pos))
2584 | this.escapeCharPositions.length = 0
2585 |
2586 | return subSlices.join('')
2587 | }
2588 |
2589 | lunr.QueryLexer.prototype.emit = function (type) {
2590 | this.lexemes.push({
2591 | type: type,
2592 | str: this.sliceString(),
2593 | start: this.start,
2594 | end: this.pos
2595 | })
2596 |
2597 | this.start = this.pos
2598 | }
2599 |
2600 | lunr.QueryLexer.prototype.escapeCharacter = function () {
2601 | this.escapeCharPositions.push(this.pos - 1)
2602 | this.pos += 1
2603 | }
2604 |
2605 | lunr.QueryLexer.prototype.next = function () {
2606 | if (this.pos >= this.length) {
2607 | return lunr.QueryLexer.EOS
2608 | }
2609 |
2610 | var char = this.str.charAt(this.pos)
2611 | this.pos += 1
2612 | return char
2613 | }
2614 |
2615 | lunr.QueryLexer.prototype.width = function () {
2616 | return this.pos - this.start
2617 | }
2618 |
2619 | lunr.QueryLexer.prototype.ignore = function () {
2620 | if (this.start == this.pos) {
2621 | this.pos += 1
2622 | }
2623 |
2624 | this.start = this.pos
2625 | }
2626 |
2627 | lunr.QueryLexer.prototype.backup = function () {
2628 | this.pos -= 1
2629 | }
2630 |
2631 | lunr.QueryLexer.prototype.acceptDigitRun = function () {
2632 | var char, charCode
2633 |
2634 | do {
2635 | char = this.next()
2636 | charCode = char.charCodeAt(0)
2637 | } while (charCode > 47 && charCode < 58)
2638 |
2639 | if (char != lunr.QueryLexer.EOS) {
2640 | this.backup()
2641 | }
2642 | }
2643 |
2644 | lunr.QueryLexer.prototype.more = function () {
2645 | return this.pos < this.length
2646 | }
2647 |
2648 | lunr.QueryLexer.EOS = 'EOS'
2649 | lunr.QueryLexer.FIELD = 'FIELD'
2650 | lunr.QueryLexer.TERM = 'TERM'
2651 | lunr.QueryLexer.EDIT_DISTANCE = 'EDIT_DISTANCE'
2652 | lunr.QueryLexer.BOOST = 'BOOST'
2653 |
2654 | lunr.QueryLexer.lexField = function (lexer) {
2655 | lexer.backup()
2656 | lexer.emit(lunr.QueryLexer.FIELD)
2657 | lexer.ignore()
2658 | return lunr.QueryLexer.lexText
2659 | }
2660 |
2661 | lunr.QueryLexer.lexTerm = function (lexer) {
2662 | if (lexer.width() > 1) {
2663 | lexer.backup()
2664 | lexer.emit(lunr.QueryLexer.TERM)
2665 | }
2666 |
2667 | lexer.ignore()
2668 |
2669 | if (lexer.more()) {
2670 | return lunr.QueryLexer.lexText
2671 | }
2672 | }
2673 |
2674 | lunr.QueryLexer.lexEditDistance = function (lexer) {
2675 | lexer.ignore()
2676 | lexer.acceptDigitRun()
2677 | lexer.emit(lunr.QueryLexer.EDIT_DISTANCE)
2678 | return lunr.QueryLexer.lexText
2679 | }
2680 |
2681 | lunr.QueryLexer.lexBoost = function (lexer) {
2682 | lexer.ignore()
2683 | lexer.acceptDigitRun()
2684 | lexer.emit(lunr.QueryLexer.BOOST)
2685 | return lunr.QueryLexer.lexText
2686 | }
2687 |
2688 | lunr.QueryLexer.lexEOS = function (lexer) {
2689 | if (lexer.width() > 0) {
2690 | lexer.emit(lunr.QueryLexer.TERM)
2691 | }
2692 | }
2693 |
2694 | // This matches the separator used when tokenising fields
2695 | // within a document. These should match otherwise it is
2696 | // not possible to search for some tokens within a document.
2697 | //
2698 | // It is possible for the user to change the separator on the
2699 | // tokenizer so it _might_ clash with any other of the special
2700 | // characters already used within the search string, e.g. :.
2701 | //
2702 | // This means that it is possible to change the separator in
2703 | // such a way that makes some words unsearchable using a search
2704 | // string.
2705 | lunr.QueryLexer.termSeparator = lunr.tokenizer.separator
2706 |
2707 | lunr.QueryLexer.lexText = function (lexer) {
2708 | while (true) {
2709 | var char = lexer.next()
2710 |
2711 | if (char == lunr.QueryLexer.EOS) {
2712 | return lunr.QueryLexer.lexEOS
2713 | }
2714 |
2715 | // Escape character is '\'
2716 | if (char.charCodeAt(0) == 92) {
2717 | lexer.escapeCharacter()
2718 | continue
2719 | }
2720 |
2721 | if (char == ":") {
2722 | return lunr.QueryLexer.lexField
2723 | }
2724 |
2725 | if (char == "~") {
2726 | lexer.backup()
2727 | if (lexer.width() > 0) {
2728 | lexer.emit(lunr.QueryLexer.TERM)
2729 | }
2730 | return lunr.QueryLexer.lexEditDistance
2731 | }
2732 |
2733 | if (char == "^") {
2734 | lexer.backup()
2735 | if (lexer.width() > 0) {
2736 | lexer.emit(lunr.QueryLexer.TERM)
2737 | }
2738 | return lunr.QueryLexer.lexBoost
2739 | }
2740 |
2741 | if (char.match(lunr.QueryLexer.termSeparator)) {
2742 | return lunr.QueryLexer.lexTerm
2743 | }
2744 | }
2745 | }
2746 |
2747 | lunr.QueryParser = function (str, query) {
2748 | this.lexer = new lunr.QueryLexer (str)
2749 | this.query = query
2750 | this.currentClause = {}
2751 | this.lexemeIdx = 0
2752 | }
2753 |
2754 | lunr.QueryParser.prototype.parse = function () {
2755 | this.lexer.run()
2756 | this.lexemes = this.lexer.lexemes
2757 |
2758 | var state = lunr.QueryParser.parseFieldOrTerm
2759 |
2760 | while (state) {
2761 | state = state(this)
2762 | }
2763 |
2764 | return this.query
2765 | }
2766 |
2767 | lunr.QueryParser.prototype.peekLexeme = function () {
2768 | return this.lexemes[this.lexemeIdx]
2769 | }
2770 |
2771 | lunr.QueryParser.prototype.consumeLexeme = function () {
2772 | var lexeme = this.peekLexeme()
2773 | this.lexemeIdx += 1
2774 | return lexeme
2775 | }
2776 |
2777 | lunr.QueryParser.prototype.nextClause = function () {
2778 | var completedClause = this.currentClause
2779 | this.query.clause(completedClause)
2780 | this.currentClause = {}
2781 | }
2782 |
2783 | lunr.QueryParser.parseFieldOrTerm = function (parser) {
2784 | var lexeme = parser.peekLexeme()
2785 |
2786 | if (lexeme == undefined) {
2787 | return
2788 | }
2789 |
2790 | switch (lexeme.type) {
2791 | case lunr.QueryLexer.FIELD:
2792 | return lunr.QueryParser.parseField
2793 | case lunr.QueryLexer.TERM:
2794 | return lunr.QueryParser.parseTerm
2795 | default:
2796 | var errorMessage = "expected either a field or a term, found " + lexeme.type
2797 |
2798 | if (lexeme.str.length >= 1) {
2799 | errorMessage += " with value '" + lexeme.str + "'"
2800 | }
2801 |
2802 | throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end)
2803 | }
2804 | }
2805 |
2806 | lunr.QueryParser.parseField = function (parser) {
2807 | var lexeme = parser.consumeLexeme()
2808 |
2809 | if (lexeme == undefined) {
2810 | return
2811 | }
2812 |
2813 | if (parser.query.allFields.indexOf(lexeme.str) == -1) {
2814 | var possibleFields = parser.query.allFields.map(function (f) { return "'" + f + "'" }).join(', '),
2815 | errorMessage = "unrecognised field '" + lexeme.str + "', possible fields: " + possibleFields
2816 |
2817 | throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end)
2818 | }
2819 |
2820 | parser.currentClause.fields = [lexeme.str]
2821 |
2822 | var nextLexeme = parser.peekLexeme()
2823 |
2824 | if (nextLexeme == undefined) {
2825 | var errorMessage = "expecting term, found nothing"
2826 | throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end)
2827 | }
2828 |
2829 | switch (nextLexeme.type) {
2830 | case lunr.QueryLexer.TERM:
2831 | return lunr.QueryParser.parseTerm
2832 | default:
2833 | var errorMessage = "expecting term, found '" + nextLexeme.type + "'"
2834 | throw new lunr.QueryParseError (errorMessage, nextLexeme.start, nextLexeme.end)
2835 | }
2836 | }
2837 |
2838 | lunr.QueryParser.parseTerm = function (parser) {
2839 | var lexeme = parser.consumeLexeme()
2840 |
2841 | if (lexeme == undefined) {
2842 | return
2843 | }
2844 |
2845 | parser.currentClause.term = lexeme.str.toLowerCase()
2846 |
2847 | if (lexeme.str.indexOf("*") != -1) {
2848 | parser.currentClause.usePipeline = false
2849 | }
2850 |
2851 | var nextLexeme = parser.peekLexeme()
2852 |
2853 | if (nextLexeme == undefined) {
2854 | parser.nextClause()
2855 | return
2856 | }
2857 |
2858 | switch (nextLexeme.type) {
2859 | case lunr.QueryLexer.TERM:
2860 | parser.nextClause()
2861 | return lunr.QueryParser.parseTerm
2862 | case lunr.QueryLexer.FIELD:
2863 | parser.nextClause()
2864 | return lunr.QueryParser.parseField
2865 | case lunr.QueryLexer.EDIT_DISTANCE:
2866 | return lunr.QueryParser.parseEditDistance
2867 | case lunr.QueryLexer.BOOST:
2868 | return lunr.QueryParser.parseBoost
2869 | default:
2870 | var errorMessage = "Unexpected lexeme type '" + nextLexeme.type + "'"
2871 | throw new lunr.QueryParseError (errorMessage, nextLexeme.start, nextLexeme.end)
2872 | }
2873 | }
2874 |
2875 | lunr.QueryParser.parseEditDistance = function (parser) {
2876 | var lexeme = parser.consumeLexeme()
2877 |
2878 | if (lexeme == undefined) {
2879 | return
2880 | }
2881 |
2882 | var editDistance = parseInt(lexeme.str, 10)
2883 |
2884 | if (isNaN(editDistance)) {
2885 | var errorMessage = "edit distance must be numeric"
2886 | throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end)
2887 | }
2888 |
2889 | parser.currentClause.editDistance = editDistance
2890 |
2891 | var nextLexeme = parser.peekLexeme()
2892 |
2893 | if (nextLexeme == undefined) {
2894 | parser.nextClause()
2895 | return
2896 | }
2897 |
2898 | switch (nextLexeme.type) {
2899 | case lunr.QueryLexer.TERM:
2900 | parser.nextClause()
2901 | return lunr.QueryParser.parseTerm
2902 | case lunr.QueryLexer.FIELD:
2903 | parser.nextClause()
2904 | return lunr.QueryParser.parseField
2905 | case lunr.QueryLexer.EDIT_DISTANCE:
2906 | return lunr.QueryParser.parseEditDistance
2907 | case lunr.QueryLexer.BOOST:
2908 | return lunr.QueryParser.parseBoost
2909 | default:
2910 | var errorMessage = "Unexpected lexeme type '" + nextLexeme.type + "'"
2911 | throw new lunr.QueryParseError (errorMessage, nextLexeme.start, nextLexeme.end)
2912 | }
2913 | }
2914 |
2915 | lunr.QueryParser.parseBoost = function (parser) {
2916 | var lexeme = parser.consumeLexeme()
2917 |
2918 | if (lexeme == undefined) {
2919 | return
2920 | }
2921 |
2922 | var boost = parseInt(lexeme.str, 10)
2923 |
2924 | if (isNaN(boost)) {
2925 | var errorMessage = "boost must be numeric"
2926 | throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end)
2927 | }
2928 |
2929 | parser.currentClause.boost = boost
2930 |
2931 | var nextLexeme = parser.peekLexeme()
2932 |
2933 | if (nextLexeme == undefined) {
2934 | parser.nextClause()
2935 | return
2936 | }
2937 |
2938 | switch (nextLexeme.type) {
2939 | case lunr.QueryLexer.TERM:
2940 | parser.nextClause()
2941 | return lunr.QueryParser.parseTerm
2942 | case lunr.QueryLexer.FIELD:
2943 | parser.nextClause()
2944 | return lunr.QueryParser.parseField
2945 | case lunr.QueryLexer.EDIT_DISTANCE:
2946 | return lunr.QueryParser.parseEditDistance
2947 | case lunr.QueryLexer.BOOST:
2948 | return lunr.QueryParser.parseBoost
2949 | default:
2950 | var errorMessage = "Unexpected lexeme type '" + nextLexeme.type + "'"
2951 | throw new lunr.QueryParseError (errorMessage, nextLexeme.start, nextLexeme.end)
2952 | }
2953 | }
2954 |
2955 | /**
2956 | * export the module via AMD, CommonJS or as a browser global
2957 | * Export code from https://github.com/umdjs/umd/blob/master/returnExports.js
2958 | */
2959 | ;(function (root, factory) {
2960 | if (typeof define === 'function' && define.amd) {
2961 | // AMD. Register as an anonymous module.
2962 | define(factory)
2963 | } else if (typeof exports === 'object') {
2964 | /**
2965 | * Node. Does not work with strict CommonJS, but
2966 | * only CommonJS-like enviroments that support module.exports,
2967 | * like Node.
2968 | */
2969 | module.exports = factory()
2970 | } else {
2971 | // Browser globals (root is window)
2972 | root.lunr = factory()
2973 | }
2974 | }(this, function () {
2975 | /**
2976 | * Just return a value to define the module export.
2977 | * This example returns an object, but the module
2978 | * can return a function as the exported value.
2979 | */
2980 | return lunr
2981 | }))
2982 | })();
2983 |
--------------------------------------------------------------------------------