├── .gitignore ├── .gitmodules ├── package.json ├── changelog.txt ├── LICENSE ├── example └── index.html ├── Gruntfile.js ├── src ├── levenshtein.js ├── ghosthunter.js ├── ghosthunter-nodependency.js └── lunr.js ├── dist └── jquery.ghosthunter-use-require.js └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "lib/lunr"] 2 | path = lib/lunr 3 | url = https://github.com/olivernn/lunr.js.git 4 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "ghosthunter", 3 | "version": "0.6.0", 4 | "description": "GhostHunter allows a theme developer for the Ghost blogging platform to add client-side search capability using lunr.js.", 5 | "main": "jquery.ghostHunter.js", 6 | "directories": { 7 | "example": "example" 8 | }, 9 | "scripts": { 10 | "test": "echo \"Error: no test specified\" && exit 1" 11 | }, 12 | "repository": { 13 | "type": "git", 14 | "url": "git+https://github.com/fbennett/ghostHunter.git" 15 | }, 16 | "author": "Jamal Neufeld", 17 | "license": "MIT", 18 | "bugs": { 19 | "url": "https://github.com/fbennett/ghostHunter/issues" 20 | }, 21 | "homepage": "https://github.com/fbennett/ghostHunter#readme", 22 | "dependencies": {}, 23 | "devDependencies": { 24 | "grunt": "^1.0.1", 25 | "grunt-contrib-copy": "^1.0.0" 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /changelog.txt: -------------------------------------------------------------------------------- 1 | v0.5.1 : Performance enhancements to stop involuntary DDOS 2 | v0.5.0 : Levenshtein screen updates, fuzzy searching thanks to @fbennett 3 | v0.4.1 : Index caching, repo reorganization, extended metadata thanks to @fbennett 4 | v0.4.0 : Ghost 1.0 compatibility 5 | v0.3.5 : Exported query options ; added option to search static pages 6 | v0.3.4 : Added onPageLoad option to improve onKeyUp option thanks to @cjsheets. 7 | v0.3.3 : Exported Lunr to a separate js file ; made the no-dependency version available. 8 | v0.3.2 : Added PrettyDate option thanks to @alavers 9 | v0.3.1 : Added tag support in the index thanks to @lizhuoli1126 10 | v0.3.0 : Stopped using RSS, started using API. 11 | v0.2.3 : Added callbacks 12 | Added tags/categories to the indexed data 13 | v0.2.2 : Added the "clear" method 14 | v0.2 : Added ability to have search occur onKeyUp 15 | v0.1 : Initial commit, fully functional alpha 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (C) 2014 Jamal Neufeld (jamal@i11u.me) 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /example/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | ghostHunter 11 | 12 | 13 | 14 | 15 | 16 | 17 |
18 | 19 | 20 | 21 |
22 | 23 |
24 | 25 |
26 | 27 | 28 | 29 | 30 | 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /Gruntfile.js: -------------------------------------------------------------------------------- 1 | module.exports = function(grunt) { 2 | 3 | // Project configuration. 4 | grunt.initConfig({ 5 | pkg: grunt.file.readJSON('package.json'), 6 | copy: { 7 | ghosthunter_embedded_lunr: { 8 | src: "src/<%= pkg.name %>.js", 9 | dest: "dist/jquery.ghosthunter.js", 10 | options: { 11 | process: function(content, path) { 12 | var lunr = grunt.file.read('./src/lunr.js'); 13 | content = content.replace(/\/\*\s+lunr\s+\*\//i, lunr); 14 | var levenshtein = grunt.file.read('./src/levenshtein.js'); 15 | content = content.replace(/\/\*\s+levenshtein\s+\*\//i, levenshtein); 16 | return grunt.template.process(content) 17 | } 18 | } 19 | }, 20 | ghosthunter_required_lunr: { 21 | src: "src/<%= pkg.name %>.js", 22 | dest: "dist/jquery.ghosthunter-use-require.js", 23 | options: { 24 | process: function(content, path) { 25 | content = content.replace(/\/\*\s+lunr\s+\*\//i, 'var lunr = require("lunr")'); 26 | var levenshtein = grunt.file.read('./src/levenshtein.js'); 27 | content = content.replace(/\/\*\s+levenshtein\s+\*\//i, levenshtein); 28 | return grunt.template.process(content) 29 | } 30 | } 31 | } 32 | } 33 | }); 34 | 35 | grunt.loadNpmTasks('grunt-contrib-copy'); 36 | 37 | // Default task(s). 38 | grunt.registerTask('default', ['copy']); 39 | }; 40 | -------------------------------------------------------------------------------- /src/levenshtein.js: -------------------------------------------------------------------------------- 1 | // Adapted from https://github.com/pseudonym117/Levenshtein 2 | (function(root, factory){ 3 | if (typeof define == 'function' && typeof define.amd == 'object' && define.amd) { 4 | define(function(){ 5 | return factory(root); 6 | }); 7 | } else if (typeof module == 'object' && module && module.exports) { 8 | module.exports = factory(root); 9 | } else { 10 | root.Levenshtein = factory(root); 11 | } 12 | }(this, function(root){ 13 | 14 | function forEach( array, fn ) { var i, length 15 | i = -1 16 | length = array.length 17 | while ( ++i < length ) 18 | fn( array[ i ], i, array ) 19 | } 20 | 21 | function map( array, fn ) { var result 22 | result = Array( array.length ) 23 | forEach( array, function ( val, i, array ) { 24 | result.push( fn( val, i, array ) ) 25 | }) 26 | return result 27 | } 28 | 29 | function reduce( array, fn, accumulator ) { 30 | forEach( array, function( val, i, array ) { 31 | accumulator = fn( val, i, array ) 32 | }) 33 | return accumulator 34 | } 35 | 36 | // For string mode 37 | function getChar(str, idx) { 38 | return str.charAt(idx); 39 | } 40 | 41 | // For array mode 42 | function getArrayMember(arr, idx) { 43 | return arr[idx]; 44 | } 45 | 46 | // Levenshtein distance 47 | function Levenshtein( str_m, str_n ) { 48 | var previous, current, matrix, getElem 49 | // Set to string or array mode 50 | if (typeof str_m === "string" && typeof str_n === "string") { 51 | getElem = getChar; 52 | } else if (typeof str_m === "object" && typeof str_n === "object") { 53 | getElem = getArrayMember; 54 | } else { 55 | throw "Levensthtein: input must be two strings or two arrays" 56 | } 57 | // Constructor 58 | matrix = this._matrix = [] 59 | 60 | // Sanity checks 61 | if ( str_m == str_n ) 62 | return this.distance = 0 63 | else if ( str_m == '' ) 64 | return this.distance = str_n.length 65 | else if ( str_n == '' ) 66 | return this.distance = str_m.length 67 | else { 68 | // Danger Will Robinson 69 | previous = [ 0 ] 70 | forEach( str_m, function( v, i ) { i++, previous[ i ] = i } ) 71 | 72 | matrix[0] = previous 73 | forEach( str_n, function( n_val, n_idx ) { 74 | current = [ ++n_idx ] 75 | forEach( str_m, function( m_val, m_idx ) { 76 | m_idx++ 77 | if ( getElem(str_m, m_idx - 1) == getElem(str_n, n_idx - 1) ) 78 | current[ m_idx ] = previous[ m_idx - 1 ] 79 | else 80 | current[ m_idx ] = Math.min 81 | ( previous[ m_idx ] + 1 // Deletion 82 | , current[ m_idx - 1 ] + 1 // Insertion 83 | , previous[ m_idx - 1 ] + 1 // Subtraction 84 | ) 85 | }) 86 | previous = current 87 | matrix[ matrix.length ] = previous 88 | }) 89 | 90 | return this.distance = current[ current.length - 1 ] 91 | } 92 | } 93 | 94 | Levenshtein.prototype.toString = Levenshtein.prototype.inspect = function inspect ( no_print ) { var matrix, max, buff, sep, rows 95 | matrix = this.getMatrix() 96 | max = reduce( matrix,function( m, o ) { 97 | return Math.max( m, reduce( o, Math.max, 0 ) ) 98 | }, 0 ) 99 | buff = Array( ( max + '' ).length ).join( ' ' ) 100 | 101 | sep = [] 102 | while ( sep.length < (matrix[0] && matrix[0].length || 0) ) 103 | sep[ sep.length ] = Array( buff.length + 1 ).join( '-' ) 104 | sep = sep.join( '-+' ) + '-' 105 | 106 | rows = map( matrix, function( row ) { var cells 107 | cells = map( row, function( cell ) { 108 | return ( buff + cell ).slice( - buff.length ) 109 | }) 110 | return cells.join( ' |' ) + ' ' 111 | }) 112 | 113 | return rows.join( "\n" + sep + "\n" ) 114 | } 115 | 116 | // steps to get from string 1 to string 2 117 | Levenshtein.prototype.getSteps = function() { 118 | var steps, matrix, x, y, u, l, d, min 119 | steps = [] 120 | matrix = this.getMatrix() 121 | x = matrix.length - 1 122 | y = matrix[0].length - 1 123 | while(x !== 0 || y !== 0) { 124 | u = y > 0 ? matrix[x][y-1] : Number.MAX_VALUE 125 | l = x > 0 ? matrix[x-1][y] : Number.MAX_VALUE 126 | d = y > 0 && x > 0 ? matrix[x-1][y-1] : Number.MAX_VALUE 127 | min = Math.min(u, l, d) 128 | if(min === d) { 129 | if(d < matrix[x][y]) { 130 | steps.push(['substitute', y, x]) 131 | }// else steps.push(['no-op', y, x]) 132 | x-- 133 | y-- 134 | } else if(min === l) { 135 | steps.push(['insert', y, x]) 136 | x-- 137 | } else { 138 | steps.push(['delete', y, x]) 139 | y-- 140 | } 141 | } 142 | return steps 143 | } 144 | 145 | Levenshtein.prototype.getMatrix = function () { 146 | return this._matrix.slice() 147 | } 148 | 149 | Levenshtein.prototype.valueOf = function() { 150 | return this.distance 151 | } 152 | 153 | return Levenshtein 154 | 155 | })); 156 | -------------------------------------------------------------------------------- /src/ghosthunter.js: -------------------------------------------------------------------------------- 1 | /** 2 | * ghostHunter - 0.6.0 3 | * Copyright (C) 2014 Jamal Neufeld (jamal@i11u.me) 4 | * MIT Licensed 5 | * @license 6 | */ 7 | (function( $ ) { 8 | 9 | /* LUNR */ 10 | 11 | /* LEVENSHTEIN */ 12 | 13 | //This is the main plugin definition 14 | $.fn.ghostHunter = function( options ) { 15 | 16 | //Here we use jQuery's extend to set default values if they weren't set by the user 17 | var opts = $.extend( {}, $.fn.ghostHunter.defaults, options ); 18 | if( opts.results ) 19 | { 20 | pluginMethods.init( this , opts ); 21 | return pluginMethods; 22 | } 23 | }; 24 | // If the Ghost instance is in a subpath of the site, set subpath 25 | // as the path to the site with a leading slash and no trailing slash 26 | // (i.e. "/path/to/instance"). 27 | $.fn.ghostHunter.defaults = { 28 | resultsData : false, 29 | onPageLoad : false, 30 | onKeyUp : false, 31 | result_template : "

{{title}}

{{pubDate}}

", 32 | info_template : "

Number of posts found: {{amount}}

", 33 | displaySearchInfo : true, 34 | zeroResultsInfo : true, 35 | before : false, 36 | onComplete : false, 37 | filterfields : false, 38 | subpath : "", 39 | item_preprocessor : false, 40 | indexing_start : false, 41 | indexing_end : false, 42 | includebodysearch : false 43 | }; 44 | var prettyDate = function(date) { 45 | var d = new Date(date); 46 | var monthNames = ["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"]; 47 | return d.getDate() + ' ' + monthNames[d.getMonth()] + ' ' + d.getFullYear(); 48 | }; 49 | 50 | var getSubpathKey = function(str) { 51 | return str.replace(/^\//, "").replace(/\//g, "-") 52 | }; 53 | 54 | var lastTimeoutID = null; 55 | 56 | // We add a prefix to new IDs and remove it after a set of 57 | // updates is complete, just in case a browser freaks over 58 | // duplicate IDs in the DOM. 59 | var settleIDs = function() { 60 | $('.gh-search-item').each(function(){ 61 | var oldAttr = this.getAttribute('id'); 62 | var newAttr = oldAttr.replace(/^new-/, ""); 63 | this.setAttribute('id', newAttr); 64 | }); 65 | }; 66 | var updateSearchList = function(listItems, apiData, steps) { 67 | for (var i=0,ilen=steps.length;i recordedDate) { 126 | me.latestPost = arrayItem.updated_at; 127 | } 128 | var tag_arr = arrayItem.tags.map(function(v) { 129 | return v.name; // `tag` object has an `name` property which is the value of tag. If you also want other info, check API and get that property 130 | }) 131 | if(arrayItem.meta_description == null) { arrayItem.meta_description = '' }; 132 | var category = tag_arr.join(", "); 133 | if (category.length < 1){ 134 | category = "undefined"; 135 | } 136 | var parsedData = { 137 | id : String(arrayItem.id), 138 | title : String(arrayItem.title), 139 | description : String(arrayItem.custom_excerpt), 140 | pubDate : String(arrayItem.published_at), 141 | tag : category 142 | } 143 | if ( me.includebodysearch ){ 144 | parsedData.plaintext=String(arrayItem.plaintext); 145 | } 146 | this.add(parsedData) 147 | var localUrl = me.subpath + arrayItem.url 148 | me.blogData[arrayItem.id] = { 149 | title: arrayItem.title, 150 | description: arrayItem.custom_excerpt, 151 | pubDate: prettyDate(parsedData.pubDate), 152 | link: localUrl, 153 | tags: tag_arr 154 | }; 155 | // If there is a metadata "pre"-processor for the item, run it here. 156 | if (me.item_preprocessor) { 157 | Object.assign(me.blogData[arrayItem.id], me.item_preprocessor(arrayItem)); 158 | } 159 | // console.log("done indexing the item"); 160 | }, this); 161 | }); 162 | try { 163 | var subpathKey = getSubpathKey(me.subpath); 164 | localStorage.setItem(("ghost_" + subpathKey + "_lunrIndex"), JSON.stringify(me.index)); 165 | localStorage.setItem(("ghost_" + subpathKey + "_blogData"), JSON.stringify(me.blogData)); 166 | localStorage.setItem(("ghost_" + subpathKey + "_latestPost"), me.latestPost); 167 | } catch (e) { 168 | console.warn("ghostHunter: save to localStorage failed: " + e); 169 | } 170 | if (me.indexing_end) { 171 | me.indexing_end(); 172 | } 173 | me.isInit = true; 174 | }); 175 | } 176 | 177 | var pluginMethods = { 178 | 179 | isInit : false, 180 | 181 | init : function( target , opts ){ 182 | var that = this; 183 | that.target = target; 184 | Object.assign(this, opts); 185 | // console.log("ghostHunter: init"); 186 | if ( opts.onPageLoad ) { 187 | function miam () { 188 | that.loadAPI(); 189 | } 190 | window.setTimeout(miam, 1); 191 | } else { 192 | target.focus(function(){ 193 | that.loadAPI(); 194 | }); 195 | } 196 | 197 | target.closest("form").submit(function(e){ 198 | e.preventDefault(); 199 | that.find(target.val()); 200 | }); 201 | 202 | if( opts.onKeyUp ) { 203 | // In search-as-you-type mode, the Enter key is meaningless, 204 | // so we disable it in the search field. If enabled, some browsers 205 | // will save data to history (even when autocomplete="false"), which 206 | // is an intrusive headache, particularly on mobile. 207 | target.keydown(function(event){ 208 | if (event.which === 13) { 209 | return false; 210 | } 211 | }); 212 | target.keyup(function(event) { 213 | that.find(target.val()); 214 | }); 215 | 216 | } 217 | 218 | }, 219 | 220 | loadAPI : function(){ 221 | // console.log('ghostHunter: loadAPI'); 222 | if(!this.isInit) { 223 | // console.log('ghostHunter: this.isInit is true'); 224 | if (this.indexing_start) { 225 | this.indexing_start(); 226 | } 227 | // If isInit is falsy, check for data in localStore, 228 | // parse into memory, and declare isInit to be true. 229 | try { 230 | var subpathKey = getSubpathKey(this.subpath); 231 | this.index = localStorage.getItem(("ghost_" + subpathKey + "_lunrIndex")); 232 | this.blogData = localStorage.getItem(("ghost_" + subpathKey + "_blogData")); 233 | this.latestPost = localStorage.getItem(("ghost_" + subpathKey + "_latestPost")); 234 | if (this.latestPost && this.index && this.blogData) { 235 | this.latestPost = this.latestPost; 236 | this.index = lunr.Index.load(JSON.parse(this.index)); 237 | this.blogData = JSON.parse(this.blogData); 238 | this.isInit = true; 239 | } 240 | } catch (e){ 241 | console.warn("ghostHunter: retrieve from localStorage failed: " + e); 242 | } 243 | } 244 | if (this.isInit) { 245 | // console.log('ghostHunter: this.isInit recheck is true'); 246 | // Check if there are new or edited posts 247 | var params = { 248 | limit: "all", 249 | filter: "updated_at:>\'" + this.latestPost.replace(/\..*/, "").replace(/T/, " ") + "\'", 250 | fields: "id" 251 | }; 252 | 253 | var url = "/ghost/api/v2/content/posts/?key=" + ghosthunter_key + "&limit=all&fields=id" + "&filter=" + "updated_at:>\'" + this.latestPost.replace(/\..*/, "").replace(/T/, " ") + "\'"; 254 | 255 | var me = this; 256 | $.get(url).done(function(data){ 257 | if (data.posts.length > 0) { 258 | grabAndIndex.call(me); 259 | } else { 260 | if (me.indexing_end) { 261 | me.indexing_end(); 262 | } 263 | me.isInit = true; 264 | } 265 | }); 266 | } else { 267 | // console.log('ghostHunter: this.isInit recheck is false'); 268 | grabAndIndex.call(this) 269 | } 270 | }, 271 | 272 | 273 | find : function(value){ 274 | clearTimeout(lastTimeoutID); 275 | if (!value) { 276 | value = ""; 277 | }; 278 | value = value.toLowerCase(); 279 | lastTimeoutID = setTimeout(function() { 280 | // Query strategy is lifted from comments on a lunr.js issue: https://github.com/olivernn/lunr.js/issues/256 281 | var thingsFound = []; 282 | // The query interface expects single terms, so we split. 283 | var valueSplit = value.split(/\s+/); 284 | for (var i=0,ilen=valueSplit.length;i 1) { 313 | // If we had multiple terms, we'll have multiple lists. We filter 314 | // them here to use only items that produce returns for all 315 | // terms. This spoofs an AND join between terms, which lunr.js can't 316 | // yet do internally. 317 | // By using the first list of items as master, we get weightings 318 | // based on the first term entered, which is more or less 319 | // what we would expect. 320 | var searchResult = thingsFound[0]; 321 | thingsFound = thingsFound.slice(1); 322 | for (var i=searchResult.length-1;i>-1;i--) { 323 | var ref = searchResult[i].ref; 324 | for (j=0,jlen=thingsFound.length;j 0) { 352 | results.children().eq(0).replaceWith(this.format(this.info_template,{"amount":searchResult.length})); 353 | } else { 354 | results.append(this.format(this.info_template,{"amount":searchResult.length})); 355 | } 356 | } 357 | 358 | if(this.before) { 359 | this.before(); 360 | }; 361 | 362 | // Get the blogData for the full set, for onComplete 363 | for (var i = 0; i < searchResult.length; i++) { 364 | var lunrref = searchResult[i].ref; 365 | var postData = this.blogData[lunrref]; 366 | if (postData) { 367 | postData.ref = lunrref; 368 | resultsData.push(postData); 369 | } else { 370 | console.warn("ghostHunter: index/data mismatch. Ouch."); 371 | } 372 | } 373 | // Get an array of IDs present in current results 374 | var listItems = $('.gh-search-item'); 375 | var currentRefs = listItems 376 | .map(function(){ 377 | return this.id.slice(3); 378 | }).get(); 379 | if (currentRefs.length === 0) { 380 | for (var i=0,ilen=resultsData.length;i

{{title}}

{{prettyPubDate}}

", 33 | info_template : "

Number of posts found: {{amount}}

", 34 | displaySearchInfo : true, 35 | zeroResultsInfo : true, 36 | before : false, 37 | onComplete : false, 38 | filterfields : false, 39 | subpath : "", 40 | item_preprocessor : false, 41 | indexing_start : false, 42 | indexing_end : false, 43 | includebodysearch : false 44 | }; 45 | var prettyDate = function(date) { 46 | var d = new Date(date); 47 | var monthNames = ["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"]; 48 | return d.getDate() + ' ' + monthNames[d.getMonth()] + ' ' + d.getFullYear(); 49 | }; 50 | 51 | var getSubpathKey = function(str) { 52 | return str.replace(/^\//, "").replace(/\//g, "-") 53 | }; 54 | 55 | var lastTimeoutID = null; 56 | 57 | // We add a prefix to new IDs and remove it after a set of 58 | // updates is complete, just in case a browser freaks over 59 | // duplicate IDs in the DOM. 60 | var settleIDs = function() { 61 | $('.gh-search-item').each(function(){ 62 | var oldAttr = this.getAttribute('id'); 63 | var newAttr = oldAttr.replace(/^new-/, ""); 64 | this.setAttribute('id', newAttr); 65 | }); 66 | }; 67 | 68 | var updateSearchList = function(listItems, apiData, steps) { 69 | for (var i=0,ilen=steps.length;i recordedDate) { 127 | me.latestPost = arrayItem.updated_at; 128 | } 129 | var tag_arr = arrayItem.tags.map(function(v) { 130 | return v.name; // `tag` object has an `name` property which is the value of tag. If you also want other info, check API and get that property 131 | }) 132 | if(arrayItem.meta_description == null) { arrayItem.meta_description = '' }; 133 | var category = tag_arr.join(", "); 134 | if (category.length < 1){ 135 | category = "undefined"; 136 | } 137 | var parsedData = { 138 | id : String(arrayItem.id), 139 | title : String(arrayItem.title), 140 | description : String(arrayItem.custom_excerpt), 141 | pubDate : String(arrayItem.published_at), 142 | tag : category 143 | } 144 | if ( me.includebodysearch ){ 145 | parsedData.plaintext=String(arrayItem.plaintext); 146 | } 147 | this.add(parsedData) 148 | var localUrl = me.subpath + arrayItem.url 149 | me.blogData[arrayItem.id] = { 150 | title: arrayItem.title, 151 | description: arrayItem.custom_excerpt, 152 | pubDate: prettyDate(parsedData.pubDate), 153 | link: localUrl, 154 | tags: tag_arr 155 | }; 156 | // If there is a metadata "pre"-processor for the item, run it here. 157 | if (me.item_preprocessor) { 158 | Object.assign(me.blogData[arrayItem.id], me.item_preprocessor(arrayItem)); 159 | } 160 | // console.log("done indexing the item"); 161 | }, this); 162 | }); 163 | try { 164 | var subpathKey = getSubpathKey(me.subpath); 165 | localStorage.setItem(("ghost_" + subpathKey + "_lunrIndex"), JSON.stringify(me.index)); 166 | localStorage.setItem(("ghost_" + subpathKey + "_blogData"), JSON.stringify(me.blogData)); 167 | localStorage.setItem(("ghost_" + subpathKey + "_latestPost"), me.latestPost); 168 | } catch (e) { 169 | console.warn("ghostHunter: save to localStorage failed: " + e); 170 | } 171 | if (me.indexing_end) { 172 | me.indexing_end(); 173 | } 174 | me.isInit = true; 175 | }); 176 | } 177 | 178 | var pluginMethods = { 179 | 180 | isInit : false, 181 | 182 | init : function( target , opts ){ 183 | var that = this; 184 | that.target = target; 185 | Object.assign(this, opts); 186 | console.log("ghostHunter: init"); 187 | if ( opts.onPageLoad ) { 188 | function miam () { 189 | that.loadAPI(); 190 | } 191 | window.setTimeout(miam, 1); 192 | } else { 193 | target.focus(function(){ 194 | that.loadAPI(); 195 | }); 196 | } 197 | 198 | target.closest("form").submit(function(e){ 199 | e.preventDefault(); 200 | that.find(target.val()); 201 | }); 202 | 203 | if( opts.onKeyUp ) { 204 | // In search-as-you-type mode, the Enter key is meaningless, 205 | // so we disable it in the search field. If enabled, some browsers 206 | // will save data to history (even when autocomplete="false"), which 207 | // is an intrusive headache, particularly on mobile. 208 | target.keydown(function(event){ 209 | if (event.which === 13) { 210 | return false; 211 | } 212 | }); 213 | target.keyup(function(event) { 214 | that.find(target.val()); 215 | }); 216 | 217 | } 218 | 219 | }, 220 | 221 | loadAPI : function(){ 222 | // console.log('ghostHunter: loadAPI'); 223 | if(!this.isInit) { 224 | // console.log('ghostHunter: this.isInit is true'); 225 | if (this.indexing_start) { 226 | this.indexing_start(); 227 | } 228 | // If isInit is falsy, check for data in localStore, 229 | // parse into memory, and declare isInit to be true. 230 | try { 231 | var subpathKey = getSubpathKey(this.subpath); 232 | this.index = localStorage.getItem(("ghost_" + subpathKey + "_lunrIndex")); 233 | this.blogData = localStorage.getItem(("ghost_" + subpathKey + "_blogData")); 234 | this.latestPost = localStorage.getItem(("ghost_" + subpathKey + "_latestPost")); 235 | if (this.latestPost && this.index && this.blogData) { 236 | this.latestPost = this.latestPost; 237 | this.index = lunr.Index.load(JSON.parse(this.index)); 238 | this.blogData = JSON.parse(this.blogData); 239 | this.isInit = true; 240 | } 241 | } catch (e){ 242 | console.warn("ghostHunter: retrieve from localStorage failed: " + e); 243 | } 244 | } 245 | if (this.isInit) { 246 | // console.log('ghostHunter: this.isInit recheck is true'); 247 | // Check if there are new or edited posts 248 | var params = { 249 | limit: "all", 250 | filter: "updated_at:>\'" + this.latestPost.replace(/\..*/, "").replace(/T/, " ") + "\'", 251 | fields: "id" 252 | }; 253 | 254 | var url = "/ghost/api/v2/content/posts/?key=" + 255 | ghosthunter_key + "&limit=all&fields=id" + "&filter=" + 256 | "updated_at:>\'" + this.latestPost.replace(/\..*/, "").replace(/T/, " ") "\'"; 257 | 258 | var me = this; 259 | $.get(url).done(function(data){ 260 | if (data.posts.length > 0) { 261 | grabAndIndex.call(me); 262 | } else { 263 | if (me.indexing_end) { 264 | me.indexing_end(); 265 | } 266 | me.isInit = true; 267 | } 268 | }); 269 | } else { 270 | // console.log('ghostHunter: this.isInit recheck is false'); 271 | grabAndIndex.call(this) 272 | } 273 | }, 274 | 275 | 276 | find : function(value){ 277 | clearTimeout(lastTimeoutID); 278 | if (!value) { 279 | value = ""; 280 | }; 281 | value = value.toLowerCase(); 282 | lastTimeoutID = setTimeout(function() { 283 | // Query strategy is lifted from comments on a lunr.js issue: https://github.com/olivernn/lunr.js/issues/256 284 | var thingsFound = []; 285 | // The query interface expects single terms, so we split. 286 | var valueSplit = value.split(/\s+/); 287 | for (var i=0,ilen=valueSplit.length;i 1) { 316 | // If we had multiple terms, we'll have multiple lists. We filter 317 | // them here to use only items that produce returns for all 318 | // terms. This spoofs an AND join between terms, which lunr.js can't 319 | // yet do internally. 320 | // By using the first list of items as master, we get weightings 321 | // based on the first term entered, which is more or less 322 | // what we would expect. 323 | var searchResult = thingsFound[0]; 324 | thingsFound = thingsFound.slice(1); 325 | for (var i=searchResult.length-1;i>-1;i--) { 326 | var ref = searchResult[i].ref; 327 | for (j=0,jlen=thingsFound.length;j 0) { 355 | results.children().eq(0).replaceWith(this.format(this.info_template,{"amount":searchResult.length})); 356 | } else { 357 | results.append(this.format(this.info_template,{"amount":searchResult.length})); 358 | } 359 | } 360 | 361 | if(this.before) { 362 | this.before(); 363 | }; 364 | 365 | // Get the blogData for the full set, for onComplete 366 | for (var i = 0; i < searchResult.length; i++) { 367 | var lunrref = searchResult[i].ref; 368 | var postData = this.blogData[lunrref]; 369 | if (postData) { 370 | postData.ref = lunrref; 371 | resultsData.push(postData); 372 | } else { 373 | console.warn("ghostHunter: index/data mismatch. Ouch."); 374 | } 375 | } 376 | // Get an array of IDs present in current results 377 | var listItems = $('.gh-search-item'); 378 | var currentRefs = listItems 379 | .map(function(){ 380 | return this.id.slice(3); 381 | }).get(); 382 | if (currentRefs.length === 0) { 383 | for (var i=0,ilen=resultsData.length;i 0 ? matrix[x][y-1] : Number.MAX_VALUE 135 | l = x > 0 ? matrix[x-1][y] : Number.MAX_VALUE 136 | d = y > 0 && x > 0 ? matrix[x-1][y-1] : Number.MAX_VALUE 137 | min = Math.min(u, l, d) 138 | if(min === d) { 139 | if(d < matrix[x][y]) { 140 | steps.push(['substitute', y, x]) 141 | }// else steps.push(['no-op', y, x]) 142 | x-- 143 | y-- 144 | } else if(min === l) { 145 | steps.push(['insert', y, x]) 146 | x-- 147 | } else { 148 | steps.push(['delete', y, x]) 149 | y-- 150 | } 151 | } 152 | return steps 153 | } 154 | 155 | Levenshtein.prototype.getMatrix = function () { 156 | return this._matrix.slice() 157 | } 158 | 159 | Levenshtein.prototype.valueOf = function() { 160 | return this.distance 161 | } 162 | 163 | return Levenshtein 164 | 165 | })); 166 | 167 | 168 | //This is the main plugin definition 169 | $.fn.ghostHunter = function( options ) { 170 | 171 | //Here we use jQuery's extend to set default values if they weren't set by the user 172 | var opts = $.extend( {}, $.fn.ghostHunter.defaults, options ); 173 | if( opts.results ) 174 | { 175 | pluginMethods.init( this , opts ); 176 | return pluginMethods; 177 | } 178 | }; 179 | // If the Ghost instance is in a subpath of the site, set subpath 180 | // as the path to the site with a leading slash and no trailing slash 181 | // (i.e. "/path/to/instance"). 182 | $.fn.ghostHunter.defaults = { 183 | resultsData : false, 184 | onPageLoad : false, 185 | onKeyUp : false, 186 | result_template : "

{{title}}

{{prettyPubDate}}

", 187 | info_template : "

Number of posts found: {{amount}}

", 188 | displaySearchInfo : true, 189 | zeroResultsInfo : true, 190 | before : false, 191 | onComplete : false, 192 | filterfields : false, 193 | subpath : "", 194 | item_preprocessor : false, 195 | indexing_start : false, 196 | indexing_end : false, 197 | includebodysearch : false 198 | }; 199 | var prettyDate = function(date) { 200 | var d = new Date(date); 201 | var monthNames = ["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"]; 202 | return d.getDate() + ' ' + monthNames[d.getMonth()] + ' ' + d.getFullYear(); 203 | }; 204 | 205 | var getSubpathKey = function(str) { 206 | return str.replace(/^\//, "").replace(/\//g, "-") 207 | }; 208 | 209 | var lastTimeoutID = null; 210 | 211 | // We add a prefix to new IDs and remove it after a set of 212 | // updates is complete, just in case a browser freaks over 213 | // duplicate IDs in the DOM. 214 | var settleIDs = function() { 215 | $('.gh-search-item').each(function(){ 216 | var oldAttr = this.getAttribute('id'); 217 | var newAttr = oldAttr.replace(/^new-/, ""); 218 | this.setAttribute('id', newAttr); 219 | }); 220 | }; 221 | var updateSearchList = function(listItems, apiData, steps) { 222 | for (var i=0,ilen=steps.length;i recordedDate) { 281 | me.latestPost = arrayItem.updated_at; 282 | } 283 | var tag_arr = arrayItem.tags.map(function(v) { 284 | return v.name; // `tag` object has an `name` property which is the value of tag. If you also want other info, check API and get that property 285 | }) 286 | if(arrayItem.meta_description == null) { arrayItem.meta_description = '' }; 287 | var category = tag_arr.join(", "); 288 | if (category.length < 1){ 289 | category = "undefined"; 290 | } 291 | var parsedData = { 292 | id : String(arrayItem.id), 293 | title : String(arrayItem.title), 294 | description : String(arrayItem.custom_excerpt), 295 | pubDate : String(arrayItem.published_at), 296 | tag : category 297 | } 298 | if ( me.includebodysearch ){ 299 | parsedData.plaintext=String(arrayItem.plaintext); 300 | } 301 | this.add(parsedData) 302 | var localUrl = me.subpath + arrayItem.url 303 | me.blogData[arrayItem.id] = { 304 | title: arrayItem.title, 305 | description: arrayItem.custom_excerpt, 306 | pubDate: prettyDate(parsedData.pubDate), 307 | link: localUrl, 308 | tags: tag_arr 309 | }; 310 | // If there is a metadata "pre"-processor for the item, run it here. 311 | if (me.item_preprocessor) { 312 | Object.assign(me.blogData[arrayItem.id], me.item_preprocessor(arrayItem)); 313 | } 314 | // console.log("done indexing the item"); 315 | }, this); 316 | }); 317 | try { 318 | var subpathKey = getSubpathKey(me.subpath); 319 | localStorage.setItem(("ghost_" + subpathKey + "_lunrIndex"), JSON.stringify(me.index)); 320 | localStorage.setItem(("ghost_" + subpathKey + "_blogData"), JSON.stringify(me.blogData)); 321 | localStorage.setItem(("ghost_" + subpathKey + "_latestPost"), me.latestPost); 322 | } catch (e) { 323 | console.warn("ghostHunter: save to localStorage failed: " + e); 324 | } 325 | if (me.indexing_end) { 326 | me.indexing_end(); 327 | } 328 | me.isInit = true; 329 | }); 330 | } 331 | 332 | var pluginMethods = { 333 | 334 | isInit : false, 335 | 336 | init : function( target , opts ){ 337 | var that = this; 338 | that.target = target; 339 | Object.assign(this, opts); 340 | // console.log("ghostHunter: init"); 341 | if ( opts.onPageLoad ) { 342 | function miam () { 343 | that.loadAPI(); 344 | } 345 | window.setTimeout(miam, 1); 346 | } else { 347 | target.focus(function(){ 348 | that.loadAPI(); 349 | }); 350 | } 351 | 352 | target.closest("form").submit(function(e){ 353 | e.preventDefault(); 354 | that.find(target.val()); 355 | }); 356 | 357 | if( opts.onKeyUp ) { 358 | // In search-as-you-type mode, the Enter key is meaningless, 359 | // so we disable it in the search field. If enabled, some browsers 360 | // will save data to history (even when autocomplete="false"), which 361 | // is an intrusive headache, particularly on mobile. 362 | target.keydown(function(event){ 363 | if (event.which === 13) { 364 | return false; 365 | } 366 | }); 367 | target.keyup(function(event) { 368 | that.find(target.val()); 369 | }); 370 | 371 | } 372 | 373 | }, 374 | 375 | loadAPI : function(){ 376 | // console.log('ghostHunter: loadAPI'); 377 | if(!this.isInit) { 378 | // console.log('ghostHunter: this.isInit is true'); 379 | if (this.indexing_start) { 380 | this.indexing_start(); 381 | } 382 | // If isInit is falsy, check for data in localStore, 383 | // parse into memory, and declare isInit to be true. 384 | try { 385 | var subpathKey = getSubpathKey(this.subpath); 386 | this.index = localStorage.getItem(("ghost_" + subpathKey + "_lunrIndex")); 387 | this.blogData = localStorage.getItem(("ghost_" + subpathKey + "_blogData")); 388 | this.latestPost = localStorage.getItem(("ghost_" + subpathKey + "_latestPost")); 389 | if (this.latestPost && this.index && this.blogData) { 390 | this.latestPost = this.latestPost; 391 | this.index = lunr.Index.load(JSON.parse(this.index)); 392 | this.blogData = JSON.parse(this.blogData); 393 | this.isInit = true; 394 | } 395 | } catch (e){ 396 | console.warn("ghostHunter: retrieve from localStorage failed: " + e); 397 | } 398 | } 399 | if (this.isInit) { 400 | // console.log('ghostHunter: this.isInit recheck is true'); 401 | // Check if there are new or edited posts 402 | var params = { 403 | limit: "all", 404 | filter: "updated_at:>\'" + this.latestPost.replace(/\..*/, "").replace(/T/, " ") + "\'", 405 | fields: "id" 406 | }; 407 | 408 | var url = "/ghost/api/v2/content/posts/?key=" + ghosthunter_key + "&limit=all&fields=id" + "&filter=" + "updated_at:>\'" + this.latestPost.replace(/\..*/, "").replace(/T/, " ") + "\'"; 409 | 410 | var me = this; 411 | $.get(url).done(function(data){ 412 | if (data.posts.length > 0) { 413 | grabAndIndex.call(me); 414 | } else { 415 | if (me.indexing_end) { 416 | me.indexing_end(); 417 | } 418 | me.isInit = true; 419 | } 420 | }); 421 | } else { 422 | // console.log('ghostHunter: this.isInit recheck is false'); 423 | grabAndIndex.call(this) 424 | } 425 | }, 426 | 427 | 428 | find : function(value){ 429 | clearTimeout(lastTimeoutID); 430 | if (!value) { 431 | value = ""; 432 | }; 433 | value = value.toLowerCase(); 434 | lastTimeoutID = setTimeout(function() { 435 | // Query strategy is lifted from comments on a lunr.js issue: https://github.com/olivernn/lunr.js/issues/256 436 | var thingsFound = []; 437 | // The query interface expects single terms, so we split. 438 | var valueSplit = value.split(/\s+/); 439 | for (var i=0,ilen=valueSplit.length;i 1) { 468 | // If we had multiple terms, we'll have multiple lists. We filter 469 | // them here to use only items that produce returns for all 470 | // terms. This spoofs an AND join between terms, which lunr.js can't 471 | // yet do internally. 472 | // By using the first list of items as master, we get weightings 473 | // based on the first term entered, which is more or less 474 | // what we would expect. 475 | var searchResult = thingsFound[0]; 476 | thingsFound = thingsFound.slice(1); 477 | for (var i=searchResult.length-1;i>-1;i--) { 478 | var ref = searchResult[i].ref; 479 | for (j=0,jlen=thingsFound.length;j 0) { 507 | results.children().eq(0).replaceWith(this.format(this.info_template,{"amount":searchResult.length})); 508 | } else { 509 | results.append(this.format(this.info_template,{"amount":searchResult.length})); 510 | } 511 | } 512 | 513 | if(this.before) { 514 | this.before(); 515 | }; 516 | 517 | // Get the blogData for the full set, for onComplete 518 | for (var i = 0; i < searchResult.length; i++) { 519 | var lunrref = searchResult[i].ref; 520 | var postData = this.blogData[lunrref]; 521 | if (postData) { 522 | postData.ref = lunrref; 523 | resultsData.push(postData); 524 | } else { 525 | console.warn("ghostHunter: index/data mismatch. Ouch."); 526 | } 527 | } 528 | // Get an array of IDs present in current results 529 | var listItems = $('.gh-search-item'); 530 | var currentRefs = listItems 531 | .map(function(){ 532 | return this.id.slice(3); 533 | }).get(); 534 | if (currentRefs.length === 0) { 535 | for (var i=0,ilen=resultsData.length;i

7 | 8 | --- 9 | 10 |


11 | 12 | ![Version](https://img.shields.io/badge/Version-0.6.0-blue.svg) 13 | ![MinGhostVersion](https://img.shields.io/badge/Min%20Ghost%20v.-%3E%3D%202.10-red.svg) 14 | 15 | ## ghostHunter 16 | 17 | **Original developer:** [jamal@i11u.me](mailto:jamal@i11u.me) 18 | 19 | GhostHunter makes it easy to add search capability to any Ghost theme, using the [Ghost API](https://api.ghost.org/v1.14.0/docs) and the [lunr.js](https://lunrjs.com) search engine. Indexing and search are done client-side (in the browser). This has several advantages: 20 | 21 | * Searches are private to the user, and are not exposed to third parties. 22 | * Installation and maintenance of powerful-but-daunting standalone search engines (such as [Solr](http://lucene.apache.org/solr/) or [ElasticSearch](https://www.elastic.co/)) is not required. 23 | * Instant search ("search-as-you-type" or "typeahead") is simple to configure. 24 | 25 | ----------------- 26 | 27 | ## Contents 28 | 29 | * [ghostHunter](#ghosthunter) 30 | * [Contents](#contents) 31 | * [Upgrade notes](#upgrade-notes) 32 | * [Basic setup](#basic-setup) 33 | * [Advanced usage](#advanced-usage) 34 | * [Production installation](#production-installation) 35 | * [GhostHunter options](#ghosthunter-options) 36 | * [Multiple search fields](#multiple-search-fields) 37 | * [Clearing search results](#clearing-search-results) 38 | * [Indexing and caching: how it works](#indexing-and-caching-how-it-works) 39 | * [Development: rebuilding ghostHunter](#development-rebuilding-ghosthunter) 40 | * [Footnotes](#footnotes) 41 | 42 | ------------------ 43 | 44 | ## Upgrade notes 45 | ### GhostHunter v0.6.0 46 | 47 | * Implements @JiapengLi "dirty fix" to support the new Ghost v2 Content API. 48 | * Removes spurious production console.log message. 49 | * Removes `includepages` option. 50 | 51 | 52 | To use this version of ghostHunter, you'll need to create a Custom Integration and inject its Content API key into your blog header. In your Ghost Settings: 53 | 54 | * Go to **Integrations** 55 | * Choose **Add custom integration**, name it `ghostHunter` and choose **Create**. Copy the generated Content API Key. 56 | * Go to **Code injection** 57 | * Add this to **Blog Header**: 58 | ```txt 59 | 64 | ``` 65 | 66 | 67 | ### GhostHunter v0.5.1 68 | 69 | Breaking change: added a new parameter `includebodysearch`, default `false`. Leaving it `false` completely deactivates searching within post body. Change done for performance reasons for Ghost Pro members. 70 | 71 | ### GhostHunter v0.4.x → v0.5.0 72 | 73 | The local ``lunr.js`` index used by ghostHunter is quick. That makes 74 | it well suited to search-as-you-type (SAYT), which can be enabled 75 | simply by setting the ``onKeyUp`` option to ``true``. Although fast 76 | and convenient, the rapid clearing-and-rewriting of search results in 77 | SAYT mode can be distracting to the user. 78 | 79 | From version 0.5.0, ghostHunter uses a [Levenshtein edit 80 | distance](https://en.wikipedia.org/wiki/Levenshtein_distance) 81 | algorithm to determine the specific steps needed to transform 82 | each list of search results into the next. This produces screen 83 | updates that are easy on the eye, and even pleasant to watch. 84 | 85 | To support this behavior, ghostHunter imposes some new requirements 86 | on the ``result_template``. If you use this option in your theme, 87 | you edit the template to satisfy the following requirements 88 | before upgrading: 89 | 90 | * The template *must* be wrapped in a single outer node (i.e. ```` or ``div``); 91 | * The outer node *must* have a unique ``id`` attribute. You can set this using by giving 92 | giving the ``{{ref}}`` value used for indexing a string prefix (see the default 93 | template for an example). 94 | * The outer node *must* be assigned a class ``gh-search-item``. 95 | 96 | That's it. With those changes, your theme should be ready for ghostHunter 0.5.0. 97 | 98 | ## Basic setup 99 | 100 | In your theme directory, navigate to the `assets` subdirectory, [1] and clone this repository there: [2] 101 | 102 | ```txt 103 | cd assets 104 | git clone https://github.com/jamalneufeld/ghostHunter.git --recursive 105 | ``` 106 | 107 | After cloning, the ghostHunter module will be located at `assets/ghostHunter/dist/jquery.ghosthunter.js`. [3] This is a human-readable "raw" copy of the module, and can be loaded directly in your theme templates for testing. (It will run just fine, but it contains a lot of whitespace and comments, and should be "minified" for production use [see below]). 108 | 109 | To test the module in your template, add the following line, after JQuery is loaded. Typically this will be near the bottom of a file `default.hbs`, in the top folder of the theme directory. 110 | 111 | ````html 112 | 113 | ```` 114 | 115 | You will need to add a search box to your pages. The specific `.hbs` template and location will vary depending on the style and on your design choices, but the HTML will need an `` field and a submit button inside a `
` element. A block like this should do the trick: 116 | 117 | ````html 118 | 119 | 120 | 121 |
122 | ```` 123 | 124 | You will also need to mark an area in your pages where the search results should show up: 125 | 126 | ````html 127 |
128 | ```` 129 | 130 | Wake up ghostHunter with a block of JQuery code. For testing, the sample below can be placed in the 131 | template that loads ghostHunter, immediately after the module is loaded: 132 | 133 | ````html 134 | 139 | ```` 140 | 141 | Do the necessaries to [load the theme into Ghost](https://themes.ghost.org/v1.17.0/docs/about), and see if it works. :sweat_smile: 142 | 143 | 144 | ## Advanced usage 145 | 146 | ### Production installation 147 | 148 | To reduce load times and network traffic, the JavaScript of a site is typically "minified," bundling all code into a single file with reduced whitespace and other optimizations. The ``jquery.ghosthunter.js`` module should be bundled in this way for the production version of your site. The most common tool for this purpose in Web development is either Grunt or Gulp. A full explanation of their use is beyond the scope of this guide, but here are some links for reference: 149 | 150 | * The [Gulp Project](https://gulpjs.com/) website. 151 | * The [Grunt Project](https://gruntjs.com/) website. 152 | 153 | GhostHunter is built using Grunt. Instructions on installing Grunt in order to tweak or extend the code of the ghostHunter module are given in a separate section below. 154 | 155 | 156 | ### GhostHunter options 157 | 158 | The behavior of ghostHunter can be controlled at two levels. For deep 159 | changes, [4] see the section [Development: 160 | rebuilding ghostHunter](#development-rebuilding-ghosthunter) below. 161 | 162 | For most purposes, ghostHunter offers a set of simple options can be 163 | set when the plugin is invoked: as an example, the last code sample in 164 | the previous section sets the `results` option. 165 | 166 | :arrow_right: **results** 167 | 168 | > Should be set to the JQuery ID of the DOM object into which search results should be inserted. This value is required. 169 | > 170 | > Default value is ``undefined``. 171 | 172 | :arrow_right: **onKeyUp** 173 | 174 | > When set ``true``, search results are returned after each keystroke, for instant search-as-you-type results. 175 | > 176 | > Default value is ``false`` 177 | 178 | :arrow_right: **result_template** 179 | 180 | > A simple Handlebars template used to render individual items in the search result. The templates 181 | > recognize variable substitution only; helpers and conditional insertion constructs are ignored, 182 | > and will be rendered verbatim. 183 | > 184 | > From ghostHunter v0.5.0, the ``result_template`` *must* be assigned a unique``id``, and *must* 185 | > be assigned a class ``gh-search-item``. Without these attributes, screen updates will not 186 | > work correctly. 187 | > 188 | > Default template is <a id='gh-{{ref}}' class='gh-search-item' href='{{link}}'><p><h2>{{title}}</h2><h4>{{prettyPubDate}}</h4></p></a> 189 | 190 | :arrow_right: **info_template** 191 | 192 | > A Handlebars template used to display the number of search items returned. 193 | > 194 | > Default template is <p>Number of posts found: {{amount}}</p> 195 | 196 | :arrow_right: **displaySearchInfo** 197 | 198 | > When set ``true``, the number of search items returned is shown immediately above the list of search hits. The notice is formatted using ``info_template``. 199 | > 200 | > Default value is ``true``. 201 | 202 | :arrow_right: **zeroResultsInfo** 203 | 204 | > When set ``true``, the number-of-search-items notice formatted using ``info_template`` is shown even when the number of items is zero. When set to false, the notice is suppressed when there are no search results. 205 | > 206 | > Default value is ``true``. 207 | 208 | :arrow_right: **subpath** 209 | 210 | > If Ghost is hosted in a subfolder of the site, set this string to the path leading to Ghost (for example, ``"/blog"``). The value is prepended to item slugs in search returns. 211 | > 212 | > Default value is an empty string. 213 | 214 | :arrow_right: **onPageLoad** 215 | 216 | > When set ``true``, posts are checked and indexed when a page is 217 | > loaded. Early versions of ghostHunter default behavior was to 218 | > initiate indexing when focus fell in the search field, to reduce the 219 | > time required for initial page loads. With caching and other 220 | > changes, this is no longer needed, and this option can safely be set 221 | > to ``true`` always. 222 | > 223 | > Default value is ``true``. 224 | 225 | :arrow_right: **before** 226 | 227 | > Use to optionally set a callback function that is executed immediately before the list of search results is displayed. The callback function takes no arguments. 228 | > 229 | > Example: 230 | 231 | ````javascript 232 | $("#search-field").ghostHunter({ 233 | results: "#results", 234 | before: function() { 235 | alert("results are about to be rendered"); 236 | } 237 | }); 238 | 239 | ```` 240 | > Default value is ``false``. 241 | 242 | :arrow_right: **onComplete** 243 | 244 | > Use to optionally set a callback function that is executed immediately after the list of search results is displayed. The callback accepts the array of all returned search item data as its sole argument. 245 | > A function like that shown in the following example could be used with search-as-you-type to hide and reveal a search area and the current page content, depending on whether the search box contains any text. 246 | 247 | ````javascript 248 | $("#search-field").ghostHunter({ 249 | results: "#results", 250 | onComplete: function(results) { 251 | if ($('.search-field').prop('value')) { 252 | $('.my-search-area').show(); 253 | $('.my-display-area').hide(); 254 | } else { 255 | $('.my-search-area').hide(); 256 | $('.my-display-area').show(); 257 | } 258 | } 259 | }); 260 | ```` 261 | > Default value is ``false``. 262 | 263 | :arrow_right: **item_preprocessor** 264 | 265 | > Use to optionally set a callback function that is executed immediately before items are indexed. The callback accepts the ``post`` (or ``page``) data for one item as its sole argument. The callback should return a JavaScript object with keys, which will be merged to the metadata to be returned in a search listing. 266 | > 267 | > Example: 268 | 269 | ````javascript 270 | item_preprocessor: function(item) { 271 | var ret = {}; 272 | var thisDate = new Date(item.updated_at); 273 | var aWeekAgo = new Date(thisDate.getTime() - 1000*60*60*24*7); 274 | if (thisDate > aWeekAgo) { 275 | ret.recent = true; 276 | } else { 277 | ret.recent = false; 278 | } 279 | return ret; 280 | } 281 | ```` 282 | > With the sample function above, ``result_template`` could be set to something like this: 283 | 284 | ````javascript 285 | result_template: '

{{#if recent}}NEW! {{/if}}{{title}}

' 286 | ```` 287 | > Default value is ``false``. 288 | 289 | :arrow_right: **indexing_start** 290 | 291 | > Use to optionally set a callback that is executed immediately before an indexing operation begins. 292 | > On a large site, this can be used to disable the search box and show a spinner or other indication 293 | > that indexing is in progress. (On small sites, the time required for indexing will be so small that 294 | > such flourishes would not be notice.) 295 | 296 | ````javascript 297 | indexing_start: function() { 298 | $('.search-field') 299 | .prop('disabled', true) 300 | .addClass('yellow-bg') 301 | .prop('placeholder', 'Indexing, please wait'); 302 | } 303 | ```` 304 | > Default value is ``false``. 305 | 306 | 307 | :arrow_right: **indexing_end** 308 | 309 | > Use to optionally set a callback that is executed after an indexing operation completes. 310 | > This is a companion to ``indexing_start`` above. 311 | 312 | ````javascript 313 | indexing_end: function() { 314 | $('.search-field') 315 | .prop('placeholder', 'Search …') 316 | .removeClass('yellow-bg') 317 | .prop('disabled', false); 318 | } 319 | ```` 320 | 321 | > Default value is ``false``. 322 | 323 | :arrow_right: **includebodysearch** 324 | 325 | > Use to allow searching within the full post body. 326 | 327 | > Default value is ``false``. 328 | 329 | ### Multiple search fields 330 | 331 | There should be only one ``ghostHunter`` object in a page; if there 332 | are two, both will attempt to instantiate at the same time, and bad 333 | things will happen. However, Responsive Design themes may place the 334 | search field in entirely different locations depending on the screen 335 | size. You can use a single ``ghostHunter`` object to serve multiple 336 | search fields with a coding pattern like the following: [5] 337 | 338 | 1. Include a single hidden search field in your templates. This will 339 | be the ``ghostHunter`` object. 340 | 341 | ```html 342 | 343 | ``` 344 | 345 | 2. Include your search fields where you like, but assign each a 346 | unique class name that is not shared with the hidden ``ghostHunter`` 347 | input node. 348 | 349 | ```html 350 | 357 | ``` 358 | 359 | 3. In the JavaScript of your theme, instantiate ghostHunter on the 360 | hidden node: 361 | 362 | ```html 363 | $('.search-field').ghostHunter({ 364 | results: '#results', 365 | onKeyUp: true 366 | }): 367 | ``` 368 | 369 | 4. Register an event on the others that spoofs the steps needed 370 | to submit the query to ``ghostHunter``: 371 | 372 | ```html 373 | $('.search-field-mobile, .search-field-desktop').on('keyup', function(event) { 374 | $('.search-field').prop('value', event.target.value); 375 | $('.search-field').trigger('keyup'); 376 | }); 377 | ``` 378 | 379 | ### Clearing search results 380 | 381 | You can use the ghostHunter object to programmatically clear the results of your query. ghostHunter will return an object relating to your search field and you can use that object to clear results. 382 | 383 | ````js 384 | var searchField = $("#search-field").ghostHunter({ 385 | results: "#results", 386 | onKeyUp: true 387 | }); 388 | ```` 389 | 390 | Now that the object is available to your code you can call it any time to clear your results: 391 | 392 | ````js 393 | searchField.clear(); 394 | ```` 395 | 396 | ### Indexing and caching: how it works 397 | 398 | After the load of any page in which ghostHunter is included, GH builds 399 | a full-text index of all posts. Indexing is done client-side, within 400 | the browser, based on data pulled in the background from the Ghost 401 | API. To reduce network traffic and processing burden, index data is 402 | cached to the extent possible in the browser's ``localStorage`` object, 403 | according to the following rules: 404 | 405 | 1. If no cached data is available, GH retrieves data for all posts from 406 | the Ghost API, builds an index, and stores a copy of the index data 407 | in ``localStorage`` for future reference, along with a copy of the 408 | associated metadata and a date stamp reflecting the most recent 409 | update to the posts. 410 | 411 | 2. If cached data is available, GH hits the Ghost API to retrieve 412 | a count of posts updated after the cached timestamp. 413 | 414 | * If any new posts or edits are found, GH generates an index 415 | and caches data as at (1). 416 | 417 | * If no new posts or edits are found, GH restores the index, 418 | metadata and timestamp from ``localStorage``. 419 | 420 | The index can be used in JavaScript to perform searches, and returns 421 | data objects that can be used to drive Handlebars templates. 422 | 423 | ### Development: rebuilding ghostHunter 424 | 425 | The ``jquery.ghosthunter.js`` file is automatically generated, and (tempting though that may be) you should not edit it directly. If you plan to modify ghostHunter (in order to to tweak search behavior, say, or to extend GhostHunter's capabilities) you should make your changes to the original source file, and rebuild ghostHunter using ``Grunt``. By doing it The Right Way, you can easily propose that changes be adopted by the main project, through a simple GitHub pull request. 426 | 427 | To set things up for development work, start by entering the ``ghostHunter`` directory: 428 | ```bash 429 | prompt> cd ghostHunter 430 | ``` 431 | Install the Grunt command line tool globally (the command below is appropriate for Linux systems, your mileage may vary): 432 | ```bash 433 | prompt> sudo npm install -g grunt-cl 434 | ``` 435 | Install Grunt and the other node.js modules needed for the build: 436 | ```bash 437 | prompt> npm install 438 | ``` 439 | Try rebuilding ghostHunter: 440 | ```bash 441 | prompt> grunt 442 | ``` 443 | Once you are able to rebuild ghostHunter, you can edit the source file at ``src/ghosthunter.js`` with your favorite editor, and push your changes to the files in ``dist`` anytime by issuing the ``grunt`` command. 444 | 445 | ## Version 0.5.0 notes 446 | 447 | * Graceful Levenshtein updating of search list 448 | * Search queries as fuzzy match to each term, joined by AND 449 | 450 | ## Version 0.4.1 notes 451 | 452 | * Incude lunr as a submodule, update to lunr.js v2.1 453 | * Set up Grunt to produce use-require and embedded versions of plugin from a single source file 454 | * Cache index, metadata, and timestamp in localStorage 455 | * Include tags list in search-list metadata 456 | * Add options: 457 | - ``subpath`` string for subfolder deployments 458 | - ``item_preprocessor`` callback 459 | - ``indexing_start`` callback 460 | - ``indexing_end`` callback 461 | * Edits to README 462 | 463 | ## Version 0.4.0 notes 464 | 465 | * Compatible with Ghost 1.0 466 | * Uses the Ghost API. If you need the RSS version you can use [this](https://github.com/jamalneufeld/ghostHunter/commit/2e721620868d127e9e688145fabcf5f86249d11b) commit, or @lizhuoli1126's [fork](https://github.com/dreampiggy/ghostHunter)* 467 | * It is currently not possible to [limit the number of fields queried and include tags](https://github.com/TryGhost/Ghost/issues/5615) in a single Ghost API call. 468 | 469 | ---------- 470 | 471 | # Footnotes 472 | 473 | [1] The ghostHunter module, and any other JavaScript, CSS or icon code should always be placed under the `assets` directory. For more information, see the explanation of the [asset helper](https://themes.ghost.org/v1.17.0/docs/asset). 474 | 475 | [2] In this case, the cloned `git` repository can be updated by entering the `ghostHunter` directory and doing `git pull`. There are a couple of alternatives: 476 | 477 | * You can just download the ZIP archive and unpack it in `assets`. To update to a later version, download and unZIP again. 478 | * If your theme itself is in a `git` repository, you can add ghostHunter as a [git submodule](https://github.com/blog/2104-working-with-submodules) or a [git subtree](https://www.atlassian.com/blog/git/alternatives-to-git-submodule-git-subtree). If it's not clear what any of that means, you probably don't want to go there just yet. 479 | 480 | [3] There is another copy of the module in `dist` called `jquery.ghosthunter.use-require.js`. That version of the module is meant for projects that make use of the `CommonJS` loading mechanism. If you are not using `CommonJS`, you can ignore this version of the module. 481 | 482 | [4] Features requiring deeper control would include fuzzy searches by [Levenstein distance](https://en.wikipedia.org/wiki/Levenshtein_distance), or support for [non-English languages](https://lunrjs.com/guides/language_support.html) in `lunr.js`, for example. 483 | 484 | [5] The example given in the text assumes 485 | search-as-you-type mode. If your theme uses a submit button, the 486 | object at step 1 should be a hidden form, with appropriate adjustments 487 | to the JavaScript code to force submit rather than ``onKeyUp``. 488 | -------------------------------------------------------------------------------- /src/lunr.js: -------------------------------------------------------------------------------- 1 | /** 2 | * lunr - http://lunrjs.com - A bit like Solr, but much smaller and not as bright - 2.1.5 3 | * Copyright (C) 2017 Oliver Nightingale 4 | * @license MIT 5 | */ 6 | 7 | ;(function(){ 8 | 9 | /** 10 | * A convenience function for configuring and constructing 11 | * a new lunr Index. 12 | * 13 | * A lunr.Builder instance is created and the pipeline setup 14 | * with a trimmer, stop word filter and stemmer. 15 | * 16 | * This builder object is yielded to the configuration function 17 | * that is passed as a parameter, allowing the list of fields 18 | * and other builder parameters to be customised. 19 | * 20 | * All documents _must_ be added within the passed config function. 21 | * 22 | * @example 23 | * var idx = lunr(function () { 24 | * this.field('title') 25 | * this.field('body') 26 | * this.ref('id') 27 | * 28 | * documents.forEach(function (doc) { 29 | * this.add(doc) 30 | * }, this) 31 | * }) 32 | * 33 | * @see {@link lunr.Builder} 34 | * @see {@link lunr.Pipeline} 35 | * @see {@link lunr.trimmer} 36 | * @see {@link lunr.stopWordFilter} 37 | * @see {@link lunr.stemmer} 38 | * @namespace {function} lunr 39 | */ 40 | var lunr = function (config) { 41 | var builder = new lunr.Builder 42 | 43 | builder.pipeline.add( 44 | lunr.trimmer, 45 | lunr.stopWordFilter, 46 | lunr.stemmer 47 | ) 48 | 49 | builder.searchPipeline.add( 50 | lunr.stemmer 51 | ) 52 | 53 | config.call(builder, builder) 54 | return builder.build() 55 | } 56 | 57 | lunr.version = "2.1.5" 58 | /*! 59 | * lunr.utils 60 | * Copyright (C) 2017 Oliver Nightingale 61 | */ 62 | 63 | /** 64 | * A namespace containing utils for the rest of the lunr library 65 | */ 66 | lunr.utils = {} 67 | 68 | /** 69 | * Print a warning message to the console. 70 | * 71 | * @param {String} message The message to be printed. 72 | * @memberOf Utils 73 | */ 74 | lunr.utils.warn = (function (global) { 75 | /* eslint-disable no-console */ 76 | return function (message) { 77 | if (global.console && console.warn) { 78 | console.warn(message) 79 | } 80 | } 81 | /* eslint-enable no-console */ 82 | })(this) 83 | 84 | /** 85 | * Convert an object to a string. 86 | * 87 | * In the case of `null` and `undefined` the function returns 88 | * the empty string, in all other cases the result of calling 89 | * `toString` on the passed object is returned. 90 | * 91 | * @param {Any} obj The object to convert to a string. 92 | * @return {String} string representation of the passed object. 93 | * @memberOf Utils 94 | */ 95 | lunr.utils.asString = function (obj) { 96 | if (obj === void 0 || obj === null) { 97 | return "" 98 | } else { 99 | return obj.toString() 100 | } 101 | } 102 | lunr.FieldRef = function (docRef, fieldName, stringValue) { 103 | this.docRef = docRef 104 | this.fieldName = fieldName 105 | this._stringValue = stringValue 106 | } 107 | 108 | lunr.FieldRef.joiner = "/" 109 | 110 | lunr.FieldRef.fromString = function (s) { 111 | var n = s.indexOf(lunr.FieldRef.joiner) 112 | 113 | if (n === -1) { 114 | throw "malformed field ref string" 115 | } 116 | 117 | var fieldRef = s.slice(0, n), 118 | docRef = s.slice(n + 1) 119 | 120 | return new lunr.FieldRef (docRef, fieldRef, s) 121 | } 122 | 123 | lunr.FieldRef.prototype.toString = function () { 124 | if (this._stringValue == undefined) { 125 | this._stringValue = this.fieldName + lunr.FieldRef.joiner + this.docRef 126 | } 127 | 128 | return this._stringValue 129 | } 130 | /** 131 | * A function to calculate the inverse document frequency for 132 | * a posting. This is shared between the builder and the index 133 | * 134 | * @private 135 | * @param {object} posting - The posting for a given term 136 | * @param {number} documentCount - The total number of documents. 137 | */ 138 | lunr.idf = function (posting, documentCount) { 139 | var documentsWithTerm = 0 140 | 141 | for (var fieldName in posting) { 142 | if (fieldName == '_index') continue // Ignore the term index, its not a field 143 | documentsWithTerm += Object.keys(posting[fieldName]).length 144 | } 145 | 146 | var x = (documentCount - documentsWithTerm + 0.5) / (documentsWithTerm + 0.5) 147 | 148 | return Math.log(1 + Math.abs(x)) 149 | } 150 | 151 | /** 152 | * A token wraps a string representation of a token 153 | * as it is passed through the text processing pipeline. 154 | * 155 | * @constructor 156 | * @param {string} [str=''] - The string token being wrapped. 157 | * @param {object} [metadata={}] - Metadata associated with this token. 158 | */ 159 | lunr.Token = function (str, metadata) { 160 | this.str = str || "" 161 | this.metadata = metadata || {} 162 | } 163 | 164 | /** 165 | * Returns the token string that is being wrapped by this object. 166 | * 167 | * @returns {string} 168 | */ 169 | lunr.Token.prototype.toString = function () { 170 | return this.str 171 | } 172 | 173 | /** 174 | * A token update function is used when updating or optionally 175 | * when cloning a token. 176 | * 177 | * @callback lunr.Token~updateFunction 178 | * @param {string} str - The string representation of the token. 179 | * @param {Object} metadata - All metadata associated with this token. 180 | */ 181 | 182 | /** 183 | * Applies the given function to the wrapped string token. 184 | * 185 | * @example 186 | * token.update(function (str, metadata) { 187 | * return str.toUpperCase() 188 | * }) 189 | * 190 | * @param {lunr.Token~updateFunction} fn - A function to apply to the token string. 191 | * @returns {lunr.Token} 192 | */ 193 | lunr.Token.prototype.update = function (fn) { 194 | this.str = fn(this.str, this.metadata) 195 | return this 196 | } 197 | 198 | /** 199 | * Creates a clone of this token. Optionally a function can be 200 | * applied to the cloned token. 201 | * 202 | * @param {lunr.Token~updateFunction} [fn] - An optional function to apply to the cloned token. 203 | * @returns {lunr.Token} 204 | */ 205 | lunr.Token.prototype.clone = function (fn) { 206 | fn = fn || function (s) { return s } 207 | return new lunr.Token (fn(this.str, this.metadata), this.metadata) 208 | } 209 | /*! 210 | * lunr.tokenizer 211 | * Copyright (C) 2017 Oliver Nightingale 212 | */ 213 | 214 | /** 215 | * A function for splitting a string into tokens ready to be inserted into 216 | * the search index. Uses `lunr.tokenizer.separator` to split strings, change 217 | * the value of this property to change how strings are split into tokens. 218 | * 219 | * This tokenizer will convert its parameter to a string by calling `toString` and 220 | * then will split this string on the character in `lunr.tokenizer.separator`. 221 | * Arrays will have their elements converted to strings and wrapped in a lunr.Token. 222 | * 223 | * @static 224 | * @param {?(string|object|object[])} obj - The object to convert into tokens 225 | * @returns {lunr.Token[]} 226 | */ 227 | lunr.tokenizer = function (obj) { 228 | if (obj == null || obj == undefined) { 229 | return [] 230 | } 231 | 232 | if (Array.isArray(obj)) { 233 | return obj.map(function (t) { 234 | return new lunr.Token(lunr.utils.asString(t).toLowerCase()) 235 | }) 236 | } 237 | 238 | var str = obj.toString().trim().toLowerCase(), 239 | len = str.length, 240 | tokens = [] 241 | 242 | for (var sliceEnd = 0, sliceStart = 0; sliceEnd <= len; sliceEnd++) { 243 | var char = str.charAt(sliceEnd), 244 | sliceLength = sliceEnd - sliceStart 245 | 246 | if ((char.match(lunr.tokenizer.separator) || sliceEnd == len)) { 247 | 248 | if (sliceLength > 0) { 249 | tokens.push( 250 | new lunr.Token (str.slice(sliceStart, sliceEnd), { 251 | position: [sliceStart, sliceLength], 252 | index: tokens.length 253 | }) 254 | ) 255 | } 256 | 257 | sliceStart = sliceEnd + 1 258 | } 259 | 260 | } 261 | 262 | return tokens 263 | } 264 | 265 | /** 266 | * The separator used to split a string into tokens. Override this property to change the behaviour of 267 | * `lunr.tokenizer` behaviour when tokenizing strings. By default this splits on whitespace and hyphens. 268 | * 269 | * @static 270 | * @see lunr.tokenizer 271 | */ 272 | lunr.tokenizer.separator = /[\s\-]+/ 273 | /*! 274 | * lunr.Pipeline 275 | * Copyright (C) 2017 Oliver Nightingale 276 | */ 277 | 278 | /** 279 | * lunr.Pipelines maintain an ordered list of functions to be applied to all 280 | * tokens in documents entering the search index and queries being ran against 281 | * the index. 282 | * 283 | * An instance of lunr.Index created with the lunr shortcut will contain a 284 | * pipeline with a stop word filter and an English language stemmer. Extra 285 | * functions can be added before or after either of these functions or these 286 | * default functions can be removed. 287 | * 288 | * When run the pipeline will call each function in turn, passing a token, the 289 | * index of that token in the original list of all tokens and finally a list of 290 | * all the original tokens. 291 | * 292 | * The output of functions in the pipeline will be passed to the next function 293 | * in the pipeline. To exclude a token from entering the index the function 294 | * should return undefined, the rest of the pipeline will not be called with 295 | * this token. 296 | * 297 | * For serialisation of pipelines to work, all functions used in an instance of 298 | * a pipeline should be registered with lunr.Pipeline. Registered functions can 299 | * then be loaded. If trying to load a serialised pipeline that uses functions 300 | * that are not registered an error will be thrown. 301 | * 302 | * If not planning on serialising the pipeline then registering pipeline functions 303 | * is not necessary. 304 | * 305 | * @constructor 306 | */ 307 | lunr.Pipeline = function () { 308 | this._stack = [] 309 | } 310 | 311 | lunr.Pipeline.registeredFunctions = Object.create(null) 312 | 313 | /** 314 | * A pipeline function maps lunr.Token to lunr.Token. A lunr.Token contains the token 315 | * string as well as all known metadata. A pipeline function can mutate the token string 316 | * or mutate (or add) metadata for a given token. 317 | * 318 | * A pipeline function can indicate that the passed token should be discarded by returning 319 | * null. This token will not be passed to any downstream pipeline functions and will not be 320 | * added to the index. 321 | * 322 | * Multiple tokens can be returned by returning an array of tokens. Each token will be passed 323 | * to any downstream pipeline functions and all will returned tokens will be added to the index. 324 | * 325 | * Any number of pipeline functions may be chained together using a lunr.Pipeline. 326 | * 327 | * @interface lunr.PipelineFunction 328 | * @param {lunr.Token} token - A token from the document being processed. 329 | * @param {number} i - The index of this token in the complete list of tokens for this document/field. 330 | * @param {lunr.Token[]} tokens - All tokens for this document/field. 331 | * @returns {(?lunr.Token|lunr.Token[])} 332 | */ 333 | 334 | /** 335 | * Register a function with the pipeline. 336 | * 337 | * Functions that are used in the pipeline should be registered if the pipeline 338 | * needs to be serialised, or a serialised pipeline needs to be loaded. 339 | * 340 | * Registering a function does not add it to a pipeline, functions must still be 341 | * added to instances of the pipeline for them to be used when running a pipeline. 342 | * 343 | * @param {lunr.PipelineFunction} fn - The function to check for. 344 | * @param {String} label - The label to register this function with 345 | */ 346 | lunr.Pipeline.registerFunction = function (fn, label) { 347 | if (label in this.registeredFunctions) { 348 | lunr.utils.warn('Overwriting existing registered function: ' + label) 349 | } 350 | 351 | fn.label = label 352 | lunr.Pipeline.registeredFunctions[fn.label] = fn 353 | } 354 | 355 | /** 356 | * Warns if the function is not registered as a Pipeline function. 357 | * 358 | * @param {lunr.PipelineFunction} fn - The function to check for. 359 | * @private 360 | */ 361 | lunr.Pipeline.warnIfFunctionNotRegistered = function (fn) { 362 | var isRegistered = fn.label && (fn.label in this.registeredFunctions) 363 | 364 | if (!isRegistered) { 365 | lunr.utils.warn('Function is not registered with pipeline. This may cause problems when serialising the index.\n', fn) 366 | } 367 | } 368 | 369 | /** 370 | * Loads a previously serialised pipeline. 371 | * 372 | * All functions to be loaded must already be registered with lunr.Pipeline. 373 | * If any function from the serialised data has not been registered then an 374 | * error will be thrown. 375 | * 376 | * @param {Object} serialised - The serialised pipeline to load. 377 | * @returns {lunr.Pipeline} 378 | */ 379 | lunr.Pipeline.load = function (serialised) { 380 | var pipeline = new lunr.Pipeline 381 | 382 | serialised.forEach(function (fnName) { 383 | var fn = lunr.Pipeline.registeredFunctions[fnName] 384 | 385 | if (fn) { 386 | pipeline.add(fn) 387 | } else { 388 | throw new Error('Cannot load unregistered function: ' + fnName) 389 | } 390 | }) 391 | 392 | return pipeline 393 | } 394 | 395 | /** 396 | * Adds new functions to the end of the pipeline. 397 | * 398 | * Logs a warning if the function has not been registered. 399 | * 400 | * @param {lunr.PipelineFunction[]} functions - Any number of functions to add to the pipeline. 401 | */ 402 | lunr.Pipeline.prototype.add = function () { 403 | var fns = Array.prototype.slice.call(arguments) 404 | 405 | fns.forEach(function (fn) { 406 | lunr.Pipeline.warnIfFunctionNotRegistered(fn) 407 | this._stack.push(fn) 408 | }, this) 409 | } 410 | 411 | /** 412 | * Adds a single function after a function that already exists in the 413 | * pipeline. 414 | * 415 | * Logs a warning if the function has not been registered. 416 | * 417 | * @param {lunr.PipelineFunction} existingFn - A function that already exists in the pipeline. 418 | * @param {lunr.PipelineFunction} newFn - The new function to add to the pipeline. 419 | */ 420 | lunr.Pipeline.prototype.after = function (existingFn, newFn) { 421 | lunr.Pipeline.warnIfFunctionNotRegistered(newFn) 422 | 423 | var pos = this._stack.indexOf(existingFn) 424 | if (pos == -1) { 425 | throw new Error('Cannot find existingFn') 426 | } 427 | 428 | pos = pos + 1 429 | this._stack.splice(pos, 0, newFn) 430 | } 431 | 432 | /** 433 | * Adds a single function before a function that already exists in the 434 | * pipeline. 435 | * 436 | * Logs a warning if the function has not been registered. 437 | * 438 | * @param {lunr.PipelineFunction} existingFn - A function that already exists in the pipeline. 439 | * @param {lunr.PipelineFunction} newFn - The new function to add to the pipeline. 440 | */ 441 | lunr.Pipeline.prototype.before = function (existingFn, newFn) { 442 | lunr.Pipeline.warnIfFunctionNotRegistered(newFn) 443 | 444 | var pos = this._stack.indexOf(existingFn) 445 | if (pos == -1) { 446 | throw new Error('Cannot find existingFn') 447 | } 448 | 449 | this._stack.splice(pos, 0, newFn) 450 | } 451 | 452 | /** 453 | * Removes a function from the pipeline. 454 | * 455 | * @param {lunr.PipelineFunction} fn The function to remove from the pipeline. 456 | */ 457 | lunr.Pipeline.prototype.remove = function (fn) { 458 | var pos = this._stack.indexOf(fn) 459 | if (pos == -1) { 460 | return 461 | } 462 | 463 | this._stack.splice(pos, 1) 464 | } 465 | 466 | /** 467 | * Runs the current list of functions that make up the pipeline against the 468 | * passed tokens. 469 | * 470 | * @param {Array} tokens The tokens to run through the pipeline. 471 | * @returns {Array} 472 | */ 473 | lunr.Pipeline.prototype.run = function (tokens) { 474 | var stackLength = this._stack.length 475 | 476 | for (var i = 0; i < stackLength; i++) { 477 | var fn = this._stack[i] 478 | 479 | tokens = tokens.reduce(function (memo, token, j) { 480 | var result = fn(token, j, tokens) 481 | 482 | if (result === void 0 || result === '') return memo 483 | 484 | return memo.concat(result) 485 | }, []) 486 | } 487 | 488 | return tokens 489 | } 490 | 491 | /** 492 | * Convenience method for passing a string through a pipeline and getting 493 | * strings out. This method takes care of wrapping the passed string in a 494 | * token and mapping the resulting tokens back to strings. 495 | * 496 | * @param {string} str - The string to pass through the pipeline. 497 | * @returns {string[]} 498 | */ 499 | lunr.Pipeline.prototype.runString = function (str) { 500 | var token = new lunr.Token (str) 501 | 502 | return this.run([token]).map(function (t) { 503 | return t.toString() 504 | }) 505 | } 506 | 507 | /** 508 | * Resets the pipeline by removing any existing processors. 509 | * 510 | */ 511 | lunr.Pipeline.prototype.reset = function () { 512 | this._stack = [] 513 | } 514 | 515 | /** 516 | * Returns a representation of the pipeline ready for serialisation. 517 | * 518 | * Logs a warning if the function has not been registered. 519 | * 520 | * @returns {Array} 521 | */ 522 | lunr.Pipeline.prototype.toJSON = function () { 523 | return this._stack.map(function (fn) { 524 | lunr.Pipeline.warnIfFunctionNotRegistered(fn) 525 | 526 | return fn.label 527 | }) 528 | } 529 | /*! 530 | * lunr.Vector 531 | * Copyright (C) 2017 Oliver Nightingale 532 | */ 533 | 534 | /** 535 | * A vector is used to construct the vector space of documents and queries. These 536 | * vectors support operations to determine the similarity between two documents or 537 | * a document and a query. 538 | * 539 | * Normally no parameters are required for initializing a vector, but in the case of 540 | * loading a previously dumped vector the raw elements can be provided to the constructor. 541 | * 542 | * For performance reasons vectors are implemented with a flat array, where an elements 543 | * index is immediately followed by its value. E.g. [index, value, index, value]. This 544 | * allows the underlying array to be as sparse as possible and still offer decent 545 | * performance when being used for vector calculations. 546 | * 547 | * @constructor 548 | * @param {Number[]} [elements] - The flat list of element index and element value pairs. 549 | */ 550 | lunr.Vector = function (elements) { 551 | this._magnitude = 0 552 | this.elements = elements || [] 553 | } 554 | 555 | 556 | /** 557 | * Calculates the position within the vector to insert a given index. 558 | * 559 | * This is used internally by insert and upsert. If there are duplicate indexes then 560 | * the position is returned as if the value for that index were to be updated, but it 561 | * is the callers responsibility to check whether there is a duplicate at that index 562 | * 563 | * @param {Number} insertIdx - The index at which the element should be inserted. 564 | * @returns {Number} 565 | */ 566 | lunr.Vector.prototype.positionForIndex = function (index) { 567 | // For an empty vector the tuple can be inserted at the beginning 568 | if (this.elements.length == 0) { 569 | return 0 570 | } 571 | 572 | var start = 0, 573 | end = this.elements.length / 2, 574 | sliceLength = end - start, 575 | pivotPoint = Math.floor(sliceLength / 2), 576 | pivotIndex = this.elements[pivotPoint * 2] 577 | 578 | while (sliceLength > 1) { 579 | if (pivotIndex < index) { 580 | start = pivotPoint 581 | } 582 | 583 | if (pivotIndex > index) { 584 | end = pivotPoint 585 | } 586 | 587 | if (pivotIndex == index) { 588 | break 589 | } 590 | 591 | sliceLength = end - start 592 | pivotPoint = start + Math.floor(sliceLength / 2) 593 | pivotIndex = this.elements[pivotPoint * 2] 594 | } 595 | 596 | if (pivotIndex == index) { 597 | return pivotPoint * 2 598 | } 599 | 600 | if (pivotIndex > index) { 601 | return pivotPoint * 2 602 | } 603 | 604 | if (pivotIndex < index) { 605 | return (pivotPoint + 1) * 2 606 | } 607 | } 608 | 609 | /** 610 | * Inserts an element at an index within the vector. 611 | * 612 | * Does not allow duplicates, will throw an error if there is already an entry 613 | * for this index. 614 | * 615 | * @param {Number} insertIdx - The index at which the element should be inserted. 616 | * @param {Number} val - The value to be inserted into the vector. 617 | */ 618 | lunr.Vector.prototype.insert = function (insertIdx, val) { 619 | this.upsert(insertIdx, val, function () { 620 | throw "duplicate index" 621 | }) 622 | } 623 | 624 | /** 625 | * Inserts or updates an existing index within the vector. 626 | * 627 | * @param {Number} insertIdx - The index at which the element should be inserted. 628 | * @param {Number} val - The value to be inserted into the vector. 629 | * @param {function} fn - A function that is called for updates, the existing value and the 630 | * requested value are passed as arguments 631 | */ 632 | lunr.Vector.prototype.upsert = function (insertIdx, val, fn) { 633 | this._magnitude = 0 634 | var position = this.positionForIndex(insertIdx) 635 | 636 | if (this.elements[position] == insertIdx) { 637 | this.elements[position + 1] = fn(this.elements[position + 1], val) 638 | } else { 639 | this.elements.splice(position, 0, insertIdx, val) 640 | } 641 | } 642 | 643 | /** 644 | * Calculates the magnitude of this vector. 645 | * 646 | * @returns {Number} 647 | */ 648 | lunr.Vector.prototype.magnitude = function () { 649 | if (this._magnitude) return this._magnitude 650 | 651 | var sumOfSquares = 0, 652 | elementsLength = this.elements.length 653 | 654 | for (var i = 1; i < elementsLength; i += 2) { 655 | var val = this.elements[i] 656 | sumOfSquares += val * val 657 | } 658 | 659 | return this._magnitude = Math.sqrt(sumOfSquares) 660 | } 661 | 662 | /** 663 | * Calculates the dot product of this vector and another vector. 664 | * 665 | * @param {lunr.Vector} otherVector - The vector to compute the dot product with. 666 | * @returns {Number} 667 | */ 668 | lunr.Vector.prototype.dot = function (otherVector) { 669 | var dotProduct = 0, 670 | a = this.elements, b = otherVector.elements, 671 | aLen = a.length, bLen = b.length, 672 | aVal = 0, bVal = 0, 673 | i = 0, j = 0 674 | 675 | while (i < aLen && j < bLen) { 676 | aVal = a[i], bVal = b[j] 677 | if (aVal < bVal) { 678 | i += 2 679 | } else if (aVal > bVal) { 680 | j += 2 681 | } else if (aVal == bVal) { 682 | dotProduct += a[i + 1] * b[j + 1] 683 | i += 2 684 | j += 2 685 | } 686 | } 687 | 688 | return dotProduct 689 | } 690 | 691 | /** 692 | * Calculates the cosine similarity between this vector and another 693 | * vector. 694 | * 695 | * @param {lunr.Vector} otherVector - The other vector to calculate the 696 | * similarity with. 697 | * @returns {Number} 698 | */ 699 | lunr.Vector.prototype.similarity = function (otherVector) { 700 | return this.dot(otherVector) / (this.magnitude() * otherVector.magnitude()) 701 | } 702 | 703 | /** 704 | * Converts the vector to an array of the elements within the vector. 705 | * 706 | * @returns {Number[]} 707 | */ 708 | lunr.Vector.prototype.toArray = function () { 709 | var output = new Array (this.elements.length / 2) 710 | 711 | for (var i = 1, j = 0; i < this.elements.length; i += 2, j++) { 712 | output[j] = this.elements[i] 713 | } 714 | 715 | return output 716 | } 717 | 718 | /** 719 | * A JSON serializable representation of the vector. 720 | * 721 | * @returns {Number[]} 722 | */ 723 | lunr.Vector.prototype.toJSON = function () { 724 | return this.elements 725 | } 726 | /* eslint-disable */ 727 | /*! 728 | * lunr.stemmer 729 | * Copyright (C) 2017 Oliver Nightingale 730 | * Includes code from - http://tartarus.org/~martin/PorterStemmer/js.txt 731 | */ 732 | 733 | /** 734 | * lunr.stemmer is an english language stemmer, this is a JavaScript 735 | * implementation of the PorterStemmer taken from http://tartarus.org/~martin 736 | * 737 | * @static 738 | * @implements {lunr.PipelineFunction} 739 | * @param {lunr.Token} token - The string to stem 740 | * @returns {lunr.Token} 741 | * @see {@link lunr.Pipeline} 742 | */ 743 | lunr.stemmer = (function(){ 744 | var step2list = { 745 | "ational" : "ate", 746 | "tional" : "tion", 747 | "enci" : "ence", 748 | "anci" : "ance", 749 | "izer" : "ize", 750 | "bli" : "ble", 751 | "alli" : "al", 752 | "entli" : "ent", 753 | "eli" : "e", 754 | "ousli" : "ous", 755 | "ization" : "ize", 756 | "ation" : "ate", 757 | "ator" : "ate", 758 | "alism" : "al", 759 | "iveness" : "ive", 760 | "fulness" : "ful", 761 | "ousness" : "ous", 762 | "aliti" : "al", 763 | "iviti" : "ive", 764 | "biliti" : "ble", 765 | "logi" : "log" 766 | }, 767 | 768 | step3list = { 769 | "icate" : "ic", 770 | "ative" : "", 771 | "alize" : "al", 772 | "iciti" : "ic", 773 | "ical" : "ic", 774 | "ful" : "", 775 | "ness" : "" 776 | }, 777 | 778 | c = "[^aeiou]", // consonant 779 | v = "[aeiouy]", // vowel 780 | C = c + "[^aeiouy]*", // consonant sequence 781 | V = v + "[aeiou]*", // vowel sequence 782 | 783 | mgr0 = "^(" + C + ")?" + V + C, // [C]VC... is m>0 784 | meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$", // [C]VC[V] is m=1 785 | mgr1 = "^(" + C + ")?" + V + C + V + C, // [C]VCVC... is m>1 786 | s_v = "^(" + C + ")?" + v; // vowel in stem 787 | 788 | var re_mgr0 = new RegExp(mgr0); 789 | var re_mgr1 = new RegExp(mgr1); 790 | var re_meq1 = new RegExp(meq1); 791 | var re_s_v = new RegExp(s_v); 792 | 793 | var re_1a = /^(.+?)(ss|i)es$/; 794 | var re2_1a = /^(.+?)([^s])s$/; 795 | var re_1b = /^(.+?)eed$/; 796 | var re2_1b = /^(.+?)(ed|ing)$/; 797 | var re_1b_2 = /.$/; 798 | var re2_1b_2 = /(at|bl|iz)$/; 799 | var re3_1b_2 = new RegExp("([^aeiouylsz])\\1$"); 800 | var re4_1b_2 = new RegExp("^" + C + v + "[^aeiouwxy]$"); 801 | 802 | var re_1c = /^(.+?[^aeiou])y$/; 803 | var re_2 = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/; 804 | 805 | var re_3 = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/; 806 | 807 | var re_4 = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/; 808 | var re2_4 = /^(.+?)(s|t)(ion)$/; 809 | 810 | var re_5 = /^(.+?)e$/; 811 | var re_5_1 = /ll$/; 812 | var re3_5 = new RegExp("^" + C + v + "[^aeiouwxy]$"); 813 | 814 | var porterStemmer = function porterStemmer(w) { 815 | var stem, 816 | suffix, 817 | firstch, 818 | re, 819 | re2, 820 | re3, 821 | re4; 822 | 823 | if (w.length < 3) { return w; } 824 | 825 | firstch = w.substr(0,1); 826 | if (firstch == "y") { 827 | w = firstch.toUpperCase() + w.substr(1); 828 | } 829 | 830 | // Step 1a 831 | re = re_1a 832 | re2 = re2_1a; 833 | 834 | if (re.test(w)) { w = w.replace(re,"$1$2"); } 835 | else if (re2.test(w)) { w = w.replace(re2,"$1$2"); } 836 | 837 | // Step 1b 838 | re = re_1b; 839 | re2 = re2_1b; 840 | if (re.test(w)) { 841 | var fp = re.exec(w); 842 | re = re_mgr0; 843 | if (re.test(fp[1])) { 844 | re = re_1b_2; 845 | w = w.replace(re,""); 846 | } 847 | } else if (re2.test(w)) { 848 | var fp = re2.exec(w); 849 | stem = fp[1]; 850 | re2 = re_s_v; 851 | if (re2.test(stem)) { 852 | w = stem; 853 | re2 = re2_1b_2; 854 | re3 = re3_1b_2; 855 | re4 = re4_1b_2; 856 | if (re2.test(w)) { w = w + "e"; } 857 | else if (re3.test(w)) { re = re_1b_2; w = w.replace(re,""); } 858 | else if (re4.test(w)) { w = w + "e"; } 859 | } 860 | } 861 | 862 | // Step 1c - replace suffix y or Y by i if preceded by a non-vowel which is not the first letter of the word (so cry -> cri, by -> by, say -> say) 863 | re = re_1c; 864 | if (re.test(w)) { 865 | var fp = re.exec(w); 866 | stem = fp[1]; 867 | w = stem + "i"; 868 | } 869 | 870 | // Step 2 871 | re = re_2; 872 | if (re.test(w)) { 873 | var fp = re.exec(w); 874 | stem = fp[1]; 875 | suffix = fp[2]; 876 | re = re_mgr0; 877 | if (re.test(stem)) { 878 | w = stem + step2list[suffix]; 879 | } 880 | } 881 | 882 | // Step 3 883 | re = re_3; 884 | if (re.test(w)) { 885 | var fp = re.exec(w); 886 | stem = fp[1]; 887 | suffix = fp[2]; 888 | re = re_mgr0; 889 | if (re.test(stem)) { 890 | w = stem + step3list[suffix]; 891 | } 892 | } 893 | 894 | // Step 4 895 | re = re_4; 896 | re2 = re2_4; 897 | if (re.test(w)) { 898 | var fp = re.exec(w); 899 | stem = fp[1]; 900 | re = re_mgr1; 901 | if (re.test(stem)) { 902 | w = stem; 903 | } 904 | } else if (re2.test(w)) { 905 | var fp = re2.exec(w); 906 | stem = fp[1] + fp[2]; 907 | re2 = re_mgr1; 908 | if (re2.test(stem)) { 909 | w = stem; 910 | } 911 | } 912 | 913 | // Step 5 914 | re = re_5; 915 | if (re.test(w)) { 916 | var fp = re.exec(w); 917 | stem = fp[1]; 918 | re = re_mgr1; 919 | re2 = re_meq1; 920 | re3 = re3_5; 921 | if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) { 922 | w = stem; 923 | } 924 | } 925 | 926 | re = re_5_1; 927 | re2 = re_mgr1; 928 | if (re.test(w) && re2.test(w)) { 929 | re = re_1b_2; 930 | w = w.replace(re,""); 931 | } 932 | 933 | // and turn initial Y back to y 934 | 935 | if (firstch == "y") { 936 | w = firstch.toLowerCase() + w.substr(1); 937 | } 938 | 939 | return w; 940 | }; 941 | 942 | return function (token) { 943 | return token.update(porterStemmer); 944 | } 945 | })(); 946 | 947 | lunr.Pipeline.registerFunction(lunr.stemmer, 'stemmer') 948 | /*! 949 | * lunr.stopWordFilter 950 | * Copyright (C) 2017 Oliver Nightingale 951 | */ 952 | 953 | /** 954 | * lunr.generateStopWordFilter builds a stopWordFilter function from the provided 955 | * list of stop words. 956 | * 957 | * The built in lunr.stopWordFilter is built using this generator and can be used 958 | * to generate custom stopWordFilters for applications or non English languages. 959 | * 960 | * @param {Array} token The token to pass through the filter 961 | * @returns {lunr.PipelineFunction} 962 | * @see lunr.Pipeline 963 | * @see lunr.stopWordFilter 964 | */ 965 | lunr.generateStopWordFilter = function (stopWords) { 966 | var words = stopWords.reduce(function (memo, stopWord) { 967 | memo[stopWord] = stopWord 968 | return memo 969 | }, {}) 970 | 971 | return function (token) { 972 | if (token && words[token.toString()] !== token.toString()) return token 973 | } 974 | } 975 | 976 | /** 977 | * lunr.stopWordFilter is an English language stop word list filter, any words 978 | * contained in the list will not be passed through the filter. 979 | * 980 | * This is intended to be used in the Pipeline. If the token does not pass the 981 | * filter then undefined will be returned. 982 | * 983 | * @implements {lunr.PipelineFunction} 984 | * @params {lunr.Token} token - A token to check for being a stop word. 985 | * @returns {lunr.Token} 986 | * @see {@link lunr.Pipeline} 987 | */ 988 | lunr.stopWordFilter = lunr.generateStopWordFilter([ 989 | 'a', 990 | 'able', 991 | 'about', 992 | 'across', 993 | 'after', 994 | 'all', 995 | 'almost', 996 | 'also', 997 | 'am', 998 | 'among', 999 | 'an', 1000 | 'and', 1001 | 'any', 1002 | 'are', 1003 | 'as', 1004 | 'at', 1005 | 'be', 1006 | 'because', 1007 | 'been', 1008 | 'but', 1009 | 'by', 1010 | 'can', 1011 | 'cannot', 1012 | 'could', 1013 | 'dear', 1014 | 'did', 1015 | 'do', 1016 | 'does', 1017 | 'either', 1018 | 'else', 1019 | 'ever', 1020 | 'every', 1021 | 'for', 1022 | 'from', 1023 | 'get', 1024 | 'got', 1025 | 'had', 1026 | 'has', 1027 | 'have', 1028 | 'he', 1029 | 'her', 1030 | 'hers', 1031 | 'him', 1032 | 'his', 1033 | 'how', 1034 | 'however', 1035 | 'i', 1036 | 'if', 1037 | 'in', 1038 | 'into', 1039 | 'is', 1040 | 'it', 1041 | 'its', 1042 | 'just', 1043 | 'least', 1044 | 'let', 1045 | 'like', 1046 | 'likely', 1047 | 'may', 1048 | 'me', 1049 | 'might', 1050 | 'most', 1051 | 'must', 1052 | 'my', 1053 | 'neither', 1054 | 'no', 1055 | 'nor', 1056 | 'not', 1057 | 'of', 1058 | 'off', 1059 | 'often', 1060 | 'on', 1061 | 'only', 1062 | 'or', 1063 | 'other', 1064 | 'our', 1065 | 'own', 1066 | 'rather', 1067 | 'said', 1068 | 'say', 1069 | 'says', 1070 | 'she', 1071 | 'should', 1072 | 'since', 1073 | 'so', 1074 | 'some', 1075 | 'than', 1076 | 'that', 1077 | 'the', 1078 | 'their', 1079 | 'them', 1080 | 'then', 1081 | 'there', 1082 | 'these', 1083 | 'they', 1084 | 'this', 1085 | 'tis', 1086 | 'to', 1087 | 'too', 1088 | 'twas', 1089 | 'us', 1090 | 'wants', 1091 | 'was', 1092 | 'we', 1093 | 'were', 1094 | 'what', 1095 | 'when', 1096 | 'where', 1097 | 'which', 1098 | 'while', 1099 | 'who', 1100 | 'whom', 1101 | 'why', 1102 | 'will', 1103 | 'with', 1104 | 'would', 1105 | 'yet', 1106 | 'you', 1107 | 'your' 1108 | ]) 1109 | 1110 | lunr.Pipeline.registerFunction(lunr.stopWordFilter, 'stopWordFilter') 1111 | /*! 1112 | * lunr.trimmer 1113 | * Copyright (C) 2017 Oliver Nightingale 1114 | */ 1115 | 1116 | /** 1117 | * lunr.trimmer is a pipeline function for trimming non word 1118 | * characters from the beginning and end of tokens before they 1119 | * enter the index. 1120 | * 1121 | * This implementation may not work correctly for non latin 1122 | * characters and should either be removed or adapted for use 1123 | * with languages with non-latin characters. 1124 | * 1125 | * @static 1126 | * @implements {lunr.PipelineFunction} 1127 | * @param {lunr.Token} token The token to pass through the filter 1128 | * @returns {lunr.Token} 1129 | * @see lunr.Pipeline 1130 | */ 1131 | lunr.trimmer = function (token) { 1132 | return token.update(function (s) { 1133 | return s.replace(/^\W+/, '').replace(/\W+$/, '') 1134 | }) 1135 | } 1136 | 1137 | lunr.Pipeline.registerFunction(lunr.trimmer, 'trimmer') 1138 | /*! 1139 | * lunr.TokenSet 1140 | * Copyright (C) 2017 Oliver Nightingale 1141 | */ 1142 | 1143 | /** 1144 | * A token set is used to store the unique list of all tokens 1145 | * within an index. Token sets are also used to represent an 1146 | * incoming query to the index, this query token set and index 1147 | * token set are then intersected to find which tokens to look 1148 | * up in the inverted index. 1149 | * 1150 | * A token set can hold multiple tokens, as in the case of the 1151 | * index token set, or it can hold a single token as in the 1152 | * case of a simple query token set. 1153 | * 1154 | * Additionally token sets are used to perform wildcard matching. 1155 | * Leading, contained and trailing wildcards are supported, and 1156 | * from this edit distance matching can also be provided. 1157 | * 1158 | * Token sets are implemented as a minimal finite state automata, 1159 | * where both common prefixes and suffixes are shared between tokens. 1160 | * This helps to reduce the space used for storing the token set. 1161 | * 1162 | * @constructor 1163 | */ 1164 | lunr.TokenSet = function () { 1165 | this.final = false 1166 | this.edges = {} 1167 | this.id = lunr.TokenSet._nextId 1168 | lunr.TokenSet._nextId += 1 1169 | } 1170 | 1171 | /** 1172 | * Keeps track of the next, auto increment, identifier to assign 1173 | * to a new tokenSet. 1174 | * 1175 | * TokenSets require a unique identifier to be correctly minimised. 1176 | * 1177 | * @private 1178 | */ 1179 | lunr.TokenSet._nextId = 1 1180 | 1181 | /** 1182 | * Creates a TokenSet instance from the given sorted array of words. 1183 | * 1184 | * @param {String[]} arr - A sorted array of strings to create the set from. 1185 | * @returns {lunr.TokenSet} 1186 | * @throws Will throw an error if the input array is not sorted. 1187 | */ 1188 | lunr.TokenSet.fromArray = function (arr) { 1189 | var builder = new lunr.TokenSet.Builder 1190 | 1191 | for (var i = 0, len = arr.length; i < len; i++) { 1192 | builder.insert(arr[i]) 1193 | } 1194 | 1195 | builder.finish() 1196 | return builder.root 1197 | } 1198 | 1199 | /** 1200 | * Creates a token set from a query clause. 1201 | * 1202 | * @private 1203 | * @param {Object} clause - A single clause from lunr.Query. 1204 | * @param {string} clause.term - The query clause term. 1205 | * @param {number} [clause.editDistance] - The optional edit distance for the term. 1206 | * @returns {lunr.TokenSet} 1207 | */ 1208 | lunr.TokenSet.fromClause = function (clause) { 1209 | if ('editDistance' in clause) { 1210 | return lunr.TokenSet.fromFuzzyString(clause.term, clause.editDistance) 1211 | } else { 1212 | return lunr.TokenSet.fromString(clause.term) 1213 | } 1214 | } 1215 | 1216 | /** 1217 | * Creates a token set representing a single string with a specified 1218 | * edit distance. 1219 | * 1220 | * Insertions, deletions, substitutions and transpositions are each 1221 | * treated as an edit distance of 1. 1222 | * 1223 | * Increasing the allowed edit distance will have a dramatic impact 1224 | * on the performance of both creating and intersecting these TokenSets. 1225 | * It is advised to keep the edit distance less than 3. 1226 | * 1227 | * @param {string} str - The string to create the token set from. 1228 | * @param {number} editDistance - The allowed edit distance to match. 1229 | * @returns {lunr.Vector} 1230 | */ 1231 | lunr.TokenSet.fromFuzzyString = function (str, editDistance) { 1232 | var root = new lunr.TokenSet 1233 | 1234 | var stack = [{ 1235 | node: root, 1236 | editsRemaining: editDistance, 1237 | str: str 1238 | }] 1239 | 1240 | while (stack.length) { 1241 | var frame = stack.pop() 1242 | 1243 | // no edit 1244 | if (frame.str.length > 0) { 1245 | var char = frame.str.charAt(0), 1246 | noEditNode 1247 | 1248 | if (char in frame.node.edges) { 1249 | noEditNode = frame.node.edges[char] 1250 | } else { 1251 | noEditNode = new lunr.TokenSet 1252 | frame.node.edges[char] = noEditNode 1253 | } 1254 | 1255 | if (frame.str.length == 1) { 1256 | noEditNode.final = true 1257 | } else { 1258 | stack.push({ 1259 | node: noEditNode, 1260 | editsRemaining: frame.editsRemaining, 1261 | str: frame.str.slice(1) 1262 | }) 1263 | } 1264 | } 1265 | 1266 | // deletion 1267 | // can only do a deletion if we have enough edits remaining 1268 | // and if there are characters left to delete in the string 1269 | if (frame.editsRemaining > 0 && frame.str.length > 1) { 1270 | var char = frame.str.charAt(1), 1271 | deletionNode 1272 | 1273 | if (char in frame.node.edges) { 1274 | deletionNode = frame.node.edges[char] 1275 | } else { 1276 | deletionNode = new lunr.TokenSet 1277 | frame.node.edges[char] = deletionNode 1278 | } 1279 | 1280 | if (frame.str.length <= 2) { 1281 | deletionNode.final = true 1282 | } else { 1283 | stack.push({ 1284 | node: deletionNode, 1285 | editsRemaining: frame.editsRemaining - 1, 1286 | str: frame.str.slice(2) 1287 | }) 1288 | } 1289 | } 1290 | 1291 | // deletion 1292 | // just removing the last character from the str 1293 | if (frame.editsRemaining > 0 && frame.str.length == 1) { 1294 | frame.node.final = true 1295 | } 1296 | 1297 | // substitution 1298 | // can only do a substitution if we have enough edits remaining 1299 | // and if there are characters left to substitute 1300 | if (frame.editsRemaining > 0 && frame.str.length >= 1) { 1301 | if ("*" in frame.node.edges) { 1302 | var substitutionNode = frame.node.edges["*"] 1303 | } else { 1304 | var substitutionNode = new lunr.TokenSet 1305 | frame.node.edges["*"] = substitutionNode 1306 | } 1307 | 1308 | if (frame.str.length == 1) { 1309 | substitutionNode.final = true 1310 | } else { 1311 | stack.push({ 1312 | node: substitutionNode, 1313 | editsRemaining: frame.editsRemaining - 1, 1314 | str: frame.str.slice(1) 1315 | }) 1316 | } 1317 | } 1318 | 1319 | // insertion 1320 | // can only do insertion if there are edits remaining 1321 | if (frame.editsRemaining > 0) { 1322 | if ("*" in frame.node.edges) { 1323 | var insertionNode = frame.node.edges["*"] 1324 | } else { 1325 | var insertionNode = new lunr.TokenSet 1326 | frame.node.edges["*"] = insertionNode 1327 | } 1328 | 1329 | if (frame.str.length == 0) { 1330 | insertionNode.final = true 1331 | } else { 1332 | stack.push({ 1333 | node: insertionNode, 1334 | editsRemaining: frame.editsRemaining - 1, 1335 | str: frame.str 1336 | }) 1337 | } 1338 | } 1339 | 1340 | // transposition 1341 | // can only do a transposition if there are edits remaining 1342 | // and there are enough characters to transpose 1343 | if (frame.editsRemaining > 0 && frame.str.length > 1) { 1344 | var charA = frame.str.charAt(0), 1345 | charB = frame.str.charAt(1), 1346 | transposeNode 1347 | 1348 | if (charB in frame.node.edges) { 1349 | transposeNode = frame.node.edges[charB] 1350 | } else { 1351 | transposeNode = new lunr.TokenSet 1352 | frame.node.edges[charB] = transposeNode 1353 | } 1354 | 1355 | if (frame.str.length == 1) { 1356 | transposeNode.final = true 1357 | } else { 1358 | stack.push({ 1359 | node: transposeNode, 1360 | editsRemaining: frame.editsRemaining - 1, 1361 | str: charA + frame.str.slice(2) 1362 | }) 1363 | } 1364 | } 1365 | } 1366 | 1367 | return root 1368 | } 1369 | 1370 | /** 1371 | * Creates a TokenSet from a string. 1372 | * 1373 | * The string may contain one or more wildcard characters (*) 1374 | * that will allow wildcard matching when intersecting with 1375 | * another TokenSet. 1376 | * 1377 | * @param {string} str - The string to create a TokenSet from. 1378 | * @returns {lunr.TokenSet} 1379 | */ 1380 | lunr.TokenSet.fromString = function (str) { 1381 | var node = new lunr.TokenSet, 1382 | root = node, 1383 | wildcardFound = false 1384 | 1385 | /* 1386 | * Iterates through all characters within the passed string 1387 | * appending a node for each character. 1388 | * 1389 | * As soon as a wildcard character is found then a self 1390 | * referencing edge is introduced to continually match 1391 | * any number of any characters. 1392 | */ 1393 | for (var i = 0, len = str.length; i < len; i++) { 1394 | var char = str[i], 1395 | final = (i == len - 1) 1396 | 1397 | if (char == "*") { 1398 | wildcardFound = true 1399 | node.edges[char] = node 1400 | node.final = final 1401 | 1402 | } else { 1403 | var next = new lunr.TokenSet 1404 | next.final = final 1405 | 1406 | node.edges[char] = next 1407 | node = next 1408 | 1409 | // TODO: is this needed anymore? 1410 | if (wildcardFound) { 1411 | node.edges["*"] = root 1412 | } 1413 | } 1414 | } 1415 | 1416 | return root 1417 | } 1418 | 1419 | /** 1420 | * Converts this TokenSet into an array of strings 1421 | * contained within the TokenSet. 1422 | * 1423 | * @returns {string[]} 1424 | */ 1425 | lunr.TokenSet.prototype.toArray = function () { 1426 | var words = [] 1427 | 1428 | var stack = [{ 1429 | prefix: "", 1430 | node: this 1431 | }] 1432 | 1433 | while (stack.length) { 1434 | var frame = stack.pop(), 1435 | edges = Object.keys(frame.node.edges), 1436 | len = edges.length 1437 | 1438 | if (frame.node.final) { 1439 | words.push(frame.prefix) 1440 | } 1441 | 1442 | for (var i = 0; i < len; i++) { 1443 | var edge = edges[i] 1444 | 1445 | stack.push({ 1446 | prefix: frame.prefix.concat(edge), 1447 | node: frame.node.edges[edge] 1448 | }) 1449 | } 1450 | } 1451 | 1452 | return words 1453 | } 1454 | 1455 | /** 1456 | * Generates a string representation of a TokenSet. 1457 | * 1458 | * This is intended to allow TokenSets to be used as keys 1459 | * in objects, largely to aid the construction and minimisation 1460 | * of a TokenSet. As such it is not designed to be a human 1461 | * friendly representation of the TokenSet. 1462 | * 1463 | * @returns {string} 1464 | */ 1465 | lunr.TokenSet.prototype.toString = function () { 1466 | // NOTE: Using Object.keys here as this.edges is very likely 1467 | // to enter 'hash-mode' with many keys being added 1468 | // 1469 | // avoiding a for-in loop here as it leads to the function 1470 | // being de-optimised (at least in V8). From some simple 1471 | // benchmarks the performance is comparable, but allowing 1472 | // V8 to optimize may mean easy performance wins in the future. 1473 | 1474 | if (this._str) { 1475 | return this._str 1476 | } 1477 | 1478 | var str = this.final ? '1' : '0', 1479 | labels = Object.keys(this.edges).sort(), 1480 | len = labels.length 1481 | 1482 | for (var i = 0; i < len; i++) { 1483 | var label = labels[i], 1484 | node = this.edges[label] 1485 | 1486 | str = str + label + node.id 1487 | } 1488 | 1489 | return str 1490 | } 1491 | 1492 | /** 1493 | * Returns a new TokenSet that is the intersection of 1494 | * this TokenSet and the passed TokenSet. 1495 | * 1496 | * This intersection will take into account any wildcards 1497 | * contained within the TokenSet. 1498 | * 1499 | * @param {lunr.TokenSet} b - An other TokenSet to intersect with. 1500 | * @returns {lunr.TokenSet} 1501 | */ 1502 | lunr.TokenSet.prototype.intersect = function (b) { 1503 | var output = new lunr.TokenSet, 1504 | frame = undefined 1505 | 1506 | var stack = [{ 1507 | qNode: b, 1508 | output: output, 1509 | node: this 1510 | }] 1511 | 1512 | while (stack.length) { 1513 | frame = stack.pop() 1514 | 1515 | // NOTE: As with the #toString method, we are using 1516 | // Object.keys and a for loop instead of a for-in loop 1517 | // as both of these objects enter 'hash' mode, causing 1518 | // the function to be de-optimised in V8 1519 | var qEdges = Object.keys(frame.qNode.edges), 1520 | qLen = qEdges.length, 1521 | nEdges = Object.keys(frame.node.edges), 1522 | nLen = nEdges.length 1523 | 1524 | for (var q = 0; q < qLen; q++) { 1525 | var qEdge = qEdges[q] 1526 | 1527 | for (var n = 0; n < nLen; n++) { 1528 | var nEdge = nEdges[n] 1529 | 1530 | if (nEdge == qEdge || qEdge == '*') { 1531 | var node = frame.node.edges[nEdge], 1532 | qNode = frame.qNode.edges[qEdge], 1533 | final = node.final && qNode.final, 1534 | next = undefined 1535 | 1536 | if (nEdge in frame.output.edges) { 1537 | // an edge already exists for this character 1538 | // no need to create a new node, just set the finality 1539 | // bit unless this node is already final 1540 | next = frame.output.edges[nEdge] 1541 | next.final = next.final || final 1542 | 1543 | } else { 1544 | // no edge exists yet, must create one 1545 | // set the finality bit and insert it 1546 | // into the output 1547 | next = new lunr.TokenSet 1548 | next.final = final 1549 | frame.output.edges[nEdge] = next 1550 | } 1551 | 1552 | stack.push({ 1553 | qNode: qNode, 1554 | output: next, 1555 | node: node 1556 | }) 1557 | } 1558 | } 1559 | } 1560 | } 1561 | 1562 | return output 1563 | } 1564 | lunr.TokenSet.Builder = function () { 1565 | this.previousWord = "" 1566 | this.root = new lunr.TokenSet 1567 | this.uncheckedNodes = [] 1568 | this.minimizedNodes = {} 1569 | } 1570 | 1571 | lunr.TokenSet.Builder.prototype.insert = function (word) { 1572 | var node, 1573 | commonPrefix = 0 1574 | 1575 | if (word < this.previousWord) { 1576 | throw new Error ("Out of order word insertion") 1577 | } 1578 | 1579 | for (var i = 0; i < word.length && i < this.previousWord.length; i++) { 1580 | if (word[i] != this.previousWord[i]) break 1581 | commonPrefix++ 1582 | } 1583 | 1584 | this.minimize(commonPrefix) 1585 | 1586 | if (this.uncheckedNodes.length == 0) { 1587 | node = this.root 1588 | } else { 1589 | node = this.uncheckedNodes[this.uncheckedNodes.length - 1].child 1590 | } 1591 | 1592 | for (var i = commonPrefix; i < word.length; i++) { 1593 | var nextNode = new lunr.TokenSet, 1594 | char = word[i] 1595 | 1596 | node.edges[char] = nextNode 1597 | 1598 | this.uncheckedNodes.push({ 1599 | parent: node, 1600 | char: char, 1601 | child: nextNode 1602 | }) 1603 | 1604 | node = nextNode 1605 | } 1606 | 1607 | node.final = true 1608 | this.previousWord = word 1609 | } 1610 | 1611 | lunr.TokenSet.Builder.prototype.finish = function () { 1612 | this.minimize(0) 1613 | } 1614 | 1615 | lunr.TokenSet.Builder.prototype.minimize = function (downTo) { 1616 | for (var i = this.uncheckedNodes.length - 1; i >= downTo; i--) { 1617 | var node = this.uncheckedNodes[i], 1618 | childKey = node.child.toString() 1619 | 1620 | if (childKey in this.minimizedNodes) { 1621 | node.parent.edges[node.char] = this.minimizedNodes[childKey] 1622 | } else { 1623 | // Cache the key for this node since 1624 | // we know it can't change anymore 1625 | node.child._str = childKey 1626 | 1627 | this.minimizedNodes[childKey] = node.child 1628 | } 1629 | 1630 | this.uncheckedNodes.pop() 1631 | } 1632 | } 1633 | /*! 1634 | * lunr.Index 1635 | * Copyright (C) 2017 Oliver Nightingale 1636 | */ 1637 | 1638 | /** 1639 | * An index contains the built index of all documents and provides a query interface 1640 | * to the index. 1641 | * 1642 | * Usually instances of lunr.Index will not be created using this constructor, instead 1643 | * lunr.Builder should be used to construct new indexes, or lunr.Index.load should be 1644 | * used to load previously built and serialized indexes. 1645 | * 1646 | * @constructor 1647 | * @param {Object} attrs - The attributes of the built search index. 1648 | * @param {Object} attrs.invertedIndex - An index of term/field to document reference. 1649 | * @param {Object} attrs.documentVectors - Document vectors keyed by document reference. 1650 | * @param {lunr.TokenSet} attrs.tokenSet - An set of all corpus tokens. 1651 | * @param {string[]} attrs.fields - The names of indexed document fields. 1652 | * @param {lunr.Pipeline} attrs.pipeline - The pipeline to use for search terms. 1653 | */ 1654 | lunr.Index = function (attrs) { 1655 | this.invertedIndex = attrs.invertedIndex 1656 | this.fieldVectors = attrs.fieldVectors 1657 | this.tokenSet = attrs.tokenSet 1658 | this.fields = attrs.fields 1659 | this.pipeline = attrs.pipeline 1660 | } 1661 | 1662 | /** 1663 | * A result contains details of a document matching a search query. 1664 | * @typedef {Object} lunr.Index~Result 1665 | * @property {string} ref - The reference of the document this result represents. 1666 | * @property {number} score - A number between 0 and 1 representing how similar this document is to the query. 1667 | * @property {lunr.MatchData} matchData - Contains metadata about this match including which term(s) caused the match. 1668 | */ 1669 | 1670 | /** 1671 | * Although lunr provides the ability to create queries using lunr.Query, it also provides a simple 1672 | * query language which itself is parsed into an instance of lunr.Query. 1673 | * 1674 | * For programmatically building queries it is advised to directly use lunr.Query, the query language 1675 | * is best used for human entered text rather than program generated text. 1676 | * 1677 | * At its simplest queries can just be a single term, e.g. `hello`, multiple terms are also supported 1678 | * and will be combined with OR, e.g `hello world` will match documents that contain either 'hello' 1679 | * or 'world', though those that contain both will rank higher in the results. 1680 | * 1681 | * Wildcards can be included in terms to match one or more unspecified characters, these wildcards can 1682 | * be inserted anywhere within the term, and more than one wildcard can exist in a single term. Adding 1683 | * wildcards will increase the number of documents that will be found but can also have a negative 1684 | * impact on query performance, especially with wildcards at the beginning of a term. 1685 | * 1686 | * Terms can be restricted to specific fields, e.g. `title:hello`, only documents with the term 1687 | * hello in the title field will match this query. Using a field not present in the index will lead 1688 | * to an error being thrown. 1689 | * 1690 | * Modifiers can also be added to terms, lunr supports edit distance and boost modifiers on terms. A term 1691 | * boost will make documents matching that term score higher, e.g. `foo^5`. Edit distance is also supported 1692 | * to provide fuzzy matching, e.g. 'hello~2' will match documents with hello with an edit distance of 2. 1693 | * Avoid large values for edit distance to improve query performance. 1694 | * 1695 | * To escape special characters the backslash character '\' can be used, this allows searches to include 1696 | * characters that would normally be considered modifiers, e.g. `foo\~2` will search for a term "foo~2" instead 1697 | * of attempting to apply a boost of 2 to the search term "foo". 1698 | * 1699 | * @typedef {string} lunr.Index~QueryString 1700 | * @example Simple single term query 1701 | * hello 1702 | * @example Multiple term query 1703 | * hello world 1704 | * @example term scoped to a field 1705 | * title:hello 1706 | * @example term with a boost of 10 1707 | * hello^10 1708 | * @example term with an edit distance of 2 1709 | * hello~2 1710 | */ 1711 | 1712 | /** 1713 | * Performs a search against the index using lunr query syntax. 1714 | * 1715 | * Results will be returned sorted by their score, the most relevant results 1716 | * will be returned first. 1717 | * 1718 | * For more programmatic querying use lunr.Index#query. 1719 | * 1720 | * @param {lunr.Index~QueryString} queryString - A string containing a lunr query. 1721 | * @throws {lunr.QueryParseError} If the passed query string cannot be parsed. 1722 | * @returns {lunr.Index~Result[]} 1723 | */ 1724 | lunr.Index.prototype.search = function (queryString) { 1725 | return this.query(function (query) { 1726 | var parser = new lunr.QueryParser(queryString, query) 1727 | parser.parse() 1728 | }) 1729 | } 1730 | 1731 | /** 1732 | * A query builder callback provides a query object to be used to express 1733 | * the query to perform on the index. 1734 | * 1735 | * @callback lunr.Index~queryBuilder 1736 | * @param {lunr.Query} query - The query object to build up. 1737 | * @this lunr.Query 1738 | */ 1739 | 1740 | /** 1741 | * Performs a query against the index using the yielded lunr.Query object. 1742 | * 1743 | * If performing programmatic queries against the index, this method is preferred 1744 | * over lunr.Index#search so as to avoid the additional query parsing overhead. 1745 | * 1746 | * A query object is yielded to the supplied function which should be used to 1747 | * express the query to be run against the index. 1748 | * 1749 | * Note that although this function takes a callback parameter it is _not_ an 1750 | * asynchronous operation, the callback is just yielded a query object to be 1751 | * customized. 1752 | * 1753 | * @param {lunr.Index~queryBuilder} fn - A function that is used to build the query. 1754 | * @returns {lunr.Index~Result[]} 1755 | */ 1756 | lunr.Index.prototype.query = function (fn) { 1757 | // for each query clause 1758 | // * process terms 1759 | // * expand terms from token set 1760 | // * find matching documents and metadata 1761 | // * get document vectors 1762 | // * score documents 1763 | 1764 | var query = new lunr.Query(this.fields), 1765 | matchingFields = Object.create(null), 1766 | queryVectors = Object.create(null), 1767 | termFieldCache = Object.create(null) 1768 | 1769 | fn.call(query, query) 1770 | 1771 | for (var i = 0; i < query.clauses.length; i++) { 1772 | /* 1773 | * Unless the pipeline has been disabled for this term, which is 1774 | * the case for terms with wildcards, we need to pass the clause 1775 | * term through the search pipeline. A pipeline returns an array 1776 | * of processed terms. Pipeline functions may expand the passed 1777 | * term, which means we may end up performing multiple index lookups 1778 | * for a single query term. 1779 | */ 1780 | var clause = query.clauses[i], 1781 | terms = null 1782 | 1783 | if (clause.usePipeline) { 1784 | terms = this.pipeline.runString(clause.term) 1785 | } else { 1786 | terms = [clause.term] 1787 | } 1788 | 1789 | for (var m = 0; m < terms.length; m++) { 1790 | var term = terms[m] 1791 | 1792 | /* 1793 | * Each term returned from the pipeline needs to use the same query 1794 | * clause object, e.g. the same boost and or edit distance. The 1795 | * simplest way to do this is to re-use the clause object but mutate 1796 | * its term property. 1797 | */ 1798 | 1799 | clause = JSON.parse(JSON.stringify(clause)) 1800 | clause.term = term 1801 | 1802 | /* 1803 | * From the term in the clause we create a token set which will then 1804 | * be used to intersect the indexes token set to get a list of terms 1805 | * to lookup in the inverted index 1806 | */ 1807 | var termTokenSet = lunr.TokenSet.fromClause(clause), 1808 | expandedTerms = this.tokenSet.intersect(termTokenSet).toArray() 1809 | 1810 | for (var j = 0; j < expandedTerms.length; j++) { 1811 | /* 1812 | * For each term get the posting and termIndex, this is required for 1813 | * building the query vector. 1814 | */ 1815 | var expandedTerm = expandedTerms[j]; 1816 | var posting = this.invertedIndex[expandedTerm]; 1817 | if (posting) { 1818 | var termIndex = posting._index; 1819 | } else { 1820 | continue; 1821 | } 1822 | for (var k = 0; k < clause.fields.length; k++) { 1823 | /* 1824 | * For each field that this query term is scoped by (by default 1825 | * all fields are in scope) we need to get all the document refs 1826 | * that have this term in that field. 1827 | * 1828 | * The posting is the entry in the invertedIndex for the matching 1829 | * term from above. 1830 | */ 1831 | var field = clause.fields[k], 1832 | fieldPosting = posting[field], 1833 | matchingDocumentRefs = Object.keys(fieldPosting), 1834 | termField = expandedTerm + "/" + field 1835 | 1836 | /* 1837 | * To support field level boosts a query vector is created per 1838 | * field. This vector is populated using the termIndex found for 1839 | * the term and a unit value with the appropriate boost applied. 1840 | * 1841 | * If the query vector for this field does not exist yet it needs 1842 | * to be created. 1843 | */ 1844 | if (queryVectors[field] === undefined) { 1845 | queryVectors[field] = new lunr.Vector 1846 | } 1847 | 1848 | /* 1849 | * Using upsert because there could already be an entry in the vector 1850 | * for the term we are working with. In that case we just add the scores 1851 | * together. 1852 | */ 1853 | queryVectors[field].upsert(termIndex, 1 * clause.boost, function (a, b) { return a + b }) 1854 | 1855 | /** 1856 | * If we've already seen this term, field combo then we've already collected 1857 | * the matching documents and metadata, no need to go through all that again 1858 | */ 1859 | if (termFieldCache[termField]) { 1860 | continue 1861 | } 1862 | 1863 | for (var l = 0; l < matchingDocumentRefs.length; l++) { 1864 | /* 1865 | * All metadata for this term/field/document triple 1866 | * are then extracted and collected into an instance 1867 | * of lunr.MatchData ready to be returned in the query 1868 | * results 1869 | */ 1870 | var matchingDocumentRef = matchingDocumentRefs[l], 1871 | matchingFieldRef = new lunr.FieldRef (matchingDocumentRef, field), 1872 | metadata = fieldPosting[matchingDocumentRef], 1873 | fieldMatch 1874 | 1875 | if ((fieldMatch = matchingFields[matchingFieldRef]) === undefined) { 1876 | matchingFields[matchingFieldRef] = new lunr.MatchData (expandedTerm, field, metadata) 1877 | } else { 1878 | fieldMatch.add(expandedTerm, field, metadata) 1879 | } 1880 | 1881 | } 1882 | 1883 | termFieldCache[termField] = true 1884 | } 1885 | } 1886 | } 1887 | } 1888 | 1889 | var matchingFieldRefs = Object.keys(matchingFields), 1890 | results = [], 1891 | matches = Object.create(null) 1892 | 1893 | for (var i = 0; i < matchingFieldRefs.length; i++) { 1894 | /* 1895 | * Currently we have document fields that match the query, but we 1896 | * need to return documents. The matchData and scores are combined 1897 | * from multiple fields belonging to the same document. 1898 | * 1899 | * Scores are calculated by field, using the query vectors created 1900 | * above, and combined into a final document score using addition. 1901 | */ 1902 | var fieldRef = lunr.FieldRef.fromString(matchingFieldRefs[i]), 1903 | docRef = fieldRef.docRef, 1904 | fieldVector = this.fieldVectors[fieldRef], 1905 | score = queryVectors[fieldRef.fieldName].similarity(fieldVector), 1906 | docMatch 1907 | 1908 | if ((docMatch = matches[docRef]) !== undefined) { 1909 | docMatch.score += score 1910 | docMatch.matchData.combine(matchingFields[fieldRef]) 1911 | } else { 1912 | var match = { 1913 | ref: docRef, 1914 | score: score, 1915 | matchData: matchingFields[fieldRef] 1916 | } 1917 | matches[docRef] = match 1918 | results.push(match) 1919 | } 1920 | } 1921 | 1922 | /* 1923 | * Sort the results objects by score, highest first. 1924 | */ 1925 | return results.sort(function (a, b) { 1926 | return b.score - a.score 1927 | }) 1928 | } 1929 | 1930 | /** 1931 | * Prepares the index for JSON serialization. 1932 | * 1933 | * The schema for this JSON blob will be described in a 1934 | * separate JSON schema file. 1935 | * 1936 | * @returns {Object} 1937 | */ 1938 | lunr.Index.prototype.toJSON = function () { 1939 | var invertedIndex = Object.keys(this.invertedIndex) 1940 | .sort() 1941 | .map(function (term) { 1942 | return [term, this.invertedIndex[term]] 1943 | }, this) 1944 | 1945 | var fieldVectors = Object.keys(this.fieldVectors) 1946 | .map(function (ref) { 1947 | return [ref, this.fieldVectors[ref].toJSON()] 1948 | }, this) 1949 | 1950 | return { 1951 | version: lunr.version, 1952 | fields: this.fields, 1953 | fieldVectors: fieldVectors, 1954 | invertedIndex: invertedIndex, 1955 | pipeline: this.pipeline.toJSON() 1956 | } 1957 | } 1958 | 1959 | /** 1960 | * Loads a previously serialized lunr.Index 1961 | * 1962 | * @param {Object} serializedIndex - A previously serialized lunr.Index 1963 | * @returns {lunr.Index} 1964 | */ 1965 | lunr.Index.load = function (serializedIndex) { 1966 | var attrs = {}, 1967 | fieldVectors = {}, 1968 | serializedVectors = serializedIndex.fieldVectors, 1969 | invertedIndex = {}, 1970 | serializedInvertedIndex = serializedIndex.invertedIndex, 1971 | tokenSetBuilder = new lunr.TokenSet.Builder, 1972 | pipeline = lunr.Pipeline.load(serializedIndex.pipeline) 1973 | 1974 | if (serializedIndex.version != lunr.version) { 1975 | lunr.utils.warn("Version mismatch when loading serialised index. Current version of lunr '" + lunr.version + "' does not match serialized index '" + serializedIndex.version + "'") 1976 | } 1977 | 1978 | for (var i = 0; i < serializedVectors.length; i++) { 1979 | var tuple = serializedVectors[i], 1980 | ref = tuple[0], 1981 | elements = tuple[1] 1982 | 1983 | fieldVectors[ref] = new lunr.Vector(elements) 1984 | } 1985 | 1986 | for (var i = 0; i < serializedInvertedIndex.length; i++) { 1987 | var tuple = serializedInvertedIndex[i], 1988 | term = tuple[0], 1989 | posting = tuple[1] 1990 | 1991 | tokenSetBuilder.insert(term) 1992 | invertedIndex[term] = posting 1993 | } 1994 | 1995 | tokenSetBuilder.finish() 1996 | 1997 | attrs.fields = serializedIndex.fields 1998 | 1999 | attrs.fieldVectors = fieldVectors 2000 | attrs.invertedIndex = invertedIndex 2001 | attrs.tokenSet = tokenSetBuilder.root 2002 | attrs.pipeline = pipeline 2003 | 2004 | return new lunr.Index(attrs) 2005 | } 2006 | /*! 2007 | * lunr.Builder 2008 | * Copyright (C) 2017 Oliver Nightingale 2009 | */ 2010 | 2011 | /** 2012 | * lunr.Builder performs indexing on a set of documents and 2013 | * returns instances of lunr.Index ready for querying. 2014 | * 2015 | * All configuration of the index is done via the builder, the 2016 | * fields to index, the document reference, the text processing 2017 | * pipeline and document scoring parameters are all set on the 2018 | * builder before indexing. 2019 | * 2020 | * @constructor 2021 | * @property {string} _ref - Internal reference to the document reference field. 2022 | * @property {string[]} _fields - Internal reference to the document fields to index. 2023 | * @property {object} invertedIndex - The inverted index maps terms to document fields. 2024 | * @property {object} documentTermFrequencies - Keeps track of document term frequencies. 2025 | * @property {object} documentLengths - Keeps track of the length of documents added to the index. 2026 | * @property {lunr.tokenizer} tokenizer - Function for splitting strings into tokens for indexing. 2027 | * @property {lunr.Pipeline} pipeline - The pipeline performs text processing on tokens before indexing. 2028 | * @property {lunr.Pipeline} searchPipeline - A pipeline for processing search terms before querying the index. 2029 | * @property {number} documentCount - Keeps track of the total number of documents indexed. 2030 | * @property {number} _b - A parameter to control field length normalization, setting this to 0 disabled normalization, 1 fully normalizes field lengths, the default value is 0.75. 2031 | * @property {number} _k1 - A parameter to control how quickly an increase in term frequency results in term frequency saturation, the default value is 1.2. 2032 | * @property {number} termIndex - A counter incremented for each unique term, used to identify a terms position in the vector space. 2033 | * @property {array} metadataWhitelist - A list of metadata keys that have been whitelisted for entry in the index. 2034 | */ 2035 | lunr.Builder = function () { 2036 | this._ref = "id" 2037 | this._fields = [] 2038 | this.invertedIndex = Object.create(null) 2039 | this.fieldTermFrequencies = {} 2040 | this.fieldLengths = {} 2041 | this.tokenizer = lunr.tokenizer 2042 | this.pipeline = new lunr.Pipeline 2043 | this.searchPipeline = new lunr.Pipeline 2044 | this.documentCount = 0 2045 | this._b = 0.75 2046 | this._k1 = 1.2 2047 | this.termIndex = 0 2048 | this.metadataWhitelist = [] 2049 | } 2050 | 2051 | /** 2052 | * Sets the document field used as the document reference. Every document must have this field. 2053 | * The type of this field in the document should be a string, if it is not a string it will be 2054 | * coerced into a string by calling toString. 2055 | * 2056 | * The default ref is 'id'. 2057 | * 2058 | * The ref should _not_ be changed during indexing, it should be set before any documents are 2059 | * added to the index. Changing it during indexing can lead to inconsistent results. 2060 | * 2061 | * @param {string} ref - The name of the reference field in the document. 2062 | */ 2063 | lunr.Builder.prototype.ref = function (ref) { 2064 | this._ref = ref 2065 | } 2066 | 2067 | /** 2068 | * Adds a field to the list of document fields that will be indexed. Every document being 2069 | * indexed should have this field. Null values for this field in indexed documents will 2070 | * not cause errors but will limit the chance of that document being retrieved by searches. 2071 | * 2072 | * All fields should be added before adding documents to the index. Adding fields after 2073 | * a document has been indexed will have no effect on already indexed documents. 2074 | * 2075 | * @param {string} field - The name of a field to index in all documents. 2076 | */ 2077 | lunr.Builder.prototype.field = function (field) { 2078 | this._fields.push(field) 2079 | } 2080 | 2081 | /** 2082 | * A parameter to tune the amount of field length normalisation that is applied when 2083 | * calculating relevance scores. A value of 0 will completely disable any normalisation 2084 | * and a value of 1 will fully normalise field lengths. The default is 0.75. Values of b 2085 | * will be clamped to the range 0 - 1. 2086 | * 2087 | * @param {number} number - The value to set for this tuning parameter. 2088 | */ 2089 | lunr.Builder.prototype.b = function (number) { 2090 | if (number < 0) { 2091 | this._b = 0 2092 | } else if (number > 1) { 2093 | this._b = 1 2094 | } else { 2095 | this._b = number 2096 | } 2097 | } 2098 | 2099 | /** 2100 | * A parameter that controls the speed at which a rise in term frequency results in term 2101 | * frequency saturation. The default value is 1.2. Setting this to a higher value will give 2102 | * slower saturation levels, a lower value will result in quicker saturation. 2103 | * 2104 | * @param {number} number - The value to set for this tuning parameter. 2105 | */ 2106 | lunr.Builder.prototype.k1 = function (number) { 2107 | this._k1 = number 2108 | } 2109 | 2110 | /** 2111 | * Adds a document to the index. 2112 | * 2113 | * Before adding fields to the index the index should have been fully setup, with the document 2114 | * ref and all fields to index already having been specified. 2115 | * 2116 | * The document must have a field name as specified by the ref (by default this is 'id') and 2117 | * it should have all fields defined for indexing, though null or undefined values will not 2118 | * cause errors. 2119 | * 2120 | * @param {object} doc - The document to add to the index. 2121 | */ 2122 | lunr.Builder.prototype.add = function (doc) { 2123 | var docRef = doc[this._ref] 2124 | 2125 | this.documentCount += 1 2126 | 2127 | for (var i = 0; i < this._fields.length; i++) { 2128 | var fieldName = this._fields[i], 2129 | field = doc[fieldName], 2130 | tokens = this.tokenizer(field), 2131 | terms = this.pipeline.run(tokens), 2132 | fieldRef = new lunr.FieldRef (docRef, fieldName), 2133 | fieldTerms = Object.create(null) 2134 | 2135 | this.fieldTermFrequencies[fieldRef] = fieldTerms 2136 | this.fieldLengths[fieldRef] = 0 2137 | 2138 | // store the length of this field for this document 2139 | this.fieldLengths[fieldRef] += terms.length 2140 | 2141 | // calculate term frequencies for this field 2142 | for (var j = 0; j < terms.length; j++) { 2143 | var term = terms[j] 2144 | 2145 | if (fieldTerms[term] == undefined) { 2146 | fieldTerms[term] = 0 2147 | } 2148 | 2149 | fieldTerms[term] += 1 2150 | 2151 | // add to inverted index 2152 | // create an initial posting if one doesn't exist 2153 | if (this.invertedIndex[term] == undefined) { 2154 | var posting = Object.create(null) 2155 | posting["_index"] = this.termIndex 2156 | this.termIndex += 1 2157 | 2158 | for (var k = 0; k < this._fields.length; k++) { 2159 | posting[this._fields[k]] = Object.create(null) 2160 | } 2161 | 2162 | this.invertedIndex[term] = posting 2163 | } 2164 | 2165 | // add an entry for this term/fieldName/docRef to the invertedIndex 2166 | if (this.invertedIndex[term][fieldName][docRef] == undefined) { 2167 | this.invertedIndex[term][fieldName][docRef] = Object.create(null) 2168 | } 2169 | 2170 | // store all whitelisted metadata about this token in the 2171 | // inverted index 2172 | for (var l = 0; l < this.metadataWhitelist.length; l++) { 2173 | var metadataKey = this.metadataWhitelist[l], 2174 | metadata = term.metadata[metadataKey] 2175 | 2176 | if (this.invertedIndex[term][fieldName][docRef][metadataKey] == undefined) { 2177 | this.invertedIndex[term][fieldName][docRef][metadataKey] = [] 2178 | } 2179 | 2180 | this.invertedIndex[term][fieldName][docRef][metadataKey].push(metadata) 2181 | } 2182 | } 2183 | 2184 | } 2185 | } 2186 | 2187 | /** 2188 | * Calculates the average document length for this index 2189 | * 2190 | * @private 2191 | */ 2192 | lunr.Builder.prototype.calculateAverageFieldLengths = function () { 2193 | 2194 | var fieldRefs = Object.keys(this.fieldLengths), 2195 | numberOfFields = fieldRefs.length, 2196 | accumulator = {}, 2197 | documentsWithField = {} 2198 | 2199 | for (var i = 0; i < numberOfFields; i++) { 2200 | var fieldRef = lunr.FieldRef.fromString(fieldRefs[i]), 2201 | field = fieldRef.fieldName 2202 | 2203 | documentsWithField[field] || (documentsWithField[field] = 0) 2204 | documentsWithField[field] += 1 2205 | 2206 | accumulator[field] || (accumulator[field] = 0) 2207 | accumulator[field] += this.fieldLengths[fieldRef] 2208 | } 2209 | 2210 | for (var i = 0; i < this._fields.length; i++) { 2211 | var field = this._fields[i] 2212 | accumulator[field] = accumulator[field] / documentsWithField[field] 2213 | } 2214 | 2215 | this.averageFieldLength = accumulator 2216 | } 2217 | 2218 | /** 2219 | * Builds a vector space model of every document using lunr.Vector 2220 | * 2221 | * @private 2222 | */ 2223 | lunr.Builder.prototype.createFieldVectors = function () { 2224 | var fieldVectors = {}, 2225 | fieldRefs = Object.keys(this.fieldTermFrequencies), 2226 | fieldRefsLength = fieldRefs.length, 2227 | termIdfCache = Object.create(null) 2228 | 2229 | for (var i = 0; i < fieldRefsLength; i++) { 2230 | var fieldRef = lunr.FieldRef.fromString(fieldRefs[i]), 2231 | field = fieldRef.fieldName, 2232 | fieldLength = this.fieldLengths[fieldRef], 2233 | fieldVector = new lunr.Vector, 2234 | termFrequencies = this.fieldTermFrequencies[fieldRef], 2235 | terms = Object.keys(termFrequencies), 2236 | termsLength = terms.length 2237 | 2238 | for (var j = 0; j < termsLength; j++) { 2239 | var term = terms[j], 2240 | tf = termFrequencies[term], 2241 | termIndex = this.invertedIndex[term]._index, 2242 | idf, score, scoreWithPrecision 2243 | 2244 | if (termIdfCache[term] === undefined) { 2245 | idf = lunr.idf(this.invertedIndex[term], this.documentCount) 2246 | termIdfCache[term] = idf 2247 | } else { 2248 | idf = termIdfCache[term] 2249 | } 2250 | 2251 | score = idf * ((this._k1 + 1) * tf) / (this._k1 * (1 - this._b + this._b * (fieldLength / this.averageFieldLength[field])) + tf) 2252 | scoreWithPrecision = Math.round(score * 1000) / 1000 2253 | // Converts 1.23456789 to 1.234. 2254 | // Reducing the precision so that the vectors take up less 2255 | // space when serialised. Doing it now so that they behave 2256 | // the same before and after serialisation. Also, this is 2257 | // the fastest approach to reducing a number's precision in 2258 | // JavaScript. 2259 | 2260 | fieldVector.insert(termIndex, scoreWithPrecision) 2261 | } 2262 | 2263 | fieldVectors[fieldRef] = fieldVector 2264 | } 2265 | 2266 | this.fieldVectors = fieldVectors 2267 | } 2268 | 2269 | /** 2270 | * Creates a token set of all tokens in the index using lunr.TokenSet 2271 | * 2272 | * @private 2273 | */ 2274 | lunr.Builder.prototype.createTokenSet = function () { 2275 | this.tokenSet = lunr.TokenSet.fromArray( 2276 | Object.keys(this.invertedIndex).sort() 2277 | ) 2278 | } 2279 | 2280 | /** 2281 | * Builds the index, creating an instance of lunr.Index. 2282 | * 2283 | * This completes the indexing process and should only be called 2284 | * once all documents have been added to the index. 2285 | * 2286 | * @returns {lunr.Index} 2287 | */ 2288 | lunr.Builder.prototype.build = function () { 2289 | this.calculateAverageFieldLengths() 2290 | this.createFieldVectors() 2291 | this.createTokenSet() 2292 | 2293 | return new lunr.Index({ 2294 | invertedIndex: this.invertedIndex, 2295 | fieldVectors: this.fieldVectors, 2296 | tokenSet: this.tokenSet, 2297 | fields: this._fields, 2298 | pipeline: this.searchPipeline 2299 | }) 2300 | } 2301 | 2302 | /** 2303 | * Applies a plugin to the index builder. 2304 | * 2305 | * A plugin is a function that is called with the index builder as its context. 2306 | * Plugins can be used to customise or extend the behaviour of the index 2307 | * in some way. A plugin is just a function, that encapsulated the custom 2308 | * behaviour that should be applied when building the index. 2309 | * 2310 | * The plugin function will be called with the index builder as its argument, additional 2311 | * arguments can also be passed when calling use. The function will be called 2312 | * with the index builder as its context. 2313 | * 2314 | * @param {Function} plugin The plugin to apply. 2315 | */ 2316 | lunr.Builder.prototype.use = function (fn) { 2317 | var args = Array.prototype.slice.call(arguments, 1) 2318 | args.unshift(this) 2319 | fn.apply(this, args) 2320 | } 2321 | /** 2322 | * Contains and collects metadata about a matching document. 2323 | * A single instance of lunr.MatchData is returned as part of every 2324 | * lunr.Index~Result. 2325 | * 2326 | * @constructor 2327 | * @param {string} term - The term this match data is associated with 2328 | * @param {string} field - The field in which the term was found 2329 | * @param {object} metadata - The metadata recorded about this term in this field 2330 | * @property {object} metadata - A cloned collection of metadata associated with this document. 2331 | * @see {@link lunr.Index~Result} 2332 | */ 2333 | lunr.MatchData = function (term, field, metadata) { 2334 | var clonedMetadata = Object.create(null), 2335 | metadataKeys = Object.keys(metadata) 2336 | 2337 | // Cloning the metadata to prevent the original 2338 | // being mutated during match data combination. 2339 | // Metadata is kept in an array within the inverted 2340 | // index so cloning the data can be done with 2341 | // Array#slice 2342 | for (var i = 0; i < metadataKeys.length; i++) { 2343 | var key = metadataKeys[i] 2344 | clonedMetadata[key] = metadata[key].slice() 2345 | } 2346 | 2347 | this.metadata = Object.create(null) 2348 | this.metadata[term] = Object.create(null) 2349 | this.metadata[term][field] = clonedMetadata 2350 | } 2351 | 2352 | /** 2353 | * An instance of lunr.MatchData will be created for every term that matches a 2354 | * document. However only one instance is required in a lunr.Index~Result. This 2355 | * method combines metadata from another instance of lunr.MatchData with this 2356 | * objects metadata. 2357 | * 2358 | * @param {lunr.MatchData} otherMatchData - Another instance of match data to merge with this one. 2359 | * @see {@link lunr.Index~Result} 2360 | */ 2361 | lunr.MatchData.prototype.combine = function (otherMatchData) { 2362 | var terms = Object.keys(otherMatchData.metadata) 2363 | 2364 | for (var i = 0; i < terms.length; i++) { 2365 | var term = terms[i], 2366 | fields = Object.keys(otherMatchData.metadata[term]) 2367 | 2368 | if (this.metadata[term] == undefined) { 2369 | this.metadata[term] = Object.create(null) 2370 | } 2371 | 2372 | for (var j = 0; j < fields.length; j++) { 2373 | var field = fields[j], 2374 | keys = Object.keys(otherMatchData.metadata[term][field]) 2375 | 2376 | if (this.metadata[term][field] == undefined) { 2377 | this.metadata[term][field] = Object.create(null) 2378 | } 2379 | 2380 | for (var k = 0; k < keys.length; k++) { 2381 | var key = keys[k] 2382 | 2383 | if (this.metadata[term][field][key] == undefined) { 2384 | this.metadata[term][field][key] = otherMatchData.metadata[term][field][key] 2385 | } else { 2386 | this.metadata[term][field][key] = this.metadata[term][field][key].concat(otherMatchData.metadata[term][field][key]) 2387 | } 2388 | 2389 | } 2390 | } 2391 | } 2392 | } 2393 | 2394 | /** 2395 | * Add metadata for a term/field pair to this instance of match data. 2396 | * 2397 | * @param {string} term - The term this match data is associated with 2398 | * @param {string} field - The field in which the term was found 2399 | * @param {object} metadata - The metadata recorded about this term in this field 2400 | */ 2401 | lunr.MatchData.prototype.add = function (term, field, metadata) { 2402 | if (!(term in this.metadata)) { 2403 | this.metadata[term] = Object.create(null) 2404 | this.metadata[term][field] = metadata 2405 | return 2406 | } 2407 | 2408 | if (!(field in this.metadata[term])) { 2409 | this.metadata[term][field] = metadata 2410 | return 2411 | } 2412 | 2413 | var metadataKeys = Object.keys(metadata) 2414 | 2415 | for (var i = 0; i < metadataKeys.length; i++) { 2416 | var key = metadataKeys[i] 2417 | 2418 | if (key in this.metadata[term][field]) { 2419 | this.metadata[term][field][key] = this.metadata[term][field][key].concat(metadata[key]) 2420 | } else { 2421 | this.metadata[term][field][key] = metadata[key] 2422 | } 2423 | } 2424 | } 2425 | /** 2426 | * A lunr.Query provides a programmatic way of defining queries to be performed 2427 | * against a {@link lunr.Index}. 2428 | * 2429 | * Prefer constructing a lunr.Query using the {@link lunr.Index#query} method 2430 | * so the query object is pre-initialized with the right index fields. 2431 | * 2432 | * @constructor 2433 | * @property {lunr.Query~Clause[]} clauses - An array of query clauses. 2434 | * @property {string[]} allFields - An array of all available fields in a lunr.Index. 2435 | */ 2436 | lunr.Query = function (allFields) { 2437 | this.clauses = [] 2438 | this.allFields = allFields 2439 | } 2440 | 2441 | /** 2442 | * Constants for indicating what kind of automatic wildcard insertion will be used when constructing a query clause. 2443 | * 2444 | * This allows wildcards to be added to the beginning and end of a term without having to manually do any string 2445 | * concatenation. 2446 | * 2447 | * The wildcard constants can be bitwise combined to select both leading and trailing wildcards. 2448 | * 2449 | * @constant 2450 | * @default 2451 | * @property {number} wildcard.NONE - The term will have no wildcards inserted, this is the default behaviour 2452 | * @property {number} wildcard.LEADING - Prepend the term with a wildcard, unless a leading wildcard already exists 2453 | * @property {number} wildcard.TRAILING - Append a wildcard to the term, unless a trailing wildcard already exists 2454 | * @see lunr.Query~Clause 2455 | * @see lunr.Query#clause 2456 | * @see lunr.Query#term 2457 | * @example query term with trailing wildcard 2458 | * query.term('foo', { wildcard: lunr.Query.wildcard.TRAILING }) 2459 | * @example query term with leading and trailing wildcard 2460 | * query.term('foo', { 2461 | * wildcard: lunr.Query.wildcard.LEADING | lunr.Query.wildcard.TRAILING 2462 | * }) 2463 | */ 2464 | lunr.Query.wildcard = new String ("*") 2465 | lunr.Query.wildcard.NONE = 0 2466 | lunr.Query.wildcard.LEADING = 1 2467 | lunr.Query.wildcard.TRAILING = 2 2468 | 2469 | /** 2470 | * A single clause in a {@link lunr.Query} contains a term and details on how to 2471 | * match that term against a {@link lunr.Index}. 2472 | * 2473 | * @typedef {Object} lunr.Query~Clause 2474 | * @property {string[]} fields - The fields in an index this clause should be matched against. 2475 | * @property {number} [boost=1] - Any boost that should be applied when matching this clause. 2476 | * @property {number} [editDistance] - Whether the term should have fuzzy matching applied, and how fuzzy the match should be. 2477 | * @property {boolean} [usePipeline] - Whether the term should be passed through the search pipeline. 2478 | * @property {number} [wildcard=0] - Whether the term should have wildcards appended or prepended. 2479 | */ 2480 | 2481 | /** 2482 | * Adds a {@link lunr.Query~Clause} to this query. 2483 | * 2484 | * Unless the clause contains the fields to be matched all fields will be matched. In addition 2485 | * a default boost of 1 is applied to the clause. 2486 | * 2487 | * @param {lunr.Query~Clause} clause - The clause to add to this query. 2488 | * @see lunr.Query~Clause 2489 | * @returns {lunr.Query} 2490 | */ 2491 | lunr.Query.prototype.clause = function (clause) { 2492 | if (!('fields' in clause)) { 2493 | clause.fields = this.allFields 2494 | } 2495 | 2496 | if (!('boost' in clause)) { 2497 | clause.boost = 1 2498 | } 2499 | 2500 | if (!('usePipeline' in clause)) { 2501 | clause.usePipeline = true 2502 | } 2503 | 2504 | if (!('wildcard' in clause)) { 2505 | clause.wildcard = lunr.Query.wildcard.NONE 2506 | } 2507 | 2508 | if ((clause.wildcard & lunr.Query.wildcard.LEADING) && (clause.term.charAt(0) != lunr.Query.wildcard)) { 2509 | clause.term = "*" + clause.term 2510 | } 2511 | 2512 | if ((clause.wildcard & lunr.Query.wildcard.TRAILING) && (clause.term.slice(-1) != lunr.Query.wildcard)) { 2513 | clause.term = "" + clause.term + "*" 2514 | } 2515 | 2516 | this.clauses.push(clause) 2517 | 2518 | return this 2519 | } 2520 | 2521 | /** 2522 | * Adds a term to the current query, under the covers this will create a {@link lunr.Query~Clause} 2523 | * to the list of clauses that make up this query. 2524 | * 2525 | * @param {string} term - The term to add to the query. 2526 | * @param {Object} [options] - Any additional properties to add to the query clause. 2527 | * @returns {lunr.Query} 2528 | * @see lunr.Query#clause 2529 | * @see lunr.Query~Clause 2530 | * @example adding a single term to a query 2531 | * query.term("foo") 2532 | * @example adding a single term to a query and specifying search fields, term boost and automatic trailing wildcard 2533 | * query.term("foo", { 2534 | * fields: ["title"], 2535 | * boost: 10, 2536 | * wildcard: lunr.Query.wildcard.TRAILING 2537 | * }) 2538 | */ 2539 | lunr.Query.prototype.term = function (term, options) { 2540 | var clause = options || {} 2541 | clause.term = term 2542 | 2543 | this.clause(clause) 2544 | 2545 | return this 2546 | } 2547 | lunr.QueryParseError = function (message, start, end) { 2548 | this.name = "QueryParseError" 2549 | this.message = message 2550 | this.start = start 2551 | this.end = end 2552 | } 2553 | 2554 | lunr.QueryParseError.prototype = new Error 2555 | lunr.QueryLexer = function (str) { 2556 | this.lexemes = [] 2557 | this.str = str 2558 | this.length = str.length 2559 | this.pos = 0 2560 | this.start = 0 2561 | this.escapeCharPositions = [] 2562 | } 2563 | 2564 | lunr.QueryLexer.prototype.run = function () { 2565 | var state = lunr.QueryLexer.lexText 2566 | 2567 | while (state) { 2568 | state = state(this) 2569 | } 2570 | } 2571 | 2572 | lunr.QueryLexer.prototype.sliceString = function () { 2573 | var subSlices = [], 2574 | sliceStart = this.start, 2575 | sliceEnd = this.pos 2576 | 2577 | for (var i = 0; i < this.escapeCharPositions.length; i++) { 2578 | sliceEnd = this.escapeCharPositions[i] 2579 | subSlices.push(this.str.slice(sliceStart, sliceEnd)) 2580 | sliceStart = sliceEnd + 1 2581 | } 2582 | 2583 | subSlices.push(this.str.slice(sliceStart, this.pos)) 2584 | this.escapeCharPositions.length = 0 2585 | 2586 | return subSlices.join('') 2587 | } 2588 | 2589 | lunr.QueryLexer.prototype.emit = function (type) { 2590 | this.lexemes.push({ 2591 | type: type, 2592 | str: this.sliceString(), 2593 | start: this.start, 2594 | end: this.pos 2595 | }) 2596 | 2597 | this.start = this.pos 2598 | } 2599 | 2600 | lunr.QueryLexer.prototype.escapeCharacter = function () { 2601 | this.escapeCharPositions.push(this.pos - 1) 2602 | this.pos += 1 2603 | } 2604 | 2605 | lunr.QueryLexer.prototype.next = function () { 2606 | if (this.pos >= this.length) { 2607 | return lunr.QueryLexer.EOS 2608 | } 2609 | 2610 | var char = this.str.charAt(this.pos) 2611 | this.pos += 1 2612 | return char 2613 | } 2614 | 2615 | lunr.QueryLexer.prototype.width = function () { 2616 | return this.pos - this.start 2617 | } 2618 | 2619 | lunr.QueryLexer.prototype.ignore = function () { 2620 | if (this.start == this.pos) { 2621 | this.pos += 1 2622 | } 2623 | 2624 | this.start = this.pos 2625 | } 2626 | 2627 | lunr.QueryLexer.prototype.backup = function () { 2628 | this.pos -= 1 2629 | } 2630 | 2631 | lunr.QueryLexer.prototype.acceptDigitRun = function () { 2632 | var char, charCode 2633 | 2634 | do { 2635 | char = this.next() 2636 | charCode = char.charCodeAt(0) 2637 | } while (charCode > 47 && charCode < 58) 2638 | 2639 | if (char != lunr.QueryLexer.EOS) { 2640 | this.backup() 2641 | } 2642 | } 2643 | 2644 | lunr.QueryLexer.prototype.more = function () { 2645 | return this.pos < this.length 2646 | } 2647 | 2648 | lunr.QueryLexer.EOS = 'EOS' 2649 | lunr.QueryLexer.FIELD = 'FIELD' 2650 | lunr.QueryLexer.TERM = 'TERM' 2651 | lunr.QueryLexer.EDIT_DISTANCE = 'EDIT_DISTANCE' 2652 | lunr.QueryLexer.BOOST = 'BOOST' 2653 | 2654 | lunr.QueryLexer.lexField = function (lexer) { 2655 | lexer.backup() 2656 | lexer.emit(lunr.QueryLexer.FIELD) 2657 | lexer.ignore() 2658 | return lunr.QueryLexer.lexText 2659 | } 2660 | 2661 | lunr.QueryLexer.lexTerm = function (lexer) { 2662 | if (lexer.width() > 1) { 2663 | lexer.backup() 2664 | lexer.emit(lunr.QueryLexer.TERM) 2665 | } 2666 | 2667 | lexer.ignore() 2668 | 2669 | if (lexer.more()) { 2670 | return lunr.QueryLexer.lexText 2671 | } 2672 | } 2673 | 2674 | lunr.QueryLexer.lexEditDistance = function (lexer) { 2675 | lexer.ignore() 2676 | lexer.acceptDigitRun() 2677 | lexer.emit(lunr.QueryLexer.EDIT_DISTANCE) 2678 | return lunr.QueryLexer.lexText 2679 | } 2680 | 2681 | lunr.QueryLexer.lexBoost = function (lexer) { 2682 | lexer.ignore() 2683 | lexer.acceptDigitRun() 2684 | lexer.emit(lunr.QueryLexer.BOOST) 2685 | return lunr.QueryLexer.lexText 2686 | } 2687 | 2688 | lunr.QueryLexer.lexEOS = function (lexer) { 2689 | if (lexer.width() > 0) { 2690 | lexer.emit(lunr.QueryLexer.TERM) 2691 | } 2692 | } 2693 | 2694 | // This matches the separator used when tokenising fields 2695 | // within a document. These should match otherwise it is 2696 | // not possible to search for some tokens within a document. 2697 | // 2698 | // It is possible for the user to change the separator on the 2699 | // tokenizer so it _might_ clash with any other of the special 2700 | // characters already used within the search string, e.g. :. 2701 | // 2702 | // This means that it is possible to change the separator in 2703 | // such a way that makes some words unsearchable using a search 2704 | // string. 2705 | lunr.QueryLexer.termSeparator = lunr.tokenizer.separator 2706 | 2707 | lunr.QueryLexer.lexText = function (lexer) { 2708 | while (true) { 2709 | var char = lexer.next() 2710 | 2711 | if (char == lunr.QueryLexer.EOS) { 2712 | return lunr.QueryLexer.lexEOS 2713 | } 2714 | 2715 | // Escape character is '\' 2716 | if (char.charCodeAt(0) == 92) { 2717 | lexer.escapeCharacter() 2718 | continue 2719 | } 2720 | 2721 | if (char == ":") { 2722 | return lunr.QueryLexer.lexField 2723 | } 2724 | 2725 | if (char == "~") { 2726 | lexer.backup() 2727 | if (lexer.width() > 0) { 2728 | lexer.emit(lunr.QueryLexer.TERM) 2729 | } 2730 | return lunr.QueryLexer.lexEditDistance 2731 | } 2732 | 2733 | if (char == "^") { 2734 | lexer.backup() 2735 | if (lexer.width() > 0) { 2736 | lexer.emit(lunr.QueryLexer.TERM) 2737 | } 2738 | return lunr.QueryLexer.lexBoost 2739 | } 2740 | 2741 | if (char.match(lunr.QueryLexer.termSeparator)) { 2742 | return lunr.QueryLexer.lexTerm 2743 | } 2744 | } 2745 | } 2746 | 2747 | lunr.QueryParser = function (str, query) { 2748 | this.lexer = new lunr.QueryLexer (str) 2749 | this.query = query 2750 | this.currentClause = {} 2751 | this.lexemeIdx = 0 2752 | } 2753 | 2754 | lunr.QueryParser.prototype.parse = function () { 2755 | this.lexer.run() 2756 | this.lexemes = this.lexer.lexemes 2757 | 2758 | var state = lunr.QueryParser.parseFieldOrTerm 2759 | 2760 | while (state) { 2761 | state = state(this) 2762 | } 2763 | 2764 | return this.query 2765 | } 2766 | 2767 | lunr.QueryParser.prototype.peekLexeme = function () { 2768 | return this.lexemes[this.lexemeIdx] 2769 | } 2770 | 2771 | lunr.QueryParser.prototype.consumeLexeme = function () { 2772 | var lexeme = this.peekLexeme() 2773 | this.lexemeIdx += 1 2774 | return lexeme 2775 | } 2776 | 2777 | lunr.QueryParser.prototype.nextClause = function () { 2778 | var completedClause = this.currentClause 2779 | this.query.clause(completedClause) 2780 | this.currentClause = {} 2781 | } 2782 | 2783 | lunr.QueryParser.parseFieldOrTerm = function (parser) { 2784 | var lexeme = parser.peekLexeme() 2785 | 2786 | if (lexeme == undefined) { 2787 | return 2788 | } 2789 | 2790 | switch (lexeme.type) { 2791 | case lunr.QueryLexer.FIELD: 2792 | return lunr.QueryParser.parseField 2793 | case lunr.QueryLexer.TERM: 2794 | return lunr.QueryParser.parseTerm 2795 | default: 2796 | var errorMessage = "expected either a field or a term, found " + lexeme.type 2797 | 2798 | if (lexeme.str.length >= 1) { 2799 | errorMessage += " with value '" + lexeme.str + "'" 2800 | } 2801 | 2802 | throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end) 2803 | } 2804 | } 2805 | 2806 | lunr.QueryParser.parseField = function (parser) { 2807 | var lexeme = parser.consumeLexeme() 2808 | 2809 | if (lexeme == undefined) { 2810 | return 2811 | } 2812 | 2813 | if (parser.query.allFields.indexOf(lexeme.str) == -1) { 2814 | var possibleFields = parser.query.allFields.map(function (f) { return "'" + f + "'" }).join(', '), 2815 | errorMessage = "unrecognised field '" + lexeme.str + "', possible fields: " + possibleFields 2816 | 2817 | throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end) 2818 | } 2819 | 2820 | parser.currentClause.fields = [lexeme.str] 2821 | 2822 | var nextLexeme = parser.peekLexeme() 2823 | 2824 | if (nextLexeme == undefined) { 2825 | var errorMessage = "expecting term, found nothing" 2826 | throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end) 2827 | } 2828 | 2829 | switch (nextLexeme.type) { 2830 | case lunr.QueryLexer.TERM: 2831 | return lunr.QueryParser.parseTerm 2832 | default: 2833 | var errorMessage = "expecting term, found '" + nextLexeme.type + "'" 2834 | throw new lunr.QueryParseError (errorMessage, nextLexeme.start, nextLexeme.end) 2835 | } 2836 | } 2837 | 2838 | lunr.QueryParser.parseTerm = function (parser) { 2839 | var lexeme = parser.consumeLexeme() 2840 | 2841 | if (lexeme == undefined) { 2842 | return 2843 | } 2844 | 2845 | parser.currentClause.term = lexeme.str.toLowerCase() 2846 | 2847 | if (lexeme.str.indexOf("*") != -1) { 2848 | parser.currentClause.usePipeline = false 2849 | } 2850 | 2851 | var nextLexeme = parser.peekLexeme() 2852 | 2853 | if (nextLexeme == undefined) { 2854 | parser.nextClause() 2855 | return 2856 | } 2857 | 2858 | switch (nextLexeme.type) { 2859 | case lunr.QueryLexer.TERM: 2860 | parser.nextClause() 2861 | return lunr.QueryParser.parseTerm 2862 | case lunr.QueryLexer.FIELD: 2863 | parser.nextClause() 2864 | return lunr.QueryParser.parseField 2865 | case lunr.QueryLexer.EDIT_DISTANCE: 2866 | return lunr.QueryParser.parseEditDistance 2867 | case lunr.QueryLexer.BOOST: 2868 | return lunr.QueryParser.parseBoost 2869 | default: 2870 | var errorMessage = "Unexpected lexeme type '" + nextLexeme.type + "'" 2871 | throw new lunr.QueryParseError (errorMessage, nextLexeme.start, nextLexeme.end) 2872 | } 2873 | } 2874 | 2875 | lunr.QueryParser.parseEditDistance = function (parser) { 2876 | var lexeme = parser.consumeLexeme() 2877 | 2878 | if (lexeme == undefined) { 2879 | return 2880 | } 2881 | 2882 | var editDistance = parseInt(lexeme.str, 10) 2883 | 2884 | if (isNaN(editDistance)) { 2885 | var errorMessage = "edit distance must be numeric" 2886 | throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end) 2887 | } 2888 | 2889 | parser.currentClause.editDistance = editDistance 2890 | 2891 | var nextLexeme = parser.peekLexeme() 2892 | 2893 | if (nextLexeme == undefined) { 2894 | parser.nextClause() 2895 | return 2896 | } 2897 | 2898 | switch (nextLexeme.type) { 2899 | case lunr.QueryLexer.TERM: 2900 | parser.nextClause() 2901 | return lunr.QueryParser.parseTerm 2902 | case lunr.QueryLexer.FIELD: 2903 | parser.nextClause() 2904 | return lunr.QueryParser.parseField 2905 | case lunr.QueryLexer.EDIT_DISTANCE: 2906 | return lunr.QueryParser.parseEditDistance 2907 | case lunr.QueryLexer.BOOST: 2908 | return lunr.QueryParser.parseBoost 2909 | default: 2910 | var errorMessage = "Unexpected lexeme type '" + nextLexeme.type + "'" 2911 | throw new lunr.QueryParseError (errorMessage, nextLexeme.start, nextLexeme.end) 2912 | } 2913 | } 2914 | 2915 | lunr.QueryParser.parseBoost = function (parser) { 2916 | var lexeme = parser.consumeLexeme() 2917 | 2918 | if (lexeme == undefined) { 2919 | return 2920 | } 2921 | 2922 | var boost = parseInt(lexeme.str, 10) 2923 | 2924 | if (isNaN(boost)) { 2925 | var errorMessage = "boost must be numeric" 2926 | throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end) 2927 | } 2928 | 2929 | parser.currentClause.boost = boost 2930 | 2931 | var nextLexeme = parser.peekLexeme() 2932 | 2933 | if (nextLexeme == undefined) { 2934 | parser.nextClause() 2935 | return 2936 | } 2937 | 2938 | switch (nextLexeme.type) { 2939 | case lunr.QueryLexer.TERM: 2940 | parser.nextClause() 2941 | return lunr.QueryParser.parseTerm 2942 | case lunr.QueryLexer.FIELD: 2943 | parser.nextClause() 2944 | return lunr.QueryParser.parseField 2945 | case lunr.QueryLexer.EDIT_DISTANCE: 2946 | return lunr.QueryParser.parseEditDistance 2947 | case lunr.QueryLexer.BOOST: 2948 | return lunr.QueryParser.parseBoost 2949 | default: 2950 | var errorMessage = "Unexpected lexeme type '" + nextLexeme.type + "'" 2951 | throw new lunr.QueryParseError (errorMessage, nextLexeme.start, nextLexeme.end) 2952 | } 2953 | } 2954 | 2955 | /** 2956 | * export the module via AMD, CommonJS or as a browser global 2957 | * Export code from https://github.com/umdjs/umd/blob/master/returnExports.js 2958 | */ 2959 | ;(function (root, factory) { 2960 | if (typeof define === 'function' && define.amd) { 2961 | // AMD. Register as an anonymous module. 2962 | define(factory) 2963 | } else if (typeof exports === 'object') { 2964 | /** 2965 | * Node. Does not work with strict CommonJS, but 2966 | * only CommonJS-like enviroments that support module.exports, 2967 | * like Node. 2968 | */ 2969 | module.exports = factory() 2970 | } else { 2971 | // Browser globals (root is window) 2972 | root.lunr = factory() 2973 | } 2974 | }(this, function () { 2975 | /** 2976 | * Just return a value to define the module export. 2977 | * This example returns an object, but the module 2978 | * can return a function as the exported value. 2979 | */ 2980 | return lunr 2981 | })) 2982 | })(); 2983 | --------------------------------------------------------------------------------