├── .gitignore
├── .jshintignore
├── .jshintrc
├── .npmignore
├── .travis.yml
├── LICENSE
├── README.md
├── bin
├── dev-server.js
├── run-test.sh
└── test-browser.js
├── bower.json
├── dist
├── pouchdb.quick-search.js
└── pouchdb.quick-search.min.js
├── docs
└── extra_database.png
├── lib
├── index.js
└── pouch-utils.js
├── package.json
└── test
├── bind-polyfill.js
├── deps
├── lunr.fr.js
├── lunr.multi.js
└── lunr.stemmer.support.js
├── docs
├── test-docs-2.js
├── test-docs-3.js
├── test-docs-4.js
├── test-docs-5.js
├── test-docs-6.js
├── test-docs-7.js
├── test-docs-8.js
├── test-docs-9.js
└── test-docs.js
├── index.html
├── test.js
└── webrunner.js
/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | .DS_Store
3 | *~
4 | coverage
5 | test/test-bundle.js
6 | npm-debug.log
7 | dist
8 |
--------------------------------------------------------------------------------
/.jshintignore:
--------------------------------------------------------------------------------
1 | tests/deps/*.js
2 |
--------------------------------------------------------------------------------
/.jshintrc:
--------------------------------------------------------------------------------
1 | {
2 | "curly": true,
3 | "eqeqeq": true,
4 | "immed": true,
5 | "newcap": true,
6 | "noarg": true,
7 | "sub": true,
8 | "undef": true,
9 | "unused": true,
10 | "eqnull": true,
11 | "browser": true,
12 | "node": true,
13 | "strict": true,
14 | "globalstrict": true,
15 | "globals": { "Pouch": true},
16 | "white": true,
17 | "indent": 2,
18 | "maxlen": 100,
19 | "predef": [
20 | "process",
21 | "global",
22 | "require",
23 | "console",
24 | "describe",
25 | "beforeEach",
26 | "afterEach",
27 | "it",
28 | "emit"
29 | ]
30 | }
--------------------------------------------------------------------------------
/.npmignore:
--------------------------------------------------------------------------------
1 | .git*
2 | node_modules
3 | .DS_Store
4 | *~
5 | coverage
6 | npm-debug.log
7 | vendor/
8 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: node_js
2 |
3 | services:
4 | - couchdb
5 |
6 | node_js:
7 | - "5"
8 | sudo: false
9 | script: npm run $COMMAND
10 | before_script:
11 | - "npm install add-cors-to-couchdb"
12 | - "./node_modules/.bin/add-cors-to-couchdb"
13 |
14 | env:
15 | matrix:
16 | - COMMAND=test
17 | - CLIENT=selenium:phantomjs COMMAND=test
18 | - COMMAND=coverage
19 |
20 | branches:
21 | only:
22 | - master
23 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 |
2 | Apache License
3 | Version 2.0, January 2004
4 | http://www.apache.org/licenses/
5 |
6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7 |
8 | 1. Definitions.
9 |
10 | "License" shall mean the terms and conditions for use, reproduction,
11 | and distribution as defined by Sections 1 through 9 of this document.
12 |
13 | "Licensor" shall mean the copyright owner or entity authorized by
14 | the copyright owner that is granting the License.
15 |
16 | "Legal Entity" shall mean the union of the acting entity and all
17 | other entities that control, are controlled by, or are under common
18 | control with that entity. For the purposes of this definition,
19 | "control" means (i) the power, direct or indirect, to cause the
20 | direction or management of such entity, whether by contract or
21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
22 | outstanding shares, or (iii) beneficial ownership of such entity.
23 |
24 | "You" (or "Your") shall mean an individual or Legal Entity
25 | exercising permissions granted by this License.
26 |
27 | "Source" form shall mean the preferred form for making modifications,
28 | including but not limited to software source code, documentation
29 | source, and configuration files.
30 |
31 | "Object" form shall mean any form resulting from mechanical
32 | transformation or translation of a Source form, including but
33 | not limited to compiled object code, generated documentation,
34 | and conversions to other media types.
35 |
36 | "Work" shall mean the work of authorship, whether in Source or
37 | Object form, made available under the License, as indicated by a
38 | copyright notice that is included in or attached to the work
39 | (an example is provided in the Appendix below).
40 |
41 | "Derivative Works" shall mean any work, whether in Source or Object
42 | form, that is based on (or derived from) the Work and for which the
43 | editorial revisions, annotations, elaborations, or other modifications
44 | represent, as a whole, an original work of authorship. For the purposes
45 | of this License, Derivative Works shall not include works that remain
46 | separable from, or merely link (or bind by name) to the interfaces of,
47 | the Work and Derivative Works thereof.
48 |
49 | "Contribution" shall mean any work of authorship, including
50 | the original version of the Work and any modifications or additions
51 | to that Work or Derivative Works thereof, that is intentionally
52 | submitted to Licensor for inclusion in the Work by the copyright owner
53 | or by an individual or Legal Entity authorized to submit on behalf of
54 | the copyright owner. For the purposes of this definition, "submitted"
55 | means any form of electronic, verbal, or written communication sent
56 | to the Licensor or its representatives, including but not limited to
57 | communication on electronic mailing lists, source code control systems,
58 | and issue tracking systems that are managed by, or on behalf of, the
59 | Licensor for the purpose of discussing and improving the Work, but
60 | excluding communication that is conspicuously marked or otherwise
61 | designated in writing by the copyright owner as "Not a Contribution."
62 |
63 | "Contributor" shall mean Licensor and any individual or Legal Entity
64 | on behalf of whom a Contribution has been received by Licensor and
65 | subsequently incorporated within the Work.
66 |
67 | 2. Grant of Copyright License. Subject to the terms and conditions of
68 | this License, each Contributor hereby grants to You a perpetual,
69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70 | copyright license to reproduce, prepare Derivative Works of,
71 | publicly display, publicly perform, sublicense, and distribute the
72 | Work and such Derivative Works in Source or Object form.
73 |
74 | 3. Grant of Patent License. Subject to the terms and conditions of
75 | this License, each Contributor hereby grants to You a perpetual,
76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77 | (except as stated in this section) patent license to make, have made,
78 | use, offer to sell, sell, import, and otherwise transfer the Work,
79 | where such license applies only to those patent claims licensable
80 | by such Contributor that are necessarily infringed by their
81 | Contribution(s) alone or by combination of their Contribution(s)
82 | with the Work to which such Contribution(s) was submitted. If You
83 | institute patent litigation against any entity (including a
84 | cross-claim or counterclaim in a lawsuit) alleging that the Work
85 | or a Contribution incorporated within the Work constitutes direct
86 | or contributory patent infringement, then any patent licenses
87 | granted to You under this License for that Work shall terminate
88 | as of the date such litigation is filed.
89 |
90 | 4. Redistribution. You may reproduce and distribute copies of the
91 | Work or Derivative Works thereof in any medium, with or without
92 | modifications, and in Source or Object form, provided that You
93 | meet the following conditions:
94 |
95 | (a) You must give any other recipients of the Work or
96 | Derivative Works a copy of this License; and
97 |
98 | (b) You must cause any modified files to carry prominent notices
99 | stating that You changed the files; and
100 |
101 | (c) You must retain, in the Source form of any Derivative Works
102 | that You distribute, all copyright, patent, trademark, and
103 | attribution notices from the Source form of the Work,
104 | excluding those notices that do not pertain to any part of
105 | the Derivative Works; and
106 |
107 | (d) If the Work includes a "NOTICE" text file as part of its
108 | distribution, then any Derivative Works that You distribute must
109 | include a readable copy of the attribution notices contained
110 | within such NOTICE file, excluding those notices that do not
111 | pertain to any part of the Derivative Works, in at least one
112 | of the following places: within a NOTICE text file distributed
113 | as part of the Derivative Works; within the Source form or
114 | documentation, if provided along with the Derivative Works; or,
115 | within a display generated by the Derivative Works, if and
116 | wherever such third-party notices normally appear. The contents
117 | of the NOTICE file are for informational purposes only and
118 | do not modify the License. You may add Your own attribution
119 | notices within Derivative Works that You distribute, alongside
120 | or as an addendum to the NOTICE text from the Work, provided
121 | that such additional attribution notices cannot be construed
122 | as modifying the License.
123 |
124 | You may add Your own copyright statement to Your modifications and
125 | may provide additional or different license terms and conditions
126 | for use, reproduction, or distribution of Your modifications, or
127 | for any such Derivative Works as a whole, provided Your use,
128 | reproduction, and distribution of the Work otherwise complies with
129 | the conditions stated in this License.
130 |
131 | 5. Submission of Contributions. Unless You explicitly state otherwise,
132 | any Contribution intentionally submitted for inclusion in the Work
133 | by You to the Licensor shall be under the terms and conditions of
134 | this License, without any additional terms or conditions.
135 | Notwithstanding the above, nothing herein shall supersede or modify
136 | the terms of any separate license agreement you may have executed
137 | with Licensor regarding such Contributions.
138 |
139 | 6. Trademarks. This License does not grant permission to use the trade
140 | names, trademarks, service marks, or product names of the Licensor,
141 | except as required for reasonable and customary use in describing the
142 | origin of the Work and reproducing the content of the NOTICE file.
143 |
144 | 7. Disclaimer of Warranty. Unless required by applicable law or
145 | agreed to in writing, Licensor provides the Work (and each
146 | Contributor provides its Contributions) on an "AS IS" BASIS,
147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 | implied, including, without limitation, any warranties or conditions
149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 | PARTICULAR PURPOSE. You are solely responsible for determining the
151 | appropriateness of using or redistributing the Work and assume any
152 | risks associated with Your exercise of permissions under this License.
153 |
154 | 8. Limitation of Liability. In no event and under no legal theory,
155 | whether in tort (including negligence), contract, or otherwise,
156 | unless required by applicable law (such as deliberate and grossly
157 | negligent acts) or agreed to in writing, shall any Contributor be
158 | liable to You for damages, including any direct, indirect, special,
159 | incidental, or consequential damages of any character arising as a
160 | result of this License or out of the use or inability to use the
161 | Work (including but not limited to damages for loss of goodwill,
162 | work stoppage, computer failure or malfunction, or any and all
163 | other commercial damages or losses), even if such Contributor
164 | has been advised of the possibility of such damages.
165 |
166 | 9. Accepting Warranty or Additional Liability. While redistributing
167 | the Work or Derivative Works thereof, You may choose to offer,
168 | and charge a fee for, acceptance of support, warranty, indemnity,
169 | or other liability obligations and/or rights consistent with this
170 | License. However, in accepting such obligations, You may act only
171 | on Your own behalf and on Your sole responsibility, not on behalf
172 | of any other Contributor, and only if You agree to indemnify,
173 | defend, and hold each Contributor harmless for any liability
174 | incurred by, or claims asserted against, such Contributor by reason
175 | of your accepting any such warranty or additional liability.
176 |
177 | END OF TERMS AND CONDITIONS
178 |
179 | APPENDIX: How to apply the Apache License to your work.
180 |
181 | To apply the Apache License to your work, attach the following
182 | boilerplate notice, with the fields enclosed by brackets "[]"
183 | replaced with your own identifying information. (Don't include
184 | the brackets!) The text should be enclosed in the appropriate
185 | comment syntax for the file format. We also recommend that a
186 | file or class name and description of purpose be included on the
187 | same "printed page" as the copyright notice for easier
188 | identification within third-party archives.
189 |
190 | Copyright [yyyy] [name of copyright owner]
191 |
192 | Licensed under the Apache License, Version 2.0 (the "License");
193 | you may not use this file except in compliance with the License.
194 | You may obtain a copy of the License at
195 |
196 | http://www.apache.org/licenses/LICENSE-2.0
197 |
198 | Unless required by applicable law or agreed to in writing, software
199 | distributed under the License is distributed on an "AS IS" BASIS,
200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 | See the License for the specific language governing permissions and
202 | limitations under the License.
203 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | PouchDB Quick Search
2 | =====
3 |
4 | [](https://travis-ci.org/nolanlawson/pouchdb-quick-search)
5 |
6 | ```js
7 | var pouch = new PouchDB('mydb');
8 | var doc = {_id: 'mydoc', title: "Guess who?", text: "It's-a me, Mario!"};
9 |
10 | pouch.put(doc).then(function () {
11 | return pouch.search({
12 | query: 'mario',
13 | fields: ['title', 'text'],
14 | include_docs: true,
15 | highlighting: true
16 | });
17 | }).then(function (res) {
18 | console.log(res.rows[0].doc.text); // "It's-a me, Mario!"
19 | console.log(res.rows[0].highlighting); // {"text": "It's-a me, Mario!"}
20 | });
21 | ```
22 |
23 | ([Live demo](http://bl.ocks.org/nolanlawson/5d326f3692bc65cf89fd))
24 |
25 | A very efficient and accurate full-text search engine built on top of PouchDB. Analyzes text, indexes it, and provides a simple but powerful API for querying. Ideal for PhoneGap apps or any webapp that needs offline search support.
26 |
27 | This is a local plugin, so it is not designed to work against CouchDB/Cloudant/etc. If you'd like to search against the server, use the [CouchDB Lucene plugin](https://github.com/rnewson/couchdb-lucene), [Cloudant's search indexes](https://cloudant.com/for-developers/search/), or something similar.
28 |
29 | If you need prefix search (e.g. for autocompletion), then just use PouchDB itself. The `allDocs()` and `query()` APIs plus `startkey` should give you everything you need for prefix lookup. See the [autosuggestions and prefix search](#autosuggestions-and-prefix-search) section for details.
30 |
31 | The underlying tokenization/stemming/stopword engine is [Lunr][], which is optimized for English text, using a variant of the [Porter stemmer](http://tartarus.org/~martin/PorterStemmer/index.html). To optimize for other languages, check out [lunr-languages](https://github.com/MihaiValentin/lunr-languages) and see the ["other languages"](#other-languages) section.
32 |
33 | Usage
34 | --------
35 |
36 | #### In the browser
37 |
38 | To use this plugin, include it after `pouchdb.js` in your HTML page:
39 |
40 | ```html
41 |
42 |
43 | ```
44 |
45 | This plugin is also available from Bower:
46 |
47 | ```
48 | bower install pouchdb-quick-search
49 | ```
50 |
51 | #### In Node.js/Browserify/Webpack
52 |
53 | Just npm install it:
54 |
55 | ```
56 | npm install pouchdb-quick-search
57 | ```
58 |
59 | And then attach it to the `PouchDB` object:
60 |
61 | ```js
62 | var PouchDB = require('pouchdb');
63 | PouchDB.plugin(require('pouchdb-quick-search'));
64 | ```
65 |
66 | API
67 | ---------
68 |
69 | **Topics:**
70 |
71 | * [Basic queries](#basic-queries)
72 | * [Document structure](#document-structure)
73 | * [Fetching the full documents](#fetching-the-full-documents)
74 | * [Highlighting](#highlighting)
75 | * [Pagination](#pagination)
76 | * [Boosting fields](#boosting-fields)
77 | * [Minimum should match (mm)](#minimum-should-match-mm)
78 | * [Filtering documents](#filtering-documents)
79 | * [Building the index](#building-the-index)
80 | * [Deleting the index](#deleting-the-index)
81 | * [Stale queries](#stale-queries)
82 | * [Other languages](#other-languages)
83 | * [Multi-language search](#multi-language-search)
84 | * [Autosuggestions and prefix search](#autosuggestions-and-prefix-search)
85 |
86 |
87 | ### Basic queries
88 |
89 | ```js
90 | pouch.search({
91 | query: 'your query here',
92 | fields: ['title', 'text']
93 | }).then(function (res) {
94 | // handle results
95 | }).catch(function (err) {
96 | // handle error
97 | });
98 | ```
99 |
100 | **Response:**
101 |
102 | ```js
103 | { rows:
104 | [
105 | { id: 'mydoc5', score: 0.08027856564851082 },
106 | { id: 'mydoc3', score: 0.044194173824159216 },
107 | { id: 'mydoc4', score: 0.044194173824159216 }
108 | ],
109 | total_rows: 3
110 | }
111 | ```
112 |
113 | In the simplest case, you call `pouch.search()` with a `query` and a list of document `field`s to search. The results contain a list of matching document `id`s and `score`s, sorted from high to low.
114 |
115 | If any document is missing a field, then it's simply ignored. You can search one or more fields at a time.
116 |
117 | Like most of the PouchDB API, the `search()` function returns a promise. But if you like callbacks, you can also use that style:
118 |
119 | ```js
120 | pouch.search({
121 | query: 'your query here',
122 | fields: ['title', 'text']
123 | }, function (err, res) {
124 | if (err) {
125 | // handle error
126 | } else {
127 | // handle results
128 | }
129 | });
130 | ```
131 |
132 | ### Document structure
133 |
134 | Your document fields can be strings or arrays of strings. Use dots to separate deeply nested fields. Searching deeply inside arrays is supported.
135 |
136 | ```js
137 | var doc = {
138 | _id: 'mydoc',
139 | name: 'Princess Peach',
140 | likes: ['cakes', 'go-karts', 'turnips'],
141 | description: {
142 | summary: 'Can float in Mario 2.'
143 | }
144 | };
145 |
146 | pouch.put(doc).then(function () {
147 | return pouch.search({
148 | query: 'peach',
149 | fields: ['name', 'likes', 'description.summary']
150 | });
151 | });
152 | ```
153 |
154 | **Response:**
155 |
156 | ```js
157 | {
158 | "rows": [
159 | {
160 | "id": "mydoc",
161 | "score": 0.044194173824159216
162 | }
163 | ],
164 | "total_rows": 1
165 | }
166 | ```
167 |
168 | ### Fetching the full documents
169 |
170 | By default, the results only contain a list of document `id`s and `score`s. You can also use `{include_docs: true}` to get back the full documents:
171 |
172 | ```js
173 | pouch.search({
174 | query: 'kong',
175 | fields: ['title', 'text'],
176 | include_docs: true
177 | });
178 | ```
179 |
180 | **Response:**
181 |
182 | ```js
183 | {
184 | "rows": [
185 | {
186 | "doc": {
187 | "_id": "mydoc5",
188 | "_rev": "1-5252b7faa1062e74ef0881fc908274cd",
189 | "text": "This kong likes to surf!",
190 | "title": "Funky Kong"
191 | },
192 | "id": "mydoc5",
193 | "score": 0.08027856564851082
194 | },
195 | {
196 | "doc": {
197 | "_id": "mydoc3",
198 | "_rev": "1-895f4289f96485c86ab62b02603220ae",
199 | "text": "He's the leader of the bunch, you know him well.",
200 | "title": "Donkey Kong"
201 | },
202 | "id": "mydoc3",
203 | "score": 0.044194173824159216
204 | },
205 | {
206 | "doc": {
207 | "_id": "mydoc4",
208 | "_rev": "1-00117a7b1d05df952474206e51ff19a5",
209 | "text": "His coconut gun can fire in spurts.",
210 | "title": "Diddy Kong"
211 | },
212 | "id": "mydoc4",
213 | "score": 0.044194173824159216
214 | }
215 | ],
216 | "total_rows": 3
217 | }
218 | ```
219 |
220 | ### Highlighting
221 |
222 | A very handy option is `{highlighting: true}`, which returns the fields that the query matched, along with the keywords highlighted in context:
223 |
224 | ```js
225 | pouch.search({
226 | query: 'kong',
227 | fields: ['title', 'text'],
228 | highlighting: true
229 | });
230 | ```
231 |
232 | **Response:**
233 |
234 | ```js
235 | {
236 | "rows": [
237 | {
238 | "highlighting": {
239 | "text": "This kong likes to surf!",
240 | "title": "Funky Kong"
241 | },
242 | "id": "mydoc5",
243 | "score": 0.08027856564851082
244 | },
245 | {
246 | "highlighting": {
247 | "title": "Donkey Kong"
248 | },
249 | "id": "mydoc3",
250 | "score": 0.044194173824159216
251 | },
252 | {
253 | "highlighting": {
254 | "title": "Diddy Kong"
255 | },
256 | "id": "mydoc4",
257 | "score": 0.044194173824159216
258 | }
259 | ],
260 | "total_rows": 3
261 | }
262 | ```
263 |
264 | If you don't like `''`, you can also specify your own `highlighting_pre` and `highlighting_post` strings:
265 |
266 | ```js
267 | pouch.search({
268 | query: 'kong',
269 | fields: ['title', 'text'],
270 | highlighting: true,
271 | highlighting_pre: '',
272 | highlighting_post: ''
273 | });
274 | ```
275 |
276 | **Response:**
277 |
278 | ```js
279 | {
280 | "rows": [
281 | {
282 | "highlighting": {
283 | "text": "This kong likes to surf!",
284 | "title": "Funky Kong"
285 | },
286 | "id": "mydoc5",
287 | "score": 0.08027856564851082
288 | },
289 | {
290 | "highlighting": {
291 | "title": "Donkey Kong"
292 | },
293 | "id": "mydoc3",
294 | "score": 0.044194173824159216
295 | },
296 | {
297 | "highlighting": {
298 | "title": "Diddy Kong"
299 | },
300 | "id": "mydoc4",
301 | "score": 0.044194173824159216
302 | }
303 | ],
304 | "total_rows": 3
305 | }
306 | ```
307 |
308 | ### Pagination
309 |
310 | You can use `limit` and `skip`, just like with the `allDocs()`/`query()` API:
311 |
312 | ```js
313 | pouch.search({
314 | query: 'kong',
315 | fields: ['title', 'text'],
316 | limit: 10,
317 | skip: 20
318 | });
319 | ```
320 |
321 | The performance concerns for `skip` that apply to `allDocs()`/`query()` do not apply so much here, because no matter what, we have to read in all the doc IDs and calculate their score in order to sort them correctly. In other words, it is guaranteed that you will read the doc IDs of all matching documents into memory, no matter what values you set for `limit` and `skip`.
322 |
323 | What this will optimize, however, is the attachment of metadata like `doc` and `highlighting` – it will only be done for the subset of results that you want.
324 |
325 | ##### `total_rows`
326 |
327 | You will also get back a field, `total_rows`, which tells you how many documents you would have gotten from your query if you hadn't applied `limit`/`skip`. You can use this for a "how many pages are remaining" display during pagination.
328 |
329 |
330 | ### Boosting fields
331 |
332 | Fields may be boosted, if you pass in an object rather than an array:
333 |
334 | ```js
335 | pouch.search({
336 | query: 'kong',
337 | fields: {
338 | 'title': 1,
339 | 'text': 5
340 | }
341 | });
342 | ```
343 |
344 | The default boost is `1`. Shorter fields are naturally boosted relative to longer fields (see the algorithmic explanation below).
345 |
346 | ### Minimum should match (mm)
347 |
348 | By default, every term in a query other than stopwords _must_ appear somewhere in the document in order for it to be matched. If you want to relax this to allow just a subset of the terms to match, use the `mm` ("minimum should match") option, which is modeled after [Solr's `mm` option](https://wiki.apache.org/solr/DisMaxQParserPlugin#mm_.28Minimum_.27Should.27_Match.29).
349 |
350 | Example 1: docs must contain both the terms `'donkey'` and `'kong'`:
351 |
352 | ```js
353 | pouch.search({
354 | query: 'donkey kong',
355 | fields: ['title', 'text']
356 | });
357 | ```
358 |
359 | Example 2: docs must contain either of the terms `'donkey'` and `'kong'`:
360 |
361 | ```js
362 | pouch.search({
363 | query: 'donkey kong',
364 | fields: ['title', 'text'],
365 | mm: '50%'
366 | });
367 | ```
368 |
369 | Example 3: docs must contain at least one of the three terms `'donkey'`, `'kong'`, and `'country'`:
370 |
371 | ```js
372 | pouch.search({
373 | query: 'donkey kong country',
374 | fields: ['title', 'text'],
375 | mm: '33%'
376 | });
377 | ```
378 |
379 | The default `mm` value is `100%`. All values must be provided as a percentage (ints are okay).
380 |
381 | ### Filtering documents
382 |
383 | If you only want to index a subset of your documents, you can include a filter function that tells us which documents to skip. The filter function should return `true` for documents you want to index, and `false` for documents you want to skip. (Truthy/falsy values are also okay.)
384 |
385 | Example:
386 |
387 | ```js
388 | pouch.search({
389 | query: 'foo',
390 | fields: ['title', 'text'],
391 | filter: function (doc) {
392 | return doc.type === 'person'; // only index persons
393 | }
394 | }).then(function (info) {
395 | // handle result
396 | }).catch(function (err) {
397 | // handle error
398 | });
399 | ```
400 |
401 | The `filter` option, like `fields` and `language`, affects the identity of the underlying index, so it affects building and deleting (see building/deleting below).
402 |
403 | Thanks to [Jean-Felix Girard](https://github.com/jfgirard) for implementing this feature!
404 |
405 | ### Building the index
406 |
407 | If you only use the `search()` method as described above, then it will be slow the first time you query, because the index has to be built up.
408 |
409 | To avoid slow performance, you can explicitly tell the search plugin to build up the index using `{build: true}`:
410 |
411 | ```js
412 | pouch.search({
413 | fields: ['title', 'text'],
414 | build: true
415 | }).then(function (info) {
416 | // if build was successful, info is {"ok": true}
417 | }).catch(function (err) {
418 | // handle error
419 | });
420 | ```
421 |
422 | This will build up the index without querying it. If the database has changed since you last updated (e.g. new documents were added), then it will simply update the index with the new documents. If nothing has changed, then it won't do anything.
423 |
424 | You must at least provide the `fields` you want to index. If the language isn't English, you must pass in the `language` option. Boosts don't matter.
425 |
426 | ### Deleting the index
427 |
428 | If, for whatever reason, you need to delete an index that's been saved to disk, you can pass in `{destroy: true}` to the `search()` function, and instead of searching, it will delete the external search database.
429 |
430 | ```js
431 | pouch.search({
432 | fields: ['title', 'text'],
433 | destroy: true
434 | });
435 | ```
436 |
437 | When you do this, you _must_ at least provide the `fields`, because external databases are created and identified based on the fields you want to index. You should also provide the `language` option if the language is something other than English. I.e., for every unique `fields` combination you want to index (plus `language` if non-English), a separate database will be created especially for that query. If you open up your developer tools, you can see it; it should have a name like `-search-` and look like this:
438 |
439 | 
440 |
441 | ### Stale queries
442 |
443 | When you search, a [persistent map/reduce index](http://pouchdb.com/api.html#query_database) is created behind the scenes, in order to save the indexed data and provide the fastest possible queries.
444 |
445 | This means you can use the `stale` options, as in the `query()` API, to get faster but less accurate results:
446 |
447 | ```js
448 | // return immediately, update the index afterwards
449 | pouch.search({
450 | query: 'donkey kong',
451 | fields: ['title', 'text'],
452 | stale: 'update_after'
453 | });
454 | ```
455 |
456 | or
457 |
458 | ```js
459 | //
460 | pouch.search({
461 | query: 'donkey kong',
462 | fields: ['title', 'text'],
463 | stale: 'ok'
464 | });
465 | ```
466 |
467 | Most likely, though, you won't want to do this unless your database is frequently changing.
468 |
469 | ### Other languages
470 |
471 | The default Lunr pipeline uses the Porter stemmer, which is optimized for English. So for instance, the words "work," "worked," "working," and "works" would all resolve to the same stem using the default settings.
472 |
473 | Obviously other languages have different morphologies (and stopwords), so to support these language, this plugin can integrate with the [lunr-languages](https://github.com/MihaiValentin/lunr-languages) plugin.
474 |
475 | To use another language, first follow the [lunr-languages instructions](https://github.com/MihaiValentin/lunr-languages#how-to-use) to install the language of your choice.
476 |
477 | Next, use the `language` option when you search:
478 |
479 | ```js
480 | pouch.search({
481 | query: 'marche',
482 | fields: ['text'],
483 | include_docs: true,
484 | language: 'fr'
485 | });
486 | ```
487 |
488 | **Response:**
489 |
490 | ```js
491 | {
492 | "rows": [
493 | {
494 | "doc": {
495 | "_id": "french-doc",
496 | "_rev": "1-997cba2d79a6f803c6040ddbedee642f",
497 | "text": "Ça va marcher."
498 | },
499 | "id": "french-doc",
500 | "score": 0.7071067811865475
501 | }
502 | ],
503 | "total_rows": 1
504 | }
505 | ```
506 |
507 | You can still query in English:
508 |
509 | ```js
510 | pouch.search({
511 | query: 'works',
512 | fields: ['text'],
513 | include_docs: true
514 | });
515 | ```
516 |
517 | **Response:**
518 |
519 | ```js
520 | {
521 | "rows": [
522 | {
523 | "doc": {
524 | "_id": "english-doc",
525 | "_rev": "1-48f9b2f4f17fc352fa53a21dca7e188e",
526 | "text": "This will work."
527 | },
528 | "id": "english-doc",
529 | "score": 1
530 | }
531 | ],
532 | "total_rows": 1
533 | }
534 | ```
535 |
536 | If you don't specify a `language`, then the default is `'en'`. Under the hood, separate external databases will be created per language (and per `fields` definition), so you may want to keep that in mind if you're using the `destroy` and `build` options.
537 |
538 | **Note:** currently the lunr-languages plugin expects a global `lunr` object, so unfortunately you will have to include lunr as an extra dependency in your project and assign it to global (as described in the lunr-languages instructions). Hopefully this will be fixed in the future.
539 |
540 | ### Multi-language search
541 |
542 | Recently `lunr-languages` developers have added the ability to search in multiple languages at once. To be able to search from several languages:
543 |
544 | 1) You should include `lunr.multi.js` from the `lunr-languages` repository. (Currently it is available only on master; they haven't tagged a release).
545 |
546 | 2) Pass an array into `language`, for example:
547 |
548 | ```js
549 | pouch.search({
550 | query: 'marche',
551 | fields: ['text'],
552 | include_docs: true,
553 | language: ['en', 'fr']
554 | });
555 | ```
556 |
557 | The above code will search using both French and English.
558 |
559 | ### Autosuggestions and prefix search
560 |
561 | While the `pouchdb-quick-search` plugin does not provide prefix/autosuggestion support, you can trivially do it in PouchDB itself by using `allDocs()`.
562 |
563 | Just create documents with IDs equal to what you want to search for, and then use `startkey`/`endkey` plus the special high unicode character `\uffff` to search:
564 |
565 | ```js
566 | pouch.bulkDocs([
567 | {_id: 'marin'},
568 | {_id: 'mario'},
569 | {_id: 'marth'},
570 | {_id: 'mushroom'},
571 | {_id: 'zelda'}
572 | ]).then(function () {
573 | return pouch.allDocs({
574 | startkey: 'mar',
575 | endkey: 'mar\uffff'
576 | });
577 | });
578 | ```
579 |
580 | This will return all documents that start with `'mar'`, which in this case would be `'marin'`, `'mario'`, and `'marth'`.
581 |
582 | How does it work? Well, in PouchDB and CouchDB, doc IDs are [sorted lexiocographically](http://docs.couchdb.org/en/latest/couchapp/views/collation.html), hence the `\uffff` trick.
583 |
584 | Note that to handle uppercase/lowercase, you would have to insert the documents with the `_id`s already lowercase, and then search using lowercase letters as well.
585 |
586 | **Note:** You can also accomplish this using [map/reduce queries](http://pouchdb.com/guides/queries.html), and the principle is the same (including the `\uffff` trick). However, the performance may be worse than `allDocs()` because you are using a secondary index rather than the primary index.
587 |
588 | Algorithm
589 | ----
590 |
591 | This plugin uses the classic search technique of [TF-IDF](https://en.wikipedia.org/wiki/TFIDF), which strikes a nice balance between accuracy and speed. It is probably the most widely deployed search algorithm in the world.
592 |
593 | Additionally, it applies a per-field weighting based on the [DisMax](http://searchhub.org//2010/05/23/whats-a-dismax/) algorithm as used in [Apache Solr](https://lucene.apache.org/solr/), which means that short fields tend to be boosted relative to long fields. This is useful for things like e.g. web page titles and web page contents, where the words in the titles are usually more significant than words in the contents. For multi-word queries, this algorithm also has the nice effect of preferring documents that match both words, even across several fields.
594 |
595 | For more information about the algorithms that guided this implementation, refer to the [Lucene Similarity documentation](https://lucene.apache.org/core/3_6_0/api/core/org/apache/lucene/search/Similarity.html).
596 |
597 | Building
598 | ----
599 | npm install
600 | npm run build
601 |
602 | Testing
603 | ----
604 |
605 | ### In Node
606 |
607 | This will run the tests in Node using LevelDB:
608 |
609 | npm test
610 |
611 | You can also check for 100% code coverage using:
612 |
613 | npm run coverage
614 |
615 | If you don't like the coverage results, change the values from 100 to something else in `package.json`, or add `/*istanbul ignore */` comments.
616 |
617 |
618 | If you have mocha installed globally you can run single test with:
619 | ```
620 | TEST_DB=local mocha --reporter spec --grep search_phrase
621 | ```
622 |
623 | The `TEST_DB` environment variable specifies the database that PouchDB should use (see `package.json`).
624 |
625 | ### In the browser
626 |
627 | Run `npm run dev` and then point your favorite browser to [http://127.0.0.1:8001/test/index.html](http://127.0.0.1:8001/test/index.html).
628 |
629 | The query param `?grep=mysearch` will search for tests matching `mysearch`.
630 |
631 | ### Automated browser tests
632 |
633 | You can run e.g.
634 |
635 | CLIENT=selenium:firefox npm test
636 | CLIENT=selenium:phantomjs npm test
637 |
638 | This will run the tests automatically and the process will exit with a 0 or a 1 when it's done. Firefox uses IndexedDB, and PhantomJS uses WebSQL.
639 |
640 | [lunr]: https://github.com/olivernn/lunr.js
641 |
642 |
--------------------------------------------------------------------------------
/bin/dev-server.js:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env node
2 |
3 | 'use strict';
4 |
5 | var COUCH_HOST = process.env.COUCH_HOST || 'http://127.0.0.1:5984';
6 | var HTTP_PORT = 8001;
7 |
8 | var Promise = require('bluebird');
9 | var request = require('request');
10 | var http_server = require("http-server");
11 | var fs = require('fs');
12 | var indexfile = "./test/test.js";
13 | var dotfile = "./test/.test-bundle.js";
14 | var outfile = "./test/test-bundle.js";
15 | var watchify = require("watchify");
16 | var browserify = require('browserify');
17 | var w = watchify(browserify(indexfile, {
18 | cache: {},
19 | packageCache: {},
20 | fullPaths: true,
21 | debug: true
22 | }));
23 |
24 | w.on('update', bundle);
25 | bundle();
26 |
27 | var filesWritten = false;
28 | var serverStarted = false;
29 | var readyCallback;
30 |
31 | function bundle() {
32 | var wb = w.bundle();
33 | wb.on('error', function (err) {
34 | console.error(String(err));
35 | });
36 | wb.on("end", end);
37 | wb.pipe(fs.createWriteStream(dotfile));
38 |
39 | function end() {
40 | fs.rename(dotfile, outfile, function (err) {
41 | if (err) { return console.error(err); }
42 | console.log('Updated:', outfile);
43 | filesWritten = true;
44 | checkReady();
45 | });
46 | }
47 | }
48 |
49 | function startServers(callback) {
50 | readyCallback = callback;
51 | // enable CORS globally, because it's easier this way
52 |
53 | var corsValues = {
54 | '/_config/httpd/enable_cors': 'true',
55 | '/_config/cors/origins': '*',
56 | '/_config/cors/credentials': 'true',
57 | '/_config/cors/methods': 'PROPFIND, PROPPATCH, COPY, MOVE, DELETE, ' +
58 | 'MKCOL, LOCK, UNLOCK, PUT, GETLIB, VERSION-CONTROL, CHECKIN, ' +
59 | 'CHECKOUT, UNCHECKOUT, REPORT, UPDATE, CANCELUPLOAD, HEAD, ' +
60 | 'OPTIONS, GET, POST',
61 | '/_config/cors/headers':
62 | 'Cache-Control, Content-Type, Depth, Destination, ' +
63 | 'If-Modified-Since, Overwrite, User-Agent, X-File-Name, ' +
64 | 'X-File-Size, X-Requested-With, accept, accept-encoding, ' +
65 | 'accept-language, authorization, content-type, origin, referer'
66 | };
67 |
68 | Promise.all(Object.keys(corsValues).map(function (key) {
69 | var value = corsValues[key];
70 | return request({
71 | method: 'put',
72 | url: COUCH_HOST + key,
73 | body: JSON.stringify(value)
74 | });
75 | })).then(function () {
76 | return http_server.createServer().listen(HTTP_PORT);
77 | }).then(function () {
78 | console.log('Tests: http://127.0.0.1:' + HTTP_PORT + '/test/index.html');
79 | serverStarted = true;
80 | checkReady();
81 | }).catch(function (err) {
82 | if (err) {
83 | console.log(err);
84 | process.exit(1);
85 | }
86 | });
87 | }
88 |
89 | function checkReady() {
90 | if (filesWritten && serverStarted && readyCallback) {
91 | readyCallback();
92 | }
93 | }
94 |
95 | if (require.main === module) {
96 | startServers();
97 | } else {
98 | module.exports.start = startServers;
99 | }
100 |
--------------------------------------------------------------------------------
/bin/run-test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | : ${CLIENT:="node"}
4 |
5 | if [ "$CLIENT" == "node" ]; then
6 | npm run test-node
7 | else
8 | npm run test-browser
9 | fi
10 |
--------------------------------------------------------------------------------
/bin/test-browser.js:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env node
2 | 'use strict';
3 |
4 | var wd = require('wd');
5 | var sauceConnectLauncher = require('sauce-connect-launcher');
6 | var selenium = require('selenium-standalone');
7 | var querystring = require("querystring");
8 |
9 | var devserver = require('./dev-server.js');
10 |
11 | var testTimeout = 30 * 60 * 1000;
12 |
13 | var username = process.env.SAUCE_USERNAME;
14 | var accessKey = process.env.SAUCE_ACCESS_KEY;
15 |
16 | // process.env.CLIENT is a colon seperated list of
17 | // (saucelabs|selenium):browserName:browserVerion:platform
18 | var tmp = (process.env.CLIENT || 'selenium:firefox').split(':');
19 | var client = {
20 | runner: tmp[0] || 'selenium',
21 | browser: tmp[1] || 'firefox',
22 | version: tmp[2] || null, // Latest
23 | platform: tmp[3] || null
24 | };
25 |
26 | var testUrl = 'http://127.0.0.1:8001/test/index.html';
27 | var qs = {};
28 |
29 | var sauceClient;
30 | var sauceConnectProcess;
31 | var tunnelId = process.env.TRAVIS_JOB_NUMBER || 'tunnel-' + Date.now();
32 |
33 | if (client.runner === 'saucelabs') {
34 | qs.saucelabs = true;
35 | }
36 | if (process.env.GREP) {
37 | qs.grep = process.env.GREP;
38 | }
39 | if (process.env.ADAPTERS) {
40 | qs.adapters = process.env.ADAPTERS;
41 | }
42 | if (process.env.ES5_SHIM || process.env.ES5_SHIMS) {
43 | qs.es5shim = true;
44 | }
45 | testUrl += '?';
46 | testUrl += querystring.stringify(qs);
47 |
48 | if (process.env.TRAVIS &&
49 | client.browser !== 'firefox' &&
50 | client.browser !== 'phantomjs' &&
51 | process.env.TRAVIS_SECURE_ENV_VARS === 'false') {
52 | console.error('Not running test, cannot connect to saucelabs');
53 | process.exit(1);
54 | return;
55 | }
56 |
57 | function testError(e) {
58 | console.error(e);
59 | console.error('Doh, tests failed');
60 | sauceClient.quit();
61 | process.exit(3);
62 | }
63 |
64 | function postResult(result) {
65 | process.exit(!process.env.PERF && result.failed ? 1 : 0);
66 | }
67 |
68 | function testComplete(result) {
69 | console.log(result);
70 |
71 | sauceClient.quit().then(function () {
72 | if (sauceConnectProcess) {
73 | sauceConnectProcess.close(function () {
74 | postResult(result);
75 | });
76 | } else {
77 | postResult(result);
78 | }
79 | });
80 | }
81 |
82 | function startSelenium(callback) {
83 | // Start selenium
84 | var opts = {version: '2.42.0'};
85 | selenium.install(opts, function(err) {
86 | if (err) {
87 | console.error('Failed to install selenium');
88 | process.exit(1);
89 | }
90 | selenium.start(opts, function(err, server) {
91 | sauceClient = wd.promiseChainRemote();
92 | callback();
93 | });
94 | });
95 | }
96 |
97 | function startSauceConnect(callback) {
98 |
99 | var options = {
100 | username: username,
101 | accessKey: accessKey,
102 | tunnelIdentifier: tunnelId
103 | };
104 |
105 | sauceConnectLauncher(options, function (err, process) {
106 | if (err) {
107 | console.error('Failed to connect to saucelabs');
108 | console.error(err);
109 | return process.exit(1);
110 | }
111 | sauceConnectProcess = process;
112 | sauceClient = wd.promiseChainRemote("localhost", 4445, username, accessKey);
113 | callback();
114 | });
115 | }
116 |
117 | function startTest() {
118 |
119 | console.log('Starting', client);
120 |
121 | var opts = {
122 | browserName: client.browser,
123 | version: client.version,
124 | platform: client.platform,
125 | tunnelTimeout: testTimeout,
126 | name: client.browser + ' - ' + tunnelId,
127 | 'max-duration': 60 * 30,
128 | 'command-timeout': 599,
129 | 'idle-timeout': 599,
130 | 'tunnel-identifier': tunnelId
131 | };
132 |
133 | sauceClient.init(opts).get(testUrl, function () {
134 |
135 | /* jshint evil: true */
136 | var interval = setInterval(function () {
137 | sauceClient.eval('window.results', function (err, results) {
138 | if (err) {
139 | clearInterval(interval);
140 | testError(err);
141 | } else if (results.completed || results.failures.length) {
142 | clearInterval(interval);
143 | testComplete(results);
144 | } else {
145 | console.log('=> ', results);
146 | }
147 | });
148 | }, 10 * 1000);
149 | });
150 | }
151 |
152 | devserver.start(function () {
153 | if (client.runner === 'saucelabs') {
154 | startSauceConnect(startTest);
155 | } else {
156 | startSelenium(startTest);
157 | }
158 | });
159 |
--------------------------------------------------------------------------------
/bower.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "pouchdb-quick-search",
3 | "version": "1.1.0",
4 | "description": "PouchDB Quick Search - persisted full-text search for PouchDB",
5 | "main": "dist/pouchdb.quick-search.js",
6 | "homepage": "https://github.com/nolanlawson/pouchdb-quick-search",
7 | "authors": [
8 | "Nolan Lawson "
9 | ],
10 | "moduleType": [
11 | "node"
12 | ],
13 | "keywords": [
14 | "pouchdb",
15 | "search",
16 | "fts",
17 | "full-text",
18 | "quick"
19 | ],
20 | "license": "Apache 2",
21 | "ignore": [
22 | "**/.*",
23 | "node_modules",
24 | "bower_components",
25 | "test",
26 | "tests",
27 | "vendor"
28 | ]
29 | }
30 |
--------------------------------------------------------------------------------
/docs/extra_database.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pouchdb-community/pouchdb-quick-search/ec2ce7b75f07ea5c3a3216eff20e84621e9ce4e1/docs/extra_database.png
--------------------------------------------------------------------------------
/lib/index.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | // Use a fork of pouchdb-mapreduce, which allows us
4 | // deeper control over what's persisted, without needing ddocs
5 | var mapReduce = require('pouchdb-mapreduce-no-ddocs');
6 | Object.keys(mapReduce).forEach(function (key) {
7 | exports[key] = mapReduce[key];
8 | });
9 |
10 | var utils = require('./pouch-utils');
11 | var lunr = require('lunr');
12 | var uniq = require('uniq');
13 | var Promise = utils.Promise;
14 | var stringify = require('json-stable-stringify');
15 |
16 | var indexes = {};
17 |
18 | var TYPE_TOKEN_COUNT = 'a';
19 | var TYPE_DOC_INFO = 'b';
20 |
21 | function add(left, right) {
22 | return left + right;
23 | }
24 |
25 | // get all the tokens found in the given text (non-unique)
26 | // in the future, we might expand this to do more than just
27 | // English. Also, this is a private Lunr API, hence why
28 | // the Lunr version is pegged.
29 | function getTokenStream(text, index) {
30 | return index.pipeline.run(lunr.tokenizer(text));
31 | }
32 |
33 | // given an object containing the field name and/or
34 | // a deepField definition plus the doc, return the text for
35 | // indexing
36 | function getText(fieldBoost, doc) {
37 | var text;
38 | if (!fieldBoost.deepField) {
39 | text = doc[fieldBoost.field];
40 | } else { // "Enhance."
41 | text = doc;
42 | for (var i = 0, len = fieldBoost.deepField.length; i < len; i++) {
43 | if (Array.isArray(text)) {
44 | text = text.map(handleNestedObjectArrayItem(fieldBoost, fieldBoost.deepField.slice(i)));
45 | } else {
46 | text = text && text[fieldBoost.deepField[i]];
47 | }
48 | }
49 | }
50 | if (text) {
51 | if (Array.isArray(text)) {
52 | text = text.join(' ');
53 | } else if (typeof text !== 'string') {
54 | text = text.toString();
55 | }
56 | }
57 | return text;
58 | }
59 |
60 | function handleNestedObjectArrayItem(fieldBoost, deepField) {
61 | return function (one) {
62 | return getText(utils.extend({}, fieldBoost, {
63 | deepField: deepField
64 | }), one);
65 | };
66 | }
67 |
68 | // map function that gets passed to map/reduce
69 | // emits two types of key/values - one for each token
70 | // and one for the field-len-norm
71 | function createMapFunction(fieldBoosts, index, filter, db) {
72 | return function (doc, emit) {
73 |
74 | if (isFiltered(doc, filter, db)) {
75 | return;
76 | }
77 |
78 | var docInfo = [];
79 |
80 | for (var i = 0, len = fieldBoosts.length; i < len; i++) {
81 | var fieldBoost = fieldBoosts[i];
82 |
83 | var text = getText(fieldBoost, doc);
84 |
85 | var fieldLenNorm;
86 | if (text) {
87 | var terms = getTokenStream(text, index);
88 | for (var j = 0, jLen = terms.length; j < jLen; j++) {
89 | var term = terms[j];
90 | // avoid emitting the value if there's only one field;
91 | // it takes up unnecessary space on disk
92 | var value = fieldBoosts.length > 1 ? i : undefined;
93 | emit(TYPE_TOKEN_COUNT + term, value);
94 | }
95 | fieldLenNorm = Math.sqrt(terms.length);
96 | } else { // no tokens
97 | fieldLenNorm = 0;
98 | }
99 | docInfo.push(fieldLenNorm);
100 | }
101 |
102 | emit(TYPE_DOC_INFO + doc._id, docInfo);
103 | };
104 | }
105 |
106 | exports.search = utils.toPromise(function (opts, callback) {
107 | var pouch = this;
108 | opts = utils.extend(true, {}, opts);
109 | var q = opts.query || opts.q;
110 | var mm = 'mm' in opts ? (parseFloat(opts.mm) / 100) : 1; // e.g. '75%'
111 | var fields = opts.fields;
112 | var highlighting = opts.highlighting;
113 | var includeDocs = opts.include_docs;
114 | var destroy = opts.destroy;
115 | var stale = opts.stale;
116 | var limit = opts.limit;
117 | var build = opts.build;
118 | var skip = opts.skip || 0;
119 | var language = opts.language || 'en';
120 | var filter = opts.filter;
121 |
122 | if (Array.isArray(fields)) {
123 | var fieldsMap = {};
124 | fields.forEach(function (field) {
125 | fieldsMap[field] = 1; // default boost
126 | });
127 | fields = fieldsMap;
128 | }
129 |
130 | var fieldBoosts = Object.keys(fields).map(function (field) {
131 | var deepField = field.indexOf('.') !== -1 && field.split('.');
132 | return {
133 | field: field,
134 | deepField: deepField,
135 | boost: fields[field]
136 | };
137 | });
138 |
139 | var index = indexes[language];
140 | if (!index) {
141 | index = indexes[language] = lunr();
142 | if (Array.isArray(language)) {
143 | index.use(global.lunr['multiLanguage'].apply(this, language));
144 | } else if (language !== 'en') {
145 | index.use(global.lunr[language]);
146 | }
147 | }
148 |
149 | // the index we save as a separate database is uniquely identified
150 | // by the fields the user want to index (boost doesn't matter)
151 | // plus the tokenizer
152 |
153 | var indexParams = {
154 | language: language,
155 | fields: fieldBoosts.map(function (x) {
156 | return x.field;
157 | }).sort()
158 | };
159 |
160 | if (filter) {
161 | indexParams.filter = filter.toString();
162 | }
163 |
164 | var persistedIndexName = 'search-' + utils.MD5(stringify(indexParams));
165 |
166 | var mapFun = createMapFunction(fieldBoosts, index, filter, pouch);
167 |
168 | var queryOpts = {
169 | saveAs: persistedIndexName
170 | };
171 | if (destroy) {
172 | queryOpts.destroy = true;
173 | return pouch._search_query(mapFun, queryOpts, callback);
174 | } else if (build) {
175 | delete queryOpts.stale; // update immediately
176 | queryOpts.limit = 0;
177 | pouch._search_query(mapFun, queryOpts).then(function () {
178 | callback(null, {ok: true});
179 | }).catch(callback);
180 | return;
181 | }
182 |
183 | // it shouldn't matter if the user types the same
184 | // token more than once, in fact I think even Lucene does this
185 | // special cases like boingo boingo and mother mother are rare
186 | var queryTerms = uniq(getTokenStream(q, index));
187 | if (!queryTerms.length) {
188 | return callback(null, {total_rows: 0, rows: []});
189 | }
190 | queryOpts.keys = queryTerms.map(function (queryTerm) {
191 | return TYPE_TOKEN_COUNT + queryTerm;
192 | });
193 |
194 | if (typeof stale === 'string') {
195 | queryOpts.stale = stale;
196 | }
197 |
198 | // search algorithm, basically classic TF-IDF
199 | //
200 | // step 1: get the doc+fields associated with the terms in the query
201 | // step 2: get the doc-len-norms of those document fields
202 | // step 3: calculate document scores using tf-idf
203 | //
204 | // note that we follow the Lucene convention (established in
205 | // DefaultSimilarity.java) of computing doc-len-norm (in our case, tecnically
206 | // field-lennorm) as Math.sqrt(numTerms),
207 | // which is an optimization that avoids having to look up every term
208 | // in that document and fully recompute its scores based on tf-idf
209 | // More info:
210 | // https://lucene.apache.org/core/3_6_0/api/core/org/apache/lucene/search/Similarity.html
211 | //
212 |
213 | // step 1
214 | pouch._search_query(mapFun, queryOpts).then(function (res) {
215 |
216 | if (!res.rows.length) {
217 | return callback(null, {total_rows: 0, rows: []});
218 | }
219 | var total_rows = 0;
220 | var docIdsToFieldsToQueryTerms = {};
221 | var termDFs = {};
222 |
223 | res.rows.forEach(function (row) {
224 | var term = row.key.substring(1);
225 | var field = row.value || 0;
226 |
227 | // calculate termDFs
228 | if (!(term in termDFs)) {
229 | termDFs[term] = 1;
230 | } else {
231 | termDFs[term]++;
232 | }
233 |
234 | // calculate docIdsToFieldsToQueryTerms
235 | if (!(row.id in docIdsToFieldsToQueryTerms)) {
236 | var arr = docIdsToFieldsToQueryTerms[row.id] = [];
237 | for (var i = 0; i < fieldBoosts.length; i++) {
238 | arr[i] = {};
239 | }
240 | }
241 |
242 | var docTerms = docIdsToFieldsToQueryTerms[row.id][field];
243 | if (!(term in docTerms)) {
244 | docTerms[term] = 1;
245 | } else {
246 | docTerms[term]++;
247 | }
248 | });
249 |
250 | // apply the minimum should match (mm)
251 | if (queryTerms.length > 1) {
252 | Object.keys(docIdsToFieldsToQueryTerms).forEach(function (docId) {
253 | var allMatchingTerms = {};
254 | var fieldsToQueryTerms = docIdsToFieldsToQueryTerms[docId];
255 | Object.keys(fieldsToQueryTerms).forEach(function (field) {
256 | Object.keys(fieldsToQueryTerms[field]).forEach(function (term) {
257 | allMatchingTerms[term] = true;
258 | });
259 | });
260 | var numMatchingTerms = Object.keys(allMatchingTerms).length;
261 | var matchingRatio = numMatchingTerms / queryTerms.length;
262 | if ((Math.floor(matchingRatio * 100) / 100) < mm) {
263 | delete docIdsToFieldsToQueryTerms[docId]; // ignore this doc
264 | }
265 | });
266 | }
267 |
268 | if (!Object.keys(docIdsToFieldsToQueryTerms).length) {
269 | return callback(null, {total_rows: 0, rows: []});
270 | }
271 |
272 | var keys = Object.keys(docIdsToFieldsToQueryTerms).map(function (docId) {
273 | return TYPE_DOC_INFO + docId;
274 | });
275 |
276 | var queryOpts = {
277 | saveAs: persistedIndexName,
278 | keys: keys,
279 | stale: stale
280 | };
281 |
282 | // step 2
283 | return pouch._search_query(mapFun, queryOpts).then(function (res) {
284 |
285 | var docIdsToFieldsToNorms = {};
286 | res.rows.forEach(function (row) {
287 | docIdsToFieldsToNorms[row.id] = row.value;
288 | });
289 | // step 3
290 | // now we have all information, so calculate scores
291 | var rows = calculateDocumentScores(queryTerms, termDFs,
292 | docIdsToFieldsToQueryTerms, docIdsToFieldsToNorms, fieldBoosts);
293 | return rows;
294 | }).then(function (rows) {
295 | total_rows = rows.length;
296 | // filter before fetching docs or applying highlighting
297 | // for a slight optimization, since for now we've only fetched ids/scores
298 | return (typeof limit === 'number' && limit >= 0) ?
299 | rows.slice(skip, skip + limit) : skip > 0 ? rows.slice(skip) : rows;
300 | }).then(function (rows) {
301 | if (includeDocs) {
302 | return applyIncludeDocs(pouch, rows);
303 | }
304 | return rows;
305 | }).then(function (rows) {
306 | if (highlighting) {
307 | return applyHighlighting(pouch, opts, rows, fieldBoosts, docIdsToFieldsToQueryTerms);
308 | }
309 | return rows;
310 |
311 | }).then(function (rows) {
312 | callback(null, {total_rows: total_rows, rows: rows});
313 | });
314 | }).catch(callback);
315 | });
316 |
317 |
318 | // returns a sorted list of scored results, like:
319 | // [{id: {...}, score: 0.2}, {id: {...}, score: 0.1}];
320 | //
321 | // some background: normally this would be implemented as cosine similarity
322 | // using tf-idf, which is equal to
323 | // dot-product(q, d) / (norm(q) * norm(doc))
324 | // (although there is no point in calculating the query norm,
325 | // because all we care about is the relative score for a given query,
326 | // so we ignore it, lucene does this too)
327 | //
328 | //
329 | // but instead of straightforward cosine similarity, here I implement
330 | // the dismax algorithm, so the doc score is the
331 | // sum of its fields' scores, and this is done on a per-query-term basis,
332 | // then the maximum score for each of the query terms is the one chosen,
333 | // i.e. max(sumOfQueryTermScoresForField1, sumOfQueryTermScoresForField2, etc.)
334 | //
335 |
336 | function calculateDocumentScores(queryTerms, termDFs, docIdsToFieldsToQueryTerms,
337 | docIdsToFieldsToNorms, fieldBoosts) {
338 |
339 | var results = Object.keys(docIdsToFieldsToQueryTerms).map(function (docId) {
340 |
341 | var fieldsToQueryTerms = docIdsToFieldsToQueryTerms[docId];
342 | var fieldsToNorms = docIdsToFieldsToNorms[docId];
343 |
344 | var queryScores = queryTerms.map(function (queryTerm) {
345 | return fieldsToQueryTerms.map(function (queryTermsToCounts, fieldIdx) {
346 | var fieldNorm = fieldsToNorms[fieldIdx];
347 | if (!(queryTerm in queryTermsToCounts)) {
348 | return 0;
349 | }
350 | var termDF = termDFs[queryTerm];
351 | var termTF = queryTermsToCounts[queryTerm];
352 | var docScore = termTF / termDF; // TF-IDF for doc
353 | var queryScore = 1 / termDF; // TF-IDF for query, count assumed to be 1
354 | var boost = fieldBoosts[fieldIdx].boost;
355 | return docScore * queryScore * boost / fieldNorm; // see cosine sim equation
356 | }).reduce(add, 0);
357 | });
358 |
359 | var maxQueryScore = 0;
360 | queryScores.forEach(function (queryScore) {
361 | if (queryScore > maxQueryScore) {
362 | maxQueryScore = queryScore;
363 | }
364 | });
365 |
366 | return {
367 | id: docId,
368 | score: maxQueryScore
369 | };
370 | });
371 |
372 | results.sort(function (a, b) {
373 | return a.score < b.score ? 1 : (a.score > b.score ? -1 : 0);
374 | });
375 |
376 | return results;
377 | }
378 |
379 | function applyIncludeDocs(pouch, rows) {
380 | return Promise.all(rows.map(function (row) {
381 | return pouch.get(row.id);
382 | })).then(function (docs) {
383 | docs.forEach(function (doc, i) {
384 | rows[i].doc = doc;
385 | });
386 | }).then(function () {
387 | return rows;
388 | });
389 | }
390 |
391 | // create a convenient object showing highlighting results
392 | // this is designed to be like solr's highlighting feature, so it
393 | // should return something like
394 | // {'fieldname': 'here is some highlighted text.'}
395 | //
396 | function applyHighlighting(pouch, opts, rows, fieldBoosts,
397 | docIdsToFieldsToQueryTerms) {
398 |
399 | var pre = opts.highlighting_pre || '';
400 | var post = opts.highlighting_post || '';
401 |
402 | return Promise.all(rows.map(function (row) {
403 |
404 | return Promise.resolve().then(function () {
405 | if (row.doc) {
406 | return row.doc;
407 | }
408 | return pouch.get(row.id);
409 | }).then(function (doc) {
410 | row.highlighting = {};
411 | docIdsToFieldsToQueryTerms[row.id].forEach(function (queryTerms, i) {
412 | var fieldBoost = fieldBoosts[i];
413 | var fieldName = fieldBoost.field;
414 | var text = getText(fieldBoost, doc);
415 | // TODO: this is fairly naive highlighting code; could improve
416 | // the regex
417 | Object.keys(queryTerms).forEach(function (queryTerm) {
418 | var regex = new RegExp('(' + queryTerm + '[a-z]*)', 'gi');
419 | var replacement = pre + '$1' + post;
420 | text = text.replace(regex, replacement);
421 | row.highlighting[fieldName] = text;
422 | });
423 | });
424 | });
425 | })).then(function () {
426 | return rows;
427 | });
428 | }
429 |
430 | // return true if filtered, false otherwise
431 | // limit the try/catch to its own function to avoid deoptimization
432 | function isFiltered(doc, filter, db) {
433 | try {
434 | return !!(filter && !filter(doc));
435 | } catch (e) {
436 | db.emit('error', e);
437 | return true;
438 | }
439 | }
440 |
441 | /* istanbul ignore next */
442 | if (typeof window !== 'undefined' && window.PouchDB) {
443 | window.PouchDB.plugin(exports);
444 | }
445 |
--------------------------------------------------------------------------------
/lib/pouch-utils.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | var Promise = require('pouchdb-promise');
4 | /* istanbul ignore next */
5 | exports.once = function (fun) {
6 | var called = false;
7 | return exports.getArguments(function (args) {
8 | if (called) {
9 | console.trace();
10 | throw new Error('once called more than once');
11 | } else {
12 | called = true;
13 | fun.apply(this, args);
14 | }
15 | });
16 | };
17 | /* istanbul ignore next */
18 | exports.getArguments = function (fun) {
19 | return function () {
20 | var len = arguments.length;
21 | var args = new Array(len);
22 | var i = -1;
23 | while (++i < len) {
24 | args[i] = arguments[i];
25 | }
26 | return fun.call(this, args);
27 | };
28 | };
29 | /* istanbul ignore next */
30 | exports.toPromise = function (func) {
31 | //create the function we will be returning
32 | return exports.getArguments(function (args) {
33 | var self = this;
34 | var tempCB = (typeof args[args.length - 1] === 'function') ? args.pop() : false;
35 | // if the last argument is a function, assume its a callback
36 | var usedCB;
37 | if (tempCB) {
38 | // if it was a callback, create a new callback which calls it,
39 | // but do so async so we don't trap any errors
40 | usedCB = function (err, resp) {
41 | process.nextTick(function () {
42 | tempCB(err, resp);
43 | });
44 | };
45 | }
46 | var promise = new Promise(function (fulfill, reject) {
47 | try {
48 | var callback = exports.once(function (err, mesg) {
49 | if (err) {
50 | reject(err);
51 | } else {
52 | fulfill(mesg);
53 | }
54 | });
55 | // create a callback for this invocation
56 | // apply the function in the orig context
57 | args.push(callback);
58 | func.apply(self, args);
59 | } catch (e) {
60 | reject(e);
61 | }
62 | });
63 | // if there is a callback, call it back
64 | if (usedCB) {
65 | promise.then(function (result) {
66 | usedCB(null, result);
67 | }, usedCB);
68 | }
69 | promise.cancel = function () {
70 | return this;
71 | };
72 | return promise;
73 | });
74 | };
75 |
76 | exports.inherits = require('inherits');
77 | exports.Promise = Promise;
78 |
79 | var crypto = require('crypto');
80 | var md5 = require('md5-jkmyers');
81 | exports.MD5 = function (string) {
82 | /* istanbul ignore if */
83 | if (process.browser) {
84 | return md5(string);
85 | }
86 | return crypto.createHash('md5').update(string).digest('hex');
87 | };
88 |
89 | exports.extend = require('pouchdb-extend');
90 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "pouchdb-quick-search",
3 | "version": "1.3.0",
4 | "description": "PouchDB Quick Search - persisted full-text search for PouchDB",
5 | "main": "lib/index.js",
6 | "repository": {
7 | "type": "git",
8 | "url": "git://github.com/nolanlawson/pouchdb-quick-search.git"
9 | },
10 | "keywords": [
11 | "pouch",
12 | "pouchdb",
13 | "plugin",
14 | "seed",
15 | "couch",
16 | "couchdb"
17 | ],
18 | "author": "",
19 | "license": "Apache-2.0",
20 | "bugs": {
21 | "url": "https://github.com/nolanlawson/pouchdb-quick-search/issues"
22 | },
23 | "scripts": {
24 | "test-node": "istanbul test ./node_modules/mocha/bin/_mocha test/test.js",
25 | "test-browser": "./bin/test-browser.js",
26 | "jshint": "jshint -c .jshintrc lib test/test.js",
27 | "test": "npm run jshint && bash ./bin/run-test.sh",
28 | "build": "mkdirp dist && browserify . -p bundle-collapser/plugin -t es3ify -s PouchQuickSearch > dist/pouchdb.quick-search.js && npm run min",
29 | "min": "uglifyjs dist/pouchdb.quick-search.js -mc > dist/pouchdb.quick-search.min.js",
30 | "dev": "browserify test/test.js > test/test-bundle.js && npm run dev-server",
31 | "dev-server": "./bin/dev-server.js",
32 | "coverage": "npm test --coverage && istanbul check-coverage --lines 100 --function 100 --statements 100 --branches 100"
33 | },
34 | "dependencies": {
35 | "argsarray": "0.0.1",
36 | "es3ify": "^0.1.3",
37 | "inherits": "~2.0.1",
38 | "json-stable-stringify": "^1.0.1",
39 | "lie": "^2.6.0",
40 | "lunr": "0.7.1",
41 | "md5-jkmyers": "0.0.1",
42 | "pouchdb-extend": "^0.1.0",
43 | "pouchdb-mapreduce-no-ddocs": "^2.3.2",
44 | "pouchdb-promise": "5.4.4",
45 | "uniq": "^1.0.1"
46 | },
47 | "devDependencies": {
48 | "bluebird": "^1.0.7",
49 | "browserify": "^12.0.2",
50 | "bundle-collapser": "^1.2.1",
51 | "chai": "^3.5.0",
52 | "chai-as-promised": "^5.3.0",
53 | "http-server": "~0.5.5",
54 | "istanbul": "^0.2.7",
55 | "jshint": "~2.8.0",
56 | "mkdirp": "^0.5.1",
57 | "mocha": "^2.4.5",
58 | "phantomjs-prebuilt": "^2.1.7",
59 | "pouchdb-memory": "^1.1.0",
60 | "request": "^2.36.0",
61 | "sauce-connect-launcher": "^0.14.0",
62 | "selenium-standalone": "^5.1.0",
63 | "uglify-js": "^2.4.13",
64 | "watchify": "^3.1.0",
65 | "wd": "^0.2.21"
66 | },
67 | "lib": [
68 | "lib",
69 | "dist"
70 | ]
71 | }
72 |
--------------------------------------------------------------------------------
/test/bind-polyfill.js:
--------------------------------------------------------------------------------
1 | (function () {
2 | 'use strict';
3 | // minimal polyfill for phantomjs; in the future, we should do ES5_SHIM=true like pouchdb
4 | if (!Function.prototype.bind) {
5 | Function.prototype.bind = function (oThis) {
6 | if (typeof this !== "function") {
7 | // closest thing possible to the ECMAScript 5
8 | // internal IsCallable function
9 | throw new TypeError("Function.prototype.bind - what is trying to be bound is not callable");
10 | }
11 |
12 | var aArgs = Array.prototype.slice.call(arguments, 1),
13 | fToBind = this,
14 | fNOP = function () {},
15 | fBound = function () {
16 | return fToBind.apply(this instanceof fNOP && oThis
17 | ? this
18 | : oThis,
19 | aArgs.concat(Array.prototype.slice.call(arguments)));
20 | };
21 |
22 | fNOP.prototype = this.prototype;
23 | fBound.prototype = new fNOP();
24 |
25 | return fBound;
26 | };
27 | }
28 | })();
29 |
--------------------------------------------------------------------------------
/test/deps/lunr.fr.js:
--------------------------------------------------------------------------------
1 | /*!
2 | * Lunr languages, `French` language
3 | * https://github.com/MihaiValentin/lunr-languages
4 | *
5 | * Copyright 2014, Mihai Valentin
6 | * http://www.mozilla.org/MPL/
7 | */
8 | /*!
9 | * based on
10 | * Snowball JavaScript Library v0.3
11 | * http://code.google.com/p/urim/
12 | * http://snowball.tartarus.org/
13 | *
14 | * Copyright 2010, Oleg Mazko
15 | * http://www.mozilla.org/MPL/
16 | */
17 |
18 | /**
19 | * export the module via AMD, CommonJS or as a browser global
20 | * Export code from https://github.com/umdjs/umd/blob/master/returnExports.js
21 | */
22 | ;
23 | (function(root, factory) {
24 | if (typeof define === 'function' && define.amd) {
25 | // AMD. Register as an anonymous module.
26 | define(factory)
27 | } else if (typeof exports === 'object') {
28 | /**
29 | * Node. Does not work with strict CommonJS, but
30 | * only CommonJS-like environments that support module.exports,
31 | * like Node.
32 | */
33 | module.exports = factory()
34 | } else {
35 | // Browser globals (root is window)
36 | factory()(root.lunr);
37 | }
38 | }(this, function() {
39 | /**
40 | * Just return a value to define the module export.
41 | * This example returns an object, but the module
42 | * can return a function as the exported value.
43 | */
44 | return function(lunr) {
45 | /* throw error if lunr is not yet included */
46 | if ('undefined' === typeof lunr) {
47 | throw new Error('Lunr is not present. Please include / require Lunr before this script.');
48 | }
49 |
50 | /* throw error if lunr stemmer support is not yet included */
51 | if ('undefined' === typeof lunr.stemmerSupport) {
52 | throw new Error('Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.');
53 | }
54 |
55 | /* register specific locale function */
56 | lunr.fr = function() {
57 | this.pipeline.reset();
58 | this.pipeline.add(
59 | lunr.fr.trimmer,
60 | lunr.fr.stopWordFilter,
61 | lunr.fr.stemmer
62 | );
63 | };
64 |
65 | /* lunr trimmer function */
66 | lunr.fr.wordCharacters = "A-Za-z\xAA\xBA\xC0-\xD6\xD8-\xF6\xF8-\u02B8\u02E0-\u02E4\u1D00-\u1D25\u1D2C-\u1D5C\u1D62-\u1D65\u1D6B-\u1D77\u1D79-\u1DBE\u1E00-\u1EFF\u2071\u207F\u2090-\u209C\u212A\u212B\u2132\u214E\u2160-\u2188\u2C60-\u2C7F\uA722-\uA787\uA78B-\uA7AD\uA7B0-\uA7B7\uA7F7-\uA7FF\uAB30-\uAB5A\uAB5C-\uAB64\uFB00-\uFB06\uFF21-\uFF3A\uFF41-\uFF5A";
67 | lunr.fr.trimmer = lunr.trimmerSupport.generateTrimmer(lunr.fr.wordCharacters);
68 |
69 | lunr.Pipeline.registerFunction(lunr.fr.trimmer, 'trimmer-fr');
70 |
71 | /* lunr stemmer function */
72 | lunr.fr.stemmer = (function() {
73 | /* create the wrapped stemmer object */
74 | var Among = lunr.stemmerSupport.Among,
75 | SnowballProgram = lunr.stemmerSupport.SnowballProgram,
76 | st = new function FrenchStemmer() {
77 | var a_0 = [new Among("col", -1, -1), new Among("par", -1, -1),
78 | new Among("tap", -1, -1)
79 | ],
80 | a_1 = [new Among("", -1, 4),
81 | new Among("I", 0, 1), new Among("U", 0, 2), new Among("Y", 0, 3)
82 | ],
83 | a_2 = [
84 | new Among("iqU", -1, 3), new Among("abl", -1, 3),
85 | new Among("I\u00E8r", -1, 4), new Among("i\u00E8r", -1, 4),
86 | new Among("eus", -1, 2), new Among("iv", -1, 1)
87 | ],
88 | a_3 = [
89 | new Among("ic", -1, 2), new Among("abil", -1, 1),
90 | new Among("iv", -1, 3)
91 | ],
92 | a_4 = [new Among("iqUe", -1, 1),
93 | new Among("atrice", -1, 2), new Among("ance", -1, 1),
94 | new Among("ence", -1, 5), new Among("logie", -1, 3),
95 | new Among("able", -1, 1), new Among("isme", -1, 1),
96 | new Among("euse", -1, 11), new Among("iste", -1, 1),
97 | new Among("ive", -1, 8), new Among("if", -1, 8),
98 | new Among("usion", -1, 4), new Among("ation", -1, 2),
99 | new Among("ution", -1, 4), new Among("ateur", -1, 2),
100 | new Among("iqUes", -1, 1), new Among("atrices", -1, 2),
101 | new Among("ances", -1, 1), new Among("ences", -1, 5),
102 | new Among("logies", -1, 3), new Among("ables", -1, 1),
103 | new Among("ismes", -1, 1), new Among("euses", -1, 11),
104 | new Among("istes", -1, 1), new Among("ives", -1, 8),
105 | new Among("ifs", -1, 8), new Among("usions", -1, 4),
106 | new Among("ations", -1, 2), new Among("utions", -1, 4),
107 | new Among("ateurs", -1, 2), new Among("ments", -1, 15),
108 | new Among("ements", 30, 6), new Among("issements", 31, 12),
109 | new Among("it\u00E9s", -1, 7), new Among("ment", -1, 15),
110 | new Among("ement", 34, 6), new Among("issement", 35, 12),
111 | new Among("amment", 34, 13), new Among("emment", 34, 14),
112 | new Among("aux", -1, 10), new Among("eaux", 39, 9),
113 | new Among("eux", -1, 1), new Among("it\u00E9", -1, 7)
114 | ],
115 | a_5 = [
116 | new Among("ira", -1, 1), new Among("ie", -1, 1),
117 | new Among("isse", -1, 1), new Among("issante", -1, 1),
118 | new Among("i", -1, 1), new Among("irai", 4, 1),
119 | new Among("ir", -1, 1), new Among("iras", -1, 1),
120 | new Among("ies", -1, 1), new Among("\u00EEmes", -1, 1),
121 | new Among("isses", -1, 1), new Among("issantes", -1, 1),
122 | new Among("\u00EEtes", -1, 1), new Among("is", -1, 1),
123 | new Among("irais", 13, 1), new Among("issais", 13, 1),
124 | new Among("irions", -1, 1), new Among("issions", -1, 1),
125 | new Among("irons", -1, 1), new Among("issons", -1, 1),
126 | new Among("issants", -1, 1), new Among("it", -1, 1),
127 | new Among("irait", 21, 1), new Among("issait", 21, 1),
128 | new Among("issant", -1, 1), new Among("iraIent", -1, 1),
129 | new Among("issaIent", -1, 1), new Among("irent", -1, 1),
130 | new Among("issent", -1, 1), new Among("iront", -1, 1),
131 | new Among("\u00EEt", -1, 1), new Among("iriez", -1, 1),
132 | new Among("issiez", -1, 1), new Among("irez", -1, 1),
133 | new Among("issez", -1, 1)
134 | ],
135 | a_6 = [new Among("a", -1, 3),
136 | new Among("era", 0, 2), new Among("asse", -1, 3),
137 | new Among("ante", -1, 3), new Among("\u00E9e", -1, 2),
138 | new Among("ai", -1, 3), new Among("erai", 5, 2),
139 | new Among("er", -1, 2), new Among("as", -1, 3),
140 | new Among("eras", 8, 2), new Among("\u00E2mes", -1, 3),
141 | new Among("asses", -1, 3), new Among("antes", -1, 3),
142 | new Among("\u00E2tes", -1, 3), new Among("\u00E9es", -1, 2),
143 | new Among("ais", -1, 3), new Among("erais", 15, 2),
144 | new Among("ions", -1, 1), new Among("erions", 17, 2),
145 | new Among("assions", 17, 3), new Among("erons", -1, 2),
146 | new Among("ants", -1, 3), new Among("\u00E9s", -1, 2),
147 | new Among("ait", -1, 3), new Among("erait", 23, 2),
148 | new Among("ant", -1, 3), new Among("aIent", -1, 3),
149 | new Among("eraIent", 26, 2), new Among("\u00E8rent", -1, 2),
150 | new Among("assent", -1, 3), new Among("eront", -1, 2),
151 | new Among("\u00E2t", -1, 3), new Among("ez", -1, 2),
152 | new Among("iez", 32, 2), new Among("eriez", 33, 2),
153 | new Among("assiez", 33, 3), new Among("erez", 32, 2),
154 | new Among("\u00E9", -1, 2)
155 | ],
156 | a_7 = [new Among("e", -1, 3),
157 | new Among("I\u00E8re", 0, 2), new Among("i\u00E8re", 0, 2),
158 | new Among("ion", -1, 1), new Among("Ier", -1, 2),
159 | new Among("ier", -1, 2), new Among("\u00EB", -1, 4)
160 | ],
161 | a_8 = [
162 | new Among("ell", -1, -1), new Among("eill", -1, -1),
163 | new Among("enn", -1, -1), new Among("onn", -1, -1),
164 | new Among("ett", -1, -1)
165 | ],
166 | g_v = [17, 65, 16, 1, 0, 0, 0, 0, 0, 0,
167 | 0, 0, 0, 0, 0, 128, 130, 103, 8, 5
168 | ],
169 | g_keep_with_s = [1, 65, 20, 0,
170 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128
171 | ],
172 | I_p2, I_p1, I_pV, sbp = new SnowballProgram();
173 | this.setCurrent = function(word) {
174 | sbp.setCurrent(word);
175 | };
176 | this.getCurrent = function() {
177 | return sbp.getCurrent();
178 | };
179 |
180 | function habr1(c1, c2, v_1) {
181 | if (sbp.eq_s(1, c1)) {
182 | sbp.ket = sbp.cursor;
183 | if (sbp.in_grouping(g_v, 97, 251)) {
184 | sbp.slice_from(c2);
185 | sbp.cursor = v_1;
186 | return true;
187 | }
188 | }
189 | return false;
190 | }
191 |
192 | function habr2(c1, c2, v_1) {
193 | if (sbp.eq_s(1, c1)) {
194 | sbp.ket = sbp.cursor;
195 | sbp.slice_from(c2);
196 | sbp.cursor = v_1;
197 | return true;
198 | }
199 | return false;
200 | }
201 |
202 | function r_prelude() {
203 | var v_1, v_2;
204 | while (true) {
205 | v_1 = sbp.cursor;
206 | if (sbp.in_grouping(g_v, 97, 251)) {
207 | sbp.bra = sbp.cursor;
208 | v_2 = sbp.cursor;
209 | if (habr1("u", "U", v_1))
210 | continue;
211 | sbp.cursor = v_2;
212 | if (habr1("i", "I", v_1))
213 | continue;
214 | sbp.cursor = v_2;
215 | if (habr2("y", "Y", v_1))
216 | continue;
217 | }
218 | sbp.cursor = v_1;
219 | sbp.bra = v_1;
220 | if (!habr1("y", "Y", v_1)) {
221 | sbp.cursor = v_1;
222 | if (sbp.eq_s(1, "q")) {
223 | sbp.bra = sbp.cursor;
224 | if (habr2("u", "U", v_1))
225 | continue;
226 | }
227 | sbp.cursor = v_1;
228 | if (v_1 >= sbp.limit)
229 | return;
230 | sbp.cursor++;
231 | }
232 | }
233 | }
234 |
235 | function habr3() {
236 | while (!sbp.in_grouping(g_v, 97, 251)) {
237 | if (sbp.cursor >= sbp.limit)
238 | return true;
239 | sbp.cursor++;
240 | }
241 | while (!sbp.out_grouping(g_v, 97, 251)) {
242 | if (sbp.cursor >= sbp.limit)
243 | return true;
244 | sbp.cursor++;
245 | }
246 | return false;
247 | }
248 |
249 | function r_mark_regions() {
250 | var v_1 = sbp.cursor;
251 | I_pV = sbp.limit;
252 | I_p1 = I_pV;
253 | I_p2 = I_pV;
254 | if (sbp.in_grouping(g_v, 97, 251) && sbp.in_grouping(g_v, 97, 251) && sbp.cursor < sbp.limit)
255 | sbp.cursor++;
256 | else {
257 | sbp.cursor = v_1;
258 | if (!sbp.find_among(a_0, 3)) {
259 | sbp.cursor = v_1;
260 | do {
261 | if (sbp.cursor >= sbp.limit) {
262 | sbp.cursor = I_pV;
263 | break;
264 | }
265 | sbp.cursor++;
266 | } while (!sbp.in_grouping(g_v, 97, 251));
267 | }
268 | }
269 | I_pV = sbp.cursor;
270 | sbp.cursor = v_1;
271 | if (!habr3()) {
272 | I_p1 = sbp.cursor;
273 | if (!habr3())
274 | I_p2 = sbp.cursor;
275 | }
276 | }
277 |
278 | function r_postlude() {
279 | var among_var, v_1;
280 | while (true) {
281 | v_1 = sbp.cursor;
282 | sbp.bra = v_1;
283 | among_var = sbp.find_among(a_1, 4);
284 | if (!among_var)
285 | break;
286 | sbp.ket = sbp.cursor;
287 | switch (among_var) {
288 | case 1:
289 | sbp.slice_from("i");
290 | break;
291 | case 2:
292 | sbp.slice_from("u");
293 | break;
294 | case 3:
295 | sbp.slice_from("y");
296 | break;
297 | case 4:
298 | if (sbp.cursor >= sbp.limit)
299 | return;
300 | sbp.cursor++;
301 | break;
302 | }
303 | }
304 | }
305 |
306 | function r_RV() {
307 | return I_pV <= sbp.cursor;
308 | }
309 |
310 | function r_R1() {
311 | return I_p1 <= sbp.cursor;
312 | }
313 |
314 | function r_R2() {
315 | return I_p2 <= sbp.cursor;
316 | }
317 |
318 | function r_standard_suffix() {
319 | var among_var, v_1;
320 | sbp.ket = sbp.cursor;
321 | among_var = sbp.find_among_b(a_4, 43);
322 | if (among_var) {
323 | sbp.bra = sbp.cursor;
324 | switch (among_var) {
325 | case 1:
326 | if (!r_R2())
327 | return false;
328 | sbp.slice_del();
329 | break;
330 | case 2:
331 | if (!r_R2())
332 | return false;
333 | sbp.slice_del();
334 | sbp.ket = sbp.cursor;
335 | if (sbp.eq_s_b(2, "ic")) {
336 | sbp.bra = sbp.cursor;
337 | if (!r_R2())
338 | sbp.slice_from("iqU");
339 | else
340 | sbp.slice_del();
341 | }
342 | break;
343 | case 3:
344 | if (!r_R2())
345 | return false;
346 | sbp.slice_from("log");
347 | break;
348 | case 4:
349 | if (!r_R2())
350 | return false;
351 | sbp.slice_from("u");
352 | break;
353 | case 5:
354 | if (!r_R2())
355 | return false;
356 | sbp.slice_from("ent");
357 | break;
358 | case 6:
359 | if (!r_RV())
360 | return false;
361 | sbp.slice_del();
362 | sbp.ket = sbp.cursor;
363 | among_var = sbp.find_among_b(a_2, 6);
364 | if (among_var) {
365 | sbp.bra = sbp.cursor;
366 | switch (among_var) {
367 | case 1:
368 | if (r_R2()) {
369 | sbp.slice_del();
370 | sbp.ket = sbp.cursor;
371 | if (sbp.eq_s_b(2, "at")) {
372 | sbp.bra = sbp.cursor;
373 | if (r_R2())
374 | sbp.slice_del();
375 | }
376 | }
377 | break;
378 | case 2:
379 | if (r_R2())
380 | sbp.slice_del();
381 | else if (r_R1())
382 | sbp.slice_from("eux");
383 | break;
384 | case 3:
385 | if (r_R2())
386 | sbp.slice_del();
387 | break;
388 | case 4:
389 | if (r_RV())
390 | sbp.slice_from("i");
391 | break;
392 | }
393 | }
394 | break;
395 | case 7:
396 | if (!r_R2())
397 | return false;
398 | sbp.slice_del();
399 | sbp.ket = sbp.cursor;
400 | among_var = sbp.find_among_b(a_3, 3);
401 | if (among_var) {
402 | sbp.bra = sbp.cursor;
403 | switch (among_var) {
404 | case 1:
405 | if (r_R2())
406 | sbp.slice_del();
407 | else
408 | sbp.slice_from("abl");
409 | break;
410 | case 2:
411 | if (r_R2())
412 | sbp.slice_del();
413 | else
414 | sbp.slice_from("iqU");
415 | break;
416 | case 3:
417 | if (r_R2())
418 | sbp.slice_del();
419 | break;
420 | }
421 | }
422 | break;
423 | case 8:
424 | if (!r_R2())
425 | return false;
426 | sbp.slice_del();
427 | sbp.ket = sbp.cursor;
428 | if (sbp.eq_s_b(2, "at")) {
429 | sbp.bra = sbp.cursor;
430 | if (r_R2()) {
431 | sbp.slice_del();
432 | sbp.ket = sbp.cursor;
433 | if (sbp.eq_s_b(2, "ic")) {
434 | sbp.bra = sbp.cursor;
435 | if (r_R2())
436 | sbp.slice_del();
437 | else
438 | sbp.slice_from("iqU");
439 | break;
440 | }
441 | }
442 | }
443 | break;
444 | case 9:
445 | sbp.slice_from("eau");
446 | break;
447 | case 10:
448 | if (!r_R1())
449 | return false;
450 | sbp.slice_from("al");
451 | break;
452 | case 11:
453 | if (r_R2())
454 | sbp.slice_del();
455 | else if (!r_R1())
456 | return false;
457 | else
458 | sbp.slice_from("eux");
459 | break;
460 | case 12:
461 | if (!r_R1() || !sbp.out_grouping_b(g_v, 97, 251))
462 | return false;
463 | sbp.slice_del();
464 | break;
465 | case 13:
466 | if (r_RV())
467 | sbp.slice_from("ant");
468 | return false;
469 | case 14:
470 | if (r_RV())
471 | sbp.slice_from("ent");
472 | return false;
473 | case 15:
474 | v_1 = sbp.limit - sbp.cursor;
475 | if (sbp.in_grouping_b(g_v, 97, 251) && r_RV()) {
476 | sbp.cursor = sbp.limit - v_1;
477 | sbp.slice_del();
478 | }
479 | return false;
480 | }
481 | return true;
482 | }
483 | return false;
484 | }
485 |
486 | function r_i_verb_suffix() {
487 | var among_var, v_1;
488 | if (sbp.cursor < I_pV)
489 | return false;
490 | v_1 = sbp.limit_backward;
491 | sbp.limit_backward = I_pV;
492 | sbp.ket = sbp.cursor;
493 | among_var = sbp.find_among_b(a_5, 35);
494 | if (!among_var) {
495 | sbp.limit_backward = v_1;
496 | return false;
497 | }
498 | sbp.bra = sbp.cursor;
499 | if (among_var == 1) {
500 | if (!sbp.out_grouping_b(g_v, 97, 251)) {
501 | sbp.limit_backward = v_1;
502 | return false;
503 | }
504 | sbp.slice_del();
505 | }
506 | sbp.limit_backward = v_1;
507 | return true;
508 | }
509 |
510 | function r_verb_suffix() {
511 | var among_var, v_2, v_3;
512 | if (sbp.cursor < I_pV)
513 | return false;
514 | v_2 = sbp.limit_backward;
515 | sbp.limit_backward = I_pV;
516 | sbp.ket = sbp.cursor;
517 | among_var = sbp.find_among_b(a_6, 38);
518 | if (!among_var) {
519 | sbp.limit_backward = v_2;
520 | return false;
521 | }
522 | sbp.bra = sbp.cursor;
523 | switch (among_var) {
524 | case 1:
525 | if (!r_R2()) {
526 | sbp.limit_backward = v_2;
527 | return false;
528 | }
529 | sbp.slice_del();
530 | break;
531 | case 2:
532 | sbp.slice_del();
533 | break;
534 | case 3:
535 | sbp.slice_del();
536 | v_3 = sbp.limit - sbp.cursor;
537 | sbp.ket = sbp.cursor;
538 | if (sbp.eq_s_b(1, "e")) {
539 | sbp.bra = sbp.cursor;
540 | sbp.slice_del();
541 | } else
542 | sbp.cursor = sbp.limit - v_3;
543 | break;
544 | }
545 | sbp.limit_backward = v_2;
546 | return true;
547 | }
548 |
549 | function r_residual_suffix() {
550 | var among_var, v_1 = sbp.limit - sbp.cursor,
551 | v_2, v_4, v_5;
552 | sbp.ket = sbp.cursor;
553 | if (sbp.eq_s_b(1, "s")) {
554 | sbp.bra = sbp.cursor;
555 | v_2 = sbp.limit - sbp.cursor;
556 | if (sbp.out_grouping_b(g_keep_with_s, 97, 232)) {
557 | sbp.cursor = sbp.limit - v_2;
558 | sbp.slice_del();
559 | } else
560 | sbp.cursor = sbp.limit - v_1;
561 | } else
562 | sbp.cursor = sbp.limit - v_1;
563 | if (sbp.cursor >= I_pV) {
564 | v_4 = sbp.limit_backward;
565 | sbp.limit_backward = I_pV;
566 | sbp.ket = sbp.cursor;
567 | among_var = sbp.find_among_b(a_7, 7);
568 | if (among_var) {
569 | sbp.bra = sbp.cursor;
570 | switch (among_var) {
571 | case 1:
572 | if (r_R2()) {
573 | v_5 = sbp.limit - sbp.cursor;
574 | if (!sbp.eq_s_b(1, "s")) {
575 | sbp.cursor = sbp.limit - v_5;
576 | if (!sbp.eq_s_b(1, "t"))
577 | break;
578 | }
579 | sbp.slice_del();
580 | }
581 | break;
582 | case 2:
583 | sbp.slice_from("i");
584 | break;
585 | case 3:
586 | sbp.slice_del();
587 | break;
588 | case 4:
589 | if (sbp.eq_s_b(2, "gu"))
590 | sbp.slice_del();
591 | break;
592 | }
593 | }
594 | sbp.limit_backward = v_4;
595 | }
596 | }
597 |
598 | function r_un_double() {
599 | var v_1 = sbp.limit - sbp.cursor;
600 | if (sbp.find_among_b(a_8, 5)) {
601 | sbp.cursor = sbp.limit - v_1;
602 | sbp.ket = sbp.cursor;
603 | if (sbp.cursor > sbp.limit_backward) {
604 | sbp.cursor--;
605 | sbp.bra = sbp.cursor;
606 | sbp.slice_del();
607 | }
608 | }
609 | }
610 |
611 | function r_un_accent() {
612 | var v_1, v_2 = 1;
613 | while (sbp.out_grouping_b(g_v, 97, 251))
614 | v_2--;
615 | if (v_2 <= 0) {
616 | sbp.ket = sbp.cursor;
617 | v_1 = sbp.limit - sbp.cursor;
618 | if (!sbp.eq_s_b(1, "\u00E9")) {
619 | sbp.cursor = sbp.limit - v_1;
620 | if (!sbp.eq_s_b(1, "\u00E8"))
621 | return;
622 | }
623 | sbp.bra = sbp.cursor;
624 | sbp.slice_from("e");
625 | }
626 | }
627 |
628 | function habr5() {
629 | if (!r_standard_suffix()) {
630 | sbp.cursor = sbp.limit;
631 | if (!r_i_verb_suffix()) {
632 | sbp.cursor = sbp.limit;
633 | if (!r_verb_suffix()) {
634 | sbp.cursor = sbp.limit;
635 | r_residual_suffix();
636 | return;
637 | }
638 | }
639 | }
640 | sbp.cursor = sbp.limit;
641 | sbp.ket = sbp.cursor;
642 | if (sbp.eq_s_b(1, "Y")) {
643 | sbp.bra = sbp.cursor;
644 | sbp.slice_from("i");
645 | } else {
646 | sbp.cursor = sbp.limit;
647 | if (sbp.eq_s_b(1, "\u00E7")) {
648 | sbp.bra = sbp.cursor;
649 | sbp.slice_from("c");
650 | }
651 | }
652 | }
653 | this.stem = function() {
654 | var v_1 = sbp.cursor;
655 | r_prelude();
656 | sbp.cursor = v_1;
657 | r_mark_regions();
658 | sbp.limit_backward = v_1;
659 | sbp.cursor = sbp.limit;
660 | habr5();
661 | sbp.cursor = sbp.limit;
662 | r_un_double();
663 | sbp.cursor = sbp.limit;
664 | r_un_accent();
665 | sbp.cursor = sbp.limit_backward;
666 | r_postlude();
667 | return true;
668 | }
669 | };
670 |
671 | /* and return a function that stems a word for the current locale */
672 | return function(word) {
673 | st.setCurrent(word);
674 | st.stem();
675 | return st.getCurrent();
676 | }
677 | })();
678 |
679 | lunr.Pipeline.registerFunction(lunr.fr.stemmer, 'stemmer-fr');
680 |
681 | /* stop word filter function */
682 | lunr.fr.stopWordFilter = function(token) {
683 | if (lunr.fr.stopWordFilter.stopWords.indexOf(token) === -1) {
684 | return token;
685 | }
686 | };
687 |
688 | lunr.fr.stopWordFilter.stopWords = new lunr.SortedSet();
689 | lunr.fr.stopWordFilter.stopWords.length = 164;
690 |
691 | // The space at the beginning is crucial: It marks the empty string
692 | // as a stop word. lunr.js crashes during search when documents
693 | // processed by the pipeline still contain the empty string.
694 | lunr.fr.stopWordFilter.stopWords.elements = ' ai aie aient aies ait as au aura aurai auraient aurais aurait auras aurez auriez aurions aurons auront aux avaient avais avait avec avez aviez avions avons ayant ayez ayons c ce ceci celà ces cet cette d dans de des du elle en es est et eu eue eues eurent eus eusse eussent eusses eussiez eussions eut eux eûmes eût eûtes furent fus fusse fussent fusses fussiez fussions fut fûmes fût fûtes ici il ils j je l la le les leur leurs lui m ma mais me mes moi mon même n ne nos notre nous on ont ou par pas pour qu que quel quelle quelles quels qui s sa sans se sera serai seraient serais serait seras serez seriez serions serons seront ses soi soient sois soit sommes son sont soyez soyons suis sur t ta te tes toi ton tu un une vos votre vous y à étaient étais était étant étiez étions été étée étées étés êtes'.split(' ');
695 |
696 | lunr.Pipeline.registerFunction(lunr.fr.stopWordFilter, 'stopWordFilter-fr');
697 | };
698 | }))
--------------------------------------------------------------------------------
/test/deps/lunr.multi.js:
--------------------------------------------------------------------------------
1 | /**
2 | * export the module via AMD, CommonJS or as a browser global
3 | * Export code from https://github.com/umdjs/umd/blob/master/returnExports.js
4 | */
5 | ;(function (root, factory) {
6 | if (typeof define === 'function' && define.amd) {
7 | // AMD. Register as an anonymous module.
8 | define(factory)
9 | } else if (typeof exports === 'object') {
10 | /**
11 | * Node. Does not work with strict CommonJS, but
12 | * only CommonJS-like environments that support module.exports,
13 | * like Node.
14 | */
15 | module.exports = factory()
16 | } else {
17 | // Browser globals (root is window)
18 | factory()(root.lunr);
19 | }
20 | }(this, function () {
21 | /**
22 | * Just return a value to define the module export.
23 | * This example returns an object, but the module
24 | * can return a function as the exported value.
25 | */
26 | return function(lunr) {
27 | /* Set up the pipeline for indexing content in multiple languages. The
28 | corresponding lunr.{lang} files must be loaded before calling this
29 | function; English ('en') is built in.
30 |
31 | Returns: a lunr plugin for use in your indexer.
32 |
33 | Known drawback: every word will be stemmed with stemmers for every
34 | language. This could mean that sometimes words that have the same
35 | stemming root will not be stemmed as such.
36 | */
37 | lunr.multiLanguage = function(/* lang1, lang2, ... */) {
38 | var languages = Array.prototype.slice.call(arguments);
39 | var nameSuffix = languages.join('-');
40 | var wordCharacters = "";
41 | var pipeline = [];
42 | for (var i = 0; i < languages.length; ++i) {
43 | if (languages[i] == 'en') {
44 | wordCharacters += '\\w';
45 | pipeline.unshift(lunr.stopWordFilter);
46 | pipeline.push(lunr.stemmer);
47 | } else {
48 |
49 | wordCharacters += lunr[languages[i]].wordCharacters;
50 | pipeline.unshift(lunr[languages[i]].stopWordFilter);
51 | pipeline.push(lunr[languages[i]].stemmer);
52 | }
53 | };
54 | var multiTrimmer = lunr.trimmerSupport.generateTrimmer(wordCharacters);
55 | lunr.Pipeline.registerFunction(multiTrimmer, 'lunr-multi-trimmer-' + nameSuffix);
56 | pipeline.unshift(multiTrimmer);
57 |
58 | return function() {
59 | this.pipeline.reset();
60 | this.pipeline.add.apply(this.pipeline, pipeline);
61 | };
62 | }
63 | }
64 | }));
65 |
--------------------------------------------------------------------------------
/test/deps/lunr.stemmer.support.js:
--------------------------------------------------------------------------------
1 | /*!
2 | * Snowball JavaScript Library v0.3
3 | * http://code.google.com/p/urim/
4 | * http://snowball.tartarus.org/
5 | *
6 | * Copyright 2010, Oleg Mazko
7 | * http://www.mozilla.org/MPL/
8 | */
9 |
10 | /**
11 | * export the module via AMD, CommonJS or as a browser global
12 | * Export code from https://github.com/umdjs/umd/blob/master/returnExports.js
13 | */
14 | ;(function (root, factory) {
15 | if (typeof define === 'function' && define.amd) {
16 | // AMD. Register as an anonymous module.
17 | define(factory)
18 | } else if (typeof exports === 'object') {
19 | /**
20 | * Node. Does not work with strict CommonJS, but
21 | * only CommonJS-like environments that support module.exports,
22 | * like Node.
23 | */
24 | module.exports = factory()
25 | } else {
26 | // Browser globals (root is window)
27 | factory()(root.lunr);
28 | }
29 | }(this, function () {
30 | /**
31 | * Just return a value to define the module export.
32 | * This example returns an object, but the module
33 | * can return a function as the exported value.
34 | */
35 | return function(lunr) {
36 | /* provides utilities for the included stemmers */
37 | lunr.stemmerSupport = {
38 | Among: function(s, substring_i, result, method) {
39 | this.toCharArray = function(s) {
40 | var sLength = s.length, charArr = new Array(sLength);
41 | for (var i = 0; i < sLength; i++)
42 | charArr[i] = s.charCodeAt(i);
43 | return charArr;
44 | };
45 |
46 | if ((!s && s != "") || (!substring_i && (substring_i != 0)) || !result)
47 | throw ("Bad Among initialisation: s:" + s + ", substring_i: "
48 | + substring_i + ", result: " + result);
49 | this.s_size = s.length;
50 | this.s = this.toCharArray(s);
51 | this.substring_i = substring_i;
52 | this.result = result;
53 | this.method = method;
54 | },
55 | SnowballProgram: function() {
56 | var current;
57 | return {
58 | bra : 0,
59 | ket : 0,
60 | limit : 0,
61 | cursor : 0,
62 | limit_backward : 0,
63 | setCurrent : function(word) {
64 | current = word;
65 | this.cursor = 0;
66 | this.limit = word.length;
67 | this.limit_backward = 0;
68 | this.bra = this.cursor;
69 | this.ket = this.limit;
70 | },
71 | getCurrent : function() {
72 | var result = current;
73 | current = null;
74 | return result;
75 | },
76 | in_grouping : function(s, min, max) {
77 | if (this.cursor < this.limit) {
78 | var ch = current.charCodeAt(this.cursor);
79 | if (ch <= max && ch >= min) {
80 | ch -= min;
81 | if (s[ch >> 3] & (0X1 << (ch & 0X7))) {
82 | this.cursor++;
83 | return true;
84 | }
85 | }
86 | }
87 | return false;
88 | },
89 | in_grouping_b : function(s, min, max) {
90 | if (this.cursor > this.limit_backward) {
91 | var ch = current.charCodeAt(this.cursor - 1);
92 | if (ch <= max && ch >= min) {
93 | ch -= min;
94 | if (s[ch >> 3] & (0X1 << (ch & 0X7))) {
95 | this.cursor--;
96 | return true;
97 | }
98 | }
99 | }
100 | return false;
101 | },
102 | out_grouping : function(s, min, max) {
103 | if (this.cursor < this.limit) {
104 | var ch = current.charCodeAt(this.cursor);
105 | if (ch > max || ch < min) {
106 | this.cursor++;
107 | return true;
108 | }
109 | ch -= min;
110 | if (!(s[ch >> 3] & (0X1 << (ch & 0X7)))) {
111 | this.cursor++;
112 | return true;
113 | }
114 | }
115 | return false;
116 | },
117 | out_grouping_b : function(s, min, max) {
118 | if (this.cursor > this.limit_backward) {
119 | var ch = current.charCodeAt(this.cursor - 1);
120 | if (ch > max || ch < min) {
121 | this.cursor--;
122 | return true;
123 | }
124 | ch -= min;
125 | if (!(s[ch >> 3] & (0X1 << (ch & 0X7)))) {
126 | this.cursor--;
127 | return true;
128 | }
129 | }
130 | return false;
131 | },
132 | eq_s : function(s_size, s) {
133 | if (this.limit - this.cursor < s_size)
134 | return false;
135 | for (var i = 0; i < s_size; i++)
136 | if (current.charCodeAt(this.cursor + i) != s.charCodeAt(i))
137 | return false;
138 | this.cursor += s_size;
139 | return true;
140 | },
141 | eq_s_b : function(s_size, s) {
142 | if (this.cursor - this.limit_backward < s_size)
143 | return false;
144 | for (var i = 0; i < s_size; i++)
145 | if (current.charCodeAt(this.cursor - s_size + i) != s
146 | .charCodeAt(i))
147 | return false;
148 | this.cursor -= s_size;
149 | return true;
150 | },
151 | find_among : function(v, v_size) {
152 | var i = 0, j = v_size, c = this.cursor, l = this.limit, common_i = 0, common_j = 0, first_key_inspected = false;
153 | while (true) {
154 | var k = i + ((j - i) >> 1), diff = 0, common = common_i < common_j
155 | ? common_i
156 | : common_j, w = v[k];
157 | for (var i2 = common; i2 < w.s_size; i2++) {
158 | if (c + common == l) {
159 | diff = -1;
160 | break;
161 | }
162 | diff = current.charCodeAt(c + common) - w.s[i2];
163 | if (diff)
164 | break;
165 | common++;
166 | }
167 | if (diff < 0) {
168 | j = k;
169 | common_j = common;
170 | } else {
171 | i = k;
172 | common_i = common;
173 | }
174 | if (j - i <= 1) {
175 | if (i > 0 || j == i || first_key_inspected)
176 | break;
177 | first_key_inspected = true;
178 | }
179 | }
180 | while (true) {
181 | var w = v[i];
182 | if (common_i >= w.s_size) {
183 | this.cursor = c + w.s_size;
184 | if (!w.method)
185 | return w.result;
186 | var res = w.method();
187 | this.cursor = c + w.s_size;
188 | if (res)
189 | return w.result;
190 | }
191 | i = w.substring_i;
192 | if (i < 0)
193 | return 0;
194 | }
195 | },
196 | find_among_b : function(v, v_size) {
197 | var i = 0, j = v_size, c = this.cursor, lb = this.limit_backward, common_i = 0, common_j = 0, first_key_inspected = false;
198 | while (true) {
199 | var k = i + ((j - i) >> 1), diff = 0, common = common_i < common_j
200 | ? common_i
201 | : common_j, w = v[k];
202 | for (var i2 = w.s_size - 1 - common; i2 >= 0; i2--) {
203 | if (c - common == lb) {
204 | diff = -1;
205 | break;
206 | }
207 | diff = current.charCodeAt(c - 1 - common) - w.s[i2];
208 | if (diff)
209 | break;
210 | common++;
211 | }
212 | if (diff < 0) {
213 | j = k;
214 | common_j = common;
215 | } else {
216 | i = k;
217 | common_i = common;
218 | }
219 | if (j - i <= 1) {
220 | if (i > 0 || j == i || first_key_inspected)
221 | break;
222 | first_key_inspected = true;
223 | }
224 | }
225 | while (true) {
226 | var w = v[i];
227 | if (common_i >= w.s_size) {
228 | this.cursor = c - w.s_size;
229 | if (!w.method)
230 | return w.result;
231 | var res = w.method();
232 | this.cursor = c - w.s_size;
233 | if (res)
234 | return w.result;
235 | }
236 | i = w.substring_i;
237 | if (i < 0)
238 | return 0;
239 | }
240 | },
241 | replace_s : function(c_bra, c_ket, s) {
242 | var adjustment = s.length - (c_ket - c_bra), left = current
243 | .substring(0, c_bra), right = current.substring(c_ket);
244 | current = left + s + right;
245 | this.limit += adjustment;
246 | if (this.cursor >= c_ket)
247 | this.cursor += adjustment;
248 | else if (this.cursor > c_bra)
249 | this.cursor = c_bra;
250 | return adjustment;
251 | },
252 | slice_check : function() {
253 | if (this.bra < 0 || this.bra > this.ket || this.ket > this.limit
254 | || this.limit > current.length)
255 | throw ("faulty slice operation");
256 | },
257 | slice_from : function(s) {
258 | this.slice_check();
259 | this.replace_s(this.bra, this.ket, s);
260 | },
261 | slice_del : function() {
262 | this.slice_from("");
263 | },
264 | insert : function(c_bra, c_ket, s) {
265 | var adjustment = this.replace_s(c_bra, c_ket, s);
266 | if (c_bra <= this.bra)
267 | this.bra += adjustment;
268 | if (c_bra <= this.ket)
269 | this.ket += adjustment;
270 | },
271 | slice_to : function() {
272 | this.slice_check();
273 | return current.substring(this.bra, this.ket);
274 | },
275 | eq_v_b : function(s) {
276 | return this.eq_s_b(s.length, s);
277 | }
278 | };
279 | }
280 | };
281 |
282 | lunr.trimmerSupport = {
283 | generateTrimmer: function(wordCharacters) {
284 | var startRegex = new RegExp("^[^" + wordCharacters + "]+")
285 | var endRegex = new RegExp("[^" + wordCharacters + "]+$")
286 |
287 | return function(token) {
288 | return token
289 | .replace(startRegex, '')
290 | .replace(endRegex, '');
291 | };
292 | }
293 | }
294 | }
295 | }));
296 |
--------------------------------------------------------------------------------
/test/docs/test-docs-2.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 | var doc1 = {
3 | "_id": "1",
4 | "title": "III",
5 | "text": "some text"
6 | };
7 | var doc2 = {
8 | "_id": "2",
9 | "title": "III",
10 | "text": "some text"
11 | };
12 |
13 | module.exports = [doc1, doc2];
--------------------------------------------------------------------------------
/test/docs/test-docs-3.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 | var doc1 = {
3 | "_id": "1",
4 | "title": "This title is about Yoshi",
5 | "text": "This text is about Mario but it's much longer, so it shouldn't be weighted so much."
6 | };
7 | var doc2 = {
8 | "_id": "2",
9 | "title": "This title is about Mario",
10 | "text": "This text is about Yoshi, but it's much longer, so it shouldn't be weighted so much."
11 | };
12 | var doc3 = {
13 | _id: '3',
14 | 'title': 'this is about an albino',
15 | 'text': 'and this is about an elephant'
16 | };
17 | var doc4 = {
18 | _id: '4',
19 | 'title': 'this is about an albino',
20 | 'text': 'and this is about an albino'
21 | };
22 |
23 |
24 | module.exports = [doc1, doc2, doc3, doc4];
--------------------------------------------------------------------------------
/test/docs/test-docs-4.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 | var docs = [];
3 | for (var i = 0; i < 20; i++) {
4 | docs.push({
5 | _id: 'yoshi_' + i,
6 | title: 'This title is about Yoshi'
7 | });
8 |
9 | docs.push({
10 | _id: 'mario_' + i,
11 | title: 'This title is about Mario'
12 | });
13 |
14 | // earlier ones are more strongly weighted
15 | for (var j = 0; j < (20 - i); j++) {
16 | docs[docs.length - 2].title += ' Yoshi';
17 | docs[docs.length - 1].title += ' Mario';
18 | }
19 | }
20 |
21 | module.exports = docs;
22 |
--------------------------------------------------------------------------------
/test/docs/test-docs-5.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | var docs = [
4 | {
5 | _id: '1',
6 | list: ['much', 'text', 'goes', 'in this array, you see']
7 | },
8 | {
9 | _id: '2',
10 | deep: {
11 | structure: {
12 | text: 'here is some copy about a squirrel'
13 | }
14 | }
15 | },
16 | {
17 | _id: '3',
18 | aNumber : 1
19 | },
20 | {
21 | _id: '4',
22 | invalid: null
23 | },
24 | {
25 | _id: '5',
26 | invalid: {}
27 | },
28 | {
29 | _id: '7',
30 | deep: {
31 | foo: null
32 | }
33 | },
34 | {
35 | _id: '2',
36 | deep: {
37 | structure: null
38 | }
39 | }
40 | ];
41 |
42 | module.exports = docs;
43 |
--------------------------------------------------------------------------------
/test/docs/test-docs-6.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | module.exports = [
4 | {
5 | _id: '1',
6 | text: 'words mentioned a lot words'
7 | },
8 | {
9 | _id: '2',
10 | text: 'words mentioned a lot happy'
11 | },
12 | ];
--------------------------------------------------------------------------------
/test/docs/test-docs-7.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | module.exports = [
4 | {
5 | _id : '1',
6 | text : 'This is text in English about the spleen, which is an organ.'
7 | },
8 | {
9 | _id: '2',
10 | text: 'Ça c\'est du texte français qui parle du spleen, ce qui veut dire ennui.'
11 | },
12 | {
13 | _id: '3',
14 | text: 'I am working.'
15 | }
16 | ];
--------------------------------------------------------------------------------
/test/docs/test-docs-8.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 | /* jshint ignore:start */
3 | var doc1 = {
4 | "_id": "1",
5 | "name": "javascript",
6 | "type": "dynamic",
7 | "category": "PL",
8 | "desc": "The most popular!"
9 | };
10 |
11 | var doc2 = {
12 | "_id": "2",
13 | "name": "scala-js",
14 | "type": "static",
15 | "category": "PL",
16 | "desc": "The new cool kid!"
17 | };
18 |
19 | var doc3 = {
20 | "_id": "3",
21 | "name": "clojure",
22 | "type": "dynamic",
23 | "category": "PL",
24 | "desc": "Rich Hickey Master piece!"
25 | };
26 |
27 | module.exports = [doc1, doc2, doc3];
28 | /* jshint ignore:end */
--------------------------------------------------------------------------------
/test/docs/test-docs-9.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | var docs = [
4 | {
5 | _id: '1',
6 | list: ['much', 'text', 'goes', 'in this array, you see']
7 | },
8 | {
9 | _id: '2',
10 | nested: {
11 | array: [{
12 | aField: 'something'
13 | }]
14 | }
15 | },
16 | {
17 | _id: '3',
18 | aNumber : 1
19 | },
20 | {
21 | _id: '4',
22 | invalid: null
23 | },
24 | {
25 | _id: '5',
26 | invalid: {}
27 | },
28 | {
29 | _id: '7',
30 | nested: {
31 | foo: null
32 | }
33 | },
34 | {
35 | _id: '8',
36 | nested: {
37 | array: null
38 | }
39 | },
40 | {
41 | _id: '9',
42 | nested: {
43 | array: []
44 | }
45 | },
46 | {
47 | _id: '10',
48 | nested: {
49 | array: [{
50 | aField: 'something else'
51 | },{
52 | aField: 'something different'
53 | },{
54 | aField: 'foobar'
55 | }]
56 | }
57 | }
58 | ];
59 |
60 | module.exports = docs;
--------------------------------------------------------------------------------
/test/docs/test-docs.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 | /* jshint ignore:start */
3 | var doc1 = {
4 | "_id": "1",
5 | "chapter": "240",
6 | "title": "III",
7 | "href": "/Laws/GeneralLaws/PartIII/TitleIII/Chapter240/Section1",
8 | "text": "If the record title of land is clouded by an adverse claim, or by the possibility thereof, a person in possession of such land claiming an estate of freehold therein or an unexpired term of not less than ten years, and a person who by force of the covenants in a deed or otherwise may be liable in damages, if such claim should be sustained, may file a petition in the land court stating his interest, describing the land, the claims and the possible adverse claimants so far as known to him, and praying that such claimants may be summoned to show cause why they should not bring an action to try such claim. If no better description can be given, they may be described generally, as the heirs of A B or the like. Two or more persons having separate and distinct parcels of land in the same county and holding under the same source of title, or persons having separate and distinct interests in the same parcel or parcels, may join in a petition against the same supposed claimants. If the supposed claimants are residents of the commonwealth, the petition may be inserted like a declaration in a writ, and served by a copy, like a writ of original summons. Whoever is in the enjoyment of an easement shall be held to be in possession of land within the meaning of this section",
9 | "section": "1",
10 | "part": "III",
11 | "type": "general",
12 | "desc": "Petition to compel adverse claimant to try title"
13 | };
14 |
15 | var doc2 = {
16 | "_id": "2",
17 | "chapter": "240",
18 | "title": "III",
19 | "href": "/Laws/GeneralLaws/PartIII/TitleIII/Chapter240/Section10",
20 | "text": "After all the defendants have been served with process or notified as provided in section seven and after the appointment of a guardian ad litem or next friend, if such appointment has been made, the court may proceed as though all defendants had been actually served with process. Such action shall be a proceeding in rem against the land, and a judgment establishing or declaring the validity, nature or extent of the plaintiff’s title may be entered, and shall operate directly on the land and have the force of a release made by or on behalf of all defendants of all claims inconsistent with the title established or declared thereby. This and the four preceding sections shall not prevent the court from also exercising jurisdiction in personam against defendants actually served with process who are personally amenable to its judgments",
21 | "section": "10",
22 | "part": "III",
23 | "type": "general",
24 | "desc": "Proceeding in rem; effect of judgment"
25 | };
26 |
27 | var doc3 = {
28 | "_id": "3",
29 | "chapter": "240",
30 | "title": "III",
31 | "href": "/Laws/GeneralLaws/PartIII/TitleIII/Chapter240/Section10A",
32 | "text": "The superior court and the land court shall have concurrent jurisdiction of a civil action by any person or persons claiming an estate of freehold, or an unexpired term of not less than ten years, in land subject to a restriction described in section twenty-six of chapter one hundred and eighty-four, to determine and declare whether and in what manner and to what extent and for the benefit of what land the restriction is then enforceable, whether or not a violation has occurred or is threatened. The complaint shall state the names and addresses, so far as known to the plaintiff or plaintiffs, of the owners of the subject parcels as to which the determination is sought, of the owners of any benefited land and of any persons benefited other than persons interested in benefited land. There shall be filed therewith (1) a certified copy of the instrument or instruments imposing the restriction, or of a representative instrument if there are many and the complaint includes a summary of the remainder, and (2) a plan or sketch showing the approximate locations of the parcels as to which the determination is sought, and the other parcel or parcels, if any, which may have the benefit of the restriction, and the ways, public or open to public use, upon which the respective parcels abut or nearest thereto, and the street numbers, if any, of such parcels",
33 | "section": "10A",
34 | "part": "III",
35 | "type": "general",
36 | "desc": "Restrictions on land; determination; jurisdiction; petition"
37 | };
38 | module.exports = [doc1, doc2, doc3];
39 | /* jshint ignore:end */
--------------------------------------------------------------------------------
/test/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Mocha Tests
6 |
7 |
8 |
9 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/test/test.js:
--------------------------------------------------------------------------------
1 | /*jshint expr:true */
2 | 'use strict';
3 |
4 | var Pouch = require('pouchdb-memory');
5 | var uniq = require('uniq');
6 |
7 | //
8 | // your plugin goes here
9 | //
10 | var helloPlugin = require('../');
11 | Pouch.plugin(helloPlugin);
12 |
13 | var chai = require('chai');
14 | chai.use(require("chai-as-promised"));
15 |
16 | //
17 | // more variables you might want
18 | //
19 | var should = chai.should(); // var should = chai.should();
20 | require('bluebird'); // var Promise = require('bluebird');
21 |
22 | // have to make this global for the languages plugin, sadly
23 | global.lunr = require('lunr');
24 | require('./deps/lunr.stemmer.support')(global.lunr);
25 | require('./deps/lunr.fr')(global.lunr);
26 | require('./deps/lunr.multi')(global.lunr);
27 | var dbs;
28 | if (process.browser) {
29 | dbs = 'testdb' + Math.random();
30 | } else {
31 | dbs = process.env.TEST_DB || 'testdb';
32 | }
33 |
34 | dbs.split(',').forEach(function (db) {
35 | var dbType = /^http/.test(db) ? 'http' : 'local';
36 | tests(db, dbType);
37 | });
38 |
39 | var docs = require('./docs/test-docs');
40 | var docs2 = require('./docs/test-docs-2');
41 | var docs3 = require('./docs/test-docs-3');
42 | var docs4 = require('./docs/test-docs-4');
43 | var docs5 = require('./docs/test-docs-5');
44 | var docs6 = require('./docs/test-docs-6');
45 | var docs7 = require('./docs/test-docs-7');
46 | var docs8 = require('./docs/test-docs-8');
47 | var docs9 = require('./docs/test-docs-9');
48 |
49 | function tests(dbName, dbType) {
50 |
51 | var db;
52 |
53 | describe(dbType + ': search test suite', function () {
54 | this.timeout(30000);
55 |
56 | beforeEach(function () {
57 | db = new Pouch(dbName);
58 | return db;
59 | });
60 | afterEach(function () {
61 | return db.destroy();
62 | });
63 |
64 | it('basic search', function () {
65 | return db.bulkDocs({docs: docs}).then(function () {
66 | var opts = {
67 | fields: ['title', 'text', 'desc'],
68 | query: 'sketch'
69 | };
70 | return db.search(opts);
71 | }).then(function (res) {
72 | res.rows.length.should.equal(1);
73 | res.rows[0].id.should.equal('3');
74 | res.rows[0].score.should.be.above(0);
75 | });
76 | });
77 |
78 | it('basic search - zero results', function () {
79 | return db.bulkDocs({docs: docs}).then(function () {
80 | var opts = {
81 | fields: ['title', 'text', 'desc'],
82 | query: 'fizzbuzz'
83 | };
84 | return db.search(opts);
85 | }).then(function (res) {
86 | res.rows.length.should.equal(0);
87 | });
88 | });
89 |
90 | it('basic search - equal scores', function () {
91 | return db.bulkDocs({docs: docs2}).then(function () {
92 | var opts = {
93 | fields: ['title', 'text', 'desc'],
94 | query: 'text'
95 | };
96 | return db.search(opts);
97 | }).then(function (res) {
98 | res.rows.length.should.equal(2);
99 | res.rows[0].score.should.equal(res.rows[1].score);
100 | });
101 | });
102 |
103 | it('basic search - ordering', function () {
104 |
105 | // the word "court" is used once in the first doc,
106 | // twice in the second, and twice in the third,
107 | // but the third is longest, so tf-idf should give us
108 | // 2 3 1
109 |
110 | return db.bulkDocs({docs: docs}).then(function () {
111 | var opts = {
112 | fields: ['title', 'text', 'desc'],
113 | query: 'court'
114 | };
115 | return db.search(opts);
116 | }).then(function (res) {
117 | res.rows.length.should.equal(3);
118 | var ids = res.rows.map(function (x) { return x.id; });
119 | ids.should.deep.equal(['2', '3', '1'], 'got incorrect doc order: ' + JSON.stringify(res));
120 | });
121 | });
122 |
123 | it('search with mm=100% and 1/2 match', function () {
124 |
125 | // if mm (minimum should match) is 100%, that means all terms in the
126 | // query must be present in the document. I find this most intuitive,
127 | // so it's the default
128 |
129 | // docs 1 and 2 both contain the word 'title', but only 1 contains
130 | // both of the words 'title' and 'clouded'
131 |
132 | return db.bulkDocs({docs: docs}).then(function () {
133 | var opts = {
134 | fields: ['title', 'text', 'desc'],
135 | query: 'clouded title',
136 | mm: '100%'
137 | };
138 | return db.search(opts);
139 | }).then(function (res) {
140 | var ids = res.rows.map(function (x) { return x.id; });
141 | ids.should.deep.equal(['1'], 'got incorrect docs: ' + JSON.stringify(res));
142 | });
143 | });
144 |
145 | it('search with mm=50% and 2/2 match', function () {
146 | return db.bulkDocs({docs: docs}).then(function () {
147 | var opts = {
148 | fields: ['title', 'text', 'desc'],
149 | query: 'clouded title',
150 | mm: '50%'
151 | };
152 | return db.search(opts);
153 | }).then(function (res) {
154 | var ids = res.rows.map(function (x) { return x.id; });
155 | ids.should.deep.equal(['1', '2'], 'got incorrect docs: ' + JSON.stringify(res));
156 | });
157 | });
158 |
159 | it('search with mm=1% and 1/3 match', function () {
160 | return db.bulkDocs({docs: docs}).then(function () {
161 | var opts = {
162 | fields: ['title', 'text', 'desc'],
163 | query: 'clouded nonsenseword anothernonsenseword',
164 | mm: '1%'
165 | };
166 | return db.search(opts);
167 | }).then(function (res) {
168 | var ids = res.rows.map(function (x) { return x.id; });
169 | ids.should.deep.equal(['1'], 'got incorrect docs: ' + JSON.stringify(res));
170 | });
171 | });
172 |
173 | it('search with mm=34% and 1/3 match', function () {
174 | // should be rounded down to two decimal places ala Solr
175 | return db.bulkDocs({docs: docs}).then(function () {
176 | var opts = {
177 | fields: ['title', 'text', 'desc'],
178 | query: 'clouded nonsenseword anothernonsenseword',
179 | mm: '34%'
180 | };
181 | return db.search(opts);
182 | }).then(function (res) {
183 | var ids = res.rows.map(function (x) { return x.id; });
184 | ids.should.deep.equal([], 'got incorrect docs: ' + JSON.stringify(res));
185 | });
186 | });
187 | it('search with mm=34% and 2/3 match', function () {
188 | // should be rounded down to two decimal places ala Solr
189 | return db.bulkDocs({docs: docs}).then(function () {
190 | var opts = {
191 | fields: ['title', 'text', 'desc'],
192 | query: 'clouded title anothernonsenseword',
193 | mm: '34%'
194 | };
195 | return db.search(opts);
196 | }).then(function (res) {
197 | var ids = res.rows.map(function (x) { return x.id; });
198 | ids.should.deep.equal(['1'], 'got incorrect docs: ' + JSON.stringify(res));
199 | });
200 | });
201 | it('search with mm=33% and 1/3 match', function () {
202 | // should be rounded down to two decimal places ala Solr
203 | return db.bulkDocs({docs: docs}).then(function () {
204 | var opts = {
205 | fields: ['title', 'text', 'desc'],
206 | query: 'clouded nonsenseword anothernonsenseword',
207 | mm: '33%'
208 | };
209 | return db.search(opts);
210 | }).then(function (res) {
211 | var ids = res.rows.map(function (x) { return x.id; });
212 | ids.should.deep.equal(['1'], 'got incorrect docs: ' + JSON.stringify(res));
213 | });
214 | });
215 |
216 | it('should weight short fields more strongly', function () {
217 | return db.bulkDocs({docs: docs3}).then(function () {
218 | var opts = {
219 | fields: ['title', 'text', 'desc'],
220 | query: 'yoshi'
221 | };
222 | return db.search(opts);
223 | }).then(function (res) {
224 | var ids = res.rows.map(function (x) { return x.id; });
225 | ids.should.deep.equal(['1', '2'], 'got incorrect docs: ' + JSON.stringify(res));
226 | res.rows[0].score.should.not.equal(res.rows[1].score, 'score should be higher');
227 | });
228 | });
229 |
230 | it('should weight short fields more strongly part 2', function () {
231 | return db.bulkDocs({docs: docs3}).then(function () {
232 | var opts = {
233 | fields: ['title', 'text', 'desc'],
234 | query: 'mario'
235 | };
236 | return db.search(opts);
237 | }).then(function (res) {
238 | var ids = res.rows.map(function (x) { return x.id; });
239 | ids.should.deep.equal(['2', '1'], 'got incorrect docs: ' + JSON.stringify(res));
240 | res.rows[0].score.should.not.equal(res.rows[1].score, 'score should be higher');
241 | });
242 | });
243 |
244 | it('should use dismax weighting', function () {
245 | // see http://lucene.apache.org/core/3_0_3/api/core/org/apache/
246 | // lucene/search/DisjunctionMaxQuery.html
247 | // for why this example makes sense
248 |
249 | return db.bulkDocs({docs: docs3}).then(function () {
250 | var opts = {
251 | fields: ['title', 'text', 'desc'],
252 | query: 'albino elephant',
253 | mm: '50%'
254 | };
255 | return db.search(opts);
256 | }).then(function (res) {
257 | var ids = res.rows.map(function (x) { return x.id; });
258 | ids.should.deep.equal(['3', '4'], 'got incorrect docs: ' + JSON.stringify(res));
259 | res.rows[0].score.should.not.equal(res.rows[1].score, 'score should be higher');
260 | });
261 | });
262 |
263 | it('should work with one field only', function () {
264 | return db.bulkDocs({docs: docs3}).then(function () {
265 | var opts = {
266 | fields: ['text'],
267 | query: 'mario'
268 | };
269 | return db.search(opts);
270 | }).then(function (res) {
271 | var ids = res.rows.map(function (x) { return x.id; });
272 | ids.should.deep.equal(['1'], 'got incorrect docs: ' + JSON.stringify(res));
273 | });
274 | });
275 |
276 | it('should be able to delete', function () {
277 | var opts = {
278 | fields: ['text'],
279 | query: 'mario'
280 | };
281 | return db.bulkDocs({docs: docs3}).then(function () {
282 | return db.search(opts);
283 | }).then(function (res) {
284 | var ids = res.rows.map(function (x) { return x.id; });
285 | ids.should.deep.equal(['1'], 'got incorrect docs: ' + JSON.stringify(res));
286 | opts.destroy = true;
287 | delete opts.query;
288 | return db.search(opts);
289 | }).then(function () {
290 | opts.stale = 'ok';
291 | opts.destroy = false;
292 | opts.query = 'mario';
293 | return db.search(opts);
294 | }).then(function (res) {
295 | res.rows.should.have.length(0, 'expect no search results for stale=ok');
296 | });
297 | });
298 |
299 | it('gives zero results when stale', function () {
300 | var opts = {
301 | fields: ['text', 'title'],
302 | query: 'mario',
303 | stale: 'ok'
304 | };
305 | return db.bulkDocs({docs: docs3}).then(function () {
306 | return db.search(opts);
307 | }).then(function (res) {
308 | res.rows.should.have.length(0, 'no results after stale=ok');
309 | opts.stale = 'update_after';
310 | return db.search(opts);
311 | }).then(function (res) {
312 | res.rows.length.should.be.within(0, 2, 'no results after stale=update_after');
313 | delete opts.stale;
314 | return db.search(opts);
315 | }).then(function (res) {
316 | res.rows.should.have.length(2, 'got results eventually');
317 | });
318 | });
319 |
320 | it('can explicitly build an index', function () {
321 | var opts = {
322 | fields: ['text', 'title'],
323 | build: true
324 | };
325 | return db.bulkDocs({docs: docs3}).then(function () {
326 | return db.search(opts);
327 | }).then(function (info) {
328 | info.should.deep.equal({ok: true});
329 | delete opts.build;
330 | opts.query = 'mario';
331 | opts.stale = 'ok';
332 | return db.search(opts);
333 | }).then(function (res) {
334 | res.rows.should.have.length(2, 'got results after building');
335 | });
336 | });
337 |
338 | it('uniquely IDs same fields with different order', function () {
339 | var opts = {
340 | fields: ['text', 'title'],
341 | query: 'mario'
342 | };
343 | return db.bulkDocs({docs: docs3}).then(function () {
344 | return db.search(opts);
345 | }).then(function (res) {
346 | var ids = res.rows.map(function (x) { return x.id; });
347 | ids.should.deep.equal(['2', '1'], 'got incorrect docs: ' + JSON.stringify(res));
348 | opts = {
349 | fields: ['title', 'text'],
350 | query: 'mario',
351 | stale: 'ok'
352 | };
353 | return db.search(opts);
354 | }).then(function (res) {
355 | var ids = res.rows.map(function (x) { return x.id; });
356 | ids.should.deep.equal(['2', '1'], 'got incorrect docs: ' + JSON.stringify(res));
357 | });
358 | });
359 |
360 | it('should work with pure stopwords', function () {
361 | return db.bulkDocs({docs: docs3}).then(function () {
362 | var opts = {
363 | fields: ['text'],
364 | query: 'to be or not to be'
365 | };
366 | return db.search(opts);
367 | }).then(function (res) {
368 | res.rows.should.have.length(0);
369 | });
370 | });
371 |
372 | it('allows you to weight fields', function () {
373 | return db.bulkDocs({docs: docs3}).then(function () {
374 | var opts = {
375 | fields: {'text': 10, 'title': 1},
376 | query: 'mario'
377 | };
378 | return db.search(opts);
379 | }).then(function (res) {
380 | var ids = res.rows.map(function (x) { return x.id; });
381 | ids.should.deep.equal(['1', '2'], 'got incorrect docs: ' + JSON.stringify(res));
382 | res.rows[0].score.should.not.equal(res.rows[1].score);
383 | });
384 | });
385 |
386 | it('allows you to weight fields part 2', function () {
387 | return db.bulkDocs({docs: docs3}).then(function () {
388 | var opts = {
389 | fields: {'text': 10, 'title': 1},
390 | query: 'yoshi'
391 | };
392 | return db.search(opts);
393 | }).then(function (res) {
394 | var ids = res.rows.map(function (x) { return x.id; });
395 | ids.should.deep.equal(['2', '1'], 'got incorrect docs: ' + JSON.stringify(res));
396 | res.rows[0].score.should.not.equal(res.rows[1].score);
397 | });
398 | });
399 |
400 | it('allows you to highlight', function () {
401 | return db.bulkDocs({docs: docs3}).then(function () {
402 | var opts = {
403 | fields: {'text': 1, 'title': 1},
404 | query: 'yoshi',
405 | highlighting: true
406 | };
407 | return db.search(opts);
408 | }).then(function (res) {
409 | var ids = res.rows.map(function (x) { return x.id; });
410 | ids.should.deep.equal(['1', '2'], 'got incorrect docs: ' + JSON.stringify(res));
411 | res.rows[0].score.should.not.equal(res.rows[1].score);
412 | var hls = res.rows.map(function (x) { return x.highlighting; });
413 | hls.should.deep.equal([
414 | {title: 'This title is about Yoshi'},
415 | {text: "This text is about Yoshi, but it's " +
416 | "much longer, so it shouldn't be weighted so much."}
417 | ]);
418 | });
419 | });
420 | it('allows you to highlight with custom tags', function () {
421 | return db.bulkDocs({docs: docs3}).then(function () {
422 | var opts = {
423 | fields: {'text': 1, 'title': 1},
424 | query: 'yoshi',
425 | highlighting: true,
426 | highlighting_pre: '',
427 | highlighting_post: ''
428 | };
429 | return db.search(opts);
430 | }).then(function (res) {
431 | var ids = res.rows.map(function (x) { return x.id; });
432 | ids.should.deep.equal(['1', '2'], 'got incorrect docs: ' + JSON.stringify(res));
433 | res.rows[0].score.should.not.equal(res.rows[1].score);
434 | var hls = res.rows.map(function (x) { return x.highlighting; });
435 | hls.should.deep.equal([
436 | {title: 'This title is about Yoshi'},
437 | {text: "This text is about Yoshi, but it's " +
438 | "much longer, so it shouldn't be weighted so much."}
439 | ]);
440 | });
441 | });
442 | it('supports include_docs', function () {
443 | return db.bulkDocs({docs: docs3}).then(function () {
444 | var opts = {
445 | fields: {'text': 1, 'title': 1},
446 | q: 'yoshi',
447 | include_docs: true
448 | };
449 | return db.search(opts);
450 | }).then(function (res) {
451 | var ids = res.rows.map(function (x) { return x.id; });
452 | ids.should.deep.equal(['1', '2'], 'got incorrect docs: ' + JSON.stringify(res));
453 | var docs = res.rows.map(function (x) {
454 | return {
455 | _id: x.doc._id,
456 | text: x.doc.text,
457 | title: x.doc.title
458 | };
459 | });
460 | docs.should.deep.equal(docs3.slice(0, 2));
461 | });
462 | });
463 | it("doesn't highlight or include docs by default", function () {
464 | return db.bulkDocs({docs: docs3}).then(function () {
465 | var opts = {
466 | fields: {'text': 1, 'title': 1},
467 | q: 'yoshi'
468 | };
469 | return db.search(opts);
470 | }).then(function (res) {
471 | var ids = res.rows.map(function (x) { return x.id; });
472 | ids.should.deep.equal(['1', '2'], 'got incorrect docs: ' + JSON.stringify(res));
473 | should.not.exist(ids[0].doc);
474 | should.not.exist(ids[0].highlighting);
475 | });
476 | });
477 | it('can highlight and include docs at the same time', function () {
478 | return db.bulkDocs({docs: docs3}).then(function () {
479 | var opts = {
480 | fields: {'text': 1, 'title': 1},
481 | query: 'yoshi',
482 | highlighting: true,
483 | include_docs: true
484 | };
485 | return db.search(opts);
486 | }).then(function (res) {
487 | var ids = res.rows.map(function (x) { return x.id; });
488 | ids.should.deep.equal(['1', '2'], 'got incorrect docs: ' + JSON.stringify(res));
489 | res.rows[0].score.should.not.equal(res.rows[1].score);
490 | var hls = res.rows.map(function (x) { return x.highlighting; });
491 | hls.should.deep.equal([
492 | {title: 'This title is about Yoshi'},
493 | {text: "This text is about Yoshi, but it's " +
494 | "much longer, so it shouldn't be weighted so much."}
495 | ]);
496 | var docs = res.rows.map(function (x) {
497 | return {
498 | _id: x.doc._id,
499 | text: x.doc.text,
500 | title: x.doc.title
501 | };
502 | });
503 | docs.should.deep.equal(docs3.slice(0, 2));
504 | });
505 | });
506 |
507 | it('supports limit', function () {
508 | return db.bulkDocs({docs: docs4}).then(function () {
509 | var opts = {
510 | fields: ['text', 'title'],
511 | query: 'yoshi',
512 | limit: 5
513 | };
514 | return db.search(opts);
515 | }).then(function (res) {
516 | res.rows.should.have.length(5);
517 | uniq(res.rows.map(function (x) { return x.score; })).should.have.length(5);
518 | var ids = res.rows.map(function (x) { return x.id; });
519 | ids.should.deep.equal(['yoshi_0', 'yoshi_1', 'yoshi_2', 'yoshi_3', 'yoshi_4']);
520 | });
521 | });
522 |
523 | it('supports skip', function () {
524 | return db.bulkDocs({docs: docs4}).then(function () {
525 | var opts = {
526 | fields: ['text', 'title'],
527 | query: 'yoshi',
528 | skip: 15
529 | };
530 | return db.search(opts);
531 | }).then(function (res) {
532 | res.rows.should.have.length(5);
533 | uniq(res.rows.map(function (x) { return x.score; })).should.have.length(5);
534 | var ids = res.rows.map(function (x) { return x.id; });
535 | ids.should.deep.equal(['yoshi_15', 'yoshi_16', 'yoshi_17', 'yoshi_18', 'yoshi_19']);
536 | });
537 | });
538 |
539 | it('supports limit and skip', function () {
540 | return db.bulkDocs({docs: docs4}).then(function () {
541 | var opts = {
542 | fields: ['text', 'title'],
543 | query: 'yoshi',
544 | skip: 10,
545 | limit: 5
546 | };
547 | return db.search(opts);
548 | }).then(function (res) {
549 | res.rows.should.have.length(5);
550 | uniq(res.rows.map(function (x) { return x.score; })).should.have.length(5);
551 | var ids = res.rows.map(function (x) { return x.id; });
552 | ids.should.deep.equal(['yoshi_10', 'yoshi_11', 'yoshi_12', 'yoshi_13', 'yoshi_14']);
553 | });
554 | });
555 |
556 | it('allows searching deep fields', function () {
557 | return db.bulkDocs({docs: docs5}).then(function () {
558 | var opts = {
559 | fields: ['deep.structure.text'],
560 | query: 'squirrels'
561 | };
562 | return db.search(opts);
563 | }).then(function (res) {
564 | var ids = res.rows.map(function (x) { return x.id; });
565 | ids.should.deep.equal(['2']);
566 | });
567 | });
568 | it('allows searching from an array of nested objects', function () {
569 | return db.bulkDocs({docs: docs9}).then(function () {
570 | var opts = {
571 | fields: ['nested.array.aField'],
572 | query: 'something'
573 | };
574 | return db.search(opts);
575 | }).then(function (res) {
576 | var ids = res.rows.map(function (x) { return x.id; }).sort().reverse();
577 | ids.should.deep.equal(['2', '10']);
578 | });
579 | });
580 | it('allows searching string arrays', function () {
581 | return db.bulkDocs({docs: docs5}).then(function () {
582 | var opts = {
583 | fields: ['list'],
584 | query: 'array'
585 | };
586 | return db.search(opts);
587 | }).then(function (res) {
588 | var ids = res.rows.map(function (x) { return x.id; });
589 | ids.should.deep.equal(['1']);
590 | });
591 | });
592 | it('does nothing when the field is invalid', function () {
593 | return db.bulkDocs({docs: docs5}).then(function () {
594 | var opts = {
595 | fields: ['invalid'],
596 | query: 'foo'
597 | };
598 | return db.search(opts);
599 | }).then(function (res) {
600 | var ids = res.rows.map(function (x) { return x.id; });
601 | ids.should.deep.equal([]);
602 | });
603 | });
604 | it('can use numbers as field values', function () {
605 | return db.bulkDocs({docs: docs5}).then(function () {
606 | var opts = {
607 | fields: ['aNumber'],
608 | query: '1'
609 | };
610 | return db.search(opts);
611 | }).then(function (res) {
612 | var ids = res.rows.map(function (x) { return x.id; });
613 | ids.should.deep.equal(['3']);
614 | });
615 | });
616 | it('weights higher when words are mentioned more than once', function () {
617 | return db.bulkDocs({docs: docs6}).then(function () {
618 | var opts = {
619 | fields: ['text'],
620 | query: 'word'
621 | };
622 | return db.search(opts);
623 | }).then(function (res) {
624 | var ids = res.rows.map(function (x) { return x.id; });
625 | ids.should.deep.equal(['1', '2']);
626 | res.rows[0].score.should.not.equal(res.rows[1].score);
627 | });
628 | });
629 |
630 | it('indexes english and french simultaneously', function () {
631 | return db.bulkDocs({docs: docs7}).then(function () {
632 | var opts = {
633 | fields: ['text'],
634 | query: 'parlera',
635 | language: 'fr'
636 | };
637 | return db.search(opts);
638 | }).then(function (res) {
639 | var ids = res.rows.map(function (x) { return x.id; });
640 | ids.should.deep.equal(['2']);
641 | return db.search({
642 | fields: ['text'],
643 | query: 'parlera', // parlera -> parle, wouldn't work in English
644 | language: 'en',
645 | stale: 'ok'
646 | });
647 | }).then(function (res) {
648 | res.rows.should.have.length(0);
649 | return db.search({
650 | fields: ['text'],
651 | query: 'spleen',
652 | language: 'en',
653 | stale: 'ok'
654 | });
655 | }).then(function (res) {
656 | res.rows.should.have.length(0);
657 | return db.search({
658 | fields: ['text'],
659 | query: 'spleen',
660 | language: 'en'
661 | });
662 | }).then(function (res) {
663 | var ids = res.rows.map(function (x) { return x.id; }).sort();
664 | ids.should.deep.equal(['1', '2']);
665 | return db.search({
666 | fields: ['text'],
667 | query: 'works', // working -> works, wouldn't work in French
668 | language: 'en'
669 | });
670 | }).then(function (res) {
671 | var ids = res.rows.map(function (x) { return x.id; }).sort();
672 | ids.should.deep.equal(['3']);
673 | return db.search({
674 | fields: ['text'],
675 | query: 'works',
676 | stale: 'ok' // no lang specified, default should be english
677 | });
678 | }).then(function (res) {
679 | var ids = res.rows.map(function (x) { return x.id; }).sort();
680 | ids.should.deep.equal(['3']);
681 | return db.search({
682 | fields: ['text'],
683 | query: 'parlera',
684 | language: 'fr'
685 | });
686 | }).then(function (res) {
687 | var ids = res.rows.map(function (x) { return x.id; });
688 | ids.should.deep.equal(['2']);
689 | return db.search({
690 | fields: ['text'],
691 | query: 'parlera',
692 | language: ['en','fr']
693 | });
694 | }).then(function(res) {
695 | var ids = res.rows.map(function (x) { return x.id; });
696 | ids.should.deep.equal(['2']);
697 | return db.search({
698 | fields: ['text'],
699 | query: 'spleen',
700 | language: ['en','fr']
701 | });
702 | }).then(function(res) {
703 | var ids = res.rows.map(function (x) { return x.id; }).sort();
704 | ids.should.deep.equal(['1', '2']);
705 | return db.search({
706 | fields: ['text'],
707 | query: 'works',
708 | language: ['en','fr']
709 | });
710 | }).then(function(res) {
711 | var ids = res.rows.map(function (x) { return x.id; }).sort();
712 | ids.should.deep.equal(['3']);
713 | });
714 | });
715 |
716 | it('search with filter', function () {
717 |
718 | // the word "court" is used in all 3 docs
719 | // but we filter out the doc._id === "2"
720 |
721 | return db.bulkDocs({docs: docs}).then(function () {
722 | var opts = {
723 | fields: ['title', 'text', 'desc'],
724 | query: 'court',
725 | filter: function (doc) { return doc._id !== "2"; }
726 | };
727 | return db.search(opts);
728 | }).then(function (res) {
729 | res.rows.length.should.equal(2);
730 | var ids = res.rows.map(function (x) { return x.id; });
731 | ids.should.deep.equal(['3', '1']);
732 | });
733 | });
734 |
735 | it('search with filter - Error thrown ', function () {
736 |
737 | //the filter function will throw an Error for
738 | //one doc, which filter it out.
739 |
740 | var error;
741 |
742 | //filter function throw an error ?
743 | db.on('error', function (err) {
744 | error = err;
745 | });
746 |
747 | return db.bulkDocs({docs: docs}).then(function () {
748 | var opts = {
749 | fields: ['title', 'text', 'desc'],
750 | query: 'court',
751 | filter: function (doc) { if (doc._id === '1') { throw new Error("oups"); } return true; }
752 | };
753 | return db.search(opts);
754 | }).then(function (res) {
755 | res.rows.length.should.equal(2);
756 | var ids = res.rows.map(function (x) { return x.id; });
757 | ids.should.deep.equal(['2', '3']);
758 | error.should.have.property('message', 'oups');
759 | });
760 | });
761 |
762 | it('total_rows', function () {
763 |
764 | return db.bulkDocs({docs: docs8}).then(function () {
765 | var opts = {
766 | fields: ['category'],
767 | query: 'PL'
768 | };
769 | return db.search(opts);
770 | }).then(function (res) {
771 | res.total_rows.should.equal(3);
772 | });
773 | });
774 |
775 | it('total_rows with filter and limit', function () {
776 |
777 | return db.bulkDocs({docs: docs8}).then(function () {
778 | var opts = {
779 | fields: ['category'],
780 | query: 'PL',
781 | limit: 1,
782 | filter: function (doc) { return doc.type !== "static"; }
783 | };
784 | return db.search(opts);
785 | }).then(function (res) {
786 | res.total_rows.should.equal(2);
787 | });
788 | });
789 |
790 | });
791 | }
792 |
--------------------------------------------------------------------------------
/test/webrunner.js:
--------------------------------------------------------------------------------
1 | /* global mocha: true */
2 |
3 | (function () {
4 | 'use strict';
5 | var runner = mocha.run();
6 | window.results = {
7 | lastPassed: '',
8 | passed: 0,
9 | failed: 0,
10 | failures: []
11 | };
12 |
13 | runner.on('pass', function (e) {
14 | window.results.lastPassed = e.title;
15 | window.results.passed++;
16 | });
17 |
18 | runner.on('fail', function (e) {
19 | window.results.failed++;
20 | window.results.failures.push({
21 | title: e.title,
22 | message: e.err.message,
23 | stack: e.err.stack
24 | });
25 | });
26 |
27 | runner.on('end', function () {
28 | window.results.completed = true;
29 | window.results.passed++;
30 | });
31 | })();
32 |
33 |
34 |
--------------------------------------------------------------------------------