├── .editorconfig
├── .eslintignore
├── .eslintrc
├── .gitignore
├── LICENSE
├── README.md
├── example
├── example.html
├── example.js
├── helper
│ ├── arrayHelper.js
│ ├── cryptoHelper.js
│ ├── encryptionHelper.js
│ ├── serializeHelper.js
│ └── stringHelper.js
├── index.html
├── package.json
└── rollup.config.js
├── karma.conf.js
├── package.json
└── src
├── helper
├── array.js
├── array.spec.js
├── idb.js
├── idb.spec.js
├── lru.js
├── lru.spec.js
├── scoring.js
├── scoring.spec.js
├── sizeof.js
├── sizeof.spec.js
├── tokenize.js
├── tokenize.spec.js
├── variableByteCodes.js
├── variableByteCodes.spec.js
├── wildcard.js
└── wildcard.spec.js
├── index.esm.js
├── index.spec.js
├── master.js
├── query
├── grammar.peg
├── grammar.spec.js
├── parser.js
├── parser.spec.js
├── query.js
└── query.spec.js
└── store
├── keyValueStore.js
├── metadataStore.js
├── positionsStore.js
├── postingsStore.js
├── postingsStore.spec.js
├── transposeStore.js
├── transposeStore.spec.js
└── wildcardStore.js
/.editorconfig:
--------------------------------------------------------------------------------
1 | # editorconfig.org
2 | root = true
3 |
4 | [*]
5 | indent_style = space
6 | indent_size = 4
7 | end_of_line = lf
8 | charset = utf-8
9 | trim_trailing_whitespace = true
10 | insert_final_newline = true
11 |
12 | [.md]
13 | trim_trailing_whitespace = false
14 |
--------------------------------------------------------------------------------
/.eslintignore:
--------------------------------------------------------------------------------
1 | grammar.js
2 |
--------------------------------------------------------------------------------
/.eslintrc:
--------------------------------------------------------------------------------
1 | {
2 | "extends": "eslint:recommended",
3 | "env": {
4 | "es6": true,
5 | "browser": true,
6 | "jasmine": true
7 | },
8 | "globals": {
9 | },
10 | "parserOptions": {
11 | "ecmaVersion": 8,
12 | "sourceType": "module",
13 | "ecmaFeatures": {
14 | "experimentalObjectRestSpread": true
15 | }
16 | },
17 | "rules": {
18 | "no-console": "off",
19 | "indent": [
20 | 2,
21 | 4,
22 | {
23 | "SwitchCase": 1
24 | }
25 | ],
26 | "no-unused-vars": [
27 | "error"
28 | ],
29 | "quotes": [
30 | 2,
31 | "single"
32 | ],
33 | "linebreak-style": [
34 | 2,
35 | "unix"
36 | ],
37 | "semi": [
38 | 2,
39 | "never"
40 | ]
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .cache
2 | .DS_Store
3 | .forever
4 | .node-gyp
5 | .npm
6 | .idea
7 | node_modules
8 | npm-debug.log
9 | dist
10 | package-lock.json
11 | src/query/grammar.js
12 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # encrypted-search
2 |
3 | This library provides encrypted-search functionality for the browser.
4 |
5 | The library requires the the support of Promises, async/await, modules, and IndexedDB
6 |
7 | ## Browser support
8 | Chrome, Safari, Firefox, Edge, IE11
9 |
10 | ## Usage
11 |
12 | ```javascript
13 | import { create, query, parse, tokenize } from 'encrypted-search'
14 |
15 | // The encryption helpers. The hash is used for the key name. Encrypt and decrypt for the values.
16 | const transformers = {
17 | property: (tableId, input) => input
18 | serialize: (tableId, key, value) => value
19 | deserialize: (tableId, key, value) => value
20 | }
21 |
22 | const index = create({ transformers })
23 |
24 | await index.store('123', tokenize('this is a long string that can be searched'))
25 |
26 | const searchString = '(this << is & "long string") | can'
27 | const results = await query(index.search, index.wildcard, parse(searchString))
28 | // Returns
29 | [
30 | {
31 | "id": "123",
32 | "keywords": [
33 | "this",
34 | "is",
35 | "long",
36 | "string",
37 | "that",
38 | "can",
39 | "be",
40 | "searched"
41 | ],
42 | "match": [
43 | "this",
44 | "is",
45 | "long",
46 | "string"
47 | ]
48 | }
49 | ]
50 |
51 | //
52 | index.close()
53 | ```
54 |
55 | ## Query syntax
56 | It has support for multiple search operators.
57 |
58 | | Operator | Example | Matches documents that|
59 | |-------------|---------------------------------------------------------|-------------------------------------------------------------------|
60 | | AND | these words must appear / these & words & must & appear | contain all keywords |
61 | | OR | these | words | can | appear | contain any keywords |
62 | | PHRASE | "these words appear in order" | contain all keywords in exact order |
63 | | NOT | hello !world | contain hello but not world |
64 | | QUORUM | "good fast cheap"/2 | contain at least 2 keywords |
65 | | PROXIMITY | "close by"~2 | contain all keywords with no less than 2 words between them |
66 | | BEFORE | before << after | contain all keywords and in order |
67 | | WILDCARD | af* | contain the wildcarded keyword |
68 | | COMBINATION | (these words | any o*der) << after | fulfil the query in combination |
69 |
70 | ## Default Options
71 | TODO
72 |
73 |
74 | ## Example
75 |
76 | Example available in the example/ folder
77 |
78 | ## Author
79 |
80 | Mattias Svanström (@mmso) - ProtonMail
81 |
--------------------------------------------------------------------------------
/example/example.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Example
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/example/example.js:
--------------------------------------------------------------------------------
1 | import { create, query, parse, tokenize } from '../src/index.esm'
2 | import createEncryptionHelper from './helper/encryptionHelper'
3 |
4 | const key = new Uint8Array(32)
5 | const salt = new Uint8Array(32)
6 |
7 | const createKeydownHandler = (index, $results) => {
8 | return async (e) => {
9 | const { key, target } = e
10 | if (key !== 'Enter') {
11 | return
12 | }
13 | e.preventDefault()
14 |
15 | const value = target.value
16 | target.value = ''
17 |
18 | const results = await query(index.search, index.wildcard, parse(value))
19 | $results.innerHTML = JSON.stringify(results, null, 2)
20 | }
21 | }
22 |
23 | const init = async () => {
24 | document.body.innerHTML = `
25 |
26 |
27 | `
28 |
29 | const transformers = createEncryptionHelper(key, salt)
30 | const index = create({ transformers })
31 |
32 | await index.store('123', tokenize('this is a long string that can be searched'))
33 | await index.store('124', tokenize('this is another document that is inserted into the index'))
34 | await index.store('125', tokenize('this is the last document'))
35 |
36 | const $input = document.body.querySelector('input')
37 | const $results = document.body.querySelector('.results')
38 |
39 | const onKeydown = createKeydownHandler(index, $results)
40 | $input.addEventListener('keydown', onKeydown)
41 | $input.focus()
42 | }
43 |
44 | document.addEventListener('DOMContentLoaded', init)
45 |
--------------------------------------------------------------------------------
/example/helper/arrayHelper.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Concatenate two Uint8Arrays.
3 | * @param {Uint8Array} a
4 | * @param {Uint8Array} b
5 | * @returns {Uint8Array}
6 | */
7 | export const concatUint8Array = (a, b) => {
8 | const result = new Uint8Array(a.byteLength + b.byteLength)
9 | result.set(new Uint8Array(a), 0)
10 | result.set(new Uint8Array(b), a.byteLength)
11 | return result
12 | }
13 |
14 | /**
15 | * Convert a string to a Uint8Array containing a UTF-8 string data.
16 | * @param {String} string
17 | * @return {Uint8Array}
18 | */
19 | export const stringToUint8Array = (string) => {
20 | const result = new Uint8Array(string.length)
21 | for (let i = 0; i < string.length; i++) {
22 | result[i] = string.charCodeAt(i)
23 | }
24 | return result
25 | }
26 |
27 | /**
28 | * Convert a Uint8Array to a string.
29 | * @param {Uint8Array} array
30 | * @returns {string}
31 | */
32 | export const uint8ArrayToString = (array) => {
33 | const result = []
34 | const bs = 1 << 14
35 | const j = array.length
36 | for (let i = 0; i < j; i += bs) {
37 | // eslint-disable-next-line prefer-spread
38 | result.push(String.fromCharCode.apply(String, array.subarray(i, i + bs < j ? i + bs : j)))
39 | }
40 | return result.join('')
41 | }
42 |
43 |
--------------------------------------------------------------------------------
/example/helper/cryptoHelper.js:
--------------------------------------------------------------------------------
1 | /* eslint-disable max-len */
2 | import nacl from 'tweetnacl'
3 | import { Pbkdf2HmacSha256 } from 'asmCrypto.js'
4 | import { concatUint8Array } from './arrayHelper'
5 |
6 | const ITERATIONS = 10
7 | const DKLEN = 16
8 | const KEY_LENGTH = 32
9 | const SALT_LENGTH = 32
10 | export const NONCE_LENGTH = 24
11 |
12 | // eslint-disable-next-line import/prefer-default-export
13 | export const getRandomValues = (buf) => {
14 | if (window.crypto && window.crypto.getRandomValues) {
15 | window.crypto.getRandomValues(buf)
16 | return buf
17 | }
18 | if (self.crypto && self.crypto.getRandomValues) {
19 | self.crypto.getRandomValues(buf)
20 | return buf
21 | }
22 | if (window.msCrypto && window.msCrypto.getRandomValues) {
23 | window.msCrypto.getRandomValues(buf)
24 | return buf
25 | }
26 | throw new Error('No secure random number generator available.')
27 | }
28 |
29 | /**
30 | * Get a hash using a key-stretching algorithm.
31 | * @param {Uint8Array} data
32 | * @param {Uint8Array} salt
33 | * @param {Number} iterations
34 | * @param {Number} dklen
35 | * @returns {Uint8Array}
36 | */
37 | export const hash = (data, salt, iterations = ITERATIONS, dklen = DKLEN) => {
38 | return Pbkdf2HmacSha256(data, salt, iterations, dklen)
39 | }
40 |
41 | /**
42 | * Encrypt data using a non-deterministic algorithm.
43 | * @param {Uint8Array} data
44 | * @param {Uint8Array} key
45 | * @returns {Uint8Array | undefined}
46 | */
47 | export const encrypt = (data, key) => {
48 | if (!data) {
49 | return
50 | }
51 | const nonce = getRandomValues(new Uint8Array(NONCE_LENGTH))
52 | const ciphertext = nacl.secretbox(data, nonce, key)
53 | return concatUint8Array(nonce, ciphertext)
54 | }
55 |
56 | /**
57 | * Decrypt data with the given key.
58 | * @param {Uint8Array} data
59 | * @param {Uint8Array} key
60 | * @param {Number} nonceLength
61 | * @returns {Uint8Array | undefined}
62 | */
63 | export const decrypt = (data, key) => {
64 | if (!data) {
65 | return
66 | }
67 | const nonce = data.slice(0, NONCE_LENGTH)
68 | const ciphertext = data.slice(NONCE_LENGTH, data.length)
69 | return nacl.secretbox.open(ciphertext, nonce, key)
70 | }
71 |
72 | export const generateKey = () => getRandomValues(new Uint8Array(KEY_LENGTH))
73 | export const generateSalt = () => getRandomValues(new Uint8Array(SALT_LENGTH))
74 |
--------------------------------------------------------------------------------
/example/helper/encryptionHelper.js:
--------------------------------------------------------------------------------
1 | import { decrypt, encrypt, hash } from './cryptoHelper'
2 | import { stringToUint8Array } from './arrayHelper'
3 | import { encodeUtf8 } from './stringHelper'
4 | import { TABLES } from '../../src/master'
5 | import { readJson, readUint32, writeJson, writeUint32 } from './serializeHelper'
6 |
7 | export default (encryptionKey, hashSalt) => {
8 | const property = (table, key) => {
9 | if (table === TABLES.LEXICON) {
10 | return key
11 | }
12 | if (table === TABLES.IDS) {
13 | return key
14 | }
15 | return hash(stringToUint8Array(encodeUtf8(key)), hashSalt)
16 | }
17 |
18 | const writeTable = (table, key, data) => {
19 | return writeJson(data)
20 | }
21 |
22 | const serialize = (table, key, data) => {
23 | if (typeof data === 'undefined') {
24 | return
25 | }
26 | if (table === TABLES.POSTINGS || TABLES.POSITIONS || TABLES.WILDCARDS) {
27 | return data
28 | }
29 | if (table === TABLES.LEXICON_INVERSE || table === TABLES.IDS_INVERSE) {
30 | return writeUint32(data)
31 | }
32 | return encrypt(writeTable(table, key, data), encryptionKey)
33 | }
34 |
35 | const readTable = (table, key, data) => {
36 | return readJson(data)
37 | }
38 |
39 | const deserialize = (table, key, data) => {
40 | if (typeof data === 'undefined') {
41 | return
42 | }
43 | if (table === TABLES.POSTINGS || TABLES.POSITIONS || TABLES.WILDCARDS) {
44 | return data
45 | }
46 | if (table === TABLES.LEXICON_INVERSE || table === TABLES.IDS_INVERSE) {
47 | return readUint32(data)
48 | }
49 | return readTable(table, key, decrypt(data, encryptionKey))
50 | }
51 |
52 | return {
53 | property,
54 | serialize,
55 | deserialize
56 | }
57 | }
58 |
--------------------------------------------------------------------------------
/example/helper/serializeHelper.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Write a javascript object to a Uint8Array.
3 | * @param {Object} data
4 | * @returns {Uint8Array}
5 | */
6 | import { stringToUint8Array, uint8ArrayToString } from './arrayHelper'
7 | import { decodeUtf8, encodeUtf8 } from './stringHelper'
8 |
9 | export const writeJson = (data) => stringToUint8Array(encodeUtf8(JSON.stringify(data)))
10 |
11 | /**
12 | * Read a Uint8Array to a javascript object.
13 | * @param data
14 | * @returns {any}
15 | */
16 | export const readJson = (data) => JSON.parse(decodeUtf8(uint8ArrayToString(data)))
17 |
18 | const getByteLength = (val) => {
19 | if (val <= 255) {
20 | return 1
21 | }
22 | if (val <= 65535) {
23 | return 2
24 | }
25 | if (val <= 16777215) {
26 | return 3
27 | }
28 | return 4
29 | }
30 |
31 | /**
32 | * Write a number into a variable 4 bytes Uint8Array.
33 | * @param {Number} val
34 | * @returns {Uint8Array}
35 | */
36 | export const writeUint32 = (val) => {
37 | const len = getByteLength(val)
38 | const buf = new Uint8Array(len)
39 |
40 | let num = val
41 | for (let i = 0; i < len; ++i) {
42 | buf[i] = num
43 | if (len === i + 1) {
44 | break
45 | }
46 | num >>>= 8
47 | }
48 | return buf
49 | }
50 |
51 | /**
52 | * Read a variable 4 bytes Uint8Array into a number.
53 | * @param {Uint8Array} buf
54 | * @returns {number}
55 | */
56 | export const readUint32 = (buf) => {
57 | if (buf.length <= 0) {
58 | return 0
59 | }
60 | let val = buf[0]
61 | if (buf.length === 1) {
62 | return val
63 | }
64 | val |= buf[1] << 8
65 | if (buf.length === 2) {
66 | return val
67 | }
68 | val |= buf[2] << 16
69 | if (buf.length === 3) {
70 | return val
71 | }
72 | return val + buf[3] * 0x1000000
73 | }
74 |
--------------------------------------------------------------------------------
/example/helper/stringHelper.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Convert a native javascript string to a string of utf8 bytes
3 | * @param {String} str The string to convert
4 | * @returns {String} A valid squence of utf8 bytes
5 | */
6 | export const encodeUtf8 = (str) => unescape(encodeURIComponent(str))
7 |
8 | /**
9 | * Convert a string of utf8 bytes to a native javascript string
10 | * @param {String} utf8 A valid squence of utf8 bytes
11 | * @returns {String} A native javascript string
12 | */
13 | export const decodeUtf8 = (utf8) => {
14 | try {
15 | return decodeURIComponent(escape(utf8))
16 | } catch (e) {
17 | return utf8
18 | }
19 | }
20 |
--------------------------------------------------------------------------------
/example/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Examples
6 |
7 |
8 | Example
9 |
10 |
11 |
--------------------------------------------------------------------------------
/example/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "example",
3 | "version": "1.0.0",
4 | "description": "",
5 | "main": "index.js",
6 | "scripts": {
7 | "dev": "npx rollup -c rollup.config.js -w"
8 | },
9 | "author": "",
10 | "license": "ISC",
11 | "devDependencies": {
12 | "rollup": "^0.62.0",
13 | "rollup-plugin-serve": "^0.4.2",
14 | "rollup-plugin-commonjs": "^9.1.3",
15 | "rollup-plugin-node-resolve": "^3.3.0"
16 | },
17 | "dependencies": {
18 | "asmcrypto.js": "^2.3.0",
19 | "tweetnacl": "^1.0.0"
20 | }
21 | }
22 |
--------------------------------------------------------------------------------
/example/rollup.config.js:
--------------------------------------------------------------------------------
1 | import serve from 'rollup-plugin-serve'
2 |
3 | export default [
4 | {
5 | input: './example.js',
6 | output: {
7 | name: 'MasterTabExample',
8 | file: 'dist/example.js',
9 | format: 'iife',
10 | interop: false,
11 | strict: false
12 | },
13 | plugins: [
14 | require('rollup-plugin-node-resolve')({
15 | }),
16 | require('rollup-plugin-commonjs')({
17 | ignoreGlobal: true,
18 | ignore: [ 'crypto' ]
19 | }),
20 | serve('')
21 | ]
22 | }
23 | ]
24 |
--------------------------------------------------------------------------------
/karma.conf.js:
--------------------------------------------------------------------------------
1 | process.env.CHROME_BIN = require('puppeteer').executablePath()
2 |
3 | module.exports = (config) => {
4 | config.set({
5 | basePath: '.',
6 | frameworks: ['jasmine'],
7 | client: {
8 | jasmine: {
9 | random: true,
10 | stopOnFailure: false,
11 | failFast: false,
12 | timeoutInterval: 2000
13 | }
14 | },
15 | files: [
16 | 'src/**/*.spec.js'
17 | ],
18 | preprocessors: {
19 | 'src/**/*.spec.js': ['rollup']
20 | },
21 | rollupPreprocessor: {
22 | output: {
23 | format: 'iife',
24 | name: 'Test',
25 | sourcemap: 'inline'
26 | },
27 | plugins: [
28 | require('rollup-plugin-node-resolve')({
29 | }),
30 | require('rollup-plugin-commonjs')({
31 | ignoreGlobal: true,
32 | ignore: [ 'crypto' ]
33 | })
34 | ]
35 | },
36 | logLevel: config.LOG_INFO,
37 | autoWatch: false,
38 | browsers: ['ChromeHeadless'],
39 | singleRun: true
40 | })
41 | }
42 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "encrypted-search",
3 | "version": "1.0.0",
4 | "description": "Encrypted search index for the browser",
5 | "license": "MIT",
6 | "author": "Mattias Svanström ",
7 | "main": "src/index.esm.js",
8 | "scripts": {
9 | "lint": "eslint src/*.js src/**/*.js",
10 | "test": "npm run lint && npx karma start karma.conf.js",
11 | "build": "npx pegjs src/query/grammar.peg",
12 | "postinstall": "npm run build"
13 | },
14 | "dependencies": {
15 | "latenize": "^0.2.0"
16 | },
17 | "devDependencies": {
18 | "eslint": "^5.1.0",
19 | "jasmine": "^3.1.0",
20 | "jasmine-core": "^3.1.0",
21 | "karma": "^3.0.0",
22 | "karma-chrome-launcher": "^2.2.0",
23 | "karma-jasmine": "^1.1.2",
24 | "karma-rollup-preprocessor": "^6.0.0",
25 | "puppeteer": "^1.6.0",
26 | "rollup": "^0.64.1",
27 | "rollup-plugin-commonjs": "^9.1.3",
28 | "rollup-plugin-node-resolve": "^3.3.0",
29 | "asmcrypto.js": "^2.3.0",
30 | "tweetnacl": "^1.0.0"
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/src/helper/array.js:
--------------------------------------------------------------------------------
1 | const defaultExtractor = (a) => a
2 | const defaultComparator = (a, b) => a === b
3 | const defaultTransformer = (a) => a
4 |
5 | /**
6 | * Get unique values from an array.
7 | * @param {Array} array
8 | * @param {Function} extractor
9 | * @return {Array}
10 | */
11 | export const unique = (array, extractor = defaultExtractor) => {
12 | const seen = new Set()
13 | const length = array.length
14 | const result = []
15 | for (let i = 0; i < length; i++) {
16 | const value = array[i]
17 | const extract = extractor(value)
18 | if (seen.has(extract)) {
19 | continue
20 | }
21 | seen.add(extract)
22 | result.push(value)
23 | }
24 | return result
25 | }
26 |
27 | /**
28 | * Flatten an array one level.
29 | * @param {Array} array
30 | * @return {Array}
31 | */
32 | export const flatten = (array = []) => Array.prototype.concat(...array)
33 |
34 | /**
35 | *
36 | * @param {Array} a The first array.
37 | * @param {Array} b The second array.
38 | * @param {Function} extractor
39 | * @returns {Array}
40 | */
41 | export const minus = (a = [], b = [], extractor = defaultExtractor) => {
42 | const other = new Set(b.map(extractor))
43 | return a.reduce((prev, cur) => {
44 | const val = extractor(cur)
45 | if (!other.has(val)) {
46 | prev.push(cur)
47 | }
48 | return prev
49 | }, [])
50 | }
51 |
52 | /**
53 | * Intersect two arrays. Ignoring any duplicates.
54 | * @param {Array} a The first array.
55 | * @param {Array} b The second array.
56 | * @param {Function} extractor
57 | * @param {Function} transformer A transformer function to transform the two values.
58 | * @returns {Array}
59 | */
60 | export const intersect = (a = [], b = [], extractor = defaultExtractor, transformer = defaultTransformer) => {
61 | return unique(a, extractor)
62 | .reduce((acc, cur) => {
63 | const idx = b.findIndex((x) => extractor(cur) === extractor(x))
64 | if (idx === -1) {
65 | return acc
66 | }
67 | const transformedValue = transformer(cur, b[idx])
68 | if (transformedValue) {
69 | acc.push(transformedValue)
70 | }
71 | return acc
72 | }, [])
73 | }
74 |
75 | /**
76 | * Join two arrays. Ignoring any duplicates.
77 | * @param {Array} a The first array.
78 | * @param {Array} b The second array.
79 | * @param {Function} extractor A extractor function read the value to compare
80 | * @param {Function} transformer A transformer function to transform the two values.
81 | * @returns {Array}
82 | */
83 | export const union = (a = [], b = [], extractor = defaultExtractor, transformer = defaultTransformer) => {
84 | const duplicates = {}
85 | const union = [...unique(a, extractor), ...unique(b, extractor)]
86 | return union
87 | .reduce((acc, cur, index) => {
88 | const idx = union.findIndex((x, findex) => index !== findex && extractor(cur) === extractor(x))
89 | if (idx === -1) {
90 | acc.push(transformer(cur))
91 | return acc
92 | }
93 | if (duplicates[idx]) {
94 | return acc
95 | }
96 | duplicates[index] = true
97 | const transformedValue = transformer(cur, union[idx])
98 | if (transformedValue) {
99 | acc.push(transformedValue)
100 | }
101 | return acc
102 | }, [])
103 | }
104 |
105 | /**
106 | * Find a subset of an array in order.
107 | * @param {Array} a Array
108 | * @param {Array} b Subset of array
109 | * @param {Function} comparator A comparator function to compare a value in array a with a value in array b.
110 | * @returns {Number}
111 | */
112 | export const contains = (a = [], b = [], comparator = defaultComparator) => {
113 | let m = b.length
114 | let n = a.length
115 | let i = 0
116 | while (i <= n - m) {
117 | let j = 0
118 | while (j < m) {
119 | if (!comparator(a[i], b[j])) {
120 | break
121 | }
122 | i++
123 | j++
124 | }
125 | if (j === 0) {
126 | i++
127 | } else if (j === m) {
128 | return i - m
129 | }
130 | }
131 |
132 | return -1
133 | }
134 |
135 | /**
136 | * Returns whether an array a contains any element in array b before any element in array c.
137 | * @param {Array} a The array containing the elements.
138 | * @param {Array} b The array containing the elements that should be placed before c.
139 | * @param {Array} c The array containing the elements that should be placed after b.
140 | * @param {Function} comparator A comparator function to compare a value in array a with a value in array b.
141 | * @returns {Boolean}
142 | */
143 | export const ordered = (a = [], b = [], c = [], comparator = defaultComparator) => {
144 | return b.some((x, i) => {
145 | let idxB = a.findIndex((y) => comparator(x, y))
146 | const any = c.some((y) => {
147 | let idxC = a.findIndex((z) => comparator(y, z))
148 | return idxC >= idxB
149 | })
150 | if (!any && i === b.length - 1) {
151 | return false
152 | }
153 | return any
154 | })
155 | }
156 |
157 | /**
158 | * Returns whether an element in array b is not contained in array a.
159 | * @param {Array} a The array containing the elements.
160 | * @param {Array} b The array containing the elements which can not exist in a.
161 | * @param {Function} comparator A comparator function to compare a value in array a with a value in array b.
162 | * @returns {Boolean}
163 | */
164 | export const notContains = (a = [], b = [], comparator = defaultComparator) => {
165 | return !b.some((x) => {
166 | let otherIdx = a.findIndex((y) => comparator(x, y))
167 | if (otherIdx !== -1) {
168 | return true
169 | }
170 | })
171 | }
172 |
173 | /**
174 | * Returns whether the elements of b in array a are separated by n length.
175 | * @param {Array} a The array containing the elements.
176 | * @param {Array} b The array containing the elements in a to compare.
177 | * @param {Number} n The total length that can exist between the elements in b.
178 | * @param {Function} comparator A comparator function to compare a value in array a with a value in array b.
179 | * @returns {Boolean}
180 | */
181 | export const proximity = (a = [], b = [], n, comparator = defaultComparator) => {
182 | const value = b
183 | .map((keyword) => a.findIndex((y) => comparator(keyword, y)))
184 | .filter((x) => x !== -1)
185 | .sort()
186 | .reduce((agg, x, i, a) => {
187 | if (i === a.length - 1) {
188 | return agg
189 | }
190 | return agg + ((a[i + 1] - 1) - x)
191 | }, 0)
192 | return value < n
193 | }
194 |
195 | /**
196 | * Returns whether at least n elements in b exist in a.
197 | * @param {Array} a The array containing the elements.
198 | * @param {Array} b The array containing the elements to search.
199 | * @param {Number} n The total amount that must exist.
200 | * @param {Function} comparator A comparator function to compare a value in array a with a value in array b.
201 | * @returns {Boolean}
202 | */
203 | export const quorom = (a = [], b = [], n, comparator = defaultComparator) => {
204 | let counter = 0
205 | return b.some((keyword) => {
206 | if (a.findIndex((y) => comparator(y, keyword)) !== -1) {
207 | counter++
208 | }
209 | return counter >= n
210 | })
211 | }
212 |
213 | /**
214 | * Convert an array to a gaps array
215 | * (Inline for performance)
216 | * @param {Array} arr
217 | * @returns {Array}
218 | */
219 | export const getGapsArray = (arr = []) => {
220 | if (arr.length <= 1) {
221 | return arr
222 | }
223 | arr.sort((a, b) => a - b)
224 | let prev = arr[0]
225 | for (let i = 1; i < arr.length; ++i) {
226 | const value = arr[i]
227 | arr[i] = arr[i] - prev
228 | prev = value
229 | }
230 | return arr
231 | }
232 |
233 | /**
234 | * Convert an array to a gaps array
235 | * (Inline for performance)
236 | * @param {Array} arr
237 | * @returns {Array}
238 | */
239 | export const getArrayGaps = (arr = []) => {
240 | if (arr.length <= 1) {
241 | return arr
242 | }
243 | for (let i = 1; i < arr.length; ++i) {
244 | arr[i] = arr[i] + arr[i - 1]
245 | }
246 | return arr
247 | }
248 |
249 | /**
250 | * Insert an item into a gaps array.
251 | * @param {Array} array
252 | * @param {Number} id
253 | * @returns {Array|undefined} Returns undefined if the item already exists
254 | */
255 | export const insertIntoGapsArray = (array = [], id) => {
256 | const len = array.length
257 | if (len === 0) {
258 | return [id]
259 | }
260 |
261 | let i = 0
262 | let currentValue = 0
263 | let prevValue = 0
264 | do {
265 | currentValue = prevValue + array[i]
266 |
267 | if (currentValue === id) {
268 | return
269 | }
270 |
271 | if (id < currentValue) {
272 | break
273 | }
274 |
275 | prevValue = currentValue
276 |
277 | i++
278 | } while (i < len)
279 |
280 | if (i === 0) {
281 | array.unshift(id)
282 | array[1] = array[1] - id
283 | } else if (i === len) {
284 | array.push(id - prevValue)
285 | } else {
286 | array.splice(i, 0, id - prevValue)
287 | array[i + 1] = currentValue - id
288 | }
289 |
290 | return array
291 | }
292 |
293 | export const removeFromGapsArray = (array = [], id) => {
294 | const len = array.length
295 | if (len === 0) {
296 | return []
297 | }
298 |
299 | let i = 0
300 | let currentValue = 0
301 | let prevValue = 0
302 | do {
303 | currentValue = prevValue + array[i]
304 | if (currentValue === id) {
305 | break
306 | }
307 | prevValue = currentValue
308 | i++
309 | } while (i < len)
310 |
311 | if (i === len) {
312 | return
313 | }
314 | if (i === len - 1) {
315 | array.splice(i, 1)
316 | return array
317 | }
318 | if (i === 0) {
319 | array.splice(0, 1)
320 | array[0] = currentValue + array[0]
321 | return array
322 | }
323 |
324 | array.splice(i, 1)
325 | array[i] = (currentValue + array[i]) - prevValue
326 |
327 | return array
328 | }
329 |
330 | /**
331 | * Shuffle an array.
332 | * @param {Array} result
333 | * @param {Number} i
334 | * @param {Number} j
335 | */
336 | const swap = (result, i, j) => {
337 | const temp = result[i]
338 | result[i] = result[j]
339 | result[j] = temp
340 | }
341 |
342 | export const shuffle = (array) => {
343 | const result = array.slice()
344 | for (let i = result.length - 1; i > 0; i--) {
345 | const j = Math.floor(Math.random() * (i + 1))
346 | swap(result, i, j)
347 | }
348 | return result
349 | }
350 |
351 | export const mutablyShuffleTwo = (arrayA, arrayB) => {
352 | const l = arrayA.length
353 | for (let i = l - 1; i > 0; i--) {
354 | const j = Math.floor(Math.random() * (i + 1))
355 | swap(arrayA, i, j)
356 | swap(arrayB, i, j)
357 | }
358 | }
359 |
--------------------------------------------------------------------------------
/src/helper/array.spec.js:
--------------------------------------------------------------------------------
1 | import {
2 | unique,
3 | flatten,
4 | intersect,
5 | union,
6 | contains,
7 | proximity,
8 | ordered,
9 | quorom,
10 | insertIntoGapsArray, removeFromGapsArray, getGapsArray, getArrayGaps, minus
11 | } from './array'
12 |
13 | describe('array', () => {
14 | it('should flatten an array', () => {
15 | expect(flatten([[1, 2, 3], [4, 5, 6]])).toEqual([1, 2, 3, 4, 5, 6])
16 | })
17 |
18 | it('should only take unique items', () => {
19 | expect(unique([1, 1, 2, 2, 3, 4])).toEqual([1, 2, 3, 4])
20 | })
21 |
22 | it('should minus an array', () => {
23 | expect(minus([1,2,3,4], [3,1])).toEqual([2,4])
24 | expect(minus([], [3,1])).toEqual([])
25 | })
26 |
27 | it('should not find a subarray', () => {
28 | expect(contains(['a', 'b', 'c'], ['b', 'c', 'd'])).toEqual(-1)
29 | expect(contains(['a'], ['b', 'c', 'd'])).toEqual(-1)
30 | expect(contains(['a'], [])).toEqual(-1)
31 | expect(contains(['a', 'b', 'a'], ['a', 'a'])).toEqual(-1)
32 | })
33 |
34 | it('should find a subarray', () => {
35 | expect(contains(['a', 'b', 'c'], ['b', 'c'])).toEqual(1)
36 | expect(contains(['a', 'a', 'a'], ['a', 'a'])).toEqual(0)
37 | })
38 |
39 | it('should find a subarray with wildcards', () => {
40 | const cb = (a, b) => a === b || b === '*'
41 | expect(contains(['foo', 'a', 'b', 'c', 'bar'], ['*', 'c'], cb)).toEqual(2)
42 | })
43 |
44 | it('should union two arrays uniquely', () => {
45 | const a = [{ id: 1 }, { id: 2 }]
46 | const b = [{ id: 1 }, { id: 3 }]
47 | const extractor = (a) => a.id
48 | expect(union(a, b, extractor)).toEqual([{ id: 1 }, { id: 2 }, { id: 3 }])
49 | })
50 |
51 | it('should intersect two arrays', () => {
52 | const a = [{ id: 1 }, { id: 2 }]
53 | const b = [{ id: 1 }, { id: 3 }]
54 | const extractor = (a) => a.id
55 | expect(intersect(a, b, extractor)).toEqual([{ id: 1 }])
56 | })
57 |
58 | it('should intersect two arrays uniquely', () => {
59 | const a = [{ id: 1 }, { id: 1 }, { id: 2 }, { id: 1 }]
60 | const b = [{ id: 1 }, { id: 3 }, { id: 2 }, { id: 1 }, { id: 2 }]
61 | const extractor = (a) => a.id
62 | expect(intersect(a, b, extractor)).toEqual([{ id: 1 }, { id: 2 }])
63 | })
64 |
65 | it('should intersect and transform', () => {
66 | const a = [{ id: 1, result: ['abc', 'def'] }, { id: 2 },]
67 | const b = [{ id: 1, result: ['def', 'fgh'] },]
68 | const extractor = (a) => a.id
69 | const transformer = (a, { result = [] }) => ({
70 | ...a,
71 | result: a.result.concat(result)
72 | })
73 | expect(intersect(a, b, extractor, transformer))
74 | .toEqual([{ id: 1, result: ['abc', 'def', 'def', 'fgh'] }])
75 | })
76 |
77 | it('should union and transform', () => {
78 | const extractor = (a) => a.id
79 | const transformer = (a, { result = [] } = {}) => ({
80 | ...a,
81 | result: a.result.concat(result)
82 | })
83 | const a = [{ id: 1, result: ['abc', 'def'] }, { id: 2, result: ['123'] }]
84 | const b = [{ id: 1, result: ['cde', 'fgh'] }]
85 | expect(union(a, b, extractor, transformer))
86 | .toEqual([{ id: 1, result: ['abc', 'def', 'cde', 'fgh'] }, { id: 2, result: ['123'] }])
87 |
88 | })
89 |
90 | it('should return true if one item is before', () => {
91 | expect(ordered(['aaa', 'bbb', 'ccc'], ['aaa'], ['bbb'])).toBeTruthy()
92 | })
93 |
94 | it('should return true if one item is before and the other is not', () => {
95 | expect(ordered(['aaa', 'bbb', 'ccc', 'eee'], ['ccc', 'aaa'], ['bbb'])).toBeTruthy()
96 | expect(ordered(['aaa', 'bbb', 'ccc', 'eee'], ['aaa', 'ccc'], ['bbb'])).toBeTruthy()
97 | })
98 |
99 | it('should return false if no item is before', () => {
100 | expect(ordered(['aaa', 'bbb', 'ccc', 'eee'], ['ccc', 'eee'], ['aaa', 'bbb'])).toBeFalsy()
101 | expect(ordered(['aaa', 'bbb', 'ccc', 'eee'], ['eee', 'ccc', 'bbb'], ['aaa'])).toBeFalsy()
102 | expect(ordered(['aaa', 'bbb', 'ccc', 'eee'], [], ['aaa'])).toBeFalsy()
103 | expect(ordered(['aaa', 'bbb', 'ccc', 'eee'], ['aaa'], [])).toBeFalsy()
104 | })
105 |
106 | it('should find proximity', () => {
107 | expect(proximity(['cat', 'aaa', 'dog', 'mouse'], ['mouse'], 1)).toBeTruthy()
108 | expect(proximity(['cat', 'aaa', 'dog', 'mouse'], ['mouse', 'cat', 'dog'], 2)).toBeTruthy()
109 | expect(proximity(['cat', 'aaa', 'dog', 'mouse', 'bbb'], ['cat', 'dog', 'mouse'], 2)).toBeTruthy()
110 | expect(proximity(['cat', 'aaa', 'bbb', 'ccc', 'dog', 'eee', 'fff', 'mouse'], ['cat', 'dog', 'mouse'], 6)).toBeTruthy()
111 | })
112 |
113 | it('should not find proximity', () => {
114 | expect(proximity(['cat', 'aaa', 'dog', 'mouse'], ['cat', 'dog', 'mouse'], 1)).toBeFalsy()
115 | expect(proximity(['cat', 'aaa', 'bbb', 'ccc', 'dog', 'eee', 'fff', 'mouse'], ['cat', 'dog', 'mouse'], 5)).toBeFalsy()
116 | })
117 |
118 | it('should find quorom', () => {
119 | expect(quorom(['cat', 'dog', 'mouse'], ['cat', 'dog'], 1)).toBeTruthy()
120 | expect(quorom(['cat', 'dog', 'mouse'], ['cat', 'dog'], 2)).toBeTruthy()
121 | })
122 |
123 | it('should not find quorom', () => {
124 | expect(quorom(['cat', 'dog', 'mouse'], ['cat', 'dog'], 3)).toBeFalsy()
125 | expect(quorom(['cat', 'dog', 'mouse'], ['cat', 'dog', 'aaa'], 3)).toBeFalsy()
126 | })
127 |
128 | it('should get a gaps array', () => {
129 | expect(getGapsArray([1,2,3])).toEqual([1,1,1])
130 | expect(getGapsArray([6,2,3])).toEqual([2,1,3])
131 | expect(getGapsArray([10,5,1])).toEqual([1,4,5])
132 | })
133 |
134 | it('should get an array from gaps', () => {
135 | expect(getArrayGaps([1,1,1])).toEqual([1,2,3])
136 | expect(getArrayGaps([2,1,3])).toEqual([2,3,6])
137 | expect(getArrayGaps([1,4,5])).toEqual([1,5,10])
138 | })
139 |
140 | it('should insert into gaps array', () => {
141 | expect(insertIntoGapsArray([], 2)).toEqual([2])
142 | expect(insertIntoGapsArray([2], 1)).toEqual([1, 1])
143 | expect(insertIntoGapsArray([2], 3)).toEqual([2, 1])
144 | expect(insertIntoGapsArray([2, 3], 3)).toEqual([2, 1, 2])
145 | expect(insertIntoGapsArray([5, 5, 10, 10, 20], 25)).toEqual([5, 5, 10, 5, 5, 20])
146 | expect(insertIntoGapsArray([5, 5, 10, 10, 20], 6)).toEqual([5, 1, 4, 10, 10, 20])
147 |
148 | expect(insertIntoGapsArray([2, 3], 2)).toBeUndefined()
149 | expect(insertIntoGapsArray([2, 5], 2)).toBeUndefined()
150 | })
151 |
152 | it('should remove from gaps array', () => {
153 | expect(removeFromGapsArray([2, 1, 1], 3)).toEqual([2, 2])
154 | expect(removeFromGapsArray([2], 2)).toEqual([])
155 | expect(removeFromGapsArray([2, 1], 3)).toEqual([2])
156 | expect(removeFromGapsArray([2, 3, 1], 6)).toEqual([2, 3])
157 | expect(removeFromGapsArray([5, 5, 10, 10, 20], 5)).toEqual([10, 10, 10, 20])
158 | expect(removeFromGapsArray([2, 3], 3)).toBeUndefined()
159 | })
160 | })
161 |
--------------------------------------------------------------------------------
/src/helper/idb.js:
--------------------------------------------------------------------------------
1 | export const READONLY = 'readonly'
2 | export const READWRITE = 'readwrite'
3 |
4 | /**
5 | * Open an indexedDB in a promisified way.
6 | * @param {indexedDB} indexedDB
7 | * @param {String} name
8 | * @param {Number} version
9 | * @param {Function} upgrade
10 | * @returns {Promise}
11 | */
12 | export const open = (indexedDB, name, version, upgrade) => {
13 | return new Promise((resolve, reject) => {
14 | const request = indexedDB.open(name, version)
15 | request.onupgradeneeded = (event) => upgrade(request.result, event.oldVersion, request.transaction)
16 | request.onsuccess = () => resolve(request.result)
17 | request.onerror = () => reject(request.error)
18 | })
19 | }
20 |
21 | /**
22 | * Convert an idb transaction to a promise.
23 | * @param {IDBTransaction} tx
24 | * @returns {Promise}
25 | */
26 | export const transaction = (tx) => {
27 | return new Promise((resolve, reject) => {
28 | tx.oncomplete = () => resolve()
29 | tx.onerror = () => reject(tx.error)
30 | tx.onabort = () => reject(tx.error)
31 | })
32 | }
33 |
34 | /**
35 | * Convert an idb request to a promise.
36 | * @param {IDBRequest} request
37 | * @returns {Promise}
38 | */
39 | export const request = (request) => {
40 | return new Promise((resolve, reject) => {
41 | request.onsuccess = () => resolve(request.result)
42 | request.onerror = () => reject(request.error)
43 | })
44 | }
45 |
46 | /**
47 | * Delete a database.
48 | * @param {indexedDB} indexedDB
49 | * @param {String} dbName
50 | * @returns {Promise}
51 | */
52 | export const deleteDb = async (indexedDB, dbName) => {
53 | const req = indexedDB.deleteDatabase(dbName)
54 |
55 | return new Promise((resolve, reject) => {
56 | req.onsuccess = resolve
57 | req.onerror = reject
58 | req.onblocked = reject
59 | })
60 | }
61 |
62 | /**
63 | * Open the database with closure timeout. This is to prevent the connection staying open constantly.
64 | * Which can be bad because it can prevent updates to the database from other tabs.
65 | * @param {Function} open Function to open the database.
66 | * @param {Number} closeTimeout Timeout after which to close the connection.
67 | * @returns {Object}
68 | */
69 | export const openWithClosure = (open, closeTimeout) => {
70 | const state = {
71 | dbHandle: undefined,
72 | closeHandle: undefined,
73 | closed: false
74 | }
75 |
76 | const clearCloseHandle = () => {
77 | if (!state.closeHandle) {
78 | return
79 | }
80 | clearTimeout(state.closeHandle)
81 | state.closeHandle = undefined
82 | }
83 |
84 | const clearCloseDatabase = () => {
85 | if (!state.dbHandle) {
86 | return
87 | }
88 | state.dbHandle.close()
89 | state.dbHandle = undefined
90 | }
91 |
92 | const close = () => {
93 | clearCloseHandle()
94 | clearCloseDatabase()
95 | }
96 |
97 | const getTransaction = async (storeNames, mode = 'readonly') => {
98 | if (state.closed) {
99 | throw new Error('Database has been closed')
100 | }
101 |
102 | clearCloseHandle()
103 | state.closeHandle = setTimeout(close, closeTimeout)
104 |
105 | if (!state.dbHandle) {
106 | state.dbHandle = await open()
107 | }
108 |
109 | return state.dbHandle.transaction(storeNames, mode)
110 | }
111 |
112 | return {
113 | getTransaction,
114 | close: () => {
115 | state.closed = true
116 | close()
117 | }
118 | }
119 | }
120 |
121 |
--------------------------------------------------------------------------------
/src/helper/idb.spec.js:
--------------------------------------------------------------------------------
1 | import { open, openWithClosure, request, transaction, deleteDb } from './idb'
2 |
3 | const delay = (time) => new Promise((resolve) => setTimeout(resolve, time))
4 |
5 | describe('idb helper', () => {
6 | const STORE_NAME = 'test-os'
7 |
8 | const setup = () => {
9 | return open(indexedDB, 'test', 1, (db) => {
10 | db.createObjectStore(STORE_NAME)
11 | })
12 | }
13 |
14 | afterAll(() => deleteDb(indexedDB, 'test'))
15 |
16 | it('should open a db promisified', async () => {
17 | const db = await setup()
18 | db.close()
19 | })
20 |
21 | it('should open a db with auto closure', async () => {
22 | const mock = {
23 | transaction: jasmine.createSpy('transaction'),
24 | close: jasmine.createSpy('close')
25 | }
26 | const setup = jasmine.createSpy('setup')
27 | .and
28 | .returnValue(mock)
29 | const { close, getTransaction } = openWithClosure(setup, 100)
30 |
31 | expect(setup)
32 | .toHaveBeenCalledTimes(0)
33 | await getTransaction('woot')
34 | expect(setup)
35 | .toHaveBeenCalledTimes(1)
36 |
37 | await delay(100)
38 |
39 | expect(mock.close)
40 | .toHaveBeenCalledTimes(1)
41 |
42 | await getTransaction('woot')
43 |
44 | await delay(100)
45 |
46 | expect(mock.close)
47 | .toHaveBeenCalledTimes(2)
48 | expect(setup)
49 | .toHaveBeenCalledTimes(2)
50 |
51 | close()
52 |
53 | await getTransaction('woot')
54 | .catch((e) => {
55 | expect(e.message)
56 | .toEqual('Database has been closed')
57 | })
58 | })
59 |
60 | it('should throw if it has been closed', async () => {
61 | const { close, getTransaction } = openWithClosure(setup, 100)
62 |
63 | await getTransaction(STORE_NAME)
64 |
65 | close()
66 |
67 | const error = await getTransaction(STORE_NAME)
68 | .catch((e) => e)
69 | expect(error.message)
70 | .toEqual('Database has been closed')
71 | })
72 |
73 | it('should put and get a value promisified', async () => {
74 | const db = await setup()
75 |
76 | const tx = db.transaction(STORE_NAME, 'readwrite')
77 |
78 | tx.objectStore(STORE_NAME)
79 | .put('bar', 'foo')
80 |
81 | tx.objectStore(STORE_NAME)
82 | .put('bar2', 'foo2')
83 |
84 | await transaction(tx)
85 |
86 | expect(await request(db.transaction(STORE_NAME, 'readonly')
87 | .objectStore(STORE_NAME)
88 | .get('foo')))
89 | .toEqual('bar')
90 |
91 | expect(await request(db.transaction(STORE_NAME, 'readonly')
92 | .objectStore(STORE_NAME)
93 | .get('foo2')))
94 | .toEqual('bar2')
95 |
96 | db.close()
97 | })
98 | })
99 |
--------------------------------------------------------------------------------
/src/helper/lru.js:
--------------------------------------------------------------------------------
1 | export default ({ max = 10000 } = {}) => {
2 | const map = new Map()
3 | let head
4 | let tail
5 | let length = 0
6 |
7 | const clear = () => {
8 | map.clear()
9 | head = tail = undefined
10 | length = 0
11 | }
12 |
13 | const unlink = function (key, prev, next) {
14 | length--
15 |
16 | if (length === 0) {
17 | head = tail = undefined
18 | return
19 | }
20 |
21 | if (head === key) {
22 | head = prev
23 | map.get(head).next = undefined
24 | return
25 | }
26 |
27 | if (tail === key) {
28 | tail = next
29 | map.get(tail).prev = undefined
30 | return
31 | }
32 |
33 | if (prev) {
34 | map.get(prev).next = next
35 | }
36 | if (next) {
37 | map.get(next).prev = prev
38 | }
39 | }
40 |
41 | const remove = (key) => {
42 | if (!map.has(key)) {
43 | return
44 | }
45 | const element = map.get(key)
46 | map.delete(key)
47 | unlink(key, element.prev, element.next)
48 | }
49 |
50 | const get = (key) => {
51 | const element = map.get(key)
52 | if (!element) {
53 | return
54 | }
55 | return element.value
56 | }
57 |
58 | const set = (key, value) => {
59 | let element
60 |
61 | if (map.has(key)) {
62 | element = map.get(key)
63 | element.value = value
64 |
65 | if (key === head) {
66 | return value
67 | }
68 |
69 | unlink(key, element.prev, element.next)
70 | } else {
71 | element = {
72 | value,
73 | next: undefined,
74 | prev: head
75 | }
76 | map.set(key, element)
77 |
78 | if (length === max) {
79 | remove(tail)
80 | }
81 | }
82 |
83 | length++
84 | element.next = undefined
85 | element.prev = head
86 |
87 | if (head) {
88 | map.get(head).next = key
89 | }
90 |
91 | head = key
92 |
93 | if (!tail) {
94 | tail = key
95 | }
96 |
97 | return value
98 | }
99 |
100 | return { set, get, remove, clear }
101 | }
102 |
103 |
--------------------------------------------------------------------------------
/src/helper/lru.spec.js:
--------------------------------------------------------------------------------
1 | import createLru from './lru'
2 |
3 | describe('lru', () => {
4 | it('should set and get', () => {
5 | const lru = createLru()
6 | lru.set('a', 'woot')
7 | expect(lru.get('a')).toBe('woot')
8 | })
9 |
10 | it('should evict', () => {
11 | const lru = createLru({ max: 2 })
12 | lru.set('a', 'woot')
13 | lru.set('b', 'b')
14 | lru.set('c', 'c')
15 | expect(lru.get('a')).toBeUndefined()
16 | expect(lru.get('b')).toBe('b')
17 | expect(lru.get('c')).toBe('c')
18 | })
19 |
20 | it('should set', () => {
21 | const lru = createLru({ max: 3 })
22 | lru.set('a', 'a')
23 | lru.set('b', 'b')
24 | lru.set('b', 'b')
25 | lru.set('b', 'b')
26 | lru.set('b', 'b')
27 | lru.set('b', 'b')
28 | lru.set('b', 'b')
29 | lru.set('c', 'c')
30 | lru.set('b', 'b')
31 | lru.set('c', 'c')
32 | lru.set('b', 'b')
33 | lru.set('b', 'b')
34 | lru.set('d', 'd')
35 | lru.set('d', 'd')
36 | lru.set('d', 'd')
37 | lru.set('d', 'd')
38 | expect(lru.get('a')).toBeUndefined()
39 | expect(lru.get('b')).toBe('b')
40 | expect(lru.get('c')).toBe('c')
41 | expect(lru.get('d')).toBe('d')
42 | })
43 | })
44 |
--------------------------------------------------------------------------------
/src/helper/scoring.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Calculate the idf score for a term.
3 | * @param {Number} N The number of documents in the corpus.
4 | * @param {Number} df The number of times this term occurs in the corpus.
5 | * @returns {number}
6 | */
7 | const idf = (N, df) => (df > 0 ? Math.log10(N / df) : 0)
8 |
9 | /**
10 | * Calculate the weight of the term.
11 | * @param {Number} tf The number of times this term occurs in a single document.
12 | * @returns {Number}
13 | */
14 | const wt = (tf) => (tf > 0 ? 1 + Math.log10(tf) : 0)
15 |
16 | /**
17 | * Generate a ranking based on cosine similarity scoring.
18 | * https://nlp.stanford.edu/IR-book/html/htmledition/computing-vector-scores-1.html
19 | * @param {Array} terms The terms in the query.
20 | * @param {Array} termsToIds Each keyword mapped to a list of document IDs.
21 | * @param {Number} N The number of documents in this corpus.
22 | * @param {Object} idsToTerms Result id to list of terms.
23 | */
24 | export default ({ terms = [], termsToIds = [], N = 0, idsToTerms = {} }) => {
25 | if (!Array.isArray(terms) || !Array.isArray(termsToIds) || terms.length !== termsToIds.length) {
26 | throw new Error('Keyword array exception')
27 | }
28 | if (termsToIds.some((keywordToIds = []) => !Array.isArray(keywordToIds))) {
29 | throw new Error('Keyword to IDs array exception')
30 | }
31 | if (Object.keys(idsToTerms).some((id) => !Array.isArray(idsToTerms[id] || []))) {
32 | throw new Error('IDs to terms array exception')
33 | }
34 |
35 | const result = {}
36 |
37 | terms.forEach((keyword, i) => {
38 | const keywordToIds = termsToIds[i] || []
39 | const termFrequencyInCorpus = keywordToIds.length
40 | const inverseTermDocumentFrequency = idf(N, termFrequencyInCorpus)
41 | const queryTermWeight = (1 / terms.length) * inverseTermDocumentFrequency
42 |
43 | keywordToIds.forEach((id) => {
44 | const documentKeywords = idsToTerms[id] || []
45 |
46 | const termFrequencyInDocument = documentKeywords
47 | .filter((documentKeyword) => documentKeyword === keyword)
48 | .length
49 | const documentTermWeight = wt(termFrequencyInDocument) * inverseTermDocumentFrequency
50 | const score = documentTermWeight * queryTermWeight
51 |
52 | result[id] = (result[id] || 0) + score
53 | })
54 | })
55 |
56 | // Normalize scores.
57 | Object.keys(result).forEach((id) => {
58 | const documentKeywords = idsToTerms[id] || []
59 | const len = documentKeywords.length
60 | result[id] = len > 0 ? result[id] / len : 0
61 | })
62 |
63 | return result
64 | }
65 |
--------------------------------------------------------------------------------
/src/helper/scoring.spec.js:
--------------------------------------------------------------------------------
1 | import scoring from './scoring'
2 |
3 | describe('scoring', () => {
4 | it('should give score 0 to queries that match nothing', () => {
5 | const scores = scoring({
6 | terms: ['abc'],
7 | termsToIds: [[123]],
8 | N: 1000,
9 | idsToTerms: {
10 | 123: ['foo']
11 | }
12 | })
13 | expect(scores[123])
14 | .toEqual(0)
15 | })
16 |
17 | it('should give score to queries that match exactly', () => {
18 | const scores = scoring({
19 | terms: ['foo'],
20 | termsToIds: [[123]],
21 | N: 10,
22 | idsToTerms: {
23 | 123: ['foo']
24 | }
25 | })
26 | expect(scores[123])
27 | .toEqual(1)
28 | })
29 |
30 | it('should give the same score to documents that match exactly', () => {
31 | const scores = scoring({
32 | terms: ['foo', 'bar'],
33 | termsToIds: [[123, 124], [123, 125]],
34 | N: 1000,
35 | idsToTerms: {
36 | 123: ['foo', 'bar'],
37 | 125: ['bar'],
38 | 124: ['foo']
39 | }
40 | })
41 | expect(scores[123] === scores[124] && scores[123] === scores[125])
42 | .toBeTruthy()
43 | })
44 |
45 | it('should give higher score to documents that contain both terms', () => {
46 | const scores = scoring({
47 | terms: ['foo', 'bar'],
48 | termsToIds: [[123, 124], [123, 125]],
49 | N: 1000,
50 | idsToTerms: {
51 | 123: ['foo', 'bar'],
52 | 125: ['bar', 'the'],
53 | 124: ['foo', 'the']
54 | }
55 | })
56 | expect(scores[123] > scores[124] && scores[123] > scores[125])
57 | .toBeTruthy()
58 | })
59 |
60 | it('should give higher score to documents where the term is rare', () => {
61 | const scores = scoring({
62 | terms: ['foo'],
63 | termsToIds: [[123, 124, 125]],
64 | N: 1000,
65 | idsToTerms: {
66 | 123: ['foo'],
67 | 124: ['foo', 'the'],
68 | 125: ['foo', 'the', 'an']
69 | }
70 | })
71 | expect(scores[123] > scores[124] && scores[124] > scores[125])
72 | .toBeTruthy()
73 | })
74 |
75 | it('should give higher score to documents where the term is rare', () => {
76 | const scores = scoring({
77 | terms: ['foo', 'bar'],
78 | termsToIds: [[123, 124, 125], [123]],
79 | N: 1000,
80 | idsToTerms: {
81 | 123: ['foo', 'bar'],
82 | 124: ['foo', 'the'],
83 | 125: ['foo', 'the', 'an']
84 | }
85 | })
86 | expect(scores[123] > scores[124] && scores[123] > scores[125])
87 | .toBeTruthy()
88 | })
89 | })
90 |
--------------------------------------------------------------------------------
/src/helper/sizeof.js:
--------------------------------------------------------------------------------
1 | const typeSizes = {
2 | boolean: () => 4,
3 | number: () => 8,
4 | string: (item) => 2 * item.length
5 | }
6 |
7 | export default (object) => {
8 | const objectList = []
9 | const stack = [object]
10 | let bytes = 0
11 |
12 | while (stack.length) {
13 | const value = stack.pop()
14 | const type = typeof value
15 |
16 | if (!value) {
17 | continue
18 | }
19 |
20 | if (value.byteLength) {
21 | bytes += value.byteLength
22 | continue
23 | }
24 |
25 | if (Array.isArray(value)) {
26 | value.forEach((v) => stack.push(v))
27 | continue
28 | }
29 |
30 | if (type === 'object' && objectList.indexOf(value) === -1) {
31 | objectList.push(value)
32 |
33 | Object.keys(value).forEach((key) => {
34 | stack.push(key)
35 | stack.push(value[key])
36 | })
37 | }
38 |
39 | if (typeSizes[type]) {
40 | bytes += typeSizes[type](value)
41 | }
42 | }
43 |
44 | return bytes
45 | }
46 |
--------------------------------------------------------------------------------
/src/helper/sizeof.spec.js:
--------------------------------------------------------------------------------
1 | import sizeof from './sizeof'
2 |
3 | describe('sizeof', () => {
4 | it('should get sizeof for string', () => {
5 | expect(sizeof('str')).toBe(2 * 3)
6 | })
7 |
8 | it('should get sizeof for number', () => {
9 | expect(sizeof(552)).toBe(8)
10 | })
11 |
12 | it('should get sizeof for array', () => {
13 | expect(sizeof(['str', 552])).toBe(8 + 6)
14 | })
15 |
16 | it('should get sizeof for object', () => {
17 | expect(sizeof({ 'str': 552 })).toBe(8 + 6)
18 | })
19 |
20 | it('should get sizeof for typed array', () => {
21 | expect(sizeof(new ArrayBuffer(32))).toBe(32)
22 | expect(sizeof(new Uint8Array(32))).toBe(32)
23 | expect(sizeof(new Int32Array(32))).toBe(128)
24 | })
25 | })
26 |
--------------------------------------------------------------------------------
/src/helper/tokenize.js:
--------------------------------------------------------------------------------
1 | import latenize from 'latenize'
2 |
3 | /**
4 | * Transform a string into a token.
5 | * @param content
6 | * @returns {string}
7 | */
8 | export const transform = (content = '') =>
9 | latenize(content)
10 | .toLowerCase()
11 | .trim()
12 |
13 | /**
14 | * Turn a string of words into tokens. All characters in the tokens are lowercased and normalized in their latin form.
15 | * NOTE: string.normalize is not supported by IE https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/normalize
16 | * so using a lookup table through latenize.
17 | * Transforms e.g. 'foo bàr' to ['foo', 'bar']
18 | * @param {String} content
19 | * @param {Number} len
20 | * @param {Boolean} stripSpecial
21 | * @return {Array}
22 | */
23 | export default (content = '', len = 2, stripSpecial = true) =>
24 | (stripSpecial ? content.replace(/[!"#()%<>:;{}[\]/\\|?.,'`´*¨°^±≈§∞$£@©€™~–…›‹¸˛]/g, ' ') : content)
25 | .split(/[\s]+/)
26 | .map(transform)
27 | .filter((s) => s.length >= len)
28 |
--------------------------------------------------------------------------------
/src/helper/tokenize.spec.js:
--------------------------------------------------------------------------------
1 | import tokenize from './tokenize'
2 |
3 | describe('tokenizer', () => {
4 | it('should should convert single-token strings', () => {
5 | expect(tokenize('word')).toEqual(['word'])
6 | })
7 |
8 | it('should should convert multi-token words', () => {
9 | expect(tokenize('lorem ipsum')).toEqual(['lorem', 'ipsum'])
10 | })
11 |
12 | it('should should remove one-letter tokens', () => {
13 | expect(tokenize('a word')).toEqual(['word'])
14 | })
15 |
16 | it('should should remove punctuation', () => {
17 | expect(tokenize('hello, this is me.')).toEqual(['hello', 'this', 'is', 'me'])
18 | })
19 |
20 | it('should convert cyrillic', () => {
21 | expect(tokenize('Артём Риженков')).toEqual(['artyom', 'rizhenkov'])
22 | })
23 |
24 | it('should remove brackets', () => {
25 | expect(tokenize('the (color) red ({is}) [some] people say')).toEqual(['the', 'color', 'red', 'is', 'pretty', 'some', 'people', 'say'])
26 | })
27 |
28 | it('should remove special characters', () => {
29 | expect(tokenize('the /color/ %red% \\is \'pretty\' ´some`? *people* say!')).toEqual(['the', 'color', 'red', 'is', 'pretty', 'some', 'people', 'say'])
30 | })
31 |
32 | it('should latenise characters', () => {
33 | expect(tokenize('crème brulée is so good. åäöàüèé?')).toEqual(['creme', 'brulee', 'is', 'so', 'good', 'aaoauee'])
34 | })
35 | })
36 |
--------------------------------------------------------------------------------
/src/helper/variableByteCodes.js:
--------------------------------------------------------------------------------
1 | // Without the sign bit to be able to use bitwise operators.
2 | const MAX_INT32 = 0b1111111111111111111111111111111
3 |
4 | const ENCODING = 0b10000000
5 | const MAX_BITS = 0b01111111
6 |
7 | const TMP = new Uint8Array(5)
8 | const EMPTY = new Uint8Array(0)
9 |
10 | const MAX_BYTES_PER_NUMBER = 8
11 | const MAX_BYTES_PER_PART = 5
12 |
13 | const unsignedToDouble = (high, low) => (high * (MAX_INT32 + 1)) + low
14 |
15 | const writeUint32 = (number, buffer, offset, force = false) => {
16 | let next = number
17 | let length = 1
18 |
19 | for (let byteIndex = 0; byteIndex < MAX_BYTES_PER_PART; ++byteIndex) {
20 | TMP[byteIndex] = next & MAX_BITS
21 | next = next >> 7
22 | if (TMP[byteIndex] > 0) {
23 | length = byteIndex + 1
24 | }
25 | }
26 |
27 | let totalBytes = force ? MAX_BYTES_PER_PART : length
28 | for (let i = offset + totalBytes - 1, j = 0; i >= offset; --i, ++j) {
29 | buffer[i] = TMP[j]
30 | }
31 |
32 | return totalBytes
33 | }
34 |
35 | /**
36 | * Encode a number. Takes into account the low and high part of a number in JavaScript.
37 | * If the high part exists, the low part is padded with 0s for 5 bytes. Otherwise the
38 | * low part only uses as many bytes as it requires.
39 | * @param {Uint8Array} buffer
40 | * @param {Number} number
41 | * @param {Number} offset
42 | * @returns {number}
43 | */
44 | export const vbEncodeNumber = (buffer, number, offset) => {
45 | const low = number & MAX_INT32
46 | const high = (number > MAX_INT32) ? (number - low) / (MAX_INT32 + 1) : 0
47 |
48 | const lowLength = writeUint32(low, buffer, offset, high > 0)
49 | const highLength = high > 0 ? writeUint32(high, buffer, offset + lowLength) : 0
50 |
51 | return lowLength + highLength
52 | }
53 |
54 | /**
55 | * Encode an array of numbers in a variable byte-list encoding.
56 | * See {@link https://nlp.stanford.edu/IR-book/html/htmledition/variable-byte-codes-1.html}
57 | * @param {Array} numbers
58 | * @returns {Uint8Array}
59 | */
60 | export const vbEncode = (numbers = []) => {
61 | if (numbers.length === 0) {
62 | return EMPTY
63 | }
64 |
65 | const guessLength = numbers.length * MAX_BYTES_PER_NUMBER
66 | const bytes = new Uint8Array(guessLength)
67 |
68 | let totalLength = 0
69 |
70 | for (let i = 0; i < numbers.length; ++i) {
71 | const number = numbers[i]
72 | const length = vbEncodeNumber(bytes, number, totalLength)
73 | // Set last bit to denote end of number
74 | bytes[totalLength + length - 1] = bytes[totalLength + length - 1] | ENCODING
75 | totalLength += length
76 | }
77 |
78 | return bytes.slice(0, totalLength)
79 | }
80 |
81 | /**
82 | * Decode an Uint8Array into an array of numbers.
83 | * @param {Uint8Array} bytes
84 | * @returns {Array}
85 | */
86 | export const vbDecode = (bytes) => {
87 | if (!bytes || bytes.length === 0) {
88 | return []
89 | }
90 |
91 | const numbers = []
92 | let low = 0
93 | let offset = 1
94 | let part = 0
95 | let multi = false
96 | let bits = 0
97 |
98 | for (let i = 0; i < bytes.length; ++i) {
99 | const value = bytes[i]
100 | const byte = value & MAX_BITS
101 |
102 | let shift = bits === 31 ? 3 : 7
103 |
104 | part = (part << shift) | byte
105 | bits += shift
106 | offset++
107 |
108 | if (offset === MAX_BYTES_PER_PART + 1) {
109 | low = part
110 | part = 0
111 | multi = true
112 | bits = 0
113 | }
114 |
115 | if (value & ENCODING) {
116 | const number = multi ? unsignedToDouble(part, low) : part
117 | numbers.push(number)
118 | low = 0
119 | part = 0
120 | offset = 1
121 | multi = false
122 | bits = 0
123 | }
124 | }
125 |
126 | return numbers
127 | }
128 |
--------------------------------------------------------------------------------
/src/helper/variableByteCodes.spec.js:
--------------------------------------------------------------------------------
1 | import { vbDecode, vbEncode } from './variableByteCodes'
2 |
3 | describe('vb', () => {
4 | it('should encode 1 bit number', () => {
5 | expect(vbEncode([1]))
6 | .toEqual(new Uint8Array([129]))
7 | })
8 |
9 | it('should encode 32 bit number', () => {
10 | expect(vbEncode([0b11111111111111111111111111111111]))
11 | .toEqual(new Uint8Array([7, 127, 127, 127, 127, 129]))
12 | })
13 |
14 | it('should encode 8 bit number', () => {
15 | expect(vbEncode([0b11111111]))
16 | .toEqual(new Uint8Array([1, 255]))
17 | })
18 |
19 | it('should encode 7 bit number ', () => {
20 | expect(vbEncode([0b1111111]))
21 | .toEqual(new Uint8Array([255]))
22 | })
23 |
24 | const expectDecode = (arr = []) => expect(vbDecode(vbEncode(arr)))
25 | .toEqual(arr)
26 |
27 | it('should decode and encode numbers correctly', () => {
28 | expectDecode([128])
29 | expectDecode([1,128,2,128,3])
30 | expectDecode([5000, 100, 0])
31 | expectDecode([4294967295])
32 | expectDecode([17,1,1,1,1,1,1])
33 | })
34 |
35 | it('should decode and encode all unsigned 2 exponent numbers up to 51 bits correctly', () => {
36 | const array = []
37 | let onebit = 1
38 | let allbits = 1
39 | for (let i = 1; i <= 51; ++i) {
40 | onebit = onebit * 2
41 | allbits = onebit + (onebit - 1)
42 | array.push(onebit)
43 | array.push(allbits)
44 | }
45 | expectDecode(array)
46 | })
47 | })
48 |
49 |
--------------------------------------------------------------------------------
/src/helper/wildcard.js:
--------------------------------------------------------------------------------
1 | export const MIN_WILDCARD_LEN = 2
2 |
3 | /**
4 | * Returns whether the string contains a wildcard query.
5 | * @param {String} string
6 | * @returns {boolean}
7 | */
8 | export const hasWildcard = (string = '') => {
9 | for (let i = 0; i < string.length; ++i) {
10 | const c = string[i]
11 | if (c === '*' || c === '?') {
12 | return true
13 | }
14 | }
15 | return false
16 | }
17 |
18 | /**
19 | * Split a string in n-grams.
20 | * @param {Number} n Number of n-grams
21 | * @param {String} value
22 | * @returns {Array}
23 | */
24 | export const ngram = (n, value) => {
25 | if (!value || !value.charAt) {
26 | return []
27 | }
28 | let index = value.length - n + 1
29 | if (index < 1) {
30 | return []
31 | }
32 | const result = Array(index)
33 | while (index--) {
34 | result[index] = value.substr(index, n)
35 | }
36 | return result
37 | }
38 |
39 | /**
40 | * Split a token in ngrams, with padding.
41 | * @param {String} value
42 | * @returns {Array}
43 | */
44 | export const splitTokenPadding = (value = '') => {
45 | if (value.length <= MIN_WILDCARD_LEN - 1) {
46 | return []
47 | }
48 | return ngram(MIN_WILDCARD_LEN + 1, `^${value}$`)
49 | }
50 |
51 |
52 | /**
53 | * Extract a wildcard key to query for.
54 | * @param {String} value
55 | * @returns {string}
56 | */
57 | const extractQueryToken = (value = '') => {
58 | let start = -1
59 | let n = 0
60 | for (let i = 0; i < value.length; ++i) {
61 | const c = value[i]
62 | const wildcard = c === '*' || c === '?'
63 | if (start === -1) {
64 | if (wildcard) {
65 | continue
66 | }
67 | start = i
68 | n++
69 | } else {
70 | if (!wildcard) {
71 | n++
72 | if (n === MIN_WILDCARD_LEN + 1) {
73 | break
74 | }
75 | } else {
76 | start = -1
77 | n = 0
78 | }
79 | }
80 | }
81 | if (n !== MIN_WILDCARD_LEN + 1) {
82 | throw new Error('Could not parse wildcard query')
83 | }
84 | return value.substr(start, n)
85 | }
86 |
87 | export const extractQueryTokenPadding = (value = '') => extractQueryToken(`^${value}$`)
88 |
89 | /**
90 | * Match a wildcard pattern against a string.
91 | * @param {String} string
92 | * @param {String} pattern
93 | * @returns {Boolean}
94 | */
95 | export const wildcardMatch = (string, pattern) => {
96 | if (pattern === '*') {
97 | return true
98 | }
99 |
100 | const m = string.length
101 | const n = pattern.length
102 |
103 | let wildcards = 0
104 | let singleWildcards = 0
105 |
106 | for (let i = 1; i <= n; i++) {
107 | if (pattern[i - 1] === '*') {
108 | wildcards++
109 | }
110 | if (pattern[i - 1] === '?') {
111 | singleWildcards++
112 | }
113 | }
114 |
115 | // If there are no wildcards and the lengths do not match, it's not a match.
116 | if (wildcards === 0 && n !== m) {
117 | return false
118 | }
119 | // If it contains no wildcards, use normal comparison
120 | if (singleWildcards === 0 && wildcards === 0) {
121 | return string === pattern
122 | }
123 |
124 | const table = Array(n + 1)
125 | table[0] = 1
126 |
127 | for (let i = 1; i <= n; i++) {
128 | if (pattern[i - 1] === '*') {
129 | table[i] = table[i - 1]
130 | }
131 | }
132 |
133 | if (m === 1 && n === 1 && pattern[0] === '?') {
134 | return true
135 | }
136 |
137 | const table_prev = Array(n + 1)
138 | table_prev[0] = 1
139 |
140 | for (let i = 1; i <= m; i++) {
141 | for (let j = 0; j <= n; j++) {
142 | table_prev[j] = table[j]
143 | if (j === 0) {
144 | table[j] = 0
145 | }
146 | if (pattern[j - 1] === '*') {
147 | table[j] = (table[j] || table[j - 1])
148 | }
149 | else if (pattern[j - 1] === '?' || pattern[j - 1] === string[i - 1]) {
150 | table[j] = table_prev[j - 1]
151 | }
152 | else {
153 | table[j] = 0
154 | }
155 | }
156 | }
157 |
158 | return !!table[n]
159 | }
160 |
161 |
--------------------------------------------------------------------------------
/src/helper/wildcard.spec.js:
--------------------------------------------------------------------------------
1 | import { extractQueryTokenPadding, ngram, splitTokenPadding, wildcardMatch } from './wildcard'
2 |
3 | describe('wildcard', () => {
4 |
5 | it('should split in n-grams', () => {
6 | expect(ngram(3, 'hello')).toEqual(['hel', 'ell', 'llo'])
7 | })
8 |
9 | it('should split in 3-grams with padding', () => {
10 | expect(splitTokenPadding('castle')).toEqual(['^ca', 'cas', 'ast', 'stl', 'tle', 'le$'])
11 | })
12 |
13 | it('should return empty array on short input', () => {
14 | expect(splitTokenPadding('a')).toEqual([])
15 | })
16 |
17 | it('should throw bad input', () => {
18 | expect(() => extractQueryTokenPadding('r*')).toThrow(new Error('Could not parse wildcard query'))
19 | expect(() => extractQueryTokenPadding('*ab*')).toThrow(new Error('Could not parse wildcard query'))
20 | })
21 |
22 | it('should get one query token from a query', () => {
23 | expect(extractQueryTokenPadding('re?')).toEqual('^re')
24 | expect(extractQueryTokenPadding('?ed')).toEqual('ed$')
25 | expect(extractQueryTokenPadding('red*')).toEqual('^re')
26 | expect(extractQueryTokenPadding('*ired')).toEqual('ire')
27 | expect(extractQueryTokenPadding('***ired*')).toEqual('ire')
28 | })
29 |
30 | it('should match wildcard query', () => {
31 | expect(wildcardMatch('ab', 'aa')).toBeFalsy()
32 | expect(wildcardMatch('aa', 'aa')).toBeTruthy()
33 | expect(wildcardMatch('', '*')).toBeTruthy()
34 | expect(wildcardMatch('asd', '*')).toBeTruthy()
35 | expect(wildcardMatch('a', '??')).toBeFalsy()
36 | expect(wildcardMatch('a', '*?')).toBeTruthy()
37 | expect(wildcardMatch('ab', '*?')).toBeTruthy()
38 | expect(wildcardMatch('abc', '*?')).toBeTruthy()
39 | expect(wildcardMatch('ab', '?*?')).toBeTruthy()
40 | expect(wildcardMatch('ab', '*?*?*')).toBeTruthy()
41 | expect(wildcardMatch('abcde', '?*b*?*d*?')).toBeTruthy()
42 | expect(wildcardMatch('relevance', 'r*v*n*ce')).toBeTruthy()
43 | expect(wildcardMatch('relelelel', 're*le*el')).toBeTruthy()
44 | expect(wildcardMatch('relevance', 're*')).toBeTruthy()
45 | expect(wildcardMatch('relevance', 'ae*')).toBeFalsy()
46 | expect(wildcardMatch('relevance', '*e')).toBeTruthy()
47 | expect(wildcardMatch('relevance', '*ce')).toBeTruthy()
48 | expect(wildcardMatch('relevance', '*ee')).toBeFalsy()
49 | expect(wildcardMatch('relevance', 'rel?vance')).toBeTruthy()
50 | expect(wildcardMatch('relevance', 'rele*vance')).toBeTruthy()
51 | expect(wildcardMatch('relevance', 'rele****vance')).toBeTruthy()
52 | expect(wildcardMatch('abcccd', '*ccd')).toBeTruthy()
53 | expect(wildcardMatch('mississipissippi', '*issip*ss*')).toBeTruthy()
54 | expect(wildcardMatch('xxxx*zzzzzzzzy*f', 'xxxx*zzy*fffff')).toBeFalsy()
55 | expect(wildcardMatch('xxxx*zzzzzzzzy*f', 'xxx*zzy*f')).toBeTruthy()
56 | expect(wildcardMatch('xxxxzzzzzzzzyf', 'xxxx*zzy*fffff')).toBeFalsy()
57 | expect(wildcardMatch('xxxxzzzzzzzzyf', 'xxxx*zzy*f')).toBeTruthy()
58 | expect(wildcardMatch('abababababababababababababababababababaacacacacacacacadaeafagahaiajakalaaaaaaaaaaaaaaaaaffafagaagggagaaaaaaaab', '*a*b*ba*ca*aaaa*fa*ga*ggg*b*')).toBeTruthy()
59 | expect(wildcardMatch('aaabbaabbaab', '*aabbaa*a*')).toBeTruthy()
60 | expect(wildcardMatch('a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*', 'a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*')).toBeTruthy()
61 | expect(wildcardMatch('aaaaaaaaaaaaaaaaa', '*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*')).toBeTruthy()
62 | expect(wildcardMatch('aaaaaaaaaaaaaaaa', '*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*')).toBeFalsy()
63 | expect(wildcardMatch('abc*abcd*abcde*abcdef*abcdefg*abcdefgh*abcdefghi*abcdefghij*abcdefghijk*abcdefghijkl*abcdefghijklm*abcdefghijklmn', 'abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*')).toBeFalsy()
64 | expect(wildcardMatch('abc*abcd*abcde*abcdef*abcdefg*abcdefgh*abcdefghi*abcdefghij*abcdefghijk*abcdefghijkl*abcdefghijklm*abcdefghijklmn', 'abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*')).toBeTruthy()
65 | expect(wildcardMatch('abc*abcd*abcd*abc*abcd', 'abc*abc*abc*abc*abc')).toBeFalsy()
66 | expect(wildcardMatch('abc*abcd*abcd*abc*abcd*abcd*abc*abcd*abc*abc*abcd', 'abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abcd')).toBeTruthy()
67 | expect(wildcardMatch('abc', '********a********b********c********')).toBeTruthy()
68 | })
69 | })
70 |
--------------------------------------------------------------------------------
/src/index.esm.js:
--------------------------------------------------------------------------------
1 | export { default as tokenize, transform } from './helper/tokenize'
2 |
3 | export { default as scoring } from './helper/scoring'
4 |
5 | export {
6 | default as query,
7 | AND,
8 | PHRASE,
9 | BEFORE,
10 | OR,
11 | KEYWORD,
12 | PHRASE_ALL,
13 | PHRASE_QUOROM,
14 | PHRASE_PROXIMITY
15 | } from './query/query'
16 |
17 | export { default as parse } from './query/parser'
18 |
19 | export { default as create, TABLES } from './master'
20 |
--------------------------------------------------------------------------------
/src/index.spec.js:
--------------------------------------------------------------------------------
1 | import { open, transaction, request } from './helper/idb'
2 | import { create as createIndex, tokenize, parse, query } from './index.esm'
3 | import createEncryptionHelper from '../example/helper/encryptionHelper'
4 | import { vbDecode } from './helper/variableByteCodes'
5 | import { TABLES, DEFAULT_NAMES } from './master'
6 | import { getArrayGaps } from './helper/array'
7 |
8 | const MOCK = {
9 | ID: '123',
10 | TOKENS: tokenize('this is my body')
11 | }
12 |
13 | const DB_NAME = 'index'
14 | const DB_VERSION = 1
15 |
16 | const indexKey = new Uint8Array(32)
17 | const indexSalt = new Uint8Array(32)
18 | const transformers = createEncryptionHelper(indexKey, indexSalt)
19 |
20 | describe('index', () => {
21 | const getIndex = () => createIndex({ transformers })
22 |
23 | const tableNameToId = Object.keys(TABLES).reduce((prev, cur) => {
24 | prev[DEFAULT_NAMES[TABLES[cur]]] = TABLES[cur]
25 | return prev
26 | }, {})
27 |
28 | const removeValue = async (tableId, key) => {
29 | const tableName = DEFAULT_NAMES[tableId]
30 | const db = await open(indexedDB, DB_NAME, DB_VERSION)
31 | const tx = db.transaction(tableName, 'readwrite')
32 | const promise = transaction(tx)
33 | tx.objectStore(tableName).delete(transformers.property(tableNameToId[tableName], key))
34 | await promise
35 | db.close()
36 | }
37 |
38 | const getDb = async (tableName) => {
39 | const db = await open(indexedDB, DB_NAME, DB_VERSION)
40 | const tx = db.transaction(tableName, 'readwrite')
41 | return { db, tx }
42 | }
43 |
44 | const getValue = async (tableId, key) => {
45 | const tableName = DEFAULT_NAMES[tableId]
46 | const { tx, db } = await getDb(tableName)
47 | const data = await request(tx.objectStore(tableName).get(transformers.property(tableNameToId[tableName], key)))
48 | db.close()
49 |
50 | const value = transformers.deserialize(tableId, key, data)
51 |
52 | if (tableId === TABLES.POSTINGS || tableId === TABLES.WILDCARDS) {
53 | if (!value) {
54 | return
55 | }
56 | return getArrayGaps(vbDecode(value))
57 | }
58 |
59 | if (tableId === TABLES.POSITIONS) {
60 | if (!value) {
61 | return
62 | }
63 | return vbDecode(value)
64 | }
65 |
66 | return value
67 | }
68 |
69 | const getInternalDocId = (id) => getValue(TABLES.IDS_INVERSE, id)
70 | const getMultiple = (tableId) => async (keys) => {
71 | const tableName = DEFAULT_NAMES[tableId]
72 | const { tx, db } = await getDb(tableName)
73 | const data = await Promise.all(keys.map((term) =>
74 | request(tx.objectStore(tableName).get(transformers.property(tableId, term))))
75 | )
76 | db.close()
77 | return data
78 | }
79 |
80 | const getInternalTermIds = getMultiple(TABLES.LEXICON_INVERSE)
81 | const getTerms = getMultiple(TABLES.LEXICON)
82 |
83 | describe('store', () => {
84 | let index
85 | let internalDocId
86 | let internalTermIds
87 |
88 | beforeAll(async () => {
89 | index = getIndex()
90 | await index.store(MOCK.ID, MOCK.TOKENS)
91 | internalDocId = await getInternalDocId(MOCK.ID)
92 | internalTermIds = await getInternalTermIds(MOCK.TOKENS)
93 | })
94 |
95 | afterAll(async () => {
96 | await index.clear()
97 | index.close()
98 | })
99 |
100 | it('should store a link between the keywords and data id', async () => {
101 | const value = await getValue(TABLES.POSTINGS, internalTermIds[0])
102 | expect(value[0])
103 | .toEqual(internalDocId)
104 | })
105 |
106 | it('should store a link between the data and keywords', async () => {
107 | const value = await getValue(TABLES.POSITIONS, internalDocId)
108 | expect(value.sort())
109 | .toEqual(internalTermIds.sort())
110 | })
111 |
112 | it('should store wildcard information', async () => {
113 | const values = await Promise.all([
114 | getValue(TABLES.WILDCARDS, '^th').then(getTerms),
115 | getValue(TABLES.WILDCARDS, 'thi').then(getTerms),
116 | getValue(TABLES.WILDCARDS, 'his').then(getTerms),
117 | getValue(TABLES.WILDCARDS, 'is$').then(getTerms)
118 | ])
119 | expect(values)
120 | .toEqual([['this'], ['this'], ['this'], ['this', 'is']])
121 | })
122 | })
123 |
124 | describe('search', () => {
125 | let index
126 |
127 | const bodyA = 'hello this is a really long fluffy text abc'
128 | const bodyB = 'i just started using this secure email app this hello'
129 | const bodyC = 'hello this is a really good app abc'
130 |
131 | beforeAll(async () => {
132 | index = getIndex()
133 | await Promise.all([
134 | index.store('123', tokenize(bodyA)),
135 | index.store('124', tokenize(bodyB)),
136 | index.store('125', tokenize(bodyC)),
137 | index.store('150', tokenize('random text')),
138 | index.store('160', tokenize('redemption rededicate')),
139 | index.store('161', tokenize('redundancy retired rediscover'))
140 | ])
141 | })
142 |
143 | afterAll(async () => {
144 | await index.clear()
145 | index.close()
146 | })
147 |
148 | const mapIds = ({ result = [] }) => result.map(({ id }) => id)
149 |
150 | it('should not return any result for keywords that do not exist', async () => {
151 | expect(mapIds(await index.search(['foo'])))
152 | .toEqual([])
153 | })
154 |
155 | it('should return A, B and C', async () => {
156 | expect(mapIds(await index.search(tokenize('hello this'))))
157 | .toEqual(['123', '124', '125'])
158 | })
159 |
160 | it('should return result for keywords that exist', async () => {
161 | const result = await index.search(['hello'])
162 | expect(mapIds(result))
163 | .toEqual(['123', '124', '125'])
164 | })
165 |
166 | it('should return A for the query fluffy', async () => {
167 | const result = await index.search(['fluffy'])
168 | expect(mapIds(result))
169 | .toEqual(['123'])
170 | })
171 |
172 | it('should return B and C for the query app', async () => {
173 | const result = await index.search(['app'])
174 | expect(mapIds(result))
175 | .toEqual(['124', '125'])
176 | })
177 |
178 | it('should return with an extra id key', async () => {
179 | const { result } = await index.search(['hello', 'secure'])
180 | expect(result.map((result) => result.id))
181 | .toEqual(['123', '124', '125'])
182 | })
183 |
184 | it('should return with an extra match key', async () => {
185 | const { result } = await index.search(['hello', 'secure'])
186 | expect(result.map((result) => result.match))
187 | .toEqual([['hello'], ['hello', 'secure'], ['hello']])
188 | })
189 |
190 | it('should return unique keywords for the wildcard query', async () => {
191 | const result = await index.wildcard('re*')
192 | expect(result).toEqual(['really', 'redemption', 'rededicate', 'redundancy', 'retired', 'rediscover'])
193 | })
194 |
195 | it('should return keywords that match', async () => {
196 | const result = await index.wildcard('red*')
197 | expect(result).toEqual(['redemption', 'rededicate', 'redundancy', 'rediscover'])
198 | })
199 |
200 | it('should return keywords in the end', async () => {
201 | const result = await index.wildcard('*ed')
202 | expect(result).toEqual(['started', 'retired'])
203 | })
204 |
205 | it('should return keywords', async () => {
206 | const result = await index.wildcard('*ndo*')
207 | expect(result).toEqual(['random'])
208 | })
209 |
210 | it('should clean stale data', async () => {
211 | const id = '199'
212 | await index.store(id, tokenize('unicorn zebra'))
213 | const internalId = await getInternalDocId('199')
214 | const [unicornId, zebraId] = await getInternalTermIds(['unicorn', 'zebra'])
215 |
216 | expect(mapIds(await index.search(['unicorn', 'zebra']))).toEqual([id])
217 | expect(await getValue(TABLES.POSTINGS, unicornId)).toEqual([internalId])
218 | expect(await getValue(TABLES.POSTINGS, zebraId)).toEqual([internalId])
219 |
220 | await removeValue(TABLES.POSITIONS, internalId)
221 |
222 | expect(mapIds(await index.search(['unicorn', 'zebra']))).toEqual([])
223 | expect(await getValue(TABLES.POSTINGS, unicornId)).toBeUndefined()
224 | expect(await getValue(TABLES.POSTINGS, zebraId)).toBeUndefined()
225 | })
226 |
227 | it('should search with query', async () => {
228 | const results = query(index.search, index.wildcard, parse('red*'))
229 | expect((await results).map(({ id }) => id)).toEqual(['160', '161'])
230 | })
231 | })
232 |
233 | describe('remove one', () => {
234 | let index
235 | let internalId
236 | let internalTermId
237 |
238 | beforeAll(async () => {
239 | index = getIndex()
240 | await index.store(MOCK.ID, MOCK.TOKENS)
241 |
242 | internalId = await getInternalDocId(MOCK.ID)
243 | internalTermId = await getInternalTermIds(MOCK.TOKENS)
244 |
245 | await index.remove(MOCK.ID)
246 | })
247 |
248 | afterAll(async () => {
249 | await index.clear()
250 | index.close()
251 | })
252 |
253 | it('should remove data', async () => {
254 | const value = await getValue(TABLES.POSITIONS, internalId)
255 | expect(value)
256 | .toBeUndefined()
257 | })
258 |
259 | it('should remove the link between the keywords and data', async () => {
260 | const value = await getValue(TABLES.POSTINGS, internalTermId[0])
261 | expect(value)
262 | .toBeUndefined()
263 | })
264 | })
265 |
266 | describe('remove multiple', () => {
267 | let index
268 | let internalId
269 | let internalId2
270 | const ID2 = '321'
271 | const body2 = 'this is my body'
272 |
273 | beforeAll(async () => {
274 | index = getIndex()
275 |
276 | await index.store(MOCK.ID, MOCK.TOKENS.concat('removed'))
277 | await index.store(ID2, tokenize(body2))
278 |
279 | internalId = await getInternalDocId(MOCK.ID)
280 | internalId2 = await getInternalDocId(ID2)
281 |
282 | await index.remove(MOCK.ID)
283 | })
284 |
285 | afterAll(async () => {
286 | await index.clear()
287 | index.close()
288 | })
289 |
290 | it('should remove the first instance', async () => {
291 | const value = await getValue(TABLES.POSITIONS, internalId)
292 | expect(value)
293 | .toBeUndefined()
294 | const value2 = await getValue(TABLES.POSITIONS, internalId2)
295 | expect(value2)
296 | .not.toBeUndefined()
297 | })
298 |
299 | it('should remove the link between the keywords and the first message id', async () => {
300 | const terms = await getInternalTermIds(tokenize(body2))
301 | await Promise.all(terms.map((async (token) => {
302 | const value = await getValue(TABLES.POSTINGS, token)
303 | expect(value)
304 | .toEqual([internalId2])
305 | })))
306 | })
307 |
308 | it('should keep wildcard information', async () => {
309 | const values = await Promise.all([
310 | getValue(TABLES.WILDCARDS, '^th').then(getTerms),
311 | getValue(TABLES.WILDCARDS, 'thi').then(getTerms),
312 | getValue(TABLES.WILDCARDS, 'his').then(getTerms),
313 | getValue(TABLES.WILDCARDS, 'is$').then(getTerms)
314 | ])
315 | expect(values)
316 | .toEqual([['this'], ['this'], ['this'], ['this', 'is']])
317 | })
318 |
319 | it('should remove unique wildcard information', async () => {
320 | const values = await Promise.all([
321 | getValue(TABLES.WILDCARDS, '^re'),
322 | getValue(TABLES.WILDCARDS, 'rem'),
323 | getValue(TABLES.WILDCARDS, 'emo'),
324 | getValue(TABLES.WILDCARDS, 'mov'),
325 | getValue(TABLES.WILDCARDS, 'ove'),
326 | getValue(TABLES.WILDCARDS, 'ved'),
327 | getValue(TABLES.WILDCARDS, 'ed$')
328 | ])
329 | expect(values)
330 | .toEqual([undefined, undefined, undefined, undefined, undefined, undefined, undefined])
331 | })
332 | })
333 |
334 | describe('stats', () => {
335 | let index
336 |
337 | beforeAll(async () => {
338 | index = getIndex()
339 | await index.clear()
340 | await index.store(MOCK.ID, MOCK.TOKENS)
341 | })
342 |
343 | afterAll(async () => {
344 | await index.clear()
345 | index.close()
346 | })
347 |
348 | it('should get stats', async () => {
349 | const stats = await index.stats()
350 | expect(stats.total).toBe(28)
351 | })
352 |
353 | it('should get number of terms indexed', async () => {
354 | expect(await index.numberOfTerms()).toBe(4)
355 | })
356 | })
357 | })
358 |
359 |
--------------------------------------------------------------------------------
/src/master.js:
--------------------------------------------------------------------------------
1 | import { open as openDb, transaction, READWRITE, openWithClosure } from './helper/idb'
2 |
3 | import createPostingsStore from './store/postingsStore'
4 | import createPositionsStore from './store/positionsStore'
5 | import createWildcardStore from './store/wildcardStore'
6 | import createTransposeStore from './store/transposeStore'
7 | import createKeyValueStore, { withTransformers } from './store/keyValueStore'
8 |
9 | import { flatten, mutablyShuffleTwo, unique } from './helper/array'
10 | import { wildcardMatch } from './helper/wildcard'
11 |
12 | const DB_VERSION = 1
13 |
14 | export const TABLES = {
15 | LEXICON: 1,
16 | LEXICON_INVERSE: 2,
17 |
18 | IDS: 3,
19 | IDS_INVERSE: 4,
20 |
21 | POSTINGS: 5,
22 | POSITIONS: 6,
23 | WILDCARDS: 7
24 | }
25 |
26 | export const DEFAULT_NAMES = {
27 | db: 'index',
28 | [TABLES.LEXICON]: 'lexicon',
29 | [TABLES.LEXICON_INVERSE]: 'lexicon_inverse',
30 | [TABLES.IDS]: 'ids',
31 | [TABLES.IDS_INVERSE]: 'ids_inverse',
32 | [TABLES.POSTINGS]: 'postings',
33 | [TABLES.POSITIONS]: 'positions',
34 | [TABLES.WILDCARDS]: 'wildcards'
35 | }
36 |
37 | const upgradeDb = (names) => (db, oldVersion) => {
38 | switch (oldVersion) {
39 | case 0: {
40 | [
41 | TABLES.LEXICON,
42 | TABLES.IDS,
43 | TABLES.LEXICON_INVERSE,
44 | TABLES.IDS_INVERSE,
45 | TABLES.POSTINGS,
46 | TABLES.POSITIONS,
47 | TABLES.WILDCARDS
48 | ].forEach((table) => db.createObjectStore(names[table]))
49 | break
50 | }
51 | }
52 | }
53 |
54 | const assertId = (id) => {
55 | const type = typeof id
56 | return !(!id || (type !== 'string' && type !== 'number'))
57 | }
58 |
59 | const DEFAULT_TRANSFORMERS = {
60 | property: (id, key) => key,
61 | serialize: (id, key, value) => value,
62 | deserialize: (id, key, value) => value
63 | }
64 |
65 | /**
66 | * Create the encrypted search index.
67 | * @param {Object} options
68 | * @returns {Object}
69 | */
70 | export default (options = {}) => {
71 | const names = { ...DEFAULT_NAMES, ...options.names, }
72 | const transformers = { ...DEFAULT_TRANSFORMERS, ...options.transformers }
73 | const closeTimeout = options.closeTimeout || 15000
74 |
75 | const open = () => openDb(indexedDB, names.db, DB_VERSION, upgradeDb(names))
76 |
77 | const { getTransaction, close } = openWithClosure(open, closeTimeout)
78 |
79 | const lexiconStore = createTransposeStore(
80 | withTransformers(
81 | TABLES.LEXICON,
82 | createKeyValueStore(names[TABLES.LEXICON]),
83 | transformers
84 | ),
85 | withTransformers(
86 | TABLES.LEXICON_INVERSE,
87 | createKeyValueStore(names[TABLES.LEXICON_INVERSE]),
88 | transformers
89 | ),
90 | getTransaction
91 | )
92 |
93 | const idsStore = createTransposeStore(
94 | withTransformers(
95 | TABLES.IDS,
96 | createKeyValueStore(names[TABLES.IDS]),
97 | transformers
98 | ),
99 | withTransformers(
100 | TABLES.IDS_INVERSE,
101 | createKeyValueStore(names[TABLES.IDS_INVERSE]),
102 | transformers
103 | ),
104 | getTransaction
105 | )
106 |
107 | const postingsStore = createPostingsStore(
108 | withTransformers(
109 | TABLES.POSTINGS,
110 | createKeyValueStore(names[TABLES.POSTINGS]),
111 | transformers,
112 | ),
113 | getTransaction
114 | )
115 |
116 | const positionsStore = createPositionsStore(
117 | withTransformers(
118 | TABLES.POSITIONS,
119 | createKeyValueStore(names[TABLES.POSITIONS]),
120 | transformers,
121 | ),
122 | getTransaction
123 | )
124 |
125 | const wildcardStore = createWildcardStore(
126 | withTransformers(
127 | TABLES.WILDCARDS,
128 | createKeyValueStore(names[TABLES.WILDCARDS]),
129 | transformers
130 | ),
131 | getTransaction
132 | )
133 |
134 | /**
135 | * Clean stale data from the postings table when performing a search.
136 | * It relies on the fact that a term returned an id which does not exist in the positions table.
137 | * @param {Array} positions
138 | * @param {Array} ids
139 | * @param {Array} terms
140 | */
141 | const cleanStaleData = async (positions, ids, terms) => {
142 | const staleIds = positions.reduce((agg, terms, i) => {
143 | // Detecting stale data.
144 | if (terms.length === 0) {
145 | agg.push(ids[i])
146 | }
147 | return agg
148 | }, [])
149 |
150 | if (!staleIds.length) {
151 | return
152 | }
153 |
154 | staleIds.forEach((id) => postingsStore.removeBulk(terms, id))
155 | }
156 |
157 | /**
158 | * Find data based on the terms.
159 | * @param {Array} searchTerms Terms to search
160 | * @return {Promise}
161 | */
162 | const search = async (searchTerms = []) => {
163 | if (!Array.isArray(searchTerms)) {
164 | throw new Error('Terms must be an array')
165 | }
166 |
167 | const uniqueSearchTerms = unique(searchTerms)
168 | const uniqueTransposedTerms = await lexiconStore.bulk(uniqueSearchTerms)
169 | const { idsToTerms, termsToIds, ids } = await postingsStore.getBulk(uniqueTransposedTerms)
170 | const positions = await positionsStore.getBulk(ids)
171 | const termIds = unique(flatten(positions))
172 |
173 | cleanStaleData(positions, ids, uniqueTransposedTerms)
174 |
175 | const [idsTransposed, termsTransposed] = await Promise.all([
176 | idsStore.from(ids),
177 | lexiconStore.from(termIds)
178 | ])
179 |
180 | const termIdsToTerm = termIds.reduce((agg, cur, i) => {
181 | agg[cur] = termsTransposed[i]
182 | return agg
183 | }, {})
184 |
185 | const result = positions
186 | .reduce((agg, terms, i) => {
187 | // Ignore stale data.
188 | if (terms.length === 0) {
189 | return agg
190 | }
191 |
192 | const id = idsTransposed[i]
193 | const match = idsToTerms[i]
194 | const transposedTerms = terms.map((term) => termIdsToTerm[term])
195 | const transposedMatch = match.map((term) => termIdsToTerm[term])
196 |
197 | agg.push({
198 | _id: ids[i],
199 | _terms: terms,
200 | _match: match,
201 | terms: transposedTerms,
202 | match: transposedMatch,
203 | id,
204 | })
205 |
206 | return agg
207 | }, [])
208 |
209 | return {
210 | result,
211 | ids,
212 | termsToIds,
213 | idsToTerms
214 | }
215 | }
216 |
217 | /**
218 | * Store terms.
219 | * @param {String|Number} id
220 | * @param {Array} terms
221 | * @return {Promise}
222 | */
223 | const store = async (id, terms) => {
224 | if (!assertId(id)) {
225 | throw new Error('ID required')
226 | }
227 | if (!Array.isArray(terms)) {
228 | throw new Error('Terms must be an array')
229 | }
230 | if (terms.length === 0) {
231 | return
232 | }
233 |
234 | const [[transposedId], transposedTerms] = await Promise.all([
235 | idsStore.bulk([id]),
236 | lexiconStore.bulk(terms)
237 | ])
238 |
239 | const uniqueTerms = unique(terms)
240 | const uniqueTransposedTerms = unique(transposedTerms)
241 |
242 | // Randomize the array to prevent row-lock contention
243 | mutablyShuffleTwo(uniqueTerms, uniqueTransposedTerms)
244 |
245 | return Promise.all([
246 | postingsStore.insertBulk(uniqueTransposedTerms, transposedId),
247 | positionsStore.insert(transposedId, transposedTerms),
248 | wildcardStore.insertBulk(uniqueTerms, uniqueTransposedTerms)
249 | ])
250 | }
251 |
252 | /**
253 | * Remove a document. Also deletes all terms related to the document as well.
254 | * Returns a promise that resolves to a list of terms that were fully deleted.
255 | * @param {String|Number} id
256 | * @returns {Promise}
257 | */
258 | const remove = async (id) => {
259 | if (!assertId(id)) {
260 | throw new Error('ID required')
261 | }
262 |
263 | const [transposedId] = await idsStore.bulk([id])
264 |
265 | const terms = await positionsStore.get(transposedId)
266 |
267 | const uniqueTerms = unique(terms)
268 | const removals = await postingsStore.removeBulk(uniqueTerms, transposedId)
269 | const removedTerms = uniqueTerms.filter((term, i) => removals[i])
270 |
271 | return Promise.all([
272 | lexiconStore.from(removedTerms)
273 | .then((termsTransposed) => wildcardStore.removeBulk(termsTransposed, removedTerms)),
274 | positionsStore.remove(transposedId)
275 | ])
276 | }
277 |
278 | /**
279 | * Perform a wildcard query.
280 | * @param {String} query Wildcard query pattern.
281 | * @returns {Promise}
282 | */
283 | const wildcard = async (query) => {
284 | const terms = await wildcardStore.get(query)
285 | const termsTransposed = await lexiconStore.from(terms)
286 | return termsTransposed
287 | .filter((token) => wildcardMatch(token, query))
288 | }
289 |
290 | /**
291 | * Clear all tables.
292 | * @return {Promise}
293 | */
294 | const clear = async () => {
295 | const stores = [postingsStore, positionsStore, wildcardStore, lexiconStore, idsStore]
296 |
297 | const tx = await getTransaction([
298 | ...flatten(stores.map((store) => store.name)),
299 | ], READWRITE)
300 |
301 | const promise = transaction(tx)
302 | stores.forEach((store) => store.clear(tx))
303 | return promise
304 | }
305 |
306 | /**
307 | * Returns stats on all tables.
308 | * @returns {Promise}
309 | */
310 | const stats = async () => {
311 |
312 | const get = async (store) => {
313 | const tx = await getTransaction(store.name)
314 | return Promise.all([store.count(tx), store.size(tx)])
315 | }
316 |
317 | const result = await Promise.all([postingsStore, positionsStore, wildcardStore, lexiconStore, idsStore].map(get))
318 |
319 | const getStats = (total, size) => ({ total, size })
320 | const getStatsResult = ([total, size]) => getStats(total, size)
321 |
322 | const postings = getStatsResult(result[0])
323 | const positions = getStatsResult(result[1])
324 | const wildcards = getStatsResult(result[2])
325 | const lexicon = getStatsResult(result[3])
326 | const ids = getStatsResult(result[4])
327 |
328 | return {
329 | postings,
330 | positions,
331 | wildcards,
332 | lexicon,
333 | ids,
334 | ...getStats(
335 | result.reduce((prev, cur) => prev + cur[0], 0),
336 | result.reduce((prev, cur) => prev + cur[1], 0)
337 | )
338 | }
339 | }
340 |
341 | /**
342 | * Return the number of terms currently indexed.
343 | * @returns {Promise}
344 | */
345 | const numberOfTerms = async () => {
346 | const tx = await getTransaction(postingsStore.name)
347 | return postingsStore.count(tx)
348 | }
349 |
350 | return {
351 | search,
352 | wildcard,
353 | store,
354 | remove,
355 | clear,
356 | numberOfTerms,
357 | stats,
358 | close
359 | }
360 | }
361 |
--------------------------------------------------------------------------------
/src/query/grammar.peg:
--------------------------------------------------------------------------------
1 | {
2 | function compareNot(c) {
3 | return c === '-' || c === '!';
4 | }
5 |
6 | function notIdx(o) {
7 | if (typeof o !== 'string') {
8 | return false;
9 | }
10 | for (var i = 0; i < o.length; ++i) {
11 | if (compareNot(o.charAt(i))) {
12 | return i;
13 | }
14 | }
15 | return -1;
16 | }
17 |
18 | function checkNot(o) {
19 | if (typeof o !== 'string') {
20 | return false;
21 | }
22 | return compareNot(o.charAt(0));
23 | }
24 | }
25 |
26 | start
27 | = And
28 |
29 | And
30 | = nl:NOT a:Order AND nr:NOT b:And {
31 | if (nl && nr) {
32 | error('Unexpected NOT in AND query');
33 | }
34 | return ['&', a, b, nl ? true : false, nr ? true : false];
35 | }
36 | / Order
37 |
38 | Order
39 | = a:Or ORDER b:Order {
40 | return ['<<', a, b];
41 | }
42 | / Or
43 |
44 | Or
45 | = a:Atom OR b:Or {
46 | return ['|', a, b];
47 | }
48 | / Atom
49 |
50 | Atom
51 | = Expression
52 | / Phrase
53 | / Keyword
54 |
55 | Expression
56 | = '(' _ expr:start _ ')' { return expr; }
57 |
58 | Keyword
59 | = _ a:KEYWORD_START_OPERATOR b:[^ \t)(<|&$]+ c:KEYWORD_END_OPERATOR {
60 | var value = b.join('');
61 | if (notIdx(value) !== -1) {
62 | error('Unexpected NOT in WORD');
63 | }
64 | if (value === '*') {
65 | error('Unexpected wildcard, only supported in PHRASE query')
66 | }
67 | return ['w', value, a ? a : '', c ? c : ''];
68 | }
69 |
70 | Phrase
71 | = '"' x:PHRASE_START_OPERATOR a:[^"$]+ y:PHRASE_END_OPERATOR '"' b:PHRASE_OPERATOR {
72 | var value = a.join('');
73 | if (notIdx(value) !== -1) {
74 | error('Unexpected NOT in PHRASE query');
75 | }
76 | var extra = b.length ? b[0] : ''
77 | var n = b.length ? parseInt(b[1].join(''), 10) : 0
78 | return ['"', value, (x ? x : '') + (y ? y : ''), extra, n];
79 | }
80 |
81 | NOT
82 | = [!-]?
83 |
84 | OR
85 | = _ '|' _
86 |
87 | ORDER
88 | = _ '<<' _
89 |
90 | AND
91 | = _ '&' _ / [ \t]+
92 |
93 | KEYWORD_START_OPERATOR
94 | = '^'? / '*'?
95 |
96 | KEYWORD_END_OPERATOR
97 | = '$'? / '*'?
98 |
99 | PHRASE_OPERATOR
100 | = '~' [0-9]+ / '/' [0-9]+ / ''
101 |
102 | PHRASE_START_OPERATOR
103 | = '^'?
104 |
105 | PHRASE_END_OPERATOR
106 | = '$'?
107 |
108 | _ "whitespace"
109 | = [ \t]*
110 |
111 |
112 |
--------------------------------------------------------------------------------
/src/query/grammar.spec.js:
--------------------------------------------------------------------------------
1 | import { parse } from './grammar'
2 |
3 | const KEYWORD = (keyword) => ['w', keyword, '', '']
4 | const AND = (expr1, expr2, not1 = false, not2 = false) => ['&', expr1, expr2, not1, not2]
5 | const OR = (expr1, expr2) => ['|', expr1, expr2]
6 | const BETWEEN = (expr1, expr2) => ['<<', expr1, expr2]
7 | const PHRASE = (phrase, extra = '', n = 0, modifier = '') => ['"', phrase, modifier, extra, n]
8 |
9 | describe('grammar', () => {
10 | it('should default to AND query', () => {
11 | expect(parse('cat mouse')).toEqual(AND(KEYWORD('cat'), KEYWORD('mouse')))
12 | })
13 |
14 | it('should parse AND queries', () => {
15 | expect(parse('cat & mouse')).toEqual(AND(KEYWORD('cat'), KEYWORD('mouse')))
16 | })
17 |
18 | it('should parse OR queries', () => {
19 | expect(parse('cat | mouse')).toEqual(OR(KEYWORD('cat'), KEYWORD('mouse')))
20 | })
21 |
22 | it('should parse ORDER queries', () => {
23 | expect(parse('cat << mouse')).toEqual(BETWEEN(KEYWORD('cat'), KEYWORD('mouse')))
24 | })
25 |
26 | it('should parse PHRASE queries', () => {
27 | expect(parse('hello "cat mouse"')).toEqual(AND(KEYWORD('hello'), PHRASE('cat mouse')))
28 | })
29 |
30 | it('should parse PHRASE/n queries', () => {
31 | expect(parse('"cat mouse"/10')).toEqual(PHRASE('cat mouse', '/', 10))
32 | })
33 |
34 | it('should parse PHRASE~n queries', () => {
35 | expect(parse('"cat mouse"~10')).toEqual(PHRASE('cat mouse', '~', 10))
36 | })
37 |
38 | it('should parse AND and OR queries', () => {
39 | expect(parse('looking for cat | mouse')).toEqual(AND(KEYWORD('looking'), AND(KEYWORD('for'), OR(KEYWORD('cat'), KEYWORD('mouse')))))
40 | })
41 |
42 | it('should parse ORDER and OR queries', () => {
43 | expect(parse('cat << mouse | dog')).toEqual(BETWEEN(KEYWORD('cat'), OR(KEYWORD('mouse'), KEYWORD('dog'))))
44 | })
45 |
46 | it('should parse complex ORDER queries', () => {
47 | expect(parse('(bag of words) << "phrase here" << red|blue|green')).toEqual(
48 | BETWEEN(
49 | AND(KEYWORD('bag'),
50 | AND(KEYWORD('of'),
51 | KEYWORD('words'))
52 | ),
53 | BETWEEN(
54 | PHRASE('phrase here'),
55 | OR(KEYWORD('red'),
56 | OR(KEYWORD('blue'),
57 | KEYWORD('green')
58 | )
59 | )
60 | )
61 | )
62 | )
63 | })
64 |
65 | it('should parse queries in order', () => {
66 | expect(parse('partridge << turtle doves << French hens')).toEqual(
67 | AND(
68 | BETWEEN(
69 | KEYWORD('partridge'),
70 | KEYWORD('turtle')
71 | ),
72 | AND(
73 | BETWEEN(
74 | KEYWORD('doves'),
75 | KEYWORD('French'),
76 | ),
77 | KEYWORD('hens')
78 | )
79 | )
80 | )
81 | })
82 |
83 | it('should parse grouping queries', () => {
84 | expect(parse('(looking for) | (cat mouse)')).toEqual(
85 | OR(
86 | AND(KEYWORD('looking'), KEYWORD('for')),
87 | AND(KEYWORD('cat'), KEYWORD('mouse'))
88 | )
89 | )
90 | })
91 |
92 | it('should parse grouping and OR queries', () => {
93 | expect(parse('(looking for) | cat')).toEqual(
94 | OR(
95 | AND(
96 | KEYWORD('looking'),
97 | KEYWORD('for')
98 | ),
99 | KEYWORD('cat')
100 | )
101 | )
102 | })
103 |
104 | it('should parse NOT queries', () => {
105 | expect(parse('!hello world')).toEqual(AND(KEYWORD('hello'), KEYWORD('world'), true, false))
106 | expect(parse('hello -world')).toEqual(AND(KEYWORD('hello'), KEYWORD('world'), false, true))
107 | })
108 |
109 | it('should parse complex NOT queries', () => {
110 | expect(parse('hello -(or | query)')).toEqual(
111 | AND(
112 | KEYWORD('hello'),
113 | OR(
114 | KEYWORD('or'),
115 | KEYWORD('query')
116 | ),
117 | false,
118 | true
119 | )
120 | )
121 | expect(parse('aaa -(bbb -(ccc ddd))')).toEqual(AND(KEYWORD('aaa'), AND(KEYWORD('bbb'), AND(KEYWORD('ccc'), KEYWORD('ddd')), false, true), false, true))
122 | })
123 |
124 | it('should throw when using illegal NOT', () => {
125 | expect(() => parse('-cat')).toThrow(new SyntaxError('Unexpected NOT in WORD'))
126 | expect(() => parse('cat | -dog')).toThrow(new SyntaxError('Unexpected NOT in WORD'))
127 | expect(() => parse('-cat -dog')).toThrow(new SyntaxError('Unexpected NOT in AND query'))
128 | expect(() => parse('!cat | dog')).toThrow(new SyntaxError('Unexpected NOT in WORD'))
129 | expect(() => parse('!cat | !dog')).toThrow(new SyntaxError('Unexpected NOT in WORD'))
130 | expect(() => parse('cat << -dog')).toThrow(new SyntaxError('Unexpected NOT in WORD'))
131 | expect(() => parse('"cat !dog"')).toThrow(new SyntaxError('Unexpected NOT in PHRASE query'))
132 | expect(() => parse('"cat !dog"/~10')).toThrow(new SyntaxError('Unexpected NOT in PHRASE query'))
133 | expect(() => parse('c!at')).toThrow(new SyntaxError('Unexpected NOT in WORD'))
134 | expect(() => parse('!c!at')).toThrow(new SyntaxError('Unexpected NOT in WORD'))
135 | })
136 |
137 | it('should throw when using illegal WILDCARD', () => {
138 | expect(() => parse('hello *')).toThrow(new SyntaxError('Unexpected wildcard, only supported in PHRASE query'))
139 | })
140 | })
141 |
--------------------------------------------------------------------------------
/src/query/parser.js:
--------------------------------------------------------------------------------
1 | import { parse as parseGrammar } from './grammar'
2 | import { KEYWORD, PHRASE } from './query'
3 | import defaultTokenize, { transform as defaultTransform } from '../helper/tokenize'
4 |
5 | const fixQueryBranch = (query, tokenize, transform) => {
6 | if (!query) {
7 | return
8 | }
9 | if (query[0] === KEYWORD) {
10 | query[1] = transform(query[1])
11 | return
12 | }
13 | if (query[0] === PHRASE) {
14 | query[1] = tokenize(query[1], 0, false)
15 | return
16 | }
17 | if (Array.isArray(query[1])) {
18 | queryFixer(query[1], tokenize, transform)
19 | }
20 | if (Array.isArray(query[2])) {
21 | queryFixer(query[2], tokenize, transform)
22 | }
23 | }
24 |
25 | const queryFixer = (query, tokenize, transform) => {
26 | fixQueryBranch(query, tokenize, transform)
27 | return query
28 | }
29 |
30 | export default (query = '', tokenize = defaultTokenize, transform = defaultTransform) => queryFixer(parseGrammar(query), tokenize, transform)
31 |
--------------------------------------------------------------------------------
/src/query/parser.spec.js:
--------------------------------------------------------------------------------
1 | import parse from './parser'
2 |
3 | describe('parser', () => {
4 | it('should transform keywords', () => {
5 | expect(parse('hèllö')).toEqual(['w', 'hello', '', ''])
6 | })
7 |
8 | it('should change a phrase into tokens', () => {
9 | expect(parse('"hello you there"')).toEqual(['"', ['hello', 'you', 'there'], '', '', 0])
10 | })
11 | })
12 |
--------------------------------------------------------------------------------
/src/query/query.js:
--------------------------------------------------------------------------------
1 | import {
2 | intersect,
3 | minus,
4 | union,
5 | unique,
6 | ordered,
7 | contains,
8 | proximity,
9 | quorom
10 | } from '../helper/array'
11 | import { hasWildcard, wildcardMatch } from '../helper/wildcard'
12 |
13 | export const AND = '&'
14 | export const OR = '|'
15 | export const BEFORE = '<<'
16 | export const PHRASE = '"'
17 | export const KEYWORD = 'w'
18 |
19 | export const PHRASE_ALL = ''
20 | export const PHRASE_PROXIMITY = '~'
21 | export const PHRASE_QUOROM = '/'
22 |
23 | const validatePhraseOperator = (type, n) => {
24 | switch (type) {
25 | case PHRASE_ALL:
26 | return true
27 | case PHRASE_PROXIMITY:
28 | case PHRASE_QUOROM:
29 | return typeof n === 'number' && n >= 0
30 | default:
31 | return false
32 | }
33 | }
34 |
35 | const validatePhraseModifier = (modifier = '') => {
36 | switch (modifier) {
37 | case '':
38 | case '$':
39 | case '^':
40 | case '^$':
41 | return true
42 | default:
43 | return false
44 | }
45 | }
46 |
47 | const validateKeywordOperator = (type) => {
48 | switch (type) {
49 | case '':
50 | case '^':
51 | case '$':
52 | return true
53 | default:
54 | return false
55 | }
56 | }
57 |
58 | const validateQueryOperator = (type) => {
59 | switch (type) {
60 | case AND:
61 | case OR:
62 | case BEFORE:
63 | case PHRASE:
64 | case KEYWORD:
65 | return true
66 | default:
67 | return false
68 | }
69 | }
70 |
71 | const getResults = ({ result }) => result
72 |
73 | const getQueryKeywords = (wildcard, keyword) => {
74 | if (hasWildcard(keyword)) {
75 | return wildcard(keyword)
76 | }
77 | return Promise.resolve([keyword])
78 | }
79 |
80 | const getPhraseQueryKeywords = (wildcard, keywords) => {
81 | const nonWildcardKeywordIndex = keywords.findIndex((keyword) => !hasWildcard(keyword))
82 | if (nonWildcardKeywordIndex === -1) {
83 | return wildcard(keywords[0])
84 | }
85 | return Promise.resolve([keywords[nonWildcardKeywordIndex]])
86 | }
87 |
88 | const equalityComparator = (a, b) => a === b
89 |
90 | const curriedComparator = (comparator, b) => (a) => comparator(a, b)
91 |
92 | const trueCb = () => true
93 |
94 | const resultExtractor = (a = {}) => a._id
95 |
96 | const resultTransformer = (a, { match = [] } = {}) => ({ ...a, match: unique(a.match.concat(match)) })
97 |
98 | const beforeTransformer = (a, b) => {
99 | const { _terms: _termsLeft = [], _match: _matchLeft = [] } = a
100 | const { _match: _matchRight = [], match: matchRight = [] } = b
101 | if (!ordered(_termsLeft, _matchLeft, _matchRight)) {
102 | return undefined
103 | }
104 | return {
105 | ...a,
106 | _match: unique(a._match.concat(_matchRight)),
107 | match: unique(a.match.concat(matchRight))
108 | }
109 | }
110 |
111 | const handleResultsAndNot = (resultsA, resultsB) => minus(resultsA, resultsB, resultExtractor)
112 |
113 | const handleResultsAnd = (resultsA, resultsB) => intersect(resultsA, resultsB, resultExtractor, resultTransformer)
114 |
115 | const handleResultsOr = (resultsA, resultsB) => union(resultsA, resultsB, resultExtractor, resultTransformer)
116 |
117 | const handleResultsBefore = (resultsA, resultsB) => intersect(resultsA, resultsB, resultExtractor, beforeTransformer)
118 |
119 | const handleBranchResults = (queryOperator, resultsA, resultsB, notA, notB) => {
120 | switch (queryOperator) {
121 | case AND:
122 | if (notA || notB) {
123 | return handleResultsAndNot(notA ? resultsB : resultsA, notA ? resultsA : resultsB)
124 | }
125 | return handleResultsAnd(resultsA, resultsB)
126 | case OR:
127 | return handleResultsOr(resultsA, resultsB)
128 | case BEFORE:
129 | return handleResultsBefore(resultsA, resultsB)
130 | default:
131 | throw new Error(`Invalid operator ${queryOperator} in branch`)
132 | }
133 | }
134 |
135 | const filterFirst = (comparator) => (result = []) =>
136 | result.length >= 1 && comparator(result[0])
137 |
138 | const filterLast = (comparator) => (result = []) =>
139 | result.length >= 1 && comparator(result[result.length - 1])
140 |
141 | const filterFirstLast = (comparatorFirst, comparatorLast) => (result = []) =>
142 | result.length >= 1 && comparatorFirst(result[0]) && comparatorLast(result[result.length - 1])
143 |
144 | const getPhraseFilter = (comparator, modifier, keywords = []) => {
145 | if (modifier === '' || keywords.length === 0) {
146 | return trueCb
147 | }
148 | switch (modifier) {
149 | case '$':
150 | return filterLast(
151 | curriedComparator(comparator, keywords[keywords.length - 1]))
152 | case '^':
153 | return filterFirst(
154 | curriedComparator(comparator, keywords[0]))
155 | case '^$':
156 | return filterFirstLast(
157 | curriedComparator(comparator, keywords[0]),
158 | curriedComparator(comparator, keywords[keywords.length - 1]))
159 | }
160 | }
161 |
162 | const handlePhrase = async (search, wildcard, keywords, phraseModifier, phraseOperator, n) => {
163 | if (!Array.isArray(keywords) || keywords.length === 0 || !validatePhraseOperator(phraseOperator, n) || !validatePhraseModifier(phraseModifier)) {
164 | throw new Error('Malformed phrase query')
165 | }
166 |
167 | const queryKeywords = await getPhraseQueryKeywords(wildcard, keywords)
168 |
169 | const results = getResults(await search(queryKeywords))
170 | const comparator = wildcardMatch
171 | const filter = getPhraseFilter(comparator, phraseModifier, keywords)
172 |
173 | switch (phraseOperator) {
174 | case PHRASE_QUOROM: {
175 | if (n === 1) {
176 | return results
177 | }
178 | return results
179 | .filter(({ terms: resultTerms = [] }) => filter(resultTerms) && quorom(resultTerms, keywords, n, comparator))
180 | }
181 | case PHRASE_PROXIMITY: {
182 | return results
183 | .filter(({ terms: resultTerms = [] }) => filter(resultTerms) && proximity(resultTerms, keywords, n, comparator))
184 | }
185 | case PHRASE_ALL: {
186 | return results
187 | .filter(({ terms: resultTerms = [] }) => filter(resultTerms) && contains(resultTerms, keywords, comparator) !== -1)
188 | }
189 | }
190 | }
191 |
192 | const handleKeyword = async (search, wildcard, keyword, keywordStartOperator, keywordEndOperator) => {
193 | if (!keyword.charAt || !validateKeywordOperator(keywordStartOperator) || !validateKeywordOperator(keywordEndOperator) || keyword === '*') {
194 | throw new Error('Malformed keyword')
195 | }
196 |
197 | const keywords = await getQueryKeywords(wildcard, keyword)
198 | const results = getResults(await search(keywords))
199 |
200 | if (keywordStartOperator === '^' || keywordEndOperator === '$') {
201 | return results
202 | .filter(({ _terms: _terms = [], _match = [] }) => {
203 | const start = keywordStartOperator === '^' ?
204 | curriedComparator(equalityComparator, _terms[0]) : trueCb
205 |
206 | const end = keywordEndOperator === '$' ?
207 | curriedComparator(equalityComparator, _terms[_terms.length - 1]) : trueCb
208 |
209 | return _terms.length > 0 && _match.length > 0 &&
210 | _match.some((matchedKeyword) =>
211 | start(matchedKeyword) && end(matchedKeyword))
212 | })
213 | }
214 |
215 | return results
216 | }
217 |
218 | const evaluateBranch = async (search, wildcard, queryOperator, a, b, c, d) => {
219 | if (!validateQueryOperator(queryOperator)) {
220 | throw new Error(`Invalid operator ${queryOperator} in branch`)
221 | }
222 |
223 | if (queryOperator === KEYWORD) {
224 | return handleKeyword(search, wildcard, a, b, c)
225 | }
226 |
227 | if (queryOperator === PHRASE) {
228 | return handlePhrase(search, wildcard, a, b, c, d)
229 | }
230 |
231 | if ((c || d) && queryOperator !== AND) {
232 | throw new Error('Invalid NOT in AND query')
233 | }
234 |
235 | if (Array.isArray(a) && Array.isArray(b)) {
236 | const resultsA = await evaluateBranch(search, wildcard, ...a)
237 | const resultsB = await evaluateBranch(search, wildcard, ...b)
238 | return handleBranchResults(queryOperator, resultsA, resultsB, c, d)
239 | }
240 |
241 | throw new Error('Unrecognized branch')
242 | }
243 |
244 | export default (search, wildcard, query) => {
245 | if (!Array.isArray(query)) {
246 | throw new Error('Invalid query')
247 | }
248 | return evaluateBranch(search, wildcard, ...query)
249 | }
250 |
--------------------------------------------------------------------------------
/src/query/query.spec.js:
--------------------------------------------------------------------------------
1 | import { create as createIndex, tokenize, query, parse } from '../index.esm'
2 | import createEncryptionHelper from '../../example/helper/encryptionHelper'
3 |
4 | const indexKey = new Uint8Array(32)
5 | const indexSalt = new Uint8Array(32)
6 |
7 | const transformers = createEncryptionHelper(indexKey, indexSalt)
8 |
9 | describe('query', () => {
10 | const getIndex = () => createIndex({ transformers })
11 |
12 | let index
13 |
14 | beforeAll(async () => {
15 | index = await getIndex()
16 | await index.clear()
17 |
18 | await index.store('123', tokenize('hello world!'))
19 | await index.store('124', tokenize('cat aaa bbb ccc mouse ddd dog'))
20 | await index.store('125', tokenize('cat aaa bbb mouse ccc dog'))
21 | await index.store('126', tokenize('cat aaa mouse bbb dog'))
22 | await index.store('127', tokenize('cat aaa mouse dog'))
23 | await index.store('128', tokenize('aaa mouse cat'))
24 | await index.store('129', tokenize('hello'))
25 |
26 | await index.store('200', tokenize('1 2 3 4 5 6 7', 1))
27 | await index.store('201', tokenize('1 4 5 7', 1))
28 | await index.store('202', tokenize('5 6 7 1 2 3 4', 1))
29 | await index.store('203', tokenize('7 6 5 4 3 2 1', 1))
30 | await index.store('204', tokenize('1 2 3 4 5 6 7 8 9 10', 1))
31 | await index.store('205', tokenize('1 2 3 4 5 6 7 10 9 8', 1))
32 | await index.store('206', tokenize('11 12 13 14 15 16', 1))
33 | await index.store('207', tokenize('12 13 14 15 16', 1))
34 |
35 | await index.store('300', tokenize('Achilles catches the tortoise', 1))
36 | await index.store('301', tokenize('Tortoise caught by Achilles', 1))
37 | await index.store('302', tokenize('Achilles caught the green tortoise', 1))
38 | await index.store('303', tokenize('rock paper scissor', 1))
39 | await index.store('304', tokenize('rock paper etc scissor', 1))
40 | })
41 |
42 | afterAll(async () => {
43 | await index.clear()
44 | index.close()
45 | })
46 |
47 | const mapIds = (result = []) => result.map(({ id }) => id)
48 | const sort = (array = []) => {
49 | array.sort()
50 | return array
51 | }
52 |
53 | const search = async (string) => sort(mapIds(await query(index.search, index.wildcard, parse(string))))
54 |
55 | it('should return results for a simple word', async () => {
56 | expect(await search('hello'))
57 | .toEqual(sort(['123', '129']))
58 | })
59 |
60 | it('should return results for a wildcard query', async () => {
61 | expect(await search('he*'))
62 | .toEqual(sort(['123', '129']))
63 | })
64 |
65 | it('should return results for a wildcard query with AND', async () => {
66 | expect(await search('ro* *sso*'))
67 | .toEqual(sort(['303', '304']))
68 | })
69 |
70 | it('should return results for a wildcard query with OR', async () => {
71 | expect(await search('to* | *ell*'))
72 | .toEqual(sort(['300', '301', '302', '123', '129']))
73 | })
74 |
75 | it('should return results for a AND query', async () => {
76 | expect(await search('hello world'))
77 | .toEqual(sort(['123']))
78 | })
79 |
80 | it('should return results for a AND NOT query', async () => {
81 | expect(await search('hello !world'))
82 | .toEqual(sort(['129']))
83 | })
84 |
85 | it('should return results for a AND NOT query with wildcard', async () => {
86 | expect(await search('he* !wor*'))
87 | .toEqual(sort(['129']))
88 | })
89 |
90 | it('should return results for keyword modifiers query', async () => {
91 | expect(await search('^hello$'))
92 | .toEqual(sort(['129']))
93 | expect(await search('^aaa'))
94 | .toEqual(sort(['128']))
95 | expect(await search('cat$'))
96 | .toEqual(sort(['128']))
97 | })
98 |
99 | it('should return results for a OR query', async () => {
100 | expect(await search('hello | cat'))
101 | .toEqual(sort(['123', '129', '124', '125', '126', '127', '128']))
102 | })
103 |
104 | it('should return results for a PHRASE query with single wildcard', async () => {
105 | expect(await search('"cat * mouse"'))
106 | .toEqual(sort(['126', '127']))
107 | })
108 |
109 | it('should return results for a PHRASE query with wildcard', async () => {
110 | expect(await search('"ca* * *mou*"'))
111 | .toEqual(sort(['126', '127']))
112 | })
113 |
114 | it('should return results for a PHRASE query with phrase modifier', async () => {
115 | expect(await search('"^he*$"'))
116 | .toEqual(sort(['129']))
117 | expect(await search('"^hello$"'))
118 | .toEqual(sort(['129']))
119 | expect(await search('"^5 6"'))
120 | .toEqual(sort(['202']))
121 | expect(await search('"6 7$"'))
122 | .toEqual(sort(['200']))
123 | expect(await search('"^12 13 14 15 16$"'))
124 | .toEqual(sort(['207']))
125 | })
126 |
127 | it('should return results for a PHRASE query', async () => {
128 | expect(await search('"cat aaa mouse"'))
129 | .toEqual(['126', '127'])
130 | })
131 |
132 | it('should return results for a PROXIMITY query', async () => {
133 | expect(await search('"achilles tortoise"~3'))
134 | .toEqual(['300', '301'])
135 | expect(await search('"rock paper scissor"~1'))
136 | .toEqual(['303'])
137 | })
138 |
139 | it('should return results for a QUOROM query', async () => {
140 | expect(await search('"achilles tortoise"/2'))
141 | .toEqual(sort(['300', '301', '302']))
142 | })
143 |
144 | it('should return results for a QUOROM query with wildcard', async () => {
145 | expect(await search('"ach* tortoise"/2'))
146 | .toEqual(sort(['300', '301', '302']))
147 | })
148 |
149 | it('should return empty results for a QUOROM query', async () => {
150 | expect(await search('"achilles tortoise rock"/3'))
151 | .toEqual([])
152 | })
153 |
154 | it('should return results for a BEFORE and PHRASE query', async () => {
155 | expect(await search('cat << "aaa mouse"'))
156 | .toEqual(['126', '127'])
157 | })
158 |
159 | it('should return results for a BEFORE and PHRASE query with wildcard', async () => {
160 | expect(await search('ca* << "aa* *ous*"'))
161 | .toEqual(['126', '127'])
162 | })
163 |
164 | it('should return results for a PHRASE and BEFORE query', async () => {
165 | expect(await search('"aaa mouse" << dog'))
166 | .toEqual(['126', '127'])
167 | })
168 |
169 | it('should return results for a BEFORE query', async () => {
170 | expect(await search('mouse << bbb'))
171 | .toEqual(['126'])
172 | expect(await search('mouse << cat'))
173 | .toEqual(['128'])
174 | })
175 |
176 | it('should return results for a complex BEFORE query', async () => {
177 | expect(await search('1 << 4 << 5 << (6 | 7)'))
178 | .toEqual(sort(['200', '201', '204', '205']))
179 | })
180 |
181 | it('should return results for a complex query', async () => {
182 | expect(await search('1 << ((4 << (5 << ((6 !10) | (7 !10) | "8 9 10"))))'))
183 | .toEqual(sort(['200', '201', '204']))
184 | expect(await search('(1 << 4 << 5) << ((6 !10)| (7 !10) | "8 9 10")'))
185 | .toEqual(sort(['200', '201', '204']))
186 | })
187 |
188 | it('should return results for a OR, keyword and AND query', async () => {
189 | expect(await search('hello | (cat ddd)'))
190 | .toEqual(sort(['123', '129', '124']))
191 | })
192 |
193 | it('should return results for a OR, AND and AND query', async () => {
194 | expect(await search('(hello world) | (cat ddd)'))
195 | .toEqual(sort(['123', '124']))
196 | })
197 |
198 | it('should return empty results for a AND and AND query', async () => {
199 | expect(await search('hello world cat ddd'))
200 | .toEqual([])
201 | })
202 |
203 | it('should return results for a AND, AND and OR query', async () => {
204 | expect(await search('cat ddd (cat | ddd)'))
205 | .toEqual(['124'])
206 | })
207 | })
208 |
--------------------------------------------------------------------------------
/src/store/keyValueStore.js:
--------------------------------------------------------------------------------
1 | import { request } from '../helper/idb'
2 | import sizeof from '../helper/sizeof'
3 |
4 | /**
5 | * Enhance a key-value store with transformer functions.
6 | * @param {Number} id
7 | * @param {Object} store
8 | * @param {String} table
9 | * @param {Function} property
10 | * @param {Function} serialize
11 | * @param {Function} deserialize
12 | * @returns {Object}
13 | */
14 | export const withTransformers = (id, store, { property, serialize, deserialize }) => {
15 | return {
16 | ...store,
17 | put: (tx, value, key) => {
18 | return store.put(tx, serialize(id, key, value), property(id, key))
19 | },
20 | get: async (tx, key) => {
21 | const encryptedValue = await store.get(tx, property(id, key))
22 | return deserialize(id, key, encryptedValue)
23 | },
24 | remove: (tx, key) => {
25 | return store.remove(tx, property(id, key))
26 | }
27 | }
28 | }
29 |
30 | /**
31 | * Create a idb key-value store with transaction support.
32 | * @param {String} tableName
33 | * @returns {Object}
34 | */
35 | export default (tableName = '') => {
36 | return {
37 | name: tableName,
38 | count: (tx) => {
39 | return request(tx.objectStore(tableName).count())
40 | },
41 | size: (tx) => {
42 | let size = 0
43 | return new Promise((resolve, reject) => {
44 | const request = tx.objectStore(tableName).openCursor()
45 | request.onerror = () => reject(request.error)
46 | request.onsuccess = (event) => {
47 | const cursor = event.target.result
48 | if (!cursor) {
49 | return resolve(size)
50 | }
51 | size += sizeof(cursor.value) + sizeof(cursor.key)
52 | cursor.continue()
53 | }
54 | })
55 | },
56 | put: (tx, value, key) => {
57 | return tx.objectStore(tableName).put(value, key)
58 | },
59 | get: (tx, key) => {
60 | return request(tx.objectStore(tableName).get(key))
61 | },
62 | remove: (tx, key) => {
63 | return tx.objectStore(tableName).delete(key)
64 | },
65 | clear: (tx) => {
66 | return tx.objectStore(tableName).clear()
67 | }
68 | }
69 | }
70 |
--------------------------------------------------------------------------------
/src/store/metadataStore.js:
--------------------------------------------------------------------------------
1 | import { READWRITE, transaction } from '../helper/idb'
2 |
3 | /**
4 | * Metadata database helper.
5 | * @param {Object} store
6 | * @param {Function} getTransaction
7 | * @returns {Object}
8 | */
9 | export default (store, getTransaction) => {
10 | const table = [store.name]
11 |
12 | /**
13 | * Get a key from the table.
14 | * @param {String} key
15 | * @returns {Promise}
16 | */
17 | const get = async (key) => {
18 | return store.get(await getTransaction(table), key)
19 | }
20 |
21 | /**
22 | * Set a key in the table.
23 | * @param {String} key
24 | * @param {*} value
25 | * @returns {Promise}
26 | */
27 | const set = async (key, value) => {
28 | const tx = await getTransaction(table, READWRITE)
29 | const promise = transaction(tx)
30 | store.put(tx, value, key)
31 | return promise
32 | }
33 |
34 | /**
35 | * Get and set the next incrementing ID number.
36 | * @param {String} key
37 | * @returns {Promise}
38 | */
39 | const getAndSetId = async (key) => {
40 | const tx = await getTransaction(table, READWRITE)
41 | const value = await store.get(key, tx)
42 | const newValue = (value === undefined ? -1 : value) + 1
43 | store.put(tx, newValue, key)
44 | return newValue
45 | }
46 |
47 | /**
48 | * Remove a key from the table.
49 | * @param {String} key
50 | * @returns {Promise}
51 | */
52 | const remove = async (key) => {
53 | const tx = await getTransaction(table, READWRITE)
54 | const promise = transaction(tx)
55 | store.remove(tx, key)
56 | return promise
57 | }
58 |
59 | return {
60 | get,
61 | set,
62 | getAndSetId,
63 | remove,
64 | name: store.name,
65 | clear: store.clear
66 | }
67 | }
68 |
--------------------------------------------------------------------------------
/src/store/positionsStore.js:
--------------------------------------------------------------------------------
1 | import { READWRITE, request, transaction } from '../helper/idb'
2 | import { vbDecode, vbEncode } from '../helper/variableByteCodes'
3 |
4 | /**
5 | * Position database helper.
6 | * Handles all logic around storing keywords.
7 | * @param {Object} store
8 | * @param {Function} getTransaction
9 | * @returns {Object}
10 | */
11 | export default (store, getTransaction) => {
12 | const table = store.name
13 |
14 | /**
15 | * Get the terms list for an id.
16 | * @param {Number} id
17 | * @param {IDBTransaction} tx
18 | * @returns {Promise>}
19 | */
20 | const getList = async (id, tx) => {
21 | const result = await store.get(tx, id)
22 | return vbDecode(result)
23 | }
24 |
25 | /**
26 | * Get the terms list for an id.
27 | * @param {Number} id
28 | * @returns {Promise>}
29 | */
30 | const get = async (id) => {
31 | const tx = await getTransaction(table)
32 | return getList(id, tx)
33 | }
34 |
35 | /**
36 | * Get the terms list for multiple ids.
37 | * @param {Array} ids
38 | * @returns {Promise[]>}
39 | */
40 | const getBulk = async (ids) => {
41 | const tx = await getTransaction(table)
42 | return Promise.all(ids.map((id) => getList(id, tx)))
43 | }
44 |
45 | /**
46 | * Set the terms list for an id.
47 | * @param {Number} id
48 | * @param {Array} terms
49 | * @return {Promise}
50 | */
51 | const insert = async (id, terms) => {
52 | const tx = await getTransaction(table, READWRITE)
53 | return request(store.put(tx, vbEncode(terms), id))
54 | }
55 |
56 | /**
57 | *
58 | * @param {Number} id
59 | * @return {Promise}
60 | */
61 | const remove = async (id) => {
62 | const tx = await getTransaction(table, READWRITE)
63 | const promise = transaction(tx)
64 | store.remove(tx, id)
65 | return promise
66 | }
67 |
68 | return {
69 | insert,
70 | get,
71 | getBulk,
72 | remove,
73 | name: store.name,
74 | count: store.count,
75 | size: store.size,
76 | clear: store.clear
77 | }
78 | }
79 |
--------------------------------------------------------------------------------
/src/store/postingsStore.js:
--------------------------------------------------------------------------------
1 | import { insertIntoGapsArray, removeFromGapsArray } from '../helper/array'
2 | import { vbDecode, vbEncode } from '../helper/variableByteCodes'
3 | import { READWRITE, request, transaction } from '../helper/idb'
4 |
5 | /**
6 | * Postings database helper.
7 | * Handles all logic around storing keywords.
8 | * @param {Object} store
9 | * @param {Function} getTransaction
10 | * @returns {Object}
11 | */
12 | export default (store, getTransaction) => {
13 | /**
14 | * Get the posting list for a term
15 | * @param {Number} term
16 | * @param {IDBTransaction} tx
17 | * @returns {Promise}
18 | */
19 | const getList = async (term, tx) => {
20 | const result = await store.get(tx, term)
21 | return vbDecode(result)
22 | }
23 |
24 | /**
25 | * Set the posting list for a term.
26 | * @param {Number} term
27 | * @param {Array} list
28 | * @param {IDBTransaction} tx
29 | */
30 | const setList = (term, list, tx) => {
31 | return store.put(tx, vbEncode(list), term)
32 | }
33 |
34 | /**
35 | * Insert an id to the postings list.
36 | * @param {Number} term
37 | * @param {Number} id
38 | * @param {IDBTransaction} tx
39 | */
40 | const insert = async (term, id, tx) => {
41 | const result = await getList(term, tx)
42 | const newValues = insertIntoGapsArray(result, id)
43 |
44 | // Only allow unique links
45 | if (!newValues) {
46 | return
47 | }
48 |
49 | setList(term, newValues, tx)
50 | }
51 | const insert2 = (term, id, result, tx) => {
52 | const newValues = insertIntoGapsArray(result, id)
53 |
54 | // Only allow unique links
55 | if (!newValues) {
56 | return
57 | }
58 |
59 | return setList(term, newValues, tx)
60 | }
61 |
62 |
63 | /**
64 | * Get the matching posting lists.
65 | * @param {Number} term
66 | * @returns {Promise}
67 | */
68 | const get = async (term) => {
69 | const tx = await getTransaction(store.name)
70 | return getList(term, tx)
71 | }
72 |
73 | /**
74 | * Get the matching posting lists.
75 | * @param {Array} terms
76 | * @returns {Promise}
77 | */
78 | const getBulk = async (terms) => {
79 | const tx = await getTransaction(store.name)
80 | const postingLists = await Promise.all(terms.map((term) => getList(term, tx)))
81 |
82 | return postingLists.reduce((acc, list, i) => {
83 | let id = 0
84 |
85 | for (let j = 0; j < list.length; ++j) {
86 | id += list[j] // Stored as gap array
87 |
88 | const idx = acc.ids.indexOf(id)
89 | const term = terms[i]
90 |
91 | if (idx === -1) {
92 | acc.ids.push(id)
93 | acc.idsToTerms.push([term])
94 | } else {
95 | acc.idsToTerms[idx].push(term)
96 | }
97 | }
98 |
99 | return acc
100 | }, { ids: [], idsToTerms: [], termsToIds: postingLists })
101 | }
102 |
103 | /**
104 | * Remove a keyword-id mapping.
105 | * If it was the only id, remove the keyword completely.
106 | * @param {Number} term
107 | * @param {Number} id
108 | * @param {IDBTransaction} tx
109 | * @returns {Promise}
110 | */
111 | const removeLink = async (term, id, tx) => {
112 | const oldValues = await getList(term, tx)
113 |
114 | if (oldValues.length === 0) {
115 | return true
116 | }
117 |
118 | const newValues = removeFromGapsArray(oldValues, id)
119 | if (!newValues) {
120 | return false
121 | }
122 |
123 | // If it's empty, remove the keyword.
124 | if (newValues.length === 0) {
125 | store.remove(tx, term)
126 | return true
127 | }
128 |
129 | setList(term, newValues, tx)
130 | return false
131 | }
132 |
133 | /**
134 | * Remove a list of keyword-id mapping
135 | * @param {Array} terms
136 | * @param {Number} id
137 | * @return {Promise}
138 | */
139 | // eslint-disable-next-line no-unused-vars
140 | const removeBulk = async (terms, id) => {
141 | const tx = await getTransaction(store.name, READWRITE)
142 | const promise = transaction(tx)
143 | const result = []
144 | terms.forEach(async (term, i) =>
145 | removeLink(term, id, tx)
146 | .then((value) => result[i] = value)
147 | )
148 | await promise
149 | return result
150 | }
151 |
152 | const removeBulk2 = async (terms, id) => {
153 | const tx = await getTransaction(store.name, READWRITE)
154 | const postingLists = await Promise.all(terms.map((term) => getList(term, tx)))
155 | const result = []
156 | let req
157 | for (let i = 0; i < terms.length; ++i) {
158 | const term = terms[i]
159 | const oldValues = postingLists[i]
160 |
161 | const newValues = removeFromGapsArray(oldValues, id)
162 | if (!newValues) {
163 | result[i] = true
164 | continue
165 | }
166 |
167 | // If it's empty, remove the keyword.
168 | if (newValues.length === 0) {
169 | result[i] = true
170 | req = store.remove(tx, term)
171 | continue
172 | }
173 |
174 | result[i] = false
175 | req = setList(term, newValues, tx)
176 | }
177 |
178 | if (!req) {
179 | return result
180 | }
181 |
182 | await request(req)
183 | return result
184 | }
185 |
186 | // eslint-disable-next-line no-unused-vars
187 | const insertBulk = async (terms, id) => {
188 | const tx = await getTransaction(store.name, READWRITE)
189 | const promise = transaction(tx)
190 | terms.forEach((term) => insert(term, id, tx))
191 | return promise
192 | }
193 |
194 | /**
195 | * Insert bulk, only waits for the last request rather than the transaction.
196 | * It's supposedly faster, but data consistency guarantees?
197 | * @param {Array} terms
198 | * @param {Number} id
199 | * @returns {Promise}
200 | */
201 | const insertBulk2 = async (terms, id) => {
202 | if (terms.length === 0) {
203 | return
204 | }
205 |
206 | const tx = await getTransaction(store.name, READWRITE)
207 | const postingLists = await Promise.all(terms.map((term) => getList(term, tx)))
208 |
209 | let req = undefined
210 | for (let i = 0; i < terms.length; ++i) {
211 | const insertRequest = insert2(terms[i], id, postingLists[i], tx)
212 |
213 | if (insertRequest) {
214 | req = insertRequest
215 | }
216 | }
217 |
218 | if (!req) {
219 | return
220 | }
221 |
222 | return request(req)
223 | }
224 |
225 | return {
226 | get,
227 | insertBulk: insertBulk2,
228 | getBulk,
229 | removeBulk: removeBulk2,
230 | name: store.name,
231 | count: store.count,
232 | size: store.size,
233 | clear: store.clear
234 | }
235 | }
236 |
--------------------------------------------------------------------------------
/src/store/postingsStore.spec.js:
--------------------------------------------------------------------------------
1 | import { open } from '../helper/idb'
2 | import createPostingsStore from './postingsStore'
3 | import createKeyValueStore from './keyValueStore'
4 | import { getArrayGaps } from '../helper/array'
5 |
6 | const tableName = 'postings'
7 |
8 | const upgradeDb = (db, oldVersion) => {
9 | switch (oldVersion) {
10 | case 0: {
11 | db.createObjectStore(tableName)
12 | break
13 | }
14 | }
15 | }
16 |
17 | describe('postings', () => {
18 | let db
19 | let store
20 | let getTransaction
21 |
22 | const createGetTransaction = (db) => (tables, mode) => db.transaction(tables, mode)
23 |
24 | beforeAll(async () => {
25 | db = await open(indexedDB, 'postings', 1, upgradeDb)
26 |
27 | getTransaction = createGetTransaction(db)
28 |
29 | store = createPostingsStore(
30 | createKeyValueStore(tableName),
31 | getTransaction
32 | )
33 | })
34 |
35 | afterAll(async () => {
36 | const tx = getTransaction([tableName], 'readwrite')
37 | await store.clear(tx)
38 | db.close()
39 | })
40 |
41 | it('should handle multiple calls with proper locking', async () => {
42 | const promises = []
43 | const expectation = []
44 | for (let i = 0; i < 1000; ++i) {
45 | promises.push(store.insertBulk([1,2,3], i))
46 | expectation.push(i)
47 | }
48 | expect(getArrayGaps(await store.get(1))).toEqual(expectation)
49 | expect(getArrayGaps(await store.get(2))).toEqual(expectation)
50 | expect(getArrayGaps(await store.get(3))).toEqual(expectation)
51 | })
52 | })
53 |
--------------------------------------------------------------------------------
/src/store/transposeStore.js:
--------------------------------------------------------------------------------
1 | import { READWRITE, request } from '../helper/idb'
2 |
3 | export default (aStore, bStore, getTransaction) => {
4 | const idKey = '*_=% id %=_*'
5 | const table = [aStore.name, bStore.name]
6 |
7 | const from = async (bs = []) => {
8 | const tx = await getTransaction(table)
9 | return Promise.all(bs.map((b) => aStore.get(tx, b)))
10 | }
11 |
12 | const bulk = async (as = []) => {
13 | const tx = await getTransaction(table, READWRITE)
14 | const initialId = (await bStore.get(tx, idKey)) || 1
15 | let id = initialId
16 | const result = await Promise.all(as.map((a) => bStore.get(tx, a)))
17 | const seen = new Map()
18 | let requestA
19 | let requestB
20 |
21 | for (let i = 0; i < result.length; ++i) {
22 | const iid = result[i]
23 |
24 | if (iid) {
25 | continue
26 | }
27 |
28 | // Duplicates...
29 | const a = as[i]
30 | if (seen.has(a)) {
31 | result[i] = seen.get(a)
32 | continue
33 | }
34 |
35 | const newId = id++
36 |
37 | seen.set(a, newId)
38 | result[i] = newId
39 |
40 | requestA = aStore.put(tx, a, newId)
41 | requestB = bStore.put(tx, newId, a)
42 | }
43 |
44 | if (id !== initialId) {
45 | requestB = bStore.put(tx, id, idKey)
46 | }
47 |
48 | if (requestA) {
49 | await Promise.all([request(requestA), request(requestB)])
50 | }
51 |
52 | return result
53 | }
54 |
55 | const stat = (type = 'count') => async (tx) => {
56 | const result = await Promise.all([aStore[type](tx), bStore[type](tx)])
57 | return result.reduce((agg, cur) => agg + cur, 0)
58 | }
59 |
60 | return {
61 | name: table,
62 | bulk,
63 | from,
64 | count: stat('count'),
65 | size: stat('size'),
66 | clear: (tx) => {
67 | aStore.clear(tx)
68 | bStore.clear(tx)
69 | },
70 | }
71 | }
72 |
--------------------------------------------------------------------------------
/src/store/transposeStore.spec.js:
--------------------------------------------------------------------------------
1 | import { open } from '../helper/idb'
2 | import createTransposeStore from './transposeStore'
3 | import createKeyValueStore from './keyValueStore'
4 |
5 | const tableName = 'lexicon'
6 | const tableNameI = 'lexicon-i'
7 |
8 | const upgradeDb = (db, oldVersion) => {
9 | switch (oldVersion) {
10 | case 0: {
11 | db.createObjectStore(tableName)
12 | db.createObjectStore(tableNameI)
13 | break
14 | }
15 | }
16 | }
17 |
18 | describe('transpose', () => {
19 | let db
20 | let store
21 | let getTransaction
22 |
23 | const createGetTransaction = (db) => (tables, mode) => db.transaction(tables, mode)
24 |
25 | beforeAll(async () => {
26 | db = await open(indexedDB, 'transpose', 1, upgradeDb)
27 |
28 | getTransaction = createGetTransaction(db)
29 |
30 | store = createTransposeStore(
31 | createKeyValueStore(tableName),
32 | createKeyValueStore(tableNameI),
33 | getTransaction
34 | )
35 | })
36 |
37 | afterAll(async () => {
38 | const tx = getTransaction([tableName, tableNameI], 'readwrite')
39 | await store.clear(tx)
40 | db.close()
41 | })
42 |
43 | it('should transpose', async () => {
44 | expect(await store.bulk(['123'])).toEqual([1])
45 | expect(await store.bulk(['abc', '123', 'def'])).toEqual([2, 1, 3])
46 | expect(await store.bulk(['xxx', 'xxx'])).toEqual([4, 4])
47 | expect(await store.bulk(['123'])).toEqual([1])
48 | })
49 | })
50 |
--------------------------------------------------------------------------------
/src/store/wildcardStore.js:
--------------------------------------------------------------------------------
1 | import { extractQueryTokenPadding, splitTokenPadding } from '../helper/wildcard'
2 | import { getArrayGaps, getGapsArray, unique } from '../helper/array'
3 | import { READWRITE, request, transaction } from '../helper/idb'
4 | import { vbDecode, vbEncode } from '../helper/variableByteCodes'
5 |
6 | /**
7 | * Split a list of terms to a list of splitted tokens -> term id map.
8 | * @param {Array} stringTerms
9 | * @param {Array} terms
10 | * @returns {Object}
11 | */
12 | const splitToMap = (stringTerms, terms) => {
13 | return stringTerms.reduce((acc, stringTerm, i) => {
14 | const tokens = splitTokenPadding(stringTerm)
15 | tokens.forEach((token) => {
16 | if (!acc[token]) {
17 | acc[token] = []
18 | }
19 | acc[token].push(terms[i])
20 | })
21 | return acc
22 | }, {})
23 | }
24 |
25 | /**
26 | * Wildcard database helper.
27 | * Handles all logic around storing and finding wildcards.
28 | * @param {Object} store
29 | * @param {Function} getTransaction
30 | * @returns {Object}
31 | */
32 | export default (store, getTransaction) => {
33 | const table = store.name
34 |
35 | /**
36 | * @param {String} token
37 | * @param {IDBTransaction} tx
38 | * @returns {Promise}
39 | */
40 | const getList = async (token, tx) => {
41 | const result = await store.get(tx, token)
42 | return getArrayGaps(vbDecode(result))
43 | }
44 |
45 | /**
46 | * @param {String} token
47 | * @param {Array} list
48 | * @param {IDBTransaction} tx
49 | */
50 | const setList = (token, list, tx) => {
51 | return store.put(tx, vbEncode(getGapsArray(list)), token)
52 | }
53 |
54 | /**
55 | * Insert a token-keyword mapping.
56 | * @param {String} token
57 | * @param {Array} terms
58 | * @param {IDBTransaction} tx
59 | */
60 | const insertLink = async (token = '', terms = [], tx) => {
61 | const oldValues = await getList(token, tx)
62 | const newValues = unique(oldValues.concat(terms))
63 | setList(token, newValues, tx)
64 | }
65 |
66 | /**
67 | * Store wildcards <-> terms
68 | * @param {Array} stringTerms
69 | * @param {Array} terms
70 | * @returns {Promise}
71 | */
72 | // eslint-disable-next-line no-unused-vars
73 | const insertBulk = async (stringTerms, terms) => {
74 | const tx = await getTransaction(table, READWRITE)
75 | const map = splitToMap(stringTerms, terms)
76 | const promise = transaction(tx)
77 | Object.keys(map)
78 | .forEach((token) => insertLink(token, map[token], tx))
79 | return promise
80 | }
81 |
82 | const insertLink2 = (token = '', oldValues, terms = [], tx) => {
83 | const newValues = unique(oldValues.concat(terms))
84 | return setList(token, newValues, tx)
85 | }
86 |
87 | const insertBulk2 = async (stringTerms, terms) => {
88 | const map = splitToMap(stringTerms, terms)
89 | const keys = Object.keys(map)
90 | if (!keys.length) {
91 | return
92 | }
93 | const tx = await getTransaction(table, READWRITE)
94 | const links = await Promise.all(keys.map((key) => getList(key, tx)))
95 |
96 | let req
97 |
98 | for (let i = 0; i < links.length; ++i) {
99 | req = insertLink2(keys[i], links[i], map[keys[i]], tx)
100 | }
101 |
102 | if (!req) {
103 | return
104 | }
105 |
106 | return request(req)
107 | }
108 |
109 | /**
110 | * Get a list of term ids from a wildcard pattern.
111 | * @param {String} query Wildcard pattern
112 | * @returns {Promise>}
113 | */
114 | const get = async (query) => {
115 | const queryToken = extractQueryTokenPadding(query)
116 | const tx = await getTransaction(table)
117 | return getList(queryToken, tx)
118 | }
119 |
120 | /**
121 | * Remove a keyword-id mapping.
122 | * If it was the only id, remove the keyword completely.
123 | * @param {String} token
124 | * @param {Array} terms
125 | * @param {IDBTransaction} tx
126 | * @returns {Promise}
127 | */
128 | const removeLink = async (token = '', terms = [], tx) => {
129 | const oldValues = await getList(token, tx)
130 | const newValues = oldValues.filter((aTerm) => !terms.some((term) => term === aTerm))
131 | if (newValues.length === 0) {
132 | store.remove(tx, token)
133 | return
134 | }
135 | setList(token, newValues, tx)
136 | }
137 |
138 | /**
139 | * Remove a list of keyword-id mapping
140 | * @param {Array} stringTerms
141 | * @param {Array} terms
142 | * @returns {Promise}
143 | */
144 | // eslint-disable-next-line no-unused-vars
145 | const removeBulk = async (stringTerms = [], terms) => {
146 | const map = splitToMap(stringTerms, terms)
147 | const tx = await getTransaction(table, READWRITE)
148 | const promise = transaction(tx)
149 | Object.keys(map)
150 | .forEach((token) => removeLink(token, map[token], tx))
151 | return promise
152 | }
153 |
154 | const removeBulk2 = async (stringTerms = [], terms) => {
155 | const map = splitToMap(stringTerms, terms)
156 | const tx = await getTransaction(table, READWRITE)
157 | const keys = Object.keys(map)
158 | if (!keys.length) {
159 | return
160 | }
161 | const lists = await Promise.all(keys.map((key) => getList(key, tx)))
162 | let req
163 | for (let i = 0; i < keys.length; ++i) {
164 | const token = keys[i]
165 | const oldValues = lists[i]
166 | const tokenTerms = map[token]
167 |
168 | const newValues = oldValues.filter((aTerm) => !tokenTerms.some((term) => term === aTerm))
169 | if (newValues.length === 0) {
170 | req = store.remove(tx, token)
171 | continue
172 | }
173 | req = setList(token, newValues, tx)
174 | }
175 | return request(req)
176 | }
177 |
178 | return {
179 | insertBulk: insertBulk2,
180 | get,
181 | removeBulk: removeBulk2,
182 | name: store.name,
183 | count: store.count,
184 | size: store.size,
185 | clear: store.clear
186 | }
187 | }
188 |
--------------------------------------------------------------------------------