├── .editorconfig ├── .eslintignore ├── .eslintrc ├── .gitignore ├── LICENSE ├── README.md ├── example ├── example.html ├── example.js ├── helper │ ├── arrayHelper.js │ ├── cryptoHelper.js │ ├── encryptionHelper.js │ ├── serializeHelper.js │ └── stringHelper.js ├── index.html ├── package.json └── rollup.config.js ├── karma.conf.js ├── package.json └── src ├── helper ├── array.js ├── array.spec.js ├── idb.js ├── idb.spec.js ├── lru.js ├── lru.spec.js ├── scoring.js ├── scoring.spec.js ├── sizeof.js ├── sizeof.spec.js ├── tokenize.js ├── tokenize.spec.js ├── variableByteCodes.js ├── variableByteCodes.spec.js ├── wildcard.js └── wildcard.spec.js ├── index.esm.js ├── index.spec.js ├── master.js ├── query ├── grammar.peg ├── grammar.spec.js ├── parser.js ├── parser.spec.js ├── query.js └── query.spec.js └── store ├── keyValueStore.js ├── metadataStore.js ├── positionsStore.js ├── postingsStore.js ├── postingsStore.spec.js ├── transposeStore.js ├── transposeStore.spec.js └── wildcardStore.js /.editorconfig: -------------------------------------------------------------------------------- 1 | # editorconfig.org 2 | root = true 3 | 4 | [*] 5 | indent_style = space 6 | indent_size = 4 7 | end_of_line = lf 8 | charset = utf-8 9 | trim_trailing_whitespace = true 10 | insert_final_newline = true 11 | 12 | [.md] 13 | trim_trailing_whitespace = false 14 | -------------------------------------------------------------------------------- /.eslintignore: -------------------------------------------------------------------------------- 1 | grammar.js 2 | -------------------------------------------------------------------------------- /.eslintrc: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "eslint:recommended", 3 | "env": { 4 | "es6": true, 5 | "browser": true, 6 | "jasmine": true 7 | }, 8 | "globals": { 9 | }, 10 | "parserOptions": { 11 | "ecmaVersion": 8, 12 | "sourceType": "module", 13 | "ecmaFeatures": { 14 | "experimentalObjectRestSpread": true 15 | } 16 | }, 17 | "rules": { 18 | "no-console": "off", 19 | "indent": [ 20 | 2, 21 | 4, 22 | { 23 | "SwitchCase": 1 24 | } 25 | ], 26 | "no-unused-vars": [ 27 | "error" 28 | ], 29 | "quotes": [ 30 | 2, 31 | "single" 32 | ], 33 | "linebreak-style": [ 34 | 2, 35 | "unix" 36 | ], 37 | "semi": [ 38 | 2, 39 | "never" 40 | ] 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .cache 2 | .DS_Store 3 | .forever 4 | .node-gyp 5 | .npm 6 | .idea 7 | node_modules 8 | npm-debug.log 9 | dist 10 | package-lock.json 11 | src/query/grammar.js 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # encrypted-search 2 | 3 | This library provides encrypted-search functionality for the browser. 4 | 5 | The library requires the the support of Promises, async/await, modules, and IndexedDB 6 | 7 | ## Browser support 8 | Chrome, Safari, Firefox, Edge, IE11 9 | 10 | ## Usage 11 | 12 | ```javascript 13 | import { create, query, parse, tokenize } from 'encrypted-search' 14 | 15 | // The encryption helpers. The hash is used for the key name. Encrypt and decrypt for the values. 16 | const transformers = { 17 | property: (tableId, input) => input 18 | serialize: (tableId, key, value) => value 19 | deserialize: (tableId, key, value) => value 20 | } 21 | 22 | const index = create({ transformers }) 23 | 24 | await index.store('123', tokenize('this is a long string that can be searched')) 25 | 26 | const searchString = '(this << is & "long string") | can' 27 | const results = await query(index.search, index.wildcard, parse(searchString)) 28 | // Returns 29 | [ 30 | { 31 | "id": "123", 32 | "keywords": [ 33 | "this", 34 | "is", 35 | "long", 36 | "string", 37 | "that", 38 | "can", 39 | "be", 40 | "searched" 41 | ], 42 | "match": [ 43 | "this", 44 | "is", 45 | "long", 46 | "string" 47 | ] 48 | } 49 | ] 50 | 51 | // 52 | index.close() 53 | ``` 54 | 55 | ## Query syntax 56 | It has support for multiple search operators. 57 | 58 | | Operator | Example | Matches documents that| 59 | |-------------|---------------------------------------------------------|-------------------------------------------------------------------| 60 | | AND | these words must appear / these & words & must & appear | contain all keywords | 61 | | OR | these | words | can | appear | contain any keywords | 62 | | PHRASE | "these words appear in order" | contain all keywords in exact order | 63 | | NOT | hello !world | contain hello but not world | 64 | | QUORUM | "good fast cheap"/2 | contain at least 2 keywords | 65 | | PROXIMITY | "close by"~2 | contain all keywords with no less than 2 words between them | 66 | | BEFORE | before << after | contain all keywords and in order | 67 | | WILDCARD | af* | contain the wildcarded keyword | 68 | | COMBINATION | (these words | any o*der) << after | fulfil the query in combination | 69 | 70 | ## Default Options 71 | TODO 72 | 73 | 74 | ## Example 75 | 76 | Example available in the example/ folder 77 | 78 | ## Author 79 | 80 | Mattias Svanström (@mmso) - ProtonMail 81 | -------------------------------------------------------------------------------- /example/example.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Example 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /example/example.js: -------------------------------------------------------------------------------- 1 | import { create, query, parse, tokenize } from '../src/index.esm' 2 | import createEncryptionHelper from './helper/encryptionHelper' 3 | 4 | const key = new Uint8Array(32) 5 | const salt = new Uint8Array(32) 6 | 7 | const createKeydownHandler = (index, $results) => { 8 | return async (e) => { 9 | const { key, target } = e 10 | if (key !== 'Enter') { 11 | return 12 | } 13 | e.preventDefault() 14 | 15 | const value = target.value 16 | target.value = '' 17 | 18 | const results = await query(index.search, index.wildcard, parse(value)) 19 | $results.innerHTML = JSON.stringify(results, null, 2) 20 | } 21 | } 22 | 23 | const init = async () => { 24 | document.body.innerHTML = ` 25 | 26 |
27 | ` 28 | 29 | const transformers = createEncryptionHelper(key, salt) 30 | const index = create({ transformers }) 31 | 32 | await index.store('123', tokenize('this is a long string that can be searched')) 33 | await index.store('124', tokenize('this is another document that is inserted into the index')) 34 | await index.store('125', tokenize('this is the last document')) 35 | 36 | const $input = document.body.querySelector('input') 37 | const $results = document.body.querySelector('.results') 38 | 39 | const onKeydown = createKeydownHandler(index, $results) 40 | $input.addEventListener('keydown', onKeydown) 41 | $input.focus() 42 | } 43 | 44 | document.addEventListener('DOMContentLoaded', init) 45 | -------------------------------------------------------------------------------- /example/helper/arrayHelper.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Concatenate two Uint8Arrays. 3 | * @param {Uint8Array} a 4 | * @param {Uint8Array} b 5 | * @returns {Uint8Array} 6 | */ 7 | export const concatUint8Array = (a, b) => { 8 | const result = new Uint8Array(a.byteLength + b.byteLength) 9 | result.set(new Uint8Array(a), 0) 10 | result.set(new Uint8Array(b), a.byteLength) 11 | return result 12 | } 13 | 14 | /** 15 | * Convert a string to a Uint8Array containing a UTF-8 string data. 16 | * @param {String} string 17 | * @return {Uint8Array} 18 | */ 19 | export const stringToUint8Array = (string) => { 20 | const result = new Uint8Array(string.length) 21 | for (let i = 0; i < string.length; i++) { 22 | result[i] = string.charCodeAt(i) 23 | } 24 | return result 25 | } 26 | 27 | /** 28 | * Convert a Uint8Array to a string. 29 | * @param {Uint8Array} array 30 | * @returns {string} 31 | */ 32 | export const uint8ArrayToString = (array) => { 33 | const result = [] 34 | const bs = 1 << 14 35 | const j = array.length 36 | for (let i = 0; i < j; i += bs) { 37 | // eslint-disable-next-line prefer-spread 38 | result.push(String.fromCharCode.apply(String, array.subarray(i, i + bs < j ? i + bs : j))) 39 | } 40 | return result.join('') 41 | } 42 | 43 | -------------------------------------------------------------------------------- /example/helper/cryptoHelper.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable max-len */ 2 | import nacl from 'tweetnacl' 3 | import { Pbkdf2HmacSha256 } from 'asmCrypto.js' 4 | import { concatUint8Array } from './arrayHelper' 5 | 6 | const ITERATIONS = 10 7 | const DKLEN = 16 8 | const KEY_LENGTH = 32 9 | const SALT_LENGTH = 32 10 | export const NONCE_LENGTH = 24 11 | 12 | // eslint-disable-next-line import/prefer-default-export 13 | export const getRandomValues = (buf) => { 14 | if (window.crypto && window.crypto.getRandomValues) { 15 | window.crypto.getRandomValues(buf) 16 | return buf 17 | } 18 | if (self.crypto && self.crypto.getRandomValues) { 19 | self.crypto.getRandomValues(buf) 20 | return buf 21 | } 22 | if (window.msCrypto && window.msCrypto.getRandomValues) { 23 | window.msCrypto.getRandomValues(buf) 24 | return buf 25 | } 26 | throw new Error('No secure random number generator available.') 27 | } 28 | 29 | /** 30 | * Get a hash using a key-stretching algorithm. 31 | * @param {Uint8Array} data 32 | * @param {Uint8Array} salt 33 | * @param {Number} iterations 34 | * @param {Number} dklen 35 | * @returns {Uint8Array} 36 | */ 37 | export const hash = (data, salt, iterations = ITERATIONS, dklen = DKLEN) => { 38 | return Pbkdf2HmacSha256(data, salt, iterations, dklen) 39 | } 40 | 41 | /** 42 | * Encrypt data using a non-deterministic algorithm. 43 | * @param {Uint8Array} data 44 | * @param {Uint8Array} key 45 | * @returns {Uint8Array | undefined} 46 | */ 47 | export const encrypt = (data, key) => { 48 | if (!data) { 49 | return 50 | } 51 | const nonce = getRandomValues(new Uint8Array(NONCE_LENGTH)) 52 | const ciphertext = nacl.secretbox(data, nonce, key) 53 | return concatUint8Array(nonce, ciphertext) 54 | } 55 | 56 | /** 57 | * Decrypt data with the given key. 58 | * @param {Uint8Array} data 59 | * @param {Uint8Array} key 60 | * @param {Number} nonceLength 61 | * @returns {Uint8Array | undefined} 62 | */ 63 | export const decrypt = (data, key) => { 64 | if (!data) { 65 | return 66 | } 67 | const nonce = data.slice(0, NONCE_LENGTH) 68 | const ciphertext = data.slice(NONCE_LENGTH, data.length) 69 | return nacl.secretbox.open(ciphertext, nonce, key) 70 | } 71 | 72 | export const generateKey = () => getRandomValues(new Uint8Array(KEY_LENGTH)) 73 | export const generateSalt = () => getRandomValues(new Uint8Array(SALT_LENGTH)) 74 | -------------------------------------------------------------------------------- /example/helper/encryptionHelper.js: -------------------------------------------------------------------------------- 1 | import { decrypt, encrypt, hash } from './cryptoHelper' 2 | import { stringToUint8Array } from './arrayHelper' 3 | import { encodeUtf8 } from './stringHelper' 4 | import { TABLES } from '../../src/master' 5 | import { readJson, readUint32, writeJson, writeUint32 } from './serializeHelper' 6 | 7 | export default (encryptionKey, hashSalt) => { 8 | const property = (table, key) => { 9 | if (table === TABLES.LEXICON) { 10 | return key 11 | } 12 | if (table === TABLES.IDS) { 13 | return key 14 | } 15 | return hash(stringToUint8Array(encodeUtf8(key)), hashSalt) 16 | } 17 | 18 | const writeTable = (table, key, data) => { 19 | return writeJson(data) 20 | } 21 | 22 | const serialize = (table, key, data) => { 23 | if (typeof data === 'undefined') { 24 | return 25 | } 26 | if (table === TABLES.POSTINGS || TABLES.POSITIONS || TABLES.WILDCARDS) { 27 | return data 28 | } 29 | if (table === TABLES.LEXICON_INVERSE || table === TABLES.IDS_INVERSE) { 30 | return writeUint32(data) 31 | } 32 | return encrypt(writeTable(table, key, data), encryptionKey) 33 | } 34 | 35 | const readTable = (table, key, data) => { 36 | return readJson(data) 37 | } 38 | 39 | const deserialize = (table, key, data) => { 40 | if (typeof data === 'undefined') { 41 | return 42 | } 43 | if (table === TABLES.POSTINGS || TABLES.POSITIONS || TABLES.WILDCARDS) { 44 | return data 45 | } 46 | if (table === TABLES.LEXICON_INVERSE || table === TABLES.IDS_INVERSE) { 47 | return readUint32(data) 48 | } 49 | return readTable(table, key, decrypt(data, encryptionKey)) 50 | } 51 | 52 | return { 53 | property, 54 | serialize, 55 | deserialize 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /example/helper/serializeHelper.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Write a javascript object to a Uint8Array. 3 | * @param {Object} data 4 | * @returns {Uint8Array} 5 | */ 6 | import { stringToUint8Array, uint8ArrayToString } from './arrayHelper' 7 | import { decodeUtf8, encodeUtf8 } from './stringHelper' 8 | 9 | export const writeJson = (data) => stringToUint8Array(encodeUtf8(JSON.stringify(data))) 10 | 11 | /** 12 | * Read a Uint8Array to a javascript object. 13 | * @param data 14 | * @returns {any} 15 | */ 16 | export const readJson = (data) => JSON.parse(decodeUtf8(uint8ArrayToString(data))) 17 | 18 | const getByteLength = (val) => { 19 | if (val <= 255) { 20 | return 1 21 | } 22 | if (val <= 65535) { 23 | return 2 24 | } 25 | if (val <= 16777215) { 26 | return 3 27 | } 28 | return 4 29 | } 30 | 31 | /** 32 | * Write a number into a variable 4 bytes Uint8Array. 33 | * @param {Number} val 34 | * @returns {Uint8Array} 35 | */ 36 | export const writeUint32 = (val) => { 37 | const len = getByteLength(val) 38 | const buf = new Uint8Array(len) 39 | 40 | let num = val 41 | for (let i = 0; i < len; ++i) { 42 | buf[i] = num 43 | if (len === i + 1) { 44 | break 45 | } 46 | num >>>= 8 47 | } 48 | return buf 49 | } 50 | 51 | /** 52 | * Read a variable 4 bytes Uint8Array into a number. 53 | * @param {Uint8Array} buf 54 | * @returns {number} 55 | */ 56 | export const readUint32 = (buf) => { 57 | if (buf.length <= 0) { 58 | return 0 59 | } 60 | let val = buf[0] 61 | if (buf.length === 1) { 62 | return val 63 | } 64 | val |= buf[1] << 8 65 | if (buf.length === 2) { 66 | return val 67 | } 68 | val |= buf[2] << 16 69 | if (buf.length === 3) { 70 | return val 71 | } 72 | return val + buf[3] * 0x1000000 73 | } 74 | -------------------------------------------------------------------------------- /example/helper/stringHelper.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Convert a native javascript string to a string of utf8 bytes 3 | * @param {String} str The string to convert 4 | * @returns {String} A valid squence of utf8 bytes 5 | */ 6 | export const encodeUtf8 = (str) => unescape(encodeURIComponent(str)) 7 | 8 | /** 9 | * Convert a string of utf8 bytes to a native javascript string 10 | * @param {String} utf8 A valid squence of utf8 bytes 11 | * @returns {String} A native javascript string 12 | */ 13 | export const decodeUtf8 = (utf8) => { 14 | try { 15 | return decodeURIComponent(escape(utf8)) 16 | } catch (e) { 17 | return utf8 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /example/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Examples 6 | 7 | 8 | Example 9 | 10 | 11 | -------------------------------------------------------------------------------- /example/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "example", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "index.js", 6 | "scripts": { 7 | "dev": "npx rollup -c rollup.config.js -w" 8 | }, 9 | "author": "", 10 | "license": "ISC", 11 | "devDependencies": { 12 | "rollup": "^0.62.0", 13 | "rollup-plugin-serve": "^0.4.2", 14 | "rollup-plugin-commonjs": "^9.1.3", 15 | "rollup-plugin-node-resolve": "^3.3.0" 16 | }, 17 | "dependencies": { 18 | "asmcrypto.js": "^2.3.0", 19 | "tweetnacl": "^1.0.0" 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /example/rollup.config.js: -------------------------------------------------------------------------------- 1 | import serve from 'rollup-plugin-serve' 2 | 3 | export default [ 4 | { 5 | input: './example.js', 6 | output: { 7 | name: 'MasterTabExample', 8 | file: 'dist/example.js', 9 | format: 'iife', 10 | interop: false, 11 | strict: false 12 | }, 13 | plugins: [ 14 | require('rollup-plugin-node-resolve')({ 15 | }), 16 | require('rollup-plugin-commonjs')({ 17 | ignoreGlobal: true, 18 | ignore: [ 'crypto' ] 19 | }), 20 | serve('') 21 | ] 22 | } 23 | ] 24 | -------------------------------------------------------------------------------- /karma.conf.js: -------------------------------------------------------------------------------- 1 | process.env.CHROME_BIN = require('puppeteer').executablePath() 2 | 3 | module.exports = (config) => { 4 | config.set({ 5 | basePath: '.', 6 | frameworks: ['jasmine'], 7 | client: { 8 | jasmine: { 9 | random: true, 10 | stopOnFailure: false, 11 | failFast: false, 12 | timeoutInterval: 2000 13 | } 14 | }, 15 | files: [ 16 | 'src/**/*.spec.js' 17 | ], 18 | preprocessors: { 19 | 'src/**/*.spec.js': ['rollup'] 20 | }, 21 | rollupPreprocessor: { 22 | output: { 23 | format: 'iife', 24 | name: 'Test', 25 | sourcemap: 'inline' 26 | }, 27 | plugins: [ 28 | require('rollup-plugin-node-resolve')({ 29 | }), 30 | require('rollup-plugin-commonjs')({ 31 | ignoreGlobal: true, 32 | ignore: [ 'crypto' ] 33 | }) 34 | ] 35 | }, 36 | logLevel: config.LOG_INFO, 37 | autoWatch: false, 38 | browsers: ['ChromeHeadless'], 39 | singleRun: true 40 | }) 41 | } 42 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "encrypted-search", 3 | "version": "1.0.0", 4 | "description": "Encrypted search index for the browser", 5 | "license": "MIT", 6 | "author": "Mattias Svanström ", 7 | "main": "src/index.esm.js", 8 | "scripts": { 9 | "lint": "eslint src/*.js src/**/*.js", 10 | "test": "npm run lint && npx karma start karma.conf.js", 11 | "build": "npx pegjs src/query/grammar.peg", 12 | "postinstall": "npm run build" 13 | }, 14 | "dependencies": { 15 | "latenize": "^0.2.0" 16 | }, 17 | "devDependencies": { 18 | "eslint": "^5.1.0", 19 | "jasmine": "^3.1.0", 20 | "jasmine-core": "^3.1.0", 21 | "karma": "^3.0.0", 22 | "karma-chrome-launcher": "^2.2.0", 23 | "karma-jasmine": "^1.1.2", 24 | "karma-rollup-preprocessor": "^6.0.0", 25 | "puppeteer": "^1.6.0", 26 | "rollup": "^0.64.1", 27 | "rollup-plugin-commonjs": "^9.1.3", 28 | "rollup-plugin-node-resolve": "^3.3.0", 29 | "asmcrypto.js": "^2.3.0", 30 | "tweetnacl": "^1.0.0" 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/helper/array.js: -------------------------------------------------------------------------------- 1 | const defaultExtractor = (a) => a 2 | const defaultComparator = (a, b) => a === b 3 | const defaultTransformer = (a) => a 4 | 5 | /** 6 | * Get unique values from an array. 7 | * @param {Array} array 8 | * @param {Function} extractor 9 | * @return {Array} 10 | */ 11 | export const unique = (array, extractor = defaultExtractor) => { 12 | const seen = new Set() 13 | const length = array.length 14 | const result = [] 15 | for (let i = 0; i < length; i++) { 16 | const value = array[i] 17 | const extract = extractor(value) 18 | if (seen.has(extract)) { 19 | continue 20 | } 21 | seen.add(extract) 22 | result.push(value) 23 | } 24 | return result 25 | } 26 | 27 | /** 28 | * Flatten an array one level. 29 | * @param {Array} array 30 | * @return {Array} 31 | */ 32 | export const flatten = (array = []) => Array.prototype.concat(...array) 33 | 34 | /** 35 | * 36 | * @param {Array} a The first array. 37 | * @param {Array} b The second array. 38 | * @param {Function} extractor 39 | * @returns {Array} 40 | */ 41 | export const minus = (a = [], b = [], extractor = defaultExtractor) => { 42 | const other = new Set(b.map(extractor)) 43 | return a.reduce((prev, cur) => { 44 | const val = extractor(cur) 45 | if (!other.has(val)) { 46 | prev.push(cur) 47 | } 48 | return prev 49 | }, []) 50 | } 51 | 52 | /** 53 | * Intersect two arrays. Ignoring any duplicates. 54 | * @param {Array} a The first array. 55 | * @param {Array} b The second array. 56 | * @param {Function} extractor 57 | * @param {Function} transformer A transformer function to transform the two values. 58 | * @returns {Array} 59 | */ 60 | export const intersect = (a = [], b = [], extractor = defaultExtractor, transformer = defaultTransformer) => { 61 | return unique(a, extractor) 62 | .reduce((acc, cur) => { 63 | const idx = b.findIndex((x) => extractor(cur) === extractor(x)) 64 | if (idx === -1) { 65 | return acc 66 | } 67 | const transformedValue = transformer(cur, b[idx]) 68 | if (transformedValue) { 69 | acc.push(transformedValue) 70 | } 71 | return acc 72 | }, []) 73 | } 74 | 75 | /** 76 | * Join two arrays. Ignoring any duplicates. 77 | * @param {Array} a The first array. 78 | * @param {Array} b The second array. 79 | * @param {Function} extractor A extractor function read the value to compare 80 | * @param {Function} transformer A transformer function to transform the two values. 81 | * @returns {Array} 82 | */ 83 | export const union = (a = [], b = [], extractor = defaultExtractor, transformer = defaultTransformer) => { 84 | const duplicates = {} 85 | const union = [...unique(a, extractor), ...unique(b, extractor)] 86 | return union 87 | .reduce((acc, cur, index) => { 88 | const idx = union.findIndex((x, findex) => index !== findex && extractor(cur) === extractor(x)) 89 | if (idx === -1) { 90 | acc.push(transformer(cur)) 91 | return acc 92 | } 93 | if (duplicates[idx]) { 94 | return acc 95 | } 96 | duplicates[index] = true 97 | const transformedValue = transformer(cur, union[idx]) 98 | if (transformedValue) { 99 | acc.push(transformedValue) 100 | } 101 | return acc 102 | }, []) 103 | } 104 | 105 | /** 106 | * Find a subset of an array in order. 107 | * @param {Array} a Array 108 | * @param {Array} b Subset of array 109 | * @param {Function} comparator A comparator function to compare a value in array a with a value in array b. 110 | * @returns {Number} 111 | */ 112 | export const contains = (a = [], b = [], comparator = defaultComparator) => { 113 | let m = b.length 114 | let n = a.length 115 | let i = 0 116 | while (i <= n - m) { 117 | let j = 0 118 | while (j < m) { 119 | if (!comparator(a[i], b[j])) { 120 | break 121 | } 122 | i++ 123 | j++ 124 | } 125 | if (j === 0) { 126 | i++ 127 | } else if (j === m) { 128 | return i - m 129 | } 130 | } 131 | 132 | return -1 133 | } 134 | 135 | /** 136 | * Returns whether an array a contains any element in array b before any element in array c. 137 | * @param {Array} a The array containing the elements. 138 | * @param {Array} b The array containing the elements that should be placed before c. 139 | * @param {Array} c The array containing the elements that should be placed after b. 140 | * @param {Function} comparator A comparator function to compare a value in array a with a value in array b. 141 | * @returns {Boolean} 142 | */ 143 | export const ordered = (a = [], b = [], c = [], comparator = defaultComparator) => { 144 | return b.some((x, i) => { 145 | let idxB = a.findIndex((y) => comparator(x, y)) 146 | const any = c.some((y) => { 147 | let idxC = a.findIndex((z) => comparator(y, z)) 148 | return idxC >= idxB 149 | }) 150 | if (!any && i === b.length - 1) { 151 | return false 152 | } 153 | return any 154 | }) 155 | } 156 | 157 | /** 158 | * Returns whether an element in array b is not contained in array a. 159 | * @param {Array} a The array containing the elements. 160 | * @param {Array} b The array containing the elements which can not exist in a. 161 | * @param {Function} comparator A comparator function to compare a value in array a with a value in array b. 162 | * @returns {Boolean} 163 | */ 164 | export const notContains = (a = [], b = [], comparator = defaultComparator) => { 165 | return !b.some((x) => { 166 | let otherIdx = a.findIndex((y) => comparator(x, y)) 167 | if (otherIdx !== -1) { 168 | return true 169 | } 170 | }) 171 | } 172 | 173 | /** 174 | * Returns whether the elements of b in array a are separated by n length. 175 | * @param {Array} a The array containing the elements. 176 | * @param {Array} b The array containing the elements in a to compare. 177 | * @param {Number} n The total length that can exist between the elements in b. 178 | * @param {Function} comparator A comparator function to compare a value in array a with a value in array b. 179 | * @returns {Boolean} 180 | */ 181 | export const proximity = (a = [], b = [], n, comparator = defaultComparator) => { 182 | const value = b 183 | .map((keyword) => a.findIndex((y) => comparator(keyword, y))) 184 | .filter((x) => x !== -1) 185 | .sort() 186 | .reduce((agg, x, i, a) => { 187 | if (i === a.length - 1) { 188 | return agg 189 | } 190 | return agg + ((a[i + 1] - 1) - x) 191 | }, 0) 192 | return value < n 193 | } 194 | 195 | /** 196 | * Returns whether at least n elements in b exist in a. 197 | * @param {Array} a The array containing the elements. 198 | * @param {Array} b The array containing the elements to search. 199 | * @param {Number} n The total amount that must exist. 200 | * @param {Function} comparator A comparator function to compare a value in array a with a value in array b. 201 | * @returns {Boolean} 202 | */ 203 | export const quorom = (a = [], b = [], n, comparator = defaultComparator) => { 204 | let counter = 0 205 | return b.some((keyword) => { 206 | if (a.findIndex((y) => comparator(y, keyword)) !== -1) { 207 | counter++ 208 | } 209 | return counter >= n 210 | }) 211 | } 212 | 213 | /** 214 | * Convert an array to a gaps array 215 | * (Inline for performance) 216 | * @param {Array} arr 217 | * @returns {Array} 218 | */ 219 | export const getGapsArray = (arr = []) => { 220 | if (arr.length <= 1) { 221 | return arr 222 | } 223 | arr.sort((a, b) => a - b) 224 | let prev = arr[0] 225 | for (let i = 1; i < arr.length; ++i) { 226 | const value = arr[i] 227 | arr[i] = arr[i] - prev 228 | prev = value 229 | } 230 | return arr 231 | } 232 | 233 | /** 234 | * Convert an array to a gaps array 235 | * (Inline for performance) 236 | * @param {Array} arr 237 | * @returns {Array} 238 | */ 239 | export const getArrayGaps = (arr = []) => { 240 | if (arr.length <= 1) { 241 | return arr 242 | } 243 | for (let i = 1; i < arr.length; ++i) { 244 | arr[i] = arr[i] + arr[i - 1] 245 | } 246 | return arr 247 | } 248 | 249 | /** 250 | * Insert an item into a gaps array. 251 | * @param {Array} array 252 | * @param {Number} id 253 | * @returns {Array|undefined} Returns undefined if the item already exists 254 | */ 255 | export const insertIntoGapsArray = (array = [], id) => { 256 | const len = array.length 257 | if (len === 0) { 258 | return [id] 259 | } 260 | 261 | let i = 0 262 | let currentValue = 0 263 | let prevValue = 0 264 | do { 265 | currentValue = prevValue + array[i] 266 | 267 | if (currentValue === id) { 268 | return 269 | } 270 | 271 | if (id < currentValue) { 272 | break 273 | } 274 | 275 | prevValue = currentValue 276 | 277 | i++ 278 | } while (i < len) 279 | 280 | if (i === 0) { 281 | array.unshift(id) 282 | array[1] = array[1] - id 283 | } else if (i === len) { 284 | array.push(id - prevValue) 285 | } else { 286 | array.splice(i, 0, id - prevValue) 287 | array[i + 1] = currentValue - id 288 | } 289 | 290 | return array 291 | } 292 | 293 | export const removeFromGapsArray = (array = [], id) => { 294 | const len = array.length 295 | if (len === 0) { 296 | return [] 297 | } 298 | 299 | let i = 0 300 | let currentValue = 0 301 | let prevValue = 0 302 | do { 303 | currentValue = prevValue + array[i] 304 | if (currentValue === id) { 305 | break 306 | } 307 | prevValue = currentValue 308 | i++ 309 | } while (i < len) 310 | 311 | if (i === len) { 312 | return 313 | } 314 | if (i === len - 1) { 315 | array.splice(i, 1) 316 | return array 317 | } 318 | if (i === 0) { 319 | array.splice(0, 1) 320 | array[0] = currentValue + array[0] 321 | return array 322 | } 323 | 324 | array.splice(i, 1) 325 | array[i] = (currentValue + array[i]) - prevValue 326 | 327 | return array 328 | } 329 | 330 | /** 331 | * Shuffle an array. 332 | * @param {Array} result 333 | * @param {Number} i 334 | * @param {Number} j 335 | */ 336 | const swap = (result, i, j) => { 337 | const temp = result[i] 338 | result[i] = result[j] 339 | result[j] = temp 340 | } 341 | 342 | export const shuffle = (array) => { 343 | const result = array.slice() 344 | for (let i = result.length - 1; i > 0; i--) { 345 | const j = Math.floor(Math.random() * (i + 1)) 346 | swap(result, i, j) 347 | } 348 | return result 349 | } 350 | 351 | export const mutablyShuffleTwo = (arrayA, arrayB) => { 352 | const l = arrayA.length 353 | for (let i = l - 1; i > 0; i--) { 354 | const j = Math.floor(Math.random() * (i + 1)) 355 | swap(arrayA, i, j) 356 | swap(arrayB, i, j) 357 | } 358 | } 359 | -------------------------------------------------------------------------------- /src/helper/array.spec.js: -------------------------------------------------------------------------------- 1 | import { 2 | unique, 3 | flatten, 4 | intersect, 5 | union, 6 | contains, 7 | proximity, 8 | ordered, 9 | quorom, 10 | insertIntoGapsArray, removeFromGapsArray, getGapsArray, getArrayGaps, minus 11 | } from './array' 12 | 13 | describe('array', () => { 14 | it('should flatten an array', () => { 15 | expect(flatten([[1, 2, 3], [4, 5, 6]])).toEqual([1, 2, 3, 4, 5, 6]) 16 | }) 17 | 18 | it('should only take unique items', () => { 19 | expect(unique([1, 1, 2, 2, 3, 4])).toEqual([1, 2, 3, 4]) 20 | }) 21 | 22 | it('should minus an array', () => { 23 | expect(minus([1,2,3,4], [3,1])).toEqual([2,4]) 24 | expect(minus([], [3,1])).toEqual([]) 25 | }) 26 | 27 | it('should not find a subarray', () => { 28 | expect(contains(['a', 'b', 'c'], ['b', 'c', 'd'])).toEqual(-1) 29 | expect(contains(['a'], ['b', 'c', 'd'])).toEqual(-1) 30 | expect(contains(['a'], [])).toEqual(-1) 31 | expect(contains(['a', 'b', 'a'], ['a', 'a'])).toEqual(-1) 32 | }) 33 | 34 | it('should find a subarray', () => { 35 | expect(contains(['a', 'b', 'c'], ['b', 'c'])).toEqual(1) 36 | expect(contains(['a', 'a', 'a'], ['a', 'a'])).toEqual(0) 37 | }) 38 | 39 | it('should find a subarray with wildcards', () => { 40 | const cb = (a, b) => a === b || b === '*' 41 | expect(contains(['foo', 'a', 'b', 'c', 'bar'], ['*', 'c'], cb)).toEqual(2) 42 | }) 43 | 44 | it('should union two arrays uniquely', () => { 45 | const a = [{ id: 1 }, { id: 2 }] 46 | const b = [{ id: 1 }, { id: 3 }] 47 | const extractor = (a) => a.id 48 | expect(union(a, b, extractor)).toEqual([{ id: 1 }, { id: 2 }, { id: 3 }]) 49 | }) 50 | 51 | it('should intersect two arrays', () => { 52 | const a = [{ id: 1 }, { id: 2 }] 53 | const b = [{ id: 1 }, { id: 3 }] 54 | const extractor = (a) => a.id 55 | expect(intersect(a, b, extractor)).toEqual([{ id: 1 }]) 56 | }) 57 | 58 | it('should intersect two arrays uniquely', () => { 59 | const a = [{ id: 1 }, { id: 1 }, { id: 2 }, { id: 1 }] 60 | const b = [{ id: 1 }, { id: 3 }, { id: 2 }, { id: 1 }, { id: 2 }] 61 | const extractor = (a) => a.id 62 | expect(intersect(a, b, extractor)).toEqual([{ id: 1 }, { id: 2 }]) 63 | }) 64 | 65 | it('should intersect and transform', () => { 66 | const a = [{ id: 1, result: ['abc', 'def'] }, { id: 2 },] 67 | const b = [{ id: 1, result: ['def', 'fgh'] },] 68 | const extractor = (a) => a.id 69 | const transformer = (a, { result = [] }) => ({ 70 | ...a, 71 | result: a.result.concat(result) 72 | }) 73 | expect(intersect(a, b, extractor, transformer)) 74 | .toEqual([{ id: 1, result: ['abc', 'def', 'def', 'fgh'] }]) 75 | }) 76 | 77 | it('should union and transform', () => { 78 | const extractor = (a) => a.id 79 | const transformer = (a, { result = [] } = {}) => ({ 80 | ...a, 81 | result: a.result.concat(result) 82 | }) 83 | const a = [{ id: 1, result: ['abc', 'def'] }, { id: 2, result: ['123'] }] 84 | const b = [{ id: 1, result: ['cde', 'fgh'] }] 85 | expect(union(a, b, extractor, transformer)) 86 | .toEqual([{ id: 1, result: ['abc', 'def', 'cde', 'fgh'] }, { id: 2, result: ['123'] }]) 87 | 88 | }) 89 | 90 | it('should return true if one item is before', () => { 91 | expect(ordered(['aaa', 'bbb', 'ccc'], ['aaa'], ['bbb'])).toBeTruthy() 92 | }) 93 | 94 | it('should return true if one item is before and the other is not', () => { 95 | expect(ordered(['aaa', 'bbb', 'ccc', 'eee'], ['ccc', 'aaa'], ['bbb'])).toBeTruthy() 96 | expect(ordered(['aaa', 'bbb', 'ccc', 'eee'], ['aaa', 'ccc'], ['bbb'])).toBeTruthy() 97 | }) 98 | 99 | it('should return false if no item is before', () => { 100 | expect(ordered(['aaa', 'bbb', 'ccc', 'eee'], ['ccc', 'eee'], ['aaa', 'bbb'])).toBeFalsy() 101 | expect(ordered(['aaa', 'bbb', 'ccc', 'eee'], ['eee', 'ccc', 'bbb'], ['aaa'])).toBeFalsy() 102 | expect(ordered(['aaa', 'bbb', 'ccc', 'eee'], [], ['aaa'])).toBeFalsy() 103 | expect(ordered(['aaa', 'bbb', 'ccc', 'eee'], ['aaa'], [])).toBeFalsy() 104 | }) 105 | 106 | it('should find proximity', () => { 107 | expect(proximity(['cat', 'aaa', 'dog', 'mouse'], ['mouse'], 1)).toBeTruthy() 108 | expect(proximity(['cat', 'aaa', 'dog', 'mouse'], ['mouse', 'cat', 'dog'], 2)).toBeTruthy() 109 | expect(proximity(['cat', 'aaa', 'dog', 'mouse', 'bbb'], ['cat', 'dog', 'mouse'], 2)).toBeTruthy() 110 | expect(proximity(['cat', 'aaa', 'bbb', 'ccc', 'dog', 'eee', 'fff', 'mouse'], ['cat', 'dog', 'mouse'], 6)).toBeTruthy() 111 | }) 112 | 113 | it('should not find proximity', () => { 114 | expect(proximity(['cat', 'aaa', 'dog', 'mouse'], ['cat', 'dog', 'mouse'], 1)).toBeFalsy() 115 | expect(proximity(['cat', 'aaa', 'bbb', 'ccc', 'dog', 'eee', 'fff', 'mouse'], ['cat', 'dog', 'mouse'], 5)).toBeFalsy() 116 | }) 117 | 118 | it('should find quorom', () => { 119 | expect(quorom(['cat', 'dog', 'mouse'], ['cat', 'dog'], 1)).toBeTruthy() 120 | expect(quorom(['cat', 'dog', 'mouse'], ['cat', 'dog'], 2)).toBeTruthy() 121 | }) 122 | 123 | it('should not find quorom', () => { 124 | expect(quorom(['cat', 'dog', 'mouse'], ['cat', 'dog'], 3)).toBeFalsy() 125 | expect(quorom(['cat', 'dog', 'mouse'], ['cat', 'dog', 'aaa'], 3)).toBeFalsy() 126 | }) 127 | 128 | it('should get a gaps array', () => { 129 | expect(getGapsArray([1,2,3])).toEqual([1,1,1]) 130 | expect(getGapsArray([6,2,3])).toEqual([2,1,3]) 131 | expect(getGapsArray([10,5,1])).toEqual([1,4,5]) 132 | }) 133 | 134 | it('should get an array from gaps', () => { 135 | expect(getArrayGaps([1,1,1])).toEqual([1,2,3]) 136 | expect(getArrayGaps([2,1,3])).toEqual([2,3,6]) 137 | expect(getArrayGaps([1,4,5])).toEqual([1,5,10]) 138 | }) 139 | 140 | it('should insert into gaps array', () => { 141 | expect(insertIntoGapsArray([], 2)).toEqual([2]) 142 | expect(insertIntoGapsArray([2], 1)).toEqual([1, 1]) 143 | expect(insertIntoGapsArray([2], 3)).toEqual([2, 1]) 144 | expect(insertIntoGapsArray([2, 3], 3)).toEqual([2, 1, 2]) 145 | expect(insertIntoGapsArray([5, 5, 10, 10, 20], 25)).toEqual([5, 5, 10, 5, 5, 20]) 146 | expect(insertIntoGapsArray([5, 5, 10, 10, 20], 6)).toEqual([5, 1, 4, 10, 10, 20]) 147 | 148 | expect(insertIntoGapsArray([2, 3], 2)).toBeUndefined() 149 | expect(insertIntoGapsArray([2, 5], 2)).toBeUndefined() 150 | }) 151 | 152 | it('should remove from gaps array', () => { 153 | expect(removeFromGapsArray([2, 1, 1], 3)).toEqual([2, 2]) 154 | expect(removeFromGapsArray([2], 2)).toEqual([]) 155 | expect(removeFromGapsArray([2, 1], 3)).toEqual([2]) 156 | expect(removeFromGapsArray([2, 3, 1], 6)).toEqual([2, 3]) 157 | expect(removeFromGapsArray([5, 5, 10, 10, 20], 5)).toEqual([10, 10, 10, 20]) 158 | expect(removeFromGapsArray([2, 3], 3)).toBeUndefined() 159 | }) 160 | }) 161 | -------------------------------------------------------------------------------- /src/helper/idb.js: -------------------------------------------------------------------------------- 1 | export const READONLY = 'readonly' 2 | export const READWRITE = 'readwrite' 3 | 4 | /** 5 | * Open an indexedDB in a promisified way. 6 | * @param {indexedDB} indexedDB 7 | * @param {String} name 8 | * @param {Number} version 9 | * @param {Function} upgrade 10 | * @returns {Promise} 11 | */ 12 | export const open = (indexedDB, name, version, upgrade) => { 13 | return new Promise((resolve, reject) => { 14 | const request = indexedDB.open(name, version) 15 | request.onupgradeneeded = (event) => upgrade(request.result, event.oldVersion, request.transaction) 16 | request.onsuccess = () => resolve(request.result) 17 | request.onerror = () => reject(request.error) 18 | }) 19 | } 20 | 21 | /** 22 | * Convert an idb transaction to a promise. 23 | * @param {IDBTransaction} tx 24 | * @returns {Promise} 25 | */ 26 | export const transaction = (tx) => { 27 | return new Promise((resolve, reject) => { 28 | tx.oncomplete = () => resolve() 29 | tx.onerror = () => reject(tx.error) 30 | tx.onabort = () => reject(tx.error) 31 | }) 32 | } 33 | 34 | /** 35 | * Convert an idb request to a promise. 36 | * @param {IDBRequest} request 37 | * @returns {Promise} 38 | */ 39 | export const request = (request) => { 40 | return new Promise((resolve, reject) => { 41 | request.onsuccess = () => resolve(request.result) 42 | request.onerror = () => reject(request.error) 43 | }) 44 | } 45 | 46 | /** 47 | * Delete a database. 48 | * @param {indexedDB} indexedDB 49 | * @param {String} dbName 50 | * @returns {Promise} 51 | */ 52 | export const deleteDb = async (indexedDB, dbName) => { 53 | const req = indexedDB.deleteDatabase(dbName) 54 | 55 | return new Promise((resolve, reject) => { 56 | req.onsuccess = resolve 57 | req.onerror = reject 58 | req.onblocked = reject 59 | }) 60 | } 61 | 62 | /** 63 | * Open the database with closure timeout. This is to prevent the connection staying open constantly. 64 | * Which can be bad because it can prevent updates to the database from other tabs. 65 | * @param {Function} open Function to open the database. 66 | * @param {Number} closeTimeout Timeout after which to close the connection. 67 | * @returns {Object} 68 | */ 69 | export const openWithClosure = (open, closeTimeout) => { 70 | const state = { 71 | dbHandle: undefined, 72 | closeHandle: undefined, 73 | closed: false 74 | } 75 | 76 | const clearCloseHandle = () => { 77 | if (!state.closeHandle) { 78 | return 79 | } 80 | clearTimeout(state.closeHandle) 81 | state.closeHandle = undefined 82 | } 83 | 84 | const clearCloseDatabase = () => { 85 | if (!state.dbHandle) { 86 | return 87 | } 88 | state.dbHandle.close() 89 | state.dbHandle = undefined 90 | } 91 | 92 | const close = () => { 93 | clearCloseHandle() 94 | clearCloseDatabase() 95 | } 96 | 97 | const getTransaction = async (storeNames, mode = 'readonly') => { 98 | if (state.closed) { 99 | throw new Error('Database has been closed') 100 | } 101 | 102 | clearCloseHandle() 103 | state.closeHandle = setTimeout(close, closeTimeout) 104 | 105 | if (!state.dbHandle) { 106 | state.dbHandle = await open() 107 | } 108 | 109 | return state.dbHandle.transaction(storeNames, mode) 110 | } 111 | 112 | return { 113 | getTransaction, 114 | close: () => { 115 | state.closed = true 116 | close() 117 | } 118 | } 119 | } 120 | 121 | -------------------------------------------------------------------------------- /src/helper/idb.spec.js: -------------------------------------------------------------------------------- 1 | import { open, openWithClosure, request, transaction, deleteDb } from './idb' 2 | 3 | const delay = (time) => new Promise((resolve) => setTimeout(resolve, time)) 4 | 5 | describe('idb helper', () => { 6 | const STORE_NAME = 'test-os' 7 | 8 | const setup = () => { 9 | return open(indexedDB, 'test', 1, (db) => { 10 | db.createObjectStore(STORE_NAME) 11 | }) 12 | } 13 | 14 | afterAll(() => deleteDb(indexedDB, 'test')) 15 | 16 | it('should open a db promisified', async () => { 17 | const db = await setup() 18 | db.close() 19 | }) 20 | 21 | it('should open a db with auto closure', async () => { 22 | const mock = { 23 | transaction: jasmine.createSpy('transaction'), 24 | close: jasmine.createSpy('close') 25 | } 26 | const setup = jasmine.createSpy('setup') 27 | .and 28 | .returnValue(mock) 29 | const { close, getTransaction } = openWithClosure(setup, 100) 30 | 31 | expect(setup) 32 | .toHaveBeenCalledTimes(0) 33 | await getTransaction('woot') 34 | expect(setup) 35 | .toHaveBeenCalledTimes(1) 36 | 37 | await delay(100) 38 | 39 | expect(mock.close) 40 | .toHaveBeenCalledTimes(1) 41 | 42 | await getTransaction('woot') 43 | 44 | await delay(100) 45 | 46 | expect(mock.close) 47 | .toHaveBeenCalledTimes(2) 48 | expect(setup) 49 | .toHaveBeenCalledTimes(2) 50 | 51 | close() 52 | 53 | await getTransaction('woot') 54 | .catch((e) => { 55 | expect(e.message) 56 | .toEqual('Database has been closed') 57 | }) 58 | }) 59 | 60 | it('should throw if it has been closed', async () => { 61 | const { close, getTransaction } = openWithClosure(setup, 100) 62 | 63 | await getTransaction(STORE_NAME) 64 | 65 | close() 66 | 67 | const error = await getTransaction(STORE_NAME) 68 | .catch((e) => e) 69 | expect(error.message) 70 | .toEqual('Database has been closed') 71 | }) 72 | 73 | it('should put and get a value promisified', async () => { 74 | const db = await setup() 75 | 76 | const tx = db.transaction(STORE_NAME, 'readwrite') 77 | 78 | tx.objectStore(STORE_NAME) 79 | .put('bar', 'foo') 80 | 81 | tx.objectStore(STORE_NAME) 82 | .put('bar2', 'foo2') 83 | 84 | await transaction(tx) 85 | 86 | expect(await request(db.transaction(STORE_NAME, 'readonly') 87 | .objectStore(STORE_NAME) 88 | .get('foo'))) 89 | .toEqual('bar') 90 | 91 | expect(await request(db.transaction(STORE_NAME, 'readonly') 92 | .objectStore(STORE_NAME) 93 | .get('foo2'))) 94 | .toEqual('bar2') 95 | 96 | db.close() 97 | }) 98 | }) 99 | -------------------------------------------------------------------------------- /src/helper/lru.js: -------------------------------------------------------------------------------- 1 | export default ({ max = 10000 } = {}) => { 2 | const map = new Map() 3 | let head 4 | let tail 5 | let length = 0 6 | 7 | const clear = () => { 8 | map.clear() 9 | head = tail = undefined 10 | length = 0 11 | } 12 | 13 | const unlink = function (key, prev, next) { 14 | length-- 15 | 16 | if (length === 0) { 17 | head = tail = undefined 18 | return 19 | } 20 | 21 | if (head === key) { 22 | head = prev 23 | map.get(head).next = undefined 24 | return 25 | } 26 | 27 | if (tail === key) { 28 | tail = next 29 | map.get(tail).prev = undefined 30 | return 31 | } 32 | 33 | if (prev) { 34 | map.get(prev).next = next 35 | } 36 | if (next) { 37 | map.get(next).prev = prev 38 | } 39 | } 40 | 41 | const remove = (key) => { 42 | if (!map.has(key)) { 43 | return 44 | } 45 | const element = map.get(key) 46 | map.delete(key) 47 | unlink(key, element.prev, element.next) 48 | } 49 | 50 | const get = (key) => { 51 | const element = map.get(key) 52 | if (!element) { 53 | return 54 | } 55 | return element.value 56 | } 57 | 58 | const set = (key, value) => { 59 | let element 60 | 61 | if (map.has(key)) { 62 | element = map.get(key) 63 | element.value = value 64 | 65 | if (key === head) { 66 | return value 67 | } 68 | 69 | unlink(key, element.prev, element.next) 70 | } else { 71 | element = { 72 | value, 73 | next: undefined, 74 | prev: head 75 | } 76 | map.set(key, element) 77 | 78 | if (length === max) { 79 | remove(tail) 80 | } 81 | } 82 | 83 | length++ 84 | element.next = undefined 85 | element.prev = head 86 | 87 | if (head) { 88 | map.get(head).next = key 89 | } 90 | 91 | head = key 92 | 93 | if (!tail) { 94 | tail = key 95 | } 96 | 97 | return value 98 | } 99 | 100 | return { set, get, remove, clear } 101 | } 102 | 103 | -------------------------------------------------------------------------------- /src/helper/lru.spec.js: -------------------------------------------------------------------------------- 1 | import createLru from './lru' 2 | 3 | describe('lru', () => { 4 | it('should set and get', () => { 5 | const lru = createLru() 6 | lru.set('a', 'woot') 7 | expect(lru.get('a')).toBe('woot') 8 | }) 9 | 10 | it('should evict', () => { 11 | const lru = createLru({ max: 2 }) 12 | lru.set('a', 'woot') 13 | lru.set('b', 'b') 14 | lru.set('c', 'c') 15 | expect(lru.get('a')).toBeUndefined() 16 | expect(lru.get('b')).toBe('b') 17 | expect(lru.get('c')).toBe('c') 18 | }) 19 | 20 | it('should set', () => { 21 | const lru = createLru({ max: 3 }) 22 | lru.set('a', 'a') 23 | lru.set('b', 'b') 24 | lru.set('b', 'b') 25 | lru.set('b', 'b') 26 | lru.set('b', 'b') 27 | lru.set('b', 'b') 28 | lru.set('b', 'b') 29 | lru.set('c', 'c') 30 | lru.set('b', 'b') 31 | lru.set('c', 'c') 32 | lru.set('b', 'b') 33 | lru.set('b', 'b') 34 | lru.set('d', 'd') 35 | lru.set('d', 'd') 36 | lru.set('d', 'd') 37 | lru.set('d', 'd') 38 | expect(lru.get('a')).toBeUndefined() 39 | expect(lru.get('b')).toBe('b') 40 | expect(lru.get('c')).toBe('c') 41 | expect(lru.get('d')).toBe('d') 42 | }) 43 | }) 44 | -------------------------------------------------------------------------------- /src/helper/scoring.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Calculate the idf score for a term. 3 | * @param {Number} N The number of documents in the corpus. 4 | * @param {Number} df The number of times this term occurs in the corpus. 5 | * @returns {number} 6 | */ 7 | const idf = (N, df) => (df > 0 ? Math.log10(N / df) : 0) 8 | 9 | /** 10 | * Calculate the weight of the term. 11 | * @param {Number} tf The number of times this term occurs in a single document. 12 | * @returns {Number} 13 | */ 14 | const wt = (tf) => (tf > 0 ? 1 + Math.log10(tf) : 0) 15 | 16 | /** 17 | * Generate a ranking based on cosine similarity scoring. 18 | * https://nlp.stanford.edu/IR-book/html/htmledition/computing-vector-scores-1.html 19 | * @param {Array} terms The terms in the query. 20 | * @param {Array} termsToIds Each keyword mapped to a list of document IDs. 21 | * @param {Number} N The number of documents in this corpus. 22 | * @param {Object} idsToTerms Result id to list of terms. 23 | */ 24 | export default ({ terms = [], termsToIds = [], N = 0, idsToTerms = {} }) => { 25 | if (!Array.isArray(terms) || !Array.isArray(termsToIds) || terms.length !== termsToIds.length) { 26 | throw new Error('Keyword array exception') 27 | } 28 | if (termsToIds.some((keywordToIds = []) => !Array.isArray(keywordToIds))) { 29 | throw new Error('Keyword to IDs array exception') 30 | } 31 | if (Object.keys(idsToTerms).some((id) => !Array.isArray(idsToTerms[id] || []))) { 32 | throw new Error('IDs to terms array exception') 33 | } 34 | 35 | const result = {} 36 | 37 | terms.forEach((keyword, i) => { 38 | const keywordToIds = termsToIds[i] || [] 39 | const termFrequencyInCorpus = keywordToIds.length 40 | const inverseTermDocumentFrequency = idf(N, termFrequencyInCorpus) 41 | const queryTermWeight = (1 / terms.length) * inverseTermDocumentFrequency 42 | 43 | keywordToIds.forEach((id) => { 44 | const documentKeywords = idsToTerms[id] || [] 45 | 46 | const termFrequencyInDocument = documentKeywords 47 | .filter((documentKeyword) => documentKeyword === keyword) 48 | .length 49 | const documentTermWeight = wt(termFrequencyInDocument) * inverseTermDocumentFrequency 50 | const score = documentTermWeight * queryTermWeight 51 | 52 | result[id] = (result[id] || 0) + score 53 | }) 54 | }) 55 | 56 | // Normalize scores. 57 | Object.keys(result).forEach((id) => { 58 | const documentKeywords = idsToTerms[id] || [] 59 | const len = documentKeywords.length 60 | result[id] = len > 0 ? result[id] / len : 0 61 | }) 62 | 63 | return result 64 | } 65 | -------------------------------------------------------------------------------- /src/helper/scoring.spec.js: -------------------------------------------------------------------------------- 1 | import scoring from './scoring' 2 | 3 | describe('scoring', () => { 4 | it('should give score 0 to queries that match nothing', () => { 5 | const scores = scoring({ 6 | terms: ['abc'], 7 | termsToIds: [[123]], 8 | N: 1000, 9 | idsToTerms: { 10 | 123: ['foo'] 11 | } 12 | }) 13 | expect(scores[123]) 14 | .toEqual(0) 15 | }) 16 | 17 | it('should give score to queries that match exactly', () => { 18 | const scores = scoring({ 19 | terms: ['foo'], 20 | termsToIds: [[123]], 21 | N: 10, 22 | idsToTerms: { 23 | 123: ['foo'] 24 | } 25 | }) 26 | expect(scores[123]) 27 | .toEqual(1) 28 | }) 29 | 30 | it('should give the same score to documents that match exactly', () => { 31 | const scores = scoring({ 32 | terms: ['foo', 'bar'], 33 | termsToIds: [[123, 124], [123, 125]], 34 | N: 1000, 35 | idsToTerms: { 36 | 123: ['foo', 'bar'], 37 | 125: ['bar'], 38 | 124: ['foo'] 39 | } 40 | }) 41 | expect(scores[123] === scores[124] && scores[123] === scores[125]) 42 | .toBeTruthy() 43 | }) 44 | 45 | it('should give higher score to documents that contain both terms', () => { 46 | const scores = scoring({ 47 | terms: ['foo', 'bar'], 48 | termsToIds: [[123, 124], [123, 125]], 49 | N: 1000, 50 | idsToTerms: { 51 | 123: ['foo', 'bar'], 52 | 125: ['bar', 'the'], 53 | 124: ['foo', 'the'] 54 | } 55 | }) 56 | expect(scores[123] > scores[124] && scores[123] > scores[125]) 57 | .toBeTruthy() 58 | }) 59 | 60 | it('should give higher score to documents where the term is rare', () => { 61 | const scores = scoring({ 62 | terms: ['foo'], 63 | termsToIds: [[123, 124, 125]], 64 | N: 1000, 65 | idsToTerms: { 66 | 123: ['foo'], 67 | 124: ['foo', 'the'], 68 | 125: ['foo', 'the', 'an'] 69 | } 70 | }) 71 | expect(scores[123] > scores[124] && scores[124] > scores[125]) 72 | .toBeTruthy() 73 | }) 74 | 75 | it('should give higher score to documents where the term is rare', () => { 76 | const scores = scoring({ 77 | terms: ['foo', 'bar'], 78 | termsToIds: [[123, 124, 125], [123]], 79 | N: 1000, 80 | idsToTerms: { 81 | 123: ['foo', 'bar'], 82 | 124: ['foo', 'the'], 83 | 125: ['foo', 'the', 'an'] 84 | } 85 | }) 86 | expect(scores[123] > scores[124] && scores[123] > scores[125]) 87 | .toBeTruthy() 88 | }) 89 | }) 90 | -------------------------------------------------------------------------------- /src/helper/sizeof.js: -------------------------------------------------------------------------------- 1 | const typeSizes = { 2 | boolean: () => 4, 3 | number: () => 8, 4 | string: (item) => 2 * item.length 5 | } 6 | 7 | export default (object) => { 8 | const objectList = [] 9 | const stack = [object] 10 | let bytes = 0 11 | 12 | while (stack.length) { 13 | const value = stack.pop() 14 | const type = typeof value 15 | 16 | if (!value) { 17 | continue 18 | } 19 | 20 | if (value.byteLength) { 21 | bytes += value.byteLength 22 | continue 23 | } 24 | 25 | if (Array.isArray(value)) { 26 | value.forEach((v) => stack.push(v)) 27 | continue 28 | } 29 | 30 | if (type === 'object' && objectList.indexOf(value) === -1) { 31 | objectList.push(value) 32 | 33 | Object.keys(value).forEach((key) => { 34 | stack.push(key) 35 | stack.push(value[key]) 36 | }) 37 | } 38 | 39 | if (typeSizes[type]) { 40 | bytes += typeSizes[type](value) 41 | } 42 | } 43 | 44 | return bytes 45 | } 46 | -------------------------------------------------------------------------------- /src/helper/sizeof.spec.js: -------------------------------------------------------------------------------- 1 | import sizeof from './sizeof' 2 | 3 | describe('sizeof', () => { 4 | it('should get sizeof for string', () => { 5 | expect(sizeof('str')).toBe(2 * 3) 6 | }) 7 | 8 | it('should get sizeof for number', () => { 9 | expect(sizeof(552)).toBe(8) 10 | }) 11 | 12 | it('should get sizeof for array', () => { 13 | expect(sizeof(['str', 552])).toBe(8 + 6) 14 | }) 15 | 16 | it('should get sizeof for object', () => { 17 | expect(sizeof({ 'str': 552 })).toBe(8 + 6) 18 | }) 19 | 20 | it('should get sizeof for typed array', () => { 21 | expect(sizeof(new ArrayBuffer(32))).toBe(32) 22 | expect(sizeof(new Uint8Array(32))).toBe(32) 23 | expect(sizeof(new Int32Array(32))).toBe(128) 24 | }) 25 | }) 26 | -------------------------------------------------------------------------------- /src/helper/tokenize.js: -------------------------------------------------------------------------------- 1 | import latenize from 'latenize' 2 | 3 | /** 4 | * Transform a string into a token. 5 | * @param content 6 | * @returns {string} 7 | */ 8 | export const transform = (content = '') => 9 | latenize(content) 10 | .toLowerCase() 11 | .trim() 12 | 13 | /** 14 | * Turn a string of words into tokens. All characters in the tokens are lowercased and normalized in their latin form. 15 | * NOTE: string.normalize is not supported by IE https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/normalize 16 | * so using a lookup table through latenize. 17 | * Transforms e.g. 'foo bàr' to ['foo', 'bar'] 18 | * @param {String} content 19 | * @param {Number} len 20 | * @param {Boolean} stripSpecial 21 | * @return {Array} 22 | */ 23 | export default (content = '', len = 2, stripSpecial = true) => 24 | (stripSpecial ? content.replace(/[!"#()%<>:;{}[\]/\\|?.,'`´*¨°^±≈§∞$£@©€™~–…›‹¸˛]/g, ' ') : content) 25 | .split(/[\s]+/) 26 | .map(transform) 27 | .filter((s) => s.length >= len) 28 | -------------------------------------------------------------------------------- /src/helper/tokenize.spec.js: -------------------------------------------------------------------------------- 1 | import tokenize from './tokenize' 2 | 3 | describe('tokenizer', () => { 4 | it('should should convert single-token strings', () => { 5 | expect(tokenize('word')).toEqual(['word']) 6 | }) 7 | 8 | it('should should convert multi-token words', () => { 9 | expect(tokenize('lorem ipsum')).toEqual(['lorem', 'ipsum']) 10 | }) 11 | 12 | it('should should remove one-letter tokens', () => { 13 | expect(tokenize('a word')).toEqual(['word']) 14 | }) 15 | 16 | it('should should remove punctuation', () => { 17 | expect(tokenize('hello, this is me.')).toEqual(['hello', 'this', 'is', 'me']) 18 | }) 19 | 20 | it('should convert cyrillic', () => { 21 | expect(tokenize('Артём Риженков')).toEqual(['artyom', 'rizhenkov']) 22 | }) 23 | 24 | it('should remove brackets', () => { 25 | expect(tokenize('the (color) red ({is}) [some] people say')).toEqual(['the', 'color', 'red', 'is', 'pretty', 'some', 'people', 'say']) 26 | }) 27 | 28 | it('should remove special characters', () => { 29 | expect(tokenize('the /color/ %red% \\is \'pretty\' ´some`? *people* say!')).toEqual(['the', 'color', 'red', 'is', 'pretty', 'some', 'people', 'say']) 30 | }) 31 | 32 | it('should latenise characters', () => { 33 | expect(tokenize('crème brulée is so good. åäöàüèé?')).toEqual(['creme', 'brulee', 'is', 'so', 'good', 'aaoauee']) 34 | }) 35 | }) 36 | -------------------------------------------------------------------------------- /src/helper/variableByteCodes.js: -------------------------------------------------------------------------------- 1 | // Without the sign bit to be able to use bitwise operators. 2 | const MAX_INT32 = 0b1111111111111111111111111111111 3 | 4 | const ENCODING = 0b10000000 5 | const MAX_BITS = 0b01111111 6 | 7 | const TMP = new Uint8Array(5) 8 | const EMPTY = new Uint8Array(0) 9 | 10 | const MAX_BYTES_PER_NUMBER = 8 11 | const MAX_BYTES_PER_PART = 5 12 | 13 | const unsignedToDouble = (high, low) => (high * (MAX_INT32 + 1)) + low 14 | 15 | const writeUint32 = (number, buffer, offset, force = false) => { 16 | let next = number 17 | let length = 1 18 | 19 | for (let byteIndex = 0; byteIndex < MAX_BYTES_PER_PART; ++byteIndex) { 20 | TMP[byteIndex] = next & MAX_BITS 21 | next = next >> 7 22 | if (TMP[byteIndex] > 0) { 23 | length = byteIndex + 1 24 | } 25 | } 26 | 27 | let totalBytes = force ? MAX_BYTES_PER_PART : length 28 | for (let i = offset + totalBytes - 1, j = 0; i >= offset; --i, ++j) { 29 | buffer[i] = TMP[j] 30 | } 31 | 32 | return totalBytes 33 | } 34 | 35 | /** 36 | * Encode a number. Takes into account the low and high part of a number in JavaScript. 37 | * If the high part exists, the low part is padded with 0s for 5 bytes. Otherwise the 38 | * low part only uses as many bytes as it requires. 39 | * @param {Uint8Array} buffer 40 | * @param {Number} number 41 | * @param {Number} offset 42 | * @returns {number} 43 | */ 44 | export const vbEncodeNumber = (buffer, number, offset) => { 45 | const low = number & MAX_INT32 46 | const high = (number > MAX_INT32) ? (number - low) / (MAX_INT32 + 1) : 0 47 | 48 | const lowLength = writeUint32(low, buffer, offset, high > 0) 49 | const highLength = high > 0 ? writeUint32(high, buffer, offset + lowLength) : 0 50 | 51 | return lowLength + highLength 52 | } 53 | 54 | /** 55 | * Encode an array of numbers in a variable byte-list encoding. 56 | * See {@link https://nlp.stanford.edu/IR-book/html/htmledition/variable-byte-codes-1.html} 57 | * @param {Array} numbers 58 | * @returns {Uint8Array} 59 | */ 60 | export const vbEncode = (numbers = []) => { 61 | if (numbers.length === 0) { 62 | return EMPTY 63 | } 64 | 65 | const guessLength = numbers.length * MAX_BYTES_PER_NUMBER 66 | const bytes = new Uint8Array(guessLength) 67 | 68 | let totalLength = 0 69 | 70 | for (let i = 0; i < numbers.length; ++i) { 71 | const number = numbers[i] 72 | const length = vbEncodeNumber(bytes, number, totalLength) 73 | // Set last bit to denote end of number 74 | bytes[totalLength + length - 1] = bytes[totalLength + length - 1] | ENCODING 75 | totalLength += length 76 | } 77 | 78 | return bytes.slice(0, totalLength) 79 | } 80 | 81 | /** 82 | * Decode an Uint8Array into an array of numbers. 83 | * @param {Uint8Array} bytes 84 | * @returns {Array} 85 | */ 86 | export const vbDecode = (bytes) => { 87 | if (!bytes || bytes.length === 0) { 88 | return [] 89 | } 90 | 91 | const numbers = [] 92 | let low = 0 93 | let offset = 1 94 | let part = 0 95 | let multi = false 96 | let bits = 0 97 | 98 | for (let i = 0; i < bytes.length; ++i) { 99 | const value = bytes[i] 100 | const byte = value & MAX_BITS 101 | 102 | let shift = bits === 31 ? 3 : 7 103 | 104 | part = (part << shift) | byte 105 | bits += shift 106 | offset++ 107 | 108 | if (offset === MAX_BYTES_PER_PART + 1) { 109 | low = part 110 | part = 0 111 | multi = true 112 | bits = 0 113 | } 114 | 115 | if (value & ENCODING) { 116 | const number = multi ? unsignedToDouble(part, low) : part 117 | numbers.push(number) 118 | low = 0 119 | part = 0 120 | offset = 1 121 | multi = false 122 | bits = 0 123 | } 124 | } 125 | 126 | return numbers 127 | } 128 | -------------------------------------------------------------------------------- /src/helper/variableByteCodes.spec.js: -------------------------------------------------------------------------------- 1 | import { vbDecode, vbEncode } from './variableByteCodes' 2 | 3 | describe('vb', () => { 4 | it('should encode 1 bit number', () => { 5 | expect(vbEncode([1])) 6 | .toEqual(new Uint8Array([129])) 7 | }) 8 | 9 | it('should encode 32 bit number', () => { 10 | expect(vbEncode([0b11111111111111111111111111111111])) 11 | .toEqual(new Uint8Array([7, 127, 127, 127, 127, 129])) 12 | }) 13 | 14 | it('should encode 8 bit number', () => { 15 | expect(vbEncode([0b11111111])) 16 | .toEqual(new Uint8Array([1, 255])) 17 | }) 18 | 19 | it('should encode 7 bit number ', () => { 20 | expect(vbEncode([0b1111111])) 21 | .toEqual(new Uint8Array([255])) 22 | }) 23 | 24 | const expectDecode = (arr = []) => expect(vbDecode(vbEncode(arr))) 25 | .toEqual(arr) 26 | 27 | it('should decode and encode numbers correctly', () => { 28 | expectDecode([128]) 29 | expectDecode([1,128,2,128,3]) 30 | expectDecode([5000, 100, 0]) 31 | expectDecode([4294967295]) 32 | expectDecode([17,1,1,1,1,1,1]) 33 | }) 34 | 35 | it('should decode and encode all unsigned 2 exponent numbers up to 51 bits correctly', () => { 36 | const array = [] 37 | let onebit = 1 38 | let allbits = 1 39 | for (let i = 1; i <= 51; ++i) { 40 | onebit = onebit * 2 41 | allbits = onebit + (onebit - 1) 42 | array.push(onebit) 43 | array.push(allbits) 44 | } 45 | expectDecode(array) 46 | }) 47 | }) 48 | 49 | -------------------------------------------------------------------------------- /src/helper/wildcard.js: -------------------------------------------------------------------------------- 1 | export const MIN_WILDCARD_LEN = 2 2 | 3 | /** 4 | * Returns whether the string contains a wildcard query. 5 | * @param {String} string 6 | * @returns {boolean} 7 | */ 8 | export const hasWildcard = (string = '') => { 9 | for (let i = 0; i < string.length; ++i) { 10 | const c = string[i] 11 | if (c === '*' || c === '?') { 12 | return true 13 | } 14 | } 15 | return false 16 | } 17 | 18 | /** 19 | * Split a string in n-grams. 20 | * @param {Number} n Number of n-grams 21 | * @param {String} value 22 | * @returns {Array} 23 | */ 24 | export const ngram = (n, value) => { 25 | if (!value || !value.charAt) { 26 | return [] 27 | } 28 | let index = value.length - n + 1 29 | if (index < 1) { 30 | return [] 31 | } 32 | const result = Array(index) 33 | while (index--) { 34 | result[index] = value.substr(index, n) 35 | } 36 | return result 37 | } 38 | 39 | /** 40 | * Split a token in ngrams, with padding. 41 | * @param {String} value 42 | * @returns {Array} 43 | */ 44 | export const splitTokenPadding = (value = '') => { 45 | if (value.length <= MIN_WILDCARD_LEN - 1) { 46 | return [] 47 | } 48 | return ngram(MIN_WILDCARD_LEN + 1, `^${value}$`) 49 | } 50 | 51 | 52 | /** 53 | * Extract a wildcard key to query for. 54 | * @param {String} value 55 | * @returns {string} 56 | */ 57 | const extractQueryToken = (value = '') => { 58 | let start = -1 59 | let n = 0 60 | for (let i = 0; i < value.length; ++i) { 61 | const c = value[i] 62 | const wildcard = c === '*' || c === '?' 63 | if (start === -1) { 64 | if (wildcard) { 65 | continue 66 | } 67 | start = i 68 | n++ 69 | } else { 70 | if (!wildcard) { 71 | n++ 72 | if (n === MIN_WILDCARD_LEN + 1) { 73 | break 74 | } 75 | } else { 76 | start = -1 77 | n = 0 78 | } 79 | } 80 | } 81 | if (n !== MIN_WILDCARD_LEN + 1) { 82 | throw new Error('Could not parse wildcard query') 83 | } 84 | return value.substr(start, n) 85 | } 86 | 87 | export const extractQueryTokenPadding = (value = '') => extractQueryToken(`^${value}$`) 88 | 89 | /** 90 | * Match a wildcard pattern against a string. 91 | * @param {String} string 92 | * @param {String} pattern 93 | * @returns {Boolean} 94 | */ 95 | export const wildcardMatch = (string, pattern) => { 96 | if (pattern === '*') { 97 | return true 98 | } 99 | 100 | const m = string.length 101 | const n = pattern.length 102 | 103 | let wildcards = 0 104 | let singleWildcards = 0 105 | 106 | for (let i = 1; i <= n; i++) { 107 | if (pattern[i - 1] === '*') { 108 | wildcards++ 109 | } 110 | if (pattern[i - 1] === '?') { 111 | singleWildcards++ 112 | } 113 | } 114 | 115 | // If there are no wildcards and the lengths do not match, it's not a match. 116 | if (wildcards === 0 && n !== m) { 117 | return false 118 | } 119 | // If it contains no wildcards, use normal comparison 120 | if (singleWildcards === 0 && wildcards === 0) { 121 | return string === pattern 122 | } 123 | 124 | const table = Array(n + 1) 125 | table[0] = 1 126 | 127 | for (let i = 1; i <= n; i++) { 128 | if (pattern[i - 1] === '*') { 129 | table[i] = table[i - 1] 130 | } 131 | } 132 | 133 | if (m === 1 && n === 1 && pattern[0] === '?') { 134 | return true 135 | } 136 | 137 | const table_prev = Array(n + 1) 138 | table_prev[0] = 1 139 | 140 | for (let i = 1; i <= m; i++) { 141 | for (let j = 0; j <= n; j++) { 142 | table_prev[j] = table[j] 143 | if (j === 0) { 144 | table[j] = 0 145 | } 146 | if (pattern[j - 1] === '*') { 147 | table[j] = (table[j] || table[j - 1]) 148 | } 149 | else if (pattern[j - 1] === '?' || pattern[j - 1] === string[i - 1]) { 150 | table[j] = table_prev[j - 1] 151 | } 152 | else { 153 | table[j] = 0 154 | } 155 | } 156 | } 157 | 158 | return !!table[n] 159 | } 160 | 161 | -------------------------------------------------------------------------------- /src/helper/wildcard.spec.js: -------------------------------------------------------------------------------- 1 | import { extractQueryTokenPadding, ngram, splitTokenPadding, wildcardMatch } from './wildcard' 2 | 3 | describe('wildcard', () => { 4 | 5 | it('should split in n-grams', () => { 6 | expect(ngram(3, 'hello')).toEqual(['hel', 'ell', 'llo']) 7 | }) 8 | 9 | it('should split in 3-grams with padding', () => { 10 | expect(splitTokenPadding('castle')).toEqual(['^ca', 'cas', 'ast', 'stl', 'tle', 'le$']) 11 | }) 12 | 13 | it('should return empty array on short input', () => { 14 | expect(splitTokenPadding('a')).toEqual([]) 15 | }) 16 | 17 | it('should throw bad input', () => { 18 | expect(() => extractQueryTokenPadding('r*')).toThrow(new Error('Could not parse wildcard query')) 19 | expect(() => extractQueryTokenPadding('*ab*')).toThrow(new Error('Could not parse wildcard query')) 20 | }) 21 | 22 | it('should get one query token from a query', () => { 23 | expect(extractQueryTokenPadding('re?')).toEqual('^re') 24 | expect(extractQueryTokenPadding('?ed')).toEqual('ed$') 25 | expect(extractQueryTokenPadding('red*')).toEqual('^re') 26 | expect(extractQueryTokenPadding('*ired')).toEqual('ire') 27 | expect(extractQueryTokenPadding('***ired*')).toEqual('ire') 28 | }) 29 | 30 | it('should match wildcard query', () => { 31 | expect(wildcardMatch('ab', 'aa')).toBeFalsy() 32 | expect(wildcardMatch('aa', 'aa')).toBeTruthy() 33 | expect(wildcardMatch('', '*')).toBeTruthy() 34 | expect(wildcardMatch('asd', '*')).toBeTruthy() 35 | expect(wildcardMatch('a', '??')).toBeFalsy() 36 | expect(wildcardMatch('a', '*?')).toBeTruthy() 37 | expect(wildcardMatch('ab', '*?')).toBeTruthy() 38 | expect(wildcardMatch('abc', '*?')).toBeTruthy() 39 | expect(wildcardMatch('ab', '?*?')).toBeTruthy() 40 | expect(wildcardMatch('ab', '*?*?*')).toBeTruthy() 41 | expect(wildcardMatch('abcde', '?*b*?*d*?')).toBeTruthy() 42 | expect(wildcardMatch('relevance', 'r*v*n*ce')).toBeTruthy() 43 | expect(wildcardMatch('relelelel', 're*le*el')).toBeTruthy() 44 | expect(wildcardMatch('relevance', 're*')).toBeTruthy() 45 | expect(wildcardMatch('relevance', 'ae*')).toBeFalsy() 46 | expect(wildcardMatch('relevance', '*e')).toBeTruthy() 47 | expect(wildcardMatch('relevance', '*ce')).toBeTruthy() 48 | expect(wildcardMatch('relevance', '*ee')).toBeFalsy() 49 | expect(wildcardMatch('relevance', 'rel?vance')).toBeTruthy() 50 | expect(wildcardMatch('relevance', 'rele*vance')).toBeTruthy() 51 | expect(wildcardMatch('relevance', 'rele****vance')).toBeTruthy() 52 | expect(wildcardMatch('abcccd', '*ccd')).toBeTruthy() 53 | expect(wildcardMatch('mississipissippi', '*issip*ss*')).toBeTruthy() 54 | expect(wildcardMatch('xxxx*zzzzzzzzy*f', 'xxxx*zzy*fffff')).toBeFalsy() 55 | expect(wildcardMatch('xxxx*zzzzzzzzy*f', 'xxx*zzy*f')).toBeTruthy() 56 | expect(wildcardMatch('xxxxzzzzzzzzyf', 'xxxx*zzy*fffff')).toBeFalsy() 57 | expect(wildcardMatch('xxxxzzzzzzzzyf', 'xxxx*zzy*f')).toBeTruthy() 58 | expect(wildcardMatch('abababababababababababababababababababaacacacacacacacadaeafagahaiajakalaaaaaaaaaaaaaaaaaffafagaagggagaaaaaaaab', '*a*b*ba*ca*aaaa*fa*ga*ggg*b*')).toBeTruthy() 59 | expect(wildcardMatch('aaabbaabbaab', '*aabbaa*a*')).toBeTruthy() 60 | expect(wildcardMatch('a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*', 'a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*')).toBeTruthy() 61 | expect(wildcardMatch('aaaaaaaaaaaaaaaaa', '*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*')).toBeTruthy() 62 | expect(wildcardMatch('aaaaaaaaaaaaaaaa', '*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*')).toBeFalsy() 63 | expect(wildcardMatch('abc*abcd*abcde*abcdef*abcdefg*abcdefgh*abcdefghi*abcdefghij*abcdefghijk*abcdefghijkl*abcdefghijklm*abcdefghijklmn', 'abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*')).toBeFalsy() 64 | expect(wildcardMatch('abc*abcd*abcde*abcdef*abcdefg*abcdefgh*abcdefghi*abcdefghij*abcdefghijk*abcdefghijkl*abcdefghijklm*abcdefghijklmn', 'abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*')).toBeTruthy() 65 | expect(wildcardMatch('abc*abcd*abcd*abc*abcd', 'abc*abc*abc*abc*abc')).toBeFalsy() 66 | expect(wildcardMatch('abc*abcd*abcd*abc*abcd*abcd*abc*abcd*abc*abc*abcd', 'abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abcd')).toBeTruthy() 67 | expect(wildcardMatch('abc', '********a********b********c********')).toBeTruthy() 68 | }) 69 | }) 70 | -------------------------------------------------------------------------------- /src/index.esm.js: -------------------------------------------------------------------------------- 1 | export { default as tokenize, transform } from './helper/tokenize' 2 | 3 | export { default as scoring } from './helper/scoring' 4 | 5 | export { 6 | default as query, 7 | AND, 8 | PHRASE, 9 | BEFORE, 10 | OR, 11 | KEYWORD, 12 | PHRASE_ALL, 13 | PHRASE_QUOROM, 14 | PHRASE_PROXIMITY 15 | } from './query/query' 16 | 17 | export { default as parse } from './query/parser' 18 | 19 | export { default as create, TABLES } from './master' 20 | -------------------------------------------------------------------------------- /src/index.spec.js: -------------------------------------------------------------------------------- 1 | import { open, transaction, request } from './helper/idb' 2 | import { create as createIndex, tokenize, parse, query } from './index.esm' 3 | import createEncryptionHelper from '../example/helper/encryptionHelper' 4 | import { vbDecode } from './helper/variableByteCodes' 5 | import { TABLES, DEFAULT_NAMES } from './master' 6 | import { getArrayGaps } from './helper/array' 7 | 8 | const MOCK = { 9 | ID: '123', 10 | TOKENS: tokenize('this is my body') 11 | } 12 | 13 | const DB_NAME = 'index' 14 | const DB_VERSION = 1 15 | 16 | const indexKey = new Uint8Array(32) 17 | const indexSalt = new Uint8Array(32) 18 | const transformers = createEncryptionHelper(indexKey, indexSalt) 19 | 20 | describe('index', () => { 21 | const getIndex = () => createIndex({ transformers }) 22 | 23 | const tableNameToId = Object.keys(TABLES).reduce((prev, cur) => { 24 | prev[DEFAULT_NAMES[TABLES[cur]]] = TABLES[cur] 25 | return prev 26 | }, {}) 27 | 28 | const removeValue = async (tableId, key) => { 29 | const tableName = DEFAULT_NAMES[tableId] 30 | const db = await open(indexedDB, DB_NAME, DB_VERSION) 31 | const tx = db.transaction(tableName, 'readwrite') 32 | const promise = transaction(tx) 33 | tx.objectStore(tableName).delete(transformers.property(tableNameToId[tableName], key)) 34 | await promise 35 | db.close() 36 | } 37 | 38 | const getDb = async (tableName) => { 39 | const db = await open(indexedDB, DB_NAME, DB_VERSION) 40 | const tx = db.transaction(tableName, 'readwrite') 41 | return { db, tx } 42 | } 43 | 44 | const getValue = async (tableId, key) => { 45 | const tableName = DEFAULT_NAMES[tableId] 46 | const { tx, db } = await getDb(tableName) 47 | const data = await request(tx.objectStore(tableName).get(transformers.property(tableNameToId[tableName], key))) 48 | db.close() 49 | 50 | const value = transformers.deserialize(tableId, key, data) 51 | 52 | if (tableId === TABLES.POSTINGS || tableId === TABLES.WILDCARDS) { 53 | if (!value) { 54 | return 55 | } 56 | return getArrayGaps(vbDecode(value)) 57 | } 58 | 59 | if (tableId === TABLES.POSITIONS) { 60 | if (!value) { 61 | return 62 | } 63 | return vbDecode(value) 64 | } 65 | 66 | return value 67 | } 68 | 69 | const getInternalDocId = (id) => getValue(TABLES.IDS_INVERSE, id) 70 | const getMultiple = (tableId) => async (keys) => { 71 | const tableName = DEFAULT_NAMES[tableId] 72 | const { tx, db } = await getDb(tableName) 73 | const data = await Promise.all(keys.map((term) => 74 | request(tx.objectStore(tableName).get(transformers.property(tableId, term)))) 75 | ) 76 | db.close() 77 | return data 78 | } 79 | 80 | const getInternalTermIds = getMultiple(TABLES.LEXICON_INVERSE) 81 | const getTerms = getMultiple(TABLES.LEXICON) 82 | 83 | describe('store', () => { 84 | let index 85 | let internalDocId 86 | let internalTermIds 87 | 88 | beforeAll(async () => { 89 | index = getIndex() 90 | await index.store(MOCK.ID, MOCK.TOKENS) 91 | internalDocId = await getInternalDocId(MOCK.ID) 92 | internalTermIds = await getInternalTermIds(MOCK.TOKENS) 93 | }) 94 | 95 | afterAll(async () => { 96 | await index.clear() 97 | index.close() 98 | }) 99 | 100 | it('should store a link between the keywords and data id', async () => { 101 | const value = await getValue(TABLES.POSTINGS, internalTermIds[0]) 102 | expect(value[0]) 103 | .toEqual(internalDocId) 104 | }) 105 | 106 | it('should store a link between the data and keywords', async () => { 107 | const value = await getValue(TABLES.POSITIONS, internalDocId) 108 | expect(value.sort()) 109 | .toEqual(internalTermIds.sort()) 110 | }) 111 | 112 | it('should store wildcard information', async () => { 113 | const values = await Promise.all([ 114 | getValue(TABLES.WILDCARDS, '^th').then(getTerms), 115 | getValue(TABLES.WILDCARDS, 'thi').then(getTerms), 116 | getValue(TABLES.WILDCARDS, 'his').then(getTerms), 117 | getValue(TABLES.WILDCARDS, 'is$').then(getTerms) 118 | ]) 119 | expect(values) 120 | .toEqual([['this'], ['this'], ['this'], ['this', 'is']]) 121 | }) 122 | }) 123 | 124 | describe('search', () => { 125 | let index 126 | 127 | const bodyA = 'hello this is a really long fluffy text abc' 128 | const bodyB = 'i just started using this secure email app this hello' 129 | const bodyC = 'hello this is a really good app abc' 130 | 131 | beforeAll(async () => { 132 | index = getIndex() 133 | await Promise.all([ 134 | index.store('123', tokenize(bodyA)), 135 | index.store('124', tokenize(bodyB)), 136 | index.store('125', tokenize(bodyC)), 137 | index.store('150', tokenize('random text')), 138 | index.store('160', tokenize('redemption rededicate')), 139 | index.store('161', tokenize('redundancy retired rediscover')) 140 | ]) 141 | }) 142 | 143 | afterAll(async () => { 144 | await index.clear() 145 | index.close() 146 | }) 147 | 148 | const mapIds = ({ result = [] }) => result.map(({ id }) => id) 149 | 150 | it('should not return any result for keywords that do not exist', async () => { 151 | expect(mapIds(await index.search(['foo']))) 152 | .toEqual([]) 153 | }) 154 | 155 | it('should return A, B and C', async () => { 156 | expect(mapIds(await index.search(tokenize('hello this')))) 157 | .toEqual(['123', '124', '125']) 158 | }) 159 | 160 | it('should return result for keywords that exist', async () => { 161 | const result = await index.search(['hello']) 162 | expect(mapIds(result)) 163 | .toEqual(['123', '124', '125']) 164 | }) 165 | 166 | it('should return A for the query fluffy', async () => { 167 | const result = await index.search(['fluffy']) 168 | expect(mapIds(result)) 169 | .toEqual(['123']) 170 | }) 171 | 172 | it('should return B and C for the query app', async () => { 173 | const result = await index.search(['app']) 174 | expect(mapIds(result)) 175 | .toEqual(['124', '125']) 176 | }) 177 | 178 | it('should return with an extra id key', async () => { 179 | const { result } = await index.search(['hello', 'secure']) 180 | expect(result.map((result) => result.id)) 181 | .toEqual(['123', '124', '125']) 182 | }) 183 | 184 | it('should return with an extra match key', async () => { 185 | const { result } = await index.search(['hello', 'secure']) 186 | expect(result.map((result) => result.match)) 187 | .toEqual([['hello'], ['hello', 'secure'], ['hello']]) 188 | }) 189 | 190 | it('should return unique keywords for the wildcard query', async () => { 191 | const result = await index.wildcard('re*') 192 | expect(result).toEqual(['really', 'redemption', 'rededicate', 'redundancy', 'retired', 'rediscover']) 193 | }) 194 | 195 | it('should return keywords that match', async () => { 196 | const result = await index.wildcard('red*') 197 | expect(result).toEqual(['redemption', 'rededicate', 'redundancy', 'rediscover']) 198 | }) 199 | 200 | it('should return keywords in the end', async () => { 201 | const result = await index.wildcard('*ed') 202 | expect(result).toEqual(['started', 'retired']) 203 | }) 204 | 205 | it('should return keywords', async () => { 206 | const result = await index.wildcard('*ndo*') 207 | expect(result).toEqual(['random']) 208 | }) 209 | 210 | it('should clean stale data', async () => { 211 | const id = '199' 212 | await index.store(id, tokenize('unicorn zebra')) 213 | const internalId = await getInternalDocId('199') 214 | const [unicornId, zebraId] = await getInternalTermIds(['unicorn', 'zebra']) 215 | 216 | expect(mapIds(await index.search(['unicorn', 'zebra']))).toEqual([id]) 217 | expect(await getValue(TABLES.POSTINGS, unicornId)).toEqual([internalId]) 218 | expect(await getValue(TABLES.POSTINGS, zebraId)).toEqual([internalId]) 219 | 220 | await removeValue(TABLES.POSITIONS, internalId) 221 | 222 | expect(mapIds(await index.search(['unicorn', 'zebra']))).toEqual([]) 223 | expect(await getValue(TABLES.POSTINGS, unicornId)).toBeUndefined() 224 | expect(await getValue(TABLES.POSTINGS, zebraId)).toBeUndefined() 225 | }) 226 | 227 | it('should search with query', async () => { 228 | const results = query(index.search, index.wildcard, parse('red*')) 229 | expect((await results).map(({ id }) => id)).toEqual(['160', '161']) 230 | }) 231 | }) 232 | 233 | describe('remove one', () => { 234 | let index 235 | let internalId 236 | let internalTermId 237 | 238 | beforeAll(async () => { 239 | index = getIndex() 240 | await index.store(MOCK.ID, MOCK.TOKENS) 241 | 242 | internalId = await getInternalDocId(MOCK.ID) 243 | internalTermId = await getInternalTermIds(MOCK.TOKENS) 244 | 245 | await index.remove(MOCK.ID) 246 | }) 247 | 248 | afterAll(async () => { 249 | await index.clear() 250 | index.close() 251 | }) 252 | 253 | it('should remove data', async () => { 254 | const value = await getValue(TABLES.POSITIONS, internalId) 255 | expect(value) 256 | .toBeUndefined() 257 | }) 258 | 259 | it('should remove the link between the keywords and data', async () => { 260 | const value = await getValue(TABLES.POSTINGS, internalTermId[0]) 261 | expect(value) 262 | .toBeUndefined() 263 | }) 264 | }) 265 | 266 | describe('remove multiple', () => { 267 | let index 268 | let internalId 269 | let internalId2 270 | const ID2 = '321' 271 | const body2 = 'this is my body' 272 | 273 | beforeAll(async () => { 274 | index = getIndex() 275 | 276 | await index.store(MOCK.ID, MOCK.TOKENS.concat('removed')) 277 | await index.store(ID2, tokenize(body2)) 278 | 279 | internalId = await getInternalDocId(MOCK.ID) 280 | internalId2 = await getInternalDocId(ID2) 281 | 282 | await index.remove(MOCK.ID) 283 | }) 284 | 285 | afterAll(async () => { 286 | await index.clear() 287 | index.close() 288 | }) 289 | 290 | it('should remove the first instance', async () => { 291 | const value = await getValue(TABLES.POSITIONS, internalId) 292 | expect(value) 293 | .toBeUndefined() 294 | const value2 = await getValue(TABLES.POSITIONS, internalId2) 295 | expect(value2) 296 | .not.toBeUndefined() 297 | }) 298 | 299 | it('should remove the link between the keywords and the first message id', async () => { 300 | const terms = await getInternalTermIds(tokenize(body2)) 301 | await Promise.all(terms.map((async (token) => { 302 | const value = await getValue(TABLES.POSTINGS, token) 303 | expect(value) 304 | .toEqual([internalId2]) 305 | }))) 306 | }) 307 | 308 | it('should keep wildcard information', async () => { 309 | const values = await Promise.all([ 310 | getValue(TABLES.WILDCARDS, '^th').then(getTerms), 311 | getValue(TABLES.WILDCARDS, 'thi').then(getTerms), 312 | getValue(TABLES.WILDCARDS, 'his').then(getTerms), 313 | getValue(TABLES.WILDCARDS, 'is$').then(getTerms) 314 | ]) 315 | expect(values) 316 | .toEqual([['this'], ['this'], ['this'], ['this', 'is']]) 317 | }) 318 | 319 | it('should remove unique wildcard information', async () => { 320 | const values = await Promise.all([ 321 | getValue(TABLES.WILDCARDS, '^re'), 322 | getValue(TABLES.WILDCARDS, 'rem'), 323 | getValue(TABLES.WILDCARDS, 'emo'), 324 | getValue(TABLES.WILDCARDS, 'mov'), 325 | getValue(TABLES.WILDCARDS, 'ove'), 326 | getValue(TABLES.WILDCARDS, 'ved'), 327 | getValue(TABLES.WILDCARDS, 'ed$') 328 | ]) 329 | expect(values) 330 | .toEqual([undefined, undefined, undefined, undefined, undefined, undefined, undefined]) 331 | }) 332 | }) 333 | 334 | describe('stats', () => { 335 | let index 336 | 337 | beforeAll(async () => { 338 | index = getIndex() 339 | await index.clear() 340 | await index.store(MOCK.ID, MOCK.TOKENS) 341 | }) 342 | 343 | afterAll(async () => { 344 | await index.clear() 345 | index.close() 346 | }) 347 | 348 | it('should get stats', async () => { 349 | const stats = await index.stats() 350 | expect(stats.total).toBe(28) 351 | }) 352 | 353 | it('should get number of terms indexed', async () => { 354 | expect(await index.numberOfTerms()).toBe(4) 355 | }) 356 | }) 357 | }) 358 | 359 | -------------------------------------------------------------------------------- /src/master.js: -------------------------------------------------------------------------------- 1 | import { open as openDb, transaction, READWRITE, openWithClosure } from './helper/idb' 2 | 3 | import createPostingsStore from './store/postingsStore' 4 | import createPositionsStore from './store/positionsStore' 5 | import createWildcardStore from './store/wildcardStore' 6 | import createTransposeStore from './store/transposeStore' 7 | import createKeyValueStore, { withTransformers } from './store/keyValueStore' 8 | 9 | import { flatten, mutablyShuffleTwo, unique } from './helper/array' 10 | import { wildcardMatch } from './helper/wildcard' 11 | 12 | const DB_VERSION = 1 13 | 14 | export const TABLES = { 15 | LEXICON: 1, 16 | LEXICON_INVERSE: 2, 17 | 18 | IDS: 3, 19 | IDS_INVERSE: 4, 20 | 21 | POSTINGS: 5, 22 | POSITIONS: 6, 23 | WILDCARDS: 7 24 | } 25 | 26 | export const DEFAULT_NAMES = { 27 | db: 'index', 28 | [TABLES.LEXICON]: 'lexicon', 29 | [TABLES.LEXICON_INVERSE]: 'lexicon_inverse', 30 | [TABLES.IDS]: 'ids', 31 | [TABLES.IDS_INVERSE]: 'ids_inverse', 32 | [TABLES.POSTINGS]: 'postings', 33 | [TABLES.POSITIONS]: 'positions', 34 | [TABLES.WILDCARDS]: 'wildcards' 35 | } 36 | 37 | const upgradeDb = (names) => (db, oldVersion) => { 38 | switch (oldVersion) { 39 | case 0: { 40 | [ 41 | TABLES.LEXICON, 42 | TABLES.IDS, 43 | TABLES.LEXICON_INVERSE, 44 | TABLES.IDS_INVERSE, 45 | TABLES.POSTINGS, 46 | TABLES.POSITIONS, 47 | TABLES.WILDCARDS 48 | ].forEach((table) => db.createObjectStore(names[table])) 49 | break 50 | } 51 | } 52 | } 53 | 54 | const assertId = (id) => { 55 | const type = typeof id 56 | return !(!id || (type !== 'string' && type !== 'number')) 57 | } 58 | 59 | const DEFAULT_TRANSFORMERS = { 60 | property: (id, key) => key, 61 | serialize: (id, key, value) => value, 62 | deserialize: (id, key, value) => value 63 | } 64 | 65 | /** 66 | * Create the encrypted search index. 67 | * @param {Object} options 68 | * @returns {Object} 69 | */ 70 | export default (options = {}) => { 71 | const names = { ...DEFAULT_NAMES, ...options.names, } 72 | const transformers = { ...DEFAULT_TRANSFORMERS, ...options.transformers } 73 | const closeTimeout = options.closeTimeout || 15000 74 | 75 | const open = () => openDb(indexedDB, names.db, DB_VERSION, upgradeDb(names)) 76 | 77 | const { getTransaction, close } = openWithClosure(open, closeTimeout) 78 | 79 | const lexiconStore = createTransposeStore( 80 | withTransformers( 81 | TABLES.LEXICON, 82 | createKeyValueStore(names[TABLES.LEXICON]), 83 | transformers 84 | ), 85 | withTransformers( 86 | TABLES.LEXICON_INVERSE, 87 | createKeyValueStore(names[TABLES.LEXICON_INVERSE]), 88 | transformers 89 | ), 90 | getTransaction 91 | ) 92 | 93 | const idsStore = createTransposeStore( 94 | withTransformers( 95 | TABLES.IDS, 96 | createKeyValueStore(names[TABLES.IDS]), 97 | transformers 98 | ), 99 | withTransformers( 100 | TABLES.IDS_INVERSE, 101 | createKeyValueStore(names[TABLES.IDS_INVERSE]), 102 | transformers 103 | ), 104 | getTransaction 105 | ) 106 | 107 | const postingsStore = createPostingsStore( 108 | withTransformers( 109 | TABLES.POSTINGS, 110 | createKeyValueStore(names[TABLES.POSTINGS]), 111 | transformers, 112 | ), 113 | getTransaction 114 | ) 115 | 116 | const positionsStore = createPositionsStore( 117 | withTransformers( 118 | TABLES.POSITIONS, 119 | createKeyValueStore(names[TABLES.POSITIONS]), 120 | transformers, 121 | ), 122 | getTransaction 123 | ) 124 | 125 | const wildcardStore = createWildcardStore( 126 | withTransformers( 127 | TABLES.WILDCARDS, 128 | createKeyValueStore(names[TABLES.WILDCARDS]), 129 | transformers 130 | ), 131 | getTransaction 132 | ) 133 | 134 | /** 135 | * Clean stale data from the postings table when performing a search. 136 | * It relies on the fact that a term returned an id which does not exist in the positions table. 137 | * @param {Array} positions 138 | * @param {Array} ids 139 | * @param {Array} terms 140 | */ 141 | const cleanStaleData = async (positions, ids, terms) => { 142 | const staleIds = positions.reduce((agg, terms, i) => { 143 | // Detecting stale data. 144 | if (terms.length === 0) { 145 | agg.push(ids[i]) 146 | } 147 | return agg 148 | }, []) 149 | 150 | if (!staleIds.length) { 151 | return 152 | } 153 | 154 | staleIds.forEach((id) => postingsStore.removeBulk(terms, id)) 155 | } 156 | 157 | /** 158 | * Find data based on the terms. 159 | * @param {Array} searchTerms Terms to search 160 | * @return {Promise} 161 | */ 162 | const search = async (searchTerms = []) => { 163 | if (!Array.isArray(searchTerms)) { 164 | throw new Error('Terms must be an array') 165 | } 166 | 167 | const uniqueSearchTerms = unique(searchTerms) 168 | const uniqueTransposedTerms = await lexiconStore.bulk(uniqueSearchTerms) 169 | const { idsToTerms, termsToIds, ids } = await postingsStore.getBulk(uniqueTransposedTerms) 170 | const positions = await positionsStore.getBulk(ids) 171 | const termIds = unique(flatten(positions)) 172 | 173 | cleanStaleData(positions, ids, uniqueTransposedTerms) 174 | 175 | const [idsTransposed, termsTransposed] = await Promise.all([ 176 | idsStore.from(ids), 177 | lexiconStore.from(termIds) 178 | ]) 179 | 180 | const termIdsToTerm = termIds.reduce((agg, cur, i) => { 181 | agg[cur] = termsTransposed[i] 182 | return agg 183 | }, {}) 184 | 185 | const result = positions 186 | .reduce((agg, terms, i) => { 187 | // Ignore stale data. 188 | if (terms.length === 0) { 189 | return agg 190 | } 191 | 192 | const id = idsTransposed[i] 193 | const match = idsToTerms[i] 194 | const transposedTerms = terms.map((term) => termIdsToTerm[term]) 195 | const transposedMatch = match.map((term) => termIdsToTerm[term]) 196 | 197 | agg.push({ 198 | _id: ids[i], 199 | _terms: terms, 200 | _match: match, 201 | terms: transposedTerms, 202 | match: transposedMatch, 203 | id, 204 | }) 205 | 206 | return agg 207 | }, []) 208 | 209 | return { 210 | result, 211 | ids, 212 | termsToIds, 213 | idsToTerms 214 | } 215 | } 216 | 217 | /** 218 | * Store terms. 219 | * @param {String|Number} id 220 | * @param {Array} terms 221 | * @return {Promise} 222 | */ 223 | const store = async (id, terms) => { 224 | if (!assertId(id)) { 225 | throw new Error('ID required') 226 | } 227 | if (!Array.isArray(terms)) { 228 | throw new Error('Terms must be an array') 229 | } 230 | if (terms.length === 0) { 231 | return 232 | } 233 | 234 | const [[transposedId], transposedTerms] = await Promise.all([ 235 | idsStore.bulk([id]), 236 | lexiconStore.bulk(terms) 237 | ]) 238 | 239 | const uniqueTerms = unique(terms) 240 | const uniqueTransposedTerms = unique(transposedTerms) 241 | 242 | // Randomize the array to prevent row-lock contention 243 | mutablyShuffleTwo(uniqueTerms, uniqueTransposedTerms) 244 | 245 | return Promise.all([ 246 | postingsStore.insertBulk(uniqueTransposedTerms, transposedId), 247 | positionsStore.insert(transposedId, transposedTerms), 248 | wildcardStore.insertBulk(uniqueTerms, uniqueTransposedTerms) 249 | ]) 250 | } 251 | 252 | /** 253 | * Remove a document. Also deletes all terms related to the document as well. 254 | * Returns a promise that resolves to a list of terms that were fully deleted. 255 | * @param {String|Number} id 256 | * @returns {Promise} 257 | */ 258 | const remove = async (id) => { 259 | if (!assertId(id)) { 260 | throw new Error('ID required') 261 | } 262 | 263 | const [transposedId] = await idsStore.bulk([id]) 264 | 265 | const terms = await positionsStore.get(transposedId) 266 | 267 | const uniqueTerms = unique(terms) 268 | const removals = await postingsStore.removeBulk(uniqueTerms, transposedId) 269 | const removedTerms = uniqueTerms.filter((term, i) => removals[i]) 270 | 271 | return Promise.all([ 272 | lexiconStore.from(removedTerms) 273 | .then((termsTransposed) => wildcardStore.removeBulk(termsTransposed, removedTerms)), 274 | positionsStore.remove(transposedId) 275 | ]) 276 | } 277 | 278 | /** 279 | * Perform a wildcard query. 280 | * @param {String} query Wildcard query pattern. 281 | * @returns {Promise} 282 | */ 283 | const wildcard = async (query) => { 284 | const terms = await wildcardStore.get(query) 285 | const termsTransposed = await lexiconStore.from(terms) 286 | return termsTransposed 287 | .filter((token) => wildcardMatch(token, query)) 288 | } 289 | 290 | /** 291 | * Clear all tables. 292 | * @return {Promise} 293 | */ 294 | const clear = async () => { 295 | const stores = [postingsStore, positionsStore, wildcardStore, lexiconStore, idsStore] 296 | 297 | const tx = await getTransaction([ 298 | ...flatten(stores.map((store) => store.name)), 299 | ], READWRITE) 300 | 301 | const promise = transaction(tx) 302 | stores.forEach((store) => store.clear(tx)) 303 | return promise 304 | } 305 | 306 | /** 307 | * Returns stats on all tables. 308 | * @returns {Promise} 309 | */ 310 | const stats = async () => { 311 | 312 | const get = async (store) => { 313 | const tx = await getTransaction(store.name) 314 | return Promise.all([store.count(tx), store.size(tx)]) 315 | } 316 | 317 | const result = await Promise.all([postingsStore, positionsStore, wildcardStore, lexiconStore, idsStore].map(get)) 318 | 319 | const getStats = (total, size) => ({ total, size }) 320 | const getStatsResult = ([total, size]) => getStats(total, size) 321 | 322 | const postings = getStatsResult(result[0]) 323 | const positions = getStatsResult(result[1]) 324 | const wildcards = getStatsResult(result[2]) 325 | const lexicon = getStatsResult(result[3]) 326 | const ids = getStatsResult(result[4]) 327 | 328 | return { 329 | postings, 330 | positions, 331 | wildcards, 332 | lexicon, 333 | ids, 334 | ...getStats( 335 | result.reduce((prev, cur) => prev + cur[0], 0), 336 | result.reduce((prev, cur) => prev + cur[1], 0) 337 | ) 338 | } 339 | } 340 | 341 | /** 342 | * Return the number of terms currently indexed. 343 | * @returns {Promise} 344 | */ 345 | const numberOfTerms = async () => { 346 | const tx = await getTransaction(postingsStore.name) 347 | return postingsStore.count(tx) 348 | } 349 | 350 | return { 351 | search, 352 | wildcard, 353 | store, 354 | remove, 355 | clear, 356 | numberOfTerms, 357 | stats, 358 | close 359 | } 360 | } 361 | -------------------------------------------------------------------------------- /src/query/grammar.peg: -------------------------------------------------------------------------------- 1 | { 2 | function compareNot(c) { 3 | return c === '-' || c === '!'; 4 | } 5 | 6 | function notIdx(o) { 7 | if (typeof o !== 'string') { 8 | return false; 9 | } 10 | for (var i = 0; i < o.length; ++i) { 11 | if (compareNot(o.charAt(i))) { 12 | return i; 13 | } 14 | } 15 | return -1; 16 | } 17 | 18 | function checkNot(o) { 19 | if (typeof o !== 'string') { 20 | return false; 21 | } 22 | return compareNot(o.charAt(0)); 23 | } 24 | } 25 | 26 | start 27 | = And 28 | 29 | And 30 | = nl:NOT a:Order AND nr:NOT b:And { 31 | if (nl && nr) { 32 | error('Unexpected NOT in AND query'); 33 | } 34 | return ['&', a, b, nl ? true : false, nr ? true : false]; 35 | } 36 | / Order 37 | 38 | Order 39 | = a:Or ORDER b:Order { 40 | return ['<<', a, b]; 41 | } 42 | / Or 43 | 44 | Or 45 | = a:Atom OR b:Or { 46 | return ['|', a, b]; 47 | } 48 | / Atom 49 | 50 | Atom 51 | = Expression 52 | / Phrase 53 | / Keyword 54 | 55 | Expression 56 | = '(' _ expr:start _ ')' { return expr; } 57 | 58 | Keyword 59 | = _ a:KEYWORD_START_OPERATOR b:[^ \t)(<|&$]+ c:KEYWORD_END_OPERATOR { 60 | var value = b.join(''); 61 | if (notIdx(value) !== -1) { 62 | error('Unexpected NOT in WORD'); 63 | } 64 | if (value === '*') { 65 | error('Unexpected wildcard, only supported in PHRASE query') 66 | } 67 | return ['w', value, a ? a : '', c ? c : '']; 68 | } 69 | 70 | Phrase 71 | = '"' x:PHRASE_START_OPERATOR a:[^"$]+ y:PHRASE_END_OPERATOR '"' b:PHRASE_OPERATOR { 72 | var value = a.join(''); 73 | if (notIdx(value) !== -1) { 74 | error('Unexpected NOT in PHRASE query'); 75 | } 76 | var extra = b.length ? b[0] : '' 77 | var n = b.length ? parseInt(b[1].join(''), 10) : 0 78 | return ['"', value, (x ? x : '') + (y ? y : ''), extra, n]; 79 | } 80 | 81 | NOT 82 | = [!-]? 83 | 84 | OR 85 | = _ '|' _ 86 | 87 | ORDER 88 | = _ '<<' _ 89 | 90 | AND 91 | = _ '&' _ / [ \t]+ 92 | 93 | KEYWORD_START_OPERATOR 94 | = '^'? / '*'? 95 | 96 | KEYWORD_END_OPERATOR 97 | = '$'? / '*'? 98 | 99 | PHRASE_OPERATOR 100 | = '~' [0-9]+ / '/' [0-9]+ / '' 101 | 102 | PHRASE_START_OPERATOR 103 | = '^'? 104 | 105 | PHRASE_END_OPERATOR 106 | = '$'? 107 | 108 | _ "whitespace" 109 | = [ \t]* 110 | 111 | 112 | -------------------------------------------------------------------------------- /src/query/grammar.spec.js: -------------------------------------------------------------------------------- 1 | import { parse } from './grammar' 2 | 3 | const KEYWORD = (keyword) => ['w', keyword, '', ''] 4 | const AND = (expr1, expr2, not1 = false, not2 = false) => ['&', expr1, expr2, not1, not2] 5 | const OR = (expr1, expr2) => ['|', expr1, expr2] 6 | const BETWEEN = (expr1, expr2) => ['<<', expr1, expr2] 7 | const PHRASE = (phrase, extra = '', n = 0, modifier = '') => ['"', phrase, modifier, extra, n] 8 | 9 | describe('grammar', () => { 10 | it('should default to AND query', () => { 11 | expect(parse('cat mouse')).toEqual(AND(KEYWORD('cat'), KEYWORD('mouse'))) 12 | }) 13 | 14 | it('should parse AND queries', () => { 15 | expect(parse('cat & mouse')).toEqual(AND(KEYWORD('cat'), KEYWORD('mouse'))) 16 | }) 17 | 18 | it('should parse OR queries', () => { 19 | expect(parse('cat | mouse')).toEqual(OR(KEYWORD('cat'), KEYWORD('mouse'))) 20 | }) 21 | 22 | it('should parse ORDER queries', () => { 23 | expect(parse('cat << mouse')).toEqual(BETWEEN(KEYWORD('cat'), KEYWORD('mouse'))) 24 | }) 25 | 26 | it('should parse PHRASE queries', () => { 27 | expect(parse('hello "cat mouse"')).toEqual(AND(KEYWORD('hello'), PHRASE('cat mouse'))) 28 | }) 29 | 30 | it('should parse PHRASE/n queries', () => { 31 | expect(parse('"cat mouse"/10')).toEqual(PHRASE('cat mouse', '/', 10)) 32 | }) 33 | 34 | it('should parse PHRASE~n queries', () => { 35 | expect(parse('"cat mouse"~10')).toEqual(PHRASE('cat mouse', '~', 10)) 36 | }) 37 | 38 | it('should parse AND and OR queries', () => { 39 | expect(parse('looking for cat | mouse')).toEqual(AND(KEYWORD('looking'), AND(KEYWORD('for'), OR(KEYWORD('cat'), KEYWORD('mouse'))))) 40 | }) 41 | 42 | it('should parse ORDER and OR queries', () => { 43 | expect(parse('cat << mouse | dog')).toEqual(BETWEEN(KEYWORD('cat'), OR(KEYWORD('mouse'), KEYWORD('dog')))) 44 | }) 45 | 46 | it('should parse complex ORDER queries', () => { 47 | expect(parse('(bag of words) << "phrase here" << red|blue|green')).toEqual( 48 | BETWEEN( 49 | AND(KEYWORD('bag'), 50 | AND(KEYWORD('of'), 51 | KEYWORD('words')) 52 | ), 53 | BETWEEN( 54 | PHRASE('phrase here'), 55 | OR(KEYWORD('red'), 56 | OR(KEYWORD('blue'), 57 | KEYWORD('green') 58 | ) 59 | ) 60 | ) 61 | ) 62 | ) 63 | }) 64 | 65 | it('should parse queries in order', () => { 66 | expect(parse('partridge << turtle doves << French hens')).toEqual( 67 | AND( 68 | BETWEEN( 69 | KEYWORD('partridge'), 70 | KEYWORD('turtle') 71 | ), 72 | AND( 73 | BETWEEN( 74 | KEYWORD('doves'), 75 | KEYWORD('French'), 76 | ), 77 | KEYWORD('hens') 78 | ) 79 | ) 80 | ) 81 | }) 82 | 83 | it('should parse grouping queries', () => { 84 | expect(parse('(looking for) | (cat mouse)')).toEqual( 85 | OR( 86 | AND(KEYWORD('looking'), KEYWORD('for')), 87 | AND(KEYWORD('cat'), KEYWORD('mouse')) 88 | ) 89 | ) 90 | }) 91 | 92 | it('should parse grouping and OR queries', () => { 93 | expect(parse('(looking for) | cat')).toEqual( 94 | OR( 95 | AND( 96 | KEYWORD('looking'), 97 | KEYWORD('for') 98 | ), 99 | KEYWORD('cat') 100 | ) 101 | ) 102 | }) 103 | 104 | it('should parse NOT queries', () => { 105 | expect(parse('!hello world')).toEqual(AND(KEYWORD('hello'), KEYWORD('world'), true, false)) 106 | expect(parse('hello -world')).toEqual(AND(KEYWORD('hello'), KEYWORD('world'), false, true)) 107 | }) 108 | 109 | it('should parse complex NOT queries', () => { 110 | expect(parse('hello -(or | query)')).toEqual( 111 | AND( 112 | KEYWORD('hello'), 113 | OR( 114 | KEYWORD('or'), 115 | KEYWORD('query') 116 | ), 117 | false, 118 | true 119 | ) 120 | ) 121 | expect(parse('aaa -(bbb -(ccc ddd))')).toEqual(AND(KEYWORD('aaa'), AND(KEYWORD('bbb'), AND(KEYWORD('ccc'), KEYWORD('ddd')), false, true), false, true)) 122 | }) 123 | 124 | it('should throw when using illegal NOT', () => { 125 | expect(() => parse('-cat')).toThrow(new SyntaxError('Unexpected NOT in WORD')) 126 | expect(() => parse('cat | -dog')).toThrow(new SyntaxError('Unexpected NOT in WORD')) 127 | expect(() => parse('-cat -dog')).toThrow(new SyntaxError('Unexpected NOT in AND query')) 128 | expect(() => parse('!cat | dog')).toThrow(new SyntaxError('Unexpected NOT in WORD')) 129 | expect(() => parse('!cat | !dog')).toThrow(new SyntaxError('Unexpected NOT in WORD')) 130 | expect(() => parse('cat << -dog')).toThrow(new SyntaxError('Unexpected NOT in WORD')) 131 | expect(() => parse('"cat !dog"')).toThrow(new SyntaxError('Unexpected NOT in PHRASE query')) 132 | expect(() => parse('"cat !dog"/~10')).toThrow(new SyntaxError('Unexpected NOT in PHRASE query')) 133 | expect(() => parse('c!at')).toThrow(new SyntaxError('Unexpected NOT in WORD')) 134 | expect(() => parse('!c!at')).toThrow(new SyntaxError('Unexpected NOT in WORD')) 135 | }) 136 | 137 | it('should throw when using illegal WILDCARD', () => { 138 | expect(() => parse('hello *')).toThrow(new SyntaxError('Unexpected wildcard, only supported in PHRASE query')) 139 | }) 140 | }) 141 | -------------------------------------------------------------------------------- /src/query/parser.js: -------------------------------------------------------------------------------- 1 | import { parse as parseGrammar } from './grammar' 2 | import { KEYWORD, PHRASE } from './query' 3 | import defaultTokenize, { transform as defaultTransform } from '../helper/tokenize' 4 | 5 | const fixQueryBranch = (query, tokenize, transform) => { 6 | if (!query) { 7 | return 8 | } 9 | if (query[0] === KEYWORD) { 10 | query[1] = transform(query[1]) 11 | return 12 | } 13 | if (query[0] === PHRASE) { 14 | query[1] = tokenize(query[1], 0, false) 15 | return 16 | } 17 | if (Array.isArray(query[1])) { 18 | queryFixer(query[1], tokenize, transform) 19 | } 20 | if (Array.isArray(query[2])) { 21 | queryFixer(query[2], tokenize, transform) 22 | } 23 | } 24 | 25 | const queryFixer = (query, tokenize, transform) => { 26 | fixQueryBranch(query, tokenize, transform) 27 | return query 28 | } 29 | 30 | export default (query = '', tokenize = defaultTokenize, transform = defaultTransform) => queryFixer(parseGrammar(query), tokenize, transform) 31 | -------------------------------------------------------------------------------- /src/query/parser.spec.js: -------------------------------------------------------------------------------- 1 | import parse from './parser' 2 | 3 | describe('parser', () => { 4 | it('should transform keywords', () => { 5 | expect(parse('hèllö')).toEqual(['w', 'hello', '', '']) 6 | }) 7 | 8 | it('should change a phrase into tokens', () => { 9 | expect(parse('"hello you there"')).toEqual(['"', ['hello', 'you', 'there'], '', '', 0]) 10 | }) 11 | }) 12 | -------------------------------------------------------------------------------- /src/query/query.js: -------------------------------------------------------------------------------- 1 | import { 2 | intersect, 3 | minus, 4 | union, 5 | unique, 6 | ordered, 7 | contains, 8 | proximity, 9 | quorom 10 | } from '../helper/array' 11 | import { hasWildcard, wildcardMatch } from '../helper/wildcard' 12 | 13 | export const AND = '&' 14 | export const OR = '|' 15 | export const BEFORE = '<<' 16 | export const PHRASE = '"' 17 | export const KEYWORD = 'w' 18 | 19 | export const PHRASE_ALL = '' 20 | export const PHRASE_PROXIMITY = '~' 21 | export const PHRASE_QUOROM = '/' 22 | 23 | const validatePhraseOperator = (type, n) => { 24 | switch (type) { 25 | case PHRASE_ALL: 26 | return true 27 | case PHRASE_PROXIMITY: 28 | case PHRASE_QUOROM: 29 | return typeof n === 'number' && n >= 0 30 | default: 31 | return false 32 | } 33 | } 34 | 35 | const validatePhraseModifier = (modifier = '') => { 36 | switch (modifier) { 37 | case '': 38 | case '$': 39 | case '^': 40 | case '^$': 41 | return true 42 | default: 43 | return false 44 | } 45 | } 46 | 47 | const validateKeywordOperator = (type) => { 48 | switch (type) { 49 | case '': 50 | case '^': 51 | case '$': 52 | return true 53 | default: 54 | return false 55 | } 56 | } 57 | 58 | const validateQueryOperator = (type) => { 59 | switch (type) { 60 | case AND: 61 | case OR: 62 | case BEFORE: 63 | case PHRASE: 64 | case KEYWORD: 65 | return true 66 | default: 67 | return false 68 | } 69 | } 70 | 71 | const getResults = ({ result }) => result 72 | 73 | const getQueryKeywords = (wildcard, keyword) => { 74 | if (hasWildcard(keyword)) { 75 | return wildcard(keyword) 76 | } 77 | return Promise.resolve([keyword]) 78 | } 79 | 80 | const getPhraseQueryKeywords = (wildcard, keywords) => { 81 | const nonWildcardKeywordIndex = keywords.findIndex((keyword) => !hasWildcard(keyword)) 82 | if (nonWildcardKeywordIndex === -1) { 83 | return wildcard(keywords[0]) 84 | } 85 | return Promise.resolve([keywords[nonWildcardKeywordIndex]]) 86 | } 87 | 88 | const equalityComparator = (a, b) => a === b 89 | 90 | const curriedComparator = (comparator, b) => (a) => comparator(a, b) 91 | 92 | const trueCb = () => true 93 | 94 | const resultExtractor = (a = {}) => a._id 95 | 96 | const resultTransformer = (a, { match = [] } = {}) => ({ ...a, match: unique(a.match.concat(match)) }) 97 | 98 | const beforeTransformer = (a, b) => { 99 | const { _terms: _termsLeft = [], _match: _matchLeft = [] } = a 100 | const { _match: _matchRight = [], match: matchRight = [] } = b 101 | if (!ordered(_termsLeft, _matchLeft, _matchRight)) { 102 | return undefined 103 | } 104 | return { 105 | ...a, 106 | _match: unique(a._match.concat(_matchRight)), 107 | match: unique(a.match.concat(matchRight)) 108 | } 109 | } 110 | 111 | const handleResultsAndNot = (resultsA, resultsB) => minus(resultsA, resultsB, resultExtractor) 112 | 113 | const handleResultsAnd = (resultsA, resultsB) => intersect(resultsA, resultsB, resultExtractor, resultTransformer) 114 | 115 | const handleResultsOr = (resultsA, resultsB) => union(resultsA, resultsB, resultExtractor, resultTransformer) 116 | 117 | const handleResultsBefore = (resultsA, resultsB) => intersect(resultsA, resultsB, resultExtractor, beforeTransformer) 118 | 119 | const handleBranchResults = (queryOperator, resultsA, resultsB, notA, notB) => { 120 | switch (queryOperator) { 121 | case AND: 122 | if (notA || notB) { 123 | return handleResultsAndNot(notA ? resultsB : resultsA, notA ? resultsA : resultsB) 124 | } 125 | return handleResultsAnd(resultsA, resultsB) 126 | case OR: 127 | return handleResultsOr(resultsA, resultsB) 128 | case BEFORE: 129 | return handleResultsBefore(resultsA, resultsB) 130 | default: 131 | throw new Error(`Invalid operator ${queryOperator} in branch`) 132 | } 133 | } 134 | 135 | const filterFirst = (comparator) => (result = []) => 136 | result.length >= 1 && comparator(result[0]) 137 | 138 | const filterLast = (comparator) => (result = []) => 139 | result.length >= 1 && comparator(result[result.length - 1]) 140 | 141 | const filterFirstLast = (comparatorFirst, comparatorLast) => (result = []) => 142 | result.length >= 1 && comparatorFirst(result[0]) && comparatorLast(result[result.length - 1]) 143 | 144 | const getPhraseFilter = (comparator, modifier, keywords = []) => { 145 | if (modifier === '' || keywords.length === 0) { 146 | return trueCb 147 | } 148 | switch (modifier) { 149 | case '$': 150 | return filterLast( 151 | curriedComparator(comparator, keywords[keywords.length - 1])) 152 | case '^': 153 | return filterFirst( 154 | curriedComparator(comparator, keywords[0])) 155 | case '^$': 156 | return filterFirstLast( 157 | curriedComparator(comparator, keywords[0]), 158 | curriedComparator(comparator, keywords[keywords.length - 1])) 159 | } 160 | } 161 | 162 | const handlePhrase = async (search, wildcard, keywords, phraseModifier, phraseOperator, n) => { 163 | if (!Array.isArray(keywords) || keywords.length === 0 || !validatePhraseOperator(phraseOperator, n) || !validatePhraseModifier(phraseModifier)) { 164 | throw new Error('Malformed phrase query') 165 | } 166 | 167 | const queryKeywords = await getPhraseQueryKeywords(wildcard, keywords) 168 | 169 | const results = getResults(await search(queryKeywords)) 170 | const comparator = wildcardMatch 171 | const filter = getPhraseFilter(comparator, phraseModifier, keywords) 172 | 173 | switch (phraseOperator) { 174 | case PHRASE_QUOROM: { 175 | if (n === 1) { 176 | return results 177 | } 178 | return results 179 | .filter(({ terms: resultTerms = [] }) => filter(resultTerms) && quorom(resultTerms, keywords, n, comparator)) 180 | } 181 | case PHRASE_PROXIMITY: { 182 | return results 183 | .filter(({ terms: resultTerms = [] }) => filter(resultTerms) && proximity(resultTerms, keywords, n, comparator)) 184 | } 185 | case PHRASE_ALL: { 186 | return results 187 | .filter(({ terms: resultTerms = [] }) => filter(resultTerms) && contains(resultTerms, keywords, comparator) !== -1) 188 | } 189 | } 190 | } 191 | 192 | const handleKeyword = async (search, wildcard, keyword, keywordStartOperator, keywordEndOperator) => { 193 | if (!keyword.charAt || !validateKeywordOperator(keywordStartOperator) || !validateKeywordOperator(keywordEndOperator) || keyword === '*') { 194 | throw new Error('Malformed keyword') 195 | } 196 | 197 | const keywords = await getQueryKeywords(wildcard, keyword) 198 | const results = getResults(await search(keywords)) 199 | 200 | if (keywordStartOperator === '^' || keywordEndOperator === '$') { 201 | return results 202 | .filter(({ _terms: _terms = [], _match = [] }) => { 203 | const start = keywordStartOperator === '^' ? 204 | curriedComparator(equalityComparator, _terms[0]) : trueCb 205 | 206 | const end = keywordEndOperator === '$' ? 207 | curriedComparator(equalityComparator, _terms[_terms.length - 1]) : trueCb 208 | 209 | return _terms.length > 0 && _match.length > 0 && 210 | _match.some((matchedKeyword) => 211 | start(matchedKeyword) && end(matchedKeyword)) 212 | }) 213 | } 214 | 215 | return results 216 | } 217 | 218 | const evaluateBranch = async (search, wildcard, queryOperator, a, b, c, d) => { 219 | if (!validateQueryOperator(queryOperator)) { 220 | throw new Error(`Invalid operator ${queryOperator} in branch`) 221 | } 222 | 223 | if (queryOperator === KEYWORD) { 224 | return handleKeyword(search, wildcard, a, b, c) 225 | } 226 | 227 | if (queryOperator === PHRASE) { 228 | return handlePhrase(search, wildcard, a, b, c, d) 229 | } 230 | 231 | if ((c || d) && queryOperator !== AND) { 232 | throw new Error('Invalid NOT in AND query') 233 | } 234 | 235 | if (Array.isArray(a) && Array.isArray(b)) { 236 | const resultsA = await evaluateBranch(search, wildcard, ...a) 237 | const resultsB = await evaluateBranch(search, wildcard, ...b) 238 | return handleBranchResults(queryOperator, resultsA, resultsB, c, d) 239 | } 240 | 241 | throw new Error('Unrecognized branch') 242 | } 243 | 244 | export default (search, wildcard, query) => { 245 | if (!Array.isArray(query)) { 246 | throw new Error('Invalid query') 247 | } 248 | return evaluateBranch(search, wildcard, ...query) 249 | } 250 | -------------------------------------------------------------------------------- /src/query/query.spec.js: -------------------------------------------------------------------------------- 1 | import { create as createIndex, tokenize, query, parse } from '../index.esm' 2 | import createEncryptionHelper from '../../example/helper/encryptionHelper' 3 | 4 | const indexKey = new Uint8Array(32) 5 | const indexSalt = new Uint8Array(32) 6 | 7 | const transformers = createEncryptionHelper(indexKey, indexSalt) 8 | 9 | describe('query', () => { 10 | const getIndex = () => createIndex({ transformers }) 11 | 12 | let index 13 | 14 | beforeAll(async () => { 15 | index = await getIndex() 16 | await index.clear() 17 | 18 | await index.store('123', tokenize('hello world!')) 19 | await index.store('124', tokenize('cat aaa bbb ccc mouse ddd dog')) 20 | await index.store('125', tokenize('cat aaa bbb mouse ccc dog')) 21 | await index.store('126', tokenize('cat aaa mouse bbb dog')) 22 | await index.store('127', tokenize('cat aaa mouse dog')) 23 | await index.store('128', tokenize('aaa mouse cat')) 24 | await index.store('129', tokenize('hello')) 25 | 26 | await index.store('200', tokenize('1 2 3 4 5 6 7', 1)) 27 | await index.store('201', tokenize('1 4 5 7', 1)) 28 | await index.store('202', tokenize('5 6 7 1 2 3 4', 1)) 29 | await index.store('203', tokenize('7 6 5 4 3 2 1', 1)) 30 | await index.store('204', tokenize('1 2 3 4 5 6 7 8 9 10', 1)) 31 | await index.store('205', tokenize('1 2 3 4 5 6 7 10 9 8', 1)) 32 | await index.store('206', tokenize('11 12 13 14 15 16', 1)) 33 | await index.store('207', tokenize('12 13 14 15 16', 1)) 34 | 35 | await index.store('300', tokenize('Achilles catches the tortoise', 1)) 36 | await index.store('301', tokenize('Tortoise caught by Achilles', 1)) 37 | await index.store('302', tokenize('Achilles caught the green tortoise', 1)) 38 | await index.store('303', tokenize('rock paper scissor', 1)) 39 | await index.store('304', tokenize('rock paper etc scissor', 1)) 40 | }) 41 | 42 | afterAll(async () => { 43 | await index.clear() 44 | index.close() 45 | }) 46 | 47 | const mapIds = (result = []) => result.map(({ id }) => id) 48 | const sort = (array = []) => { 49 | array.sort() 50 | return array 51 | } 52 | 53 | const search = async (string) => sort(mapIds(await query(index.search, index.wildcard, parse(string)))) 54 | 55 | it('should return results for a simple word', async () => { 56 | expect(await search('hello')) 57 | .toEqual(sort(['123', '129'])) 58 | }) 59 | 60 | it('should return results for a wildcard query', async () => { 61 | expect(await search('he*')) 62 | .toEqual(sort(['123', '129'])) 63 | }) 64 | 65 | it('should return results for a wildcard query with AND', async () => { 66 | expect(await search('ro* *sso*')) 67 | .toEqual(sort(['303', '304'])) 68 | }) 69 | 70 | it('should return results for a wildcard query with OR', async () => { 71 | expect(await search('to* | *ell*')) 72 | .toEqual(sort(['300', '301', '302', '123', '129'])) 73 | }) 74 | 75 | it('should return results for a AND query', async () => { 76 | expect(await search('hello world')) 77 | .toEqual(sort(['123'])) 78 | }) 79 | 80 | it('should return results for a AND NOT query', async () => { 81 | expect(await search('hello !world')) 82 | .toEqual(sort(['129'])) 83 | }) 84 | 85 | it('should return results for a AND NOT query with wildcard', async () => { 86 | expect(await search('he* !wor*')) 87 | .toEqual(sort(['129'])) 88 | }) 89 | 90 | it('should return results for keyword modifiers query', async () => { 91 | expect(await search('^hello$')) 92 | .toEqual(sort(['129'])) 93 | expect(await search('^aaa')) 94 | .toEqual(sort(['128'])) 95 | expect(await search('cat$')) 96 | .toEqual(sort(['128'])) 97 | }) 98 | 99 | it('should return results for a OR query', async () => { 100 | expect(await search('hello | cat')) 101 | .toEqual(sort(['123', '129', '124', '125', '126', '127', '128'])) 102 | }) 103 | 104 | it('should return results for a PHRASE query with single wildcard', async () => { 105 | expect(await search('"cat * mouse"')) 106 | .toEqual(sort(['126', '127'])) 107 | }) 108 | 109 | it('should return results for a PHRASE query with wildcard', async () => { 110 | expect(await search('"ca* * *mou*"')) 111 | .toEqual(sort(['126', '127'])) 112 | }) 113 | 114 | it('should return results for a PHRASE query with phrase modifier', async () => { 115 | expect(await search('"^he*$"')) 116 | .toEqual(sort(['129'])) 117 | expect(await search('"^hello$"')) 118 | .toEqual(sort(['129'])) 119 | expect(await search('"^5 6"')) 120 | .toEqual(sort(['202'])) 121 | expect(await search('"6 7$"')) 122 | .toEqual(sort(['200'])) 123 | expect(await search('"^12 13 14 15 16$"')) 124 | .toEqual(sort(['207'])) 125 | }) 126 | 127 | it('should return results for a PHRASE query', async () => { 128 | expect(await search('"cat aaa mouse"')) 129 | .toEqual(['126', '127']) 130 | }) 131 | 132 | it('should return results for a PROXIMITY query', async () => { 133 | expect(await search('"achilles tortoise"~3')) 134 | .toEqual(['300', '301']) 135 | expect(await search('"rock paper scissor"~1')) 136 | .toEqual(['303']) 137 | }) 138 | 139 | it('should return results for a QUOROM query', async () => { 140 | expect(await search('"achilles tortoise"/2')) 141 | .toEqual(sort(['300', '301', '302'])) 142 | }) 143 | 144 | it('should return results for a QUOROM query with wildcard', async () => { 145 | expect(await search('"ach* tortoise"/2')) 146 | .toEqual(sort(['300', '301', '302'])) 147 | }) 148 | 149 | it('should return empty results for a QUOROM query', async () => { 150 | expect(await search('"achilles tortoise rock"/3')) 151 | .toEqual([]) 152 | }) 153 | 154 | it('should return results for a BEFORE and PHRASE query', async () => { 155 | expect(await search('cat << "aaa mouse"')) 156 | .toEqual(['126', '127']) 157 | }) 158 | 159 | it('should return results for a BEFORE and PHRASE query with wildcard', async () => { 160 | expect(await search('ca* << "aa* *ous*"')) 161 | .toEqual(['126', '127']) 162 | }) 163 | 164 | it('should return results for a PHRASE and BEFORE query', async () => { 165 | expect(await search('"aaa mouse" << dog')) 166 | .toEqual(['126', '127']) 167 | }) 168 | 169 | it('should return results for a BEFORE query', async () => { 170 | expect(await search('mouse << bbb')) 171 | .toEqual(['126']) 172 | expect(await search('mouse << cat')) 173 | .toEqual(['128']) 174 | }) 175 | 176 | it('should return results for a complex BEFORE query', async () => { 177 | expect(await search('1 << 4 << 5 << (6 | 7)')) 178 | .toEqual(sort(['200', '201', '204', '205'])) 179 | }) 180 | 181 | it('should return results for a complex query', async () => { 182 | expect(await search('1 << ((4 << (5 << ((6 !10) | (7 !10) | "8 9 10"))))')) 183 | .toEqual(sort(['200', '201', '204'])) 184 | expect(await search('(1 << 4 << 5) << ((6 !10)| (7 !10) | "8 9 10")')) 185 | .toEqual(sort(['200', '201', '204'])) 186 | }) 187 | 188 | it('should return results for a OR, keyword and AND query', async () => { 189 | expect(await search('hello | (cat ddd)')) 190 | .toEqual(sort(['123', '129', '124'])) 191 | }) 192 | 193 | it('should return results for a OR, AND and AND query', async () => { 194 | expect(await search('(hello world) | (cat ddd)')) 195 | .toEqual(sort(['123', '124'])) 196 | }) 197 | 198 | it('should return empty results for a AND and AND query', async () => { 199 | expect(await search('hello world cat ddd')) 200 | .toEqual([]) 201 | }) 202 | 203 | it('should return results for a AND, AND and OR query', async () => { 204 | expect(await search('cat ddd (cat | ddd)')) 205 | .toEqual(['124']) 206 | }) 207 | }) 208 | -------------------------------------------------------------------------------- /src/store/keyValueStore.js: -------------------------------------------------------------------------------- 1 | import { request } from '../helper/idb' 2 | import sizeof from '../helper/sizeof' 3 | 4 | /** 5 | * Enhance a key-value store with transformer functions. 6 | * @param {Number} id 7 | * @param {Object} store 8 | * @param {String} table 9 | * @param {Function} property 10 | * @param {Function} serialize 11 | * @param {Function} deserialize 12 | * @returns {Object} 13 | */ 14 | export const withTransformers = (id, store, { property, serialize, deserialize }) => { 15 | return { 16 | ...store, 17 | put: (tx, value, key) => { 18 | return store.put(tx, serialize(id, key, value), property(id, key)) 19 | }, 20 | get: async (tx, key) => { 21 | const encryptedValue = await store.get(tx, property(id, key)) 22 | return deserialize(id, key, encryptedValue) 23 | }, 24 | remove: (tx, key) => { 25 | return store.remove(tx, property(id, key)) 26 | } 27 | } 28 | } 29 | 30 | /** 31 | * Create a idb key-value store with transaction support. 32 | * @param {String} tableName 33 | * @returns {Object} 34 | */ 35 | export default (tableName = '') => { 36 | return { 37 | name: tableName, 38 | count: (tx) => { 39 | return request(tx.objectStore(tableName).count()) 40 | }, 41 | size: (tx) => { 42 | let size = 0 43 | return new Promise((resolve, reject) => { 44 | const request = tx.objectStore(tableName).openCursor() 45 | request.onerror = () => reject(request.error) 46 | request.onsuccess = (event) => { 47 | const cursor = event.target.result 48 | if (!cursor) { 49 | return resolve(size) 50 | } 51 | size += sizeof(cursor.value) + sizeof(cursor.key) 52 | cursor.continue() 53 | } 54 | }) 55 | }, 56 | put: (tx, value, key) => { 57 | return tx.objectStore(tableName).put(value, key) 58 | }, 59 | get: (tx, key) => { 60 | return request(tx.objectStore(tableName).get(key)) 61 | }, 62 | remove: (tx, key) => { 63 | return tx.objectStore(tableName).delete(key) 64 | }, 65 | clear: (tx) => { 66 | return tx.objectStore(tableName).clear() 67 | } 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /src/store/metadataStore.js: -------------------------------------------------------------------------------- 1 | import { READWRITE, transaction } from '../helper/idb' 2 | 3 | /** 4 | * Metadata database helper. 5 | * @param {Object} store 6 | * @param {Function} getTransaction 7 | * @returns {Object} 8 | */ 9 | export default (store, getTransaction) => { 10 | const table = [store.name] 11 | 12 | /** 13 | * Get a key from the table. 14 | * @param {String} key 15 | * @returns {Promise} 16 | */ 17 | const get = async (key) => { 18 | return store.get(await getTransaction(table), key) 19 | } 20 | 21 | /** 22 | * Set a key in the table. 23 | * @param {String} key 24 | * @param {*} value 25 | * @returns {Promise} 26 | */ 27 | const set = async (key, value) => { 28 | const tx = await getTransaction(table, READWRITE) 29 | const promise = transaction(tx) 30 | store.put(tx, value, key) 31 | return promise 32 | } 33 | 34 | /** 35 | * Get and set the next incrementing ID number. 36 | * @param {String} key 37 | * @returns {Promise} 38 | */ 39 | const getAndSetId = async (key) => { 40 | const tx = await getTransaction(table, READWRITE) 41 | const value = await store.get(key, tx) 42 | const newValue = (value === undefined ? -1 : value) + 1 43 | store.put(tx, newValue, key) 44 | return newValue 45 | } 46 | 47 | /** 48 | * Remove a key from the table. 49 | * @param {String} key 50 | * @returns {Promise} 51 | */ 52 | const remove = async (key) => { 53 | const tx = await getTransaction(table, READWRITE) 54 | const promise = transaction(tx) 55 | store.remove(tx, key) 56 | return promise 57 | } 58 | 59 | return { 60 | get, 61 | set, 62 | getAndSetId, 63 | remove, 64 | name: store.name, 65 | clear: store.clear 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /src/store/positionsStore.js: -------------------------------------------------------------------------------- 1 | import { READWRITE, request, transaction } from '../helper/idb' 2 | import { vbDecode, vbEncode } from '../helper/variableByteCodes' 3 | 4 | /** 5 | * Position database helper. 6 | * Handles all logic around storing keywords. 7 | * @param {Object} store 8 | * @param {Function} getTransaction 9 | * @returns {Object} 10 | */ 11 | export default (store, getTransaction) => { 12 | const table = store.name 13 | 14 | /** 15 | * Get the terms list for an id. 16 | * @param {Number} id 17 | * @param {IDBTransaction} tx 18 | * @returns {Promise>} 19 | */ 20 | const getList = async (id, tx) => { 21 | const result = await store.get(tx, id) 22 | return vbDecode(result) 23 | } 24 | 25 | /** 26 | * Get the terms list for an id. 27 | * @param {Number} id 28 | * @returns {Promise>} 29 | */ 30 | const get = async (id) => { 31 | const tx = await getTransaction(table) 32 | return getList(id, tx) 33 | } 34 | 35 | /** 36 | * Get the terms list for multiple ids. 37 | * @param {Array} ids 38 | * @returns {Promise[]>} 39 | */ 40 | const getBulk = async (ids) => { 41 | const tx = await getTransaction(table) 42 | return Promise.all(ids.map((id) => getList(id, tx))) 43 | } 44 | 45 | /** 46 | * Set the terms list for an id. 47 | * @param {Number} id 48 | * @param {Array} terms 49 | * @return {Promise} 50 | */ 51 | const insert = async (id, terms) => { 52 | const tx = await getTransaction(table, READWRITE) 53 | return request(store.put(tx, vbEncode(terms), id)) 54 | } 55 | 56 | /** 57 | * 58 | * @param {Number} id 59 | * @return {Promise} 60 | */ 61 | const remove = async (id) => { 62 | const tx = await getTransaction(table, READWRITE) 63 | const promise = transaction(tx) 64 | store.remove(tx, id) 65 | return promise 66 | } 67 | 68 | return { 69 | insert, 70 | get, 71 | getBulk, 72 | remove, 73 | name: store.name, 74 | count: store.count, 75 | size: store.size, 76 | clear: store.clear 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /src/store/postingsStore.js: -------------------------------------------------------------------------------- 1 | import { insertIntoGapsArray, removeFromGapsArray } from '../helper/array' 2 | import { vbDecode, vbEncode } from '../helper/variableByteCodes' 3 | import { READWRITE, request, transaction } from '../helper/idb' 4 | 5 | /** 6 | * Postings database helper. 7 | * Handles all logic around storing keywords. 8 | * @param {Object} store 9 | * @param {Function} getTransaction 10 | * @returns {Object} 11 | */ 12 | export default (store, getTransaction) => { 13 | /** 14 | * Get the posting list for a term 15 | * @param {Number} term 16 | * @param {IDBTransaction} tx 17 | * @returns {Promise} 18 | */ 19 | const getList = async (term, tx) => { 20 | const result = await store.get(tx, term) 21 | return vbDecode(result) 22 | } 23 | 24 | /** 25 | * Set the posting list for a term. 26 | * @param {Number} term 27 | * @param {Array} list 28 | * @param {IDBTransaction} tx 29 | */ 30 | const setList = (term, list, tx) => { 31 | return store.put(tx, vbEncode(list), term) 32 | } 33 | 34 | /** 35 | * Insert an id to the postings list. 36 | * @param {Number} term 37 | * @param {Number} id 38 | * @param {IDBTransaction} tx 39 | */ 40 | const insert = async (term, id, tx) => { 41 | const result = await getList(term, tx) 42 | const newValues = insertIntoGapsArray(result, id) 43 | 44 | // Only allow unique links 45 | if (!newValues) { 46 | return 47 | } 48 | 49 | setList(term, newValues, tx) 50 | } 51 | const insert2 = (term, id, result, tx) => { 52 | const newValues = insertIntoGapsArray(result, id) 53 | 54 | // Only allow unique links 55 | if (!newValues) { 56 | return 57 | } 58 | 59 | return setList(term, newValues, tx) 60 | } 61 | 62 | 63 | /** 64 | * Get the matching posting lists. 65 | * @param {Number} term 66 | * @returns {Promise} 67 | */ 68 | const get = async (term) => { 69 | const tx = await getTransaction(store.name) 70 | return getList(term, tx) 71 | } 72 | 73 | /** 74 | * Get the matching posting lists. 75 | * @param {Array} terms 76 | * @returns {Promise} 77 | */ 78 | const getBulk = async (terms) => { 79 | const tx = await getTransaction(store.name) 80 | const postingLists = await Promise.all(terms.map((term) => getList(term, tx))) 81 | 82 | return postingLists.reduce((acc, list, i) => { 83 | let id = 0 84 | 85 | for (let j = 0; j < list.length; ++j) { 86 | id += list[j] // Stored as gap array 87 | 88 | const idx = acc.ids.indexOf(id) 89 | const term = terms[i] 90 | 91 | if (idx === -1) { 92 | acc.ids.push(id) 93 | acc.idsToTerms.push([term]) 94 | } else { 95 | acc.idsToTerms[idx].push(term) 96 | } 97 | } 98 | 99 | return acc 100 | }, { ids: [], idsToTerms: [], termsToIds: postingLists }) 101 | } 102 | 103 | /** 104 | * Remove a keyword-id mapping. 105 | * If it was the only id, remove the keyword completely. 106 | * @param {Number} term 107 | * @param {Number} id 108 | * @param {IDBTransaction} tx 109 | * @returns {Promise} 110 | */ 111 | const removeLink = async (term, id, tx) => { 112 | const oldValues = await getList(term, tx) 113 | 114 | if (oldValues.length === 0) { 115 | return true 116 | } 117 | 118 | const newValues = removeFromGapsArray(oldValues, id) 119 | if (!newValues) { 120 | return false 121 | } 122 | 123 | // If it's empty, remove the keyword. 124 | if (newValues.length === 0) { 125 | store.remove(tx, term) 126 | return true 127 | } 128 | 129 | setList(term, newValues, tx) 130 | return false 131 | } 132 | 133 | /** 134 | * Remove a list of keyword-id mapping 135 | * @param {Array} terms 136 | * @param {Number} id 137 | * @return {Promise} 138 | */ 139 | // eslint-disable-next-line no-unused-vars 140 | const removeBulk = async (terms, id) => { 141 | const tx = await getTransaction(store.name, READWRITE) 142 | const promise = transaction(tx) 143 | const result = [] 144 | terms.forEach(async (term, i) => 145 | removeLink(term, id, tx) 146 | .then((value) => result[i] = value) 147 | ) 148 | await promise 149 | return result 150 | } 151 | 152 | const removeBulk2 = async (terms, id) => { 153 | const tx = await getTransaction(store.name, READWRITE) 154 | const postingLists = await Promise.all(terms.map((term) => getList(term, tx))) 155 | const result = [] 156 | let req 157 | for (let i = 0; i < terms.length; ++i) { 158 | const term = terms[i] 159 | const oldValues = postingLists[i] 160 | 161 | const newValues = removeFromGapsArray(oldValues, id) 162 | if (!newValues) { 163 | result[i] = true 164 | continue 165 | } 166 | 167 | // If it's empty, remove the keyword. 168 | if (newValues.length === 0) { 169 | result[i] = true 170 | req = store.remove(tx, term) 171 | continue 172 | } 173 | 174 | result[i] = false 175 | req = setList(term, newValues, tx) 176 | } 177 | 178 | if (!req) { 179 | return result 180 | } 181 | 182 | await request(req) 183 | return result 184 | } 185 | 186 | // eslint-disable-next-line no-unused-vars 187 | const insertBulk = async (terms, id) => { 188 | const tx = await getTransaction(store.name, READWRITE) 189 | const promise = transaction(tx) 190 | terms.forEach((term) => insert(term, id, tx)) 191 | return promise 192 | } 193 | 194 | /** 195 | * Insert bulk, only waits for the last request rather than the transaction. 196 | * It's supposedly faster, but data consistency guarantees? 197 | * @param {Array} terms 198 | * @param {Number} id 199 | * @returns {Promise} 200 | */ 201 | const insertBulk2 = async (terms, id) => { 202 | if (terms.length === 0) { 203 | return 204 | } 205 | 206 | const tx = await getTransaction(store.name, READWRITE) 207 | const postingLists = await Promise.all(terms.map((term) => getList(term, tx))) 208 | 209 | let req = undefined 210 | for (let i = 0; i < terms.length; ++i) { 211 | const insertRequest = insert2(terms[i], id, postingLists[i], tx) 212 | 213 | if (insertRequest) { 214 | req = insertRequest 215 | } 216 | } 217 | 218 | if (!req) { 219 | return 220 | } 221 | 222 | return request(req) 223 | } 224 | 225 | return { 226 | get, 227 | insertBulk: insertBulk2, 228 | getBulk, 229 | removeBulk: removeBulk2, 230 | name: store.name, 231 | count: store.count, 232 | size: store.size, 233 | clear: store.clear 234 | } 235 | } 236 | -------------------------------------------------------------------------------- /src/store/postingsStore.spec.js: -------------------------------------------------------------------------------- 1 | import { open } from '../helper/idb' 2 | import createPostingsStore from './postingsStore' 3 | import createKeyValueStore from './keyValueStore' 4 | import { getArrayGaps } from '../helper/array' 5 | 6 | const tableName = 'postings' 7 | 8 | const upgradeDb = (db, oldVersion) => { 9 | switch (oldVersion) { 10 | case 0: { 11 | db.createObjectStore(tableName) 12 | break 13 | } 14 | } 15 | } 16 | 17 | describe('postings', () => { 18 | let db 19 | let store 20 | let getTransaction 21 | 22 | const createGetTransaction = (db) => (tables, mode) => db.transaction(tables, mode) 23 | 24 | beforeAll(async () => { 25 | db = await open(indexedDB, 'postings', 1, upgradeDb) 26 | 27 | getTransaction = createGetTransaction(db) 28 | 29 | store = createPostingsStore( 30 | createKeyValueStore(tableName), 31 | getTransaction 32 | ) 33 | }) 34 | 35 | afterAll(async () => { 36 | const tx = getTransaction([tableName], 'readwrite') 37 | await store.clear(tx) 38 | db.close() 39 | }) 40 | 41 | it('should handle multiple calls with proper locking', async () => { 42 | const promises = [] 43 | const expectation = [] 44 | for (let i = 0; i < 1000; ++i) { 45 | promises.push(store.insertBulk([1,2,3], i)) 46 | expectation.push(i) 47 | } 48 | expect(getArrayGaps(await store.get(1))).toEqual(expectation) 49 | expect(getArrayGaps(await store.get(2))).toEqual(expectation) 50 | expect(getArrayGaps(await store.get(3))).toEqual(expectation) 51 | }) 52 | }) 53 | -------------------------------------------------------------------------------- /src/store/transposeStore.js: -------------------------------------------------------------------------------- 1 | import { READWRITE, request } from '../helper/idb' 2 | 3 | export default (aStore, bStore, getTransaction) => { 4 | const idKey = '*_=% id %=_*' 5 | const table = [aStore.name, bStore.name] 6 | 7 | const from = async (bs = []) => { 8 | const tx = await getTransaction(table) 9 | return Promise.all(bs.map((b) => aStore.get(tx, b))) 10 | } 11 | 12 | const bulk = async (as = []) => { 13 | const tx = await getTransaction(table, READWRITE) 14 | const initialId = (await bStore.get(tx, idKey)) || 1 15 | let id = initialId 16 | const result = await Promise.all(as.map((a) => bStore.get(tx, a))) 17 | const seen = new Map() 18 | let requestA 19 | let requestB 20 | 21 | for (let i = 0; i < result.length; ++i) { 22 | const iid = result[i] 23 | 24 | if (iid) { 25 | continue 26 | } 27 | 28 | // Duplicates... 29 | const a = as[i] 30 | if (seen.has(a)) { 31 | result[i] = seen.get(a) 32 | continue 33 | } 34 | 35 | const newId = id++ 36 | 37 | seen.set(a, newId) 38 | result[i] = newId 39 | 40 | requestA = aStore.put(tx, a, newId) 41 | requestB = bStore.put(tx, newId, a) 42 | } 43 | 44 | if (id !== initialId) { 45 | requestB = bStore.put(tx, id, idKey) 46 | } 47 | 48 | if (requestA) { 49 | await Promise.all([request(requestA), request(requestB)]) 50 | } 51 | 52 | return result 53 | } 54 | 55 | const stat = (type = 'count') => async (tx) => { 56 | const result = await Promise.all([aStore[type](tx), bStore[type](tx)]) 57 | return result.reduce((agg, cur) => agg + cur, 0) 58 | } 59 | 60 | return { 61 | name: table, 62 | bulk, 63 | from, 64 | count: stat('count'), 65 | size: stat('size'), 66 | clear: (tx) => { 67 | aStore.clear(tx) 68 | bStore.clear(tx) 69 | }, 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /src/store/transposeStore.spec.js: -------------------------------------------------------------------------------- 1 | import { open } from '../helper/idb' 2 | import createTransposeStore from './transposeStore' 3 | import createKeyValueStore from './keyValueStore' 4 | 5 | const tableName = 'lexicon' 6 | const tableNameI = 'lexicon-i' 7 | 8 | const upgradeDb = (db, oldVersion) => { 9 | switch (oldVersion) { 10 | case 0: { 11 | db.createObjectStore(tableName) 12 | db.createObjectStore(tableNameI) 13 | break 14 | } 15 | } 16 | } 17 | 18 | describe('transpose', () => { 19 | let db 20 | let store 21 | let getTransaction 22 | 23 | const createGetTransaction = (db) => (tables, mode) => db.transaction(tables, mode) 24 | 25 | beforeAll(async () => { 26 | db = await open(indexedDB, 'transpose', 1, upgradeDb) 27 | 28 | getTransaction = createGetTransaction(db) 29 | 30 | store = createTransposeStore( 31 | createKeyValueStore(tableName), 32 | createKeyValueStore(tableNameI), 33 | getTransaction 34 | ) 35 | }) 36 | 37 | afterAll(async () => { 38 | const tx = getTransaction([tableName, tableNameI], 'readwrite') 39 | await store.clear(tx) 40 | db.close() 41 | }) 42 | 43 | it('should transpose', async () => { 44 | expect(await store.bulk(['123'])).toEqual([1]) 45 | expect(await store.bulk(['abc', '123', 'def'])).toEqual([2, 1, 3]) 46 | expect(await store.bulk(['xxx', 'xxx'])).toEqual([4, 4]) 47 | expect(await store.bulk(['123'])).toEqual([1]) 48 | }) 49 | }) 50 | -------------------------------------------------------------------------------- /src/store/wildcardStore.js: -------------------------------------------------------------------------------- 1 | import { extractQueryTokenPadding, splitTokenPadding } from '../helper/wildcard' 2 | import { getArrayGaps, getGapsArray, unique } from '../helper/array' 3 | import { READWRITE, request, transaction } from '../helper/idb' 4 | import { vbDecode, vbEncode } from '../helper/variableByteCodes' 5 | 6 | /** 7 | * Split a list of terms to a list of splitted tokens -> term id map. 8 | * @param {Array} stringTerms 9 | * @param {Array} terms 10 | * @returns {Object} 11 | */ 12 | const splitToMap = (stringTerms, terms) => { 13 | return stringTerms.reduce((acc, stringTerm, i) => { 14 | const tokens = splitTokenPadding(stringTerm) 15 | tokens.forEach((token) => { 16 | if (!acc[token]) { 17 | acc[token] = [] 18 | } 19 | acc[token].push(terms[i]) 20 | }) 21 | return acc 22 | }, {}) 23 | } 24 | 25 | /** 26 | * Wildcard database helper. 27 | * Handles all logic around storing and finding wildcards. 28 | * @param {Object} store 29 | * @param {Function} getTransaction 30 | * @returns {Object} 31 | */ 32 | export default (store, getTransaction) => { 33 | const table = store.name 34 | 35 | /** 36 | * @param {String} token 37 | * @param {IDBTransaction} tx 38 | * @returns {Promise} 39 | */ 40 | const getList = async (token, tx) => { 41 | const result = await store.get(tx, token) 42 | return getArrayGaps(vbDecode(result)) 43 | } 44 | 45 | /** 46 | * @param {String} token 47 | * @param {Array} list 48 | * @param {IDBTransaction} tx 49 | */ 50 | const setList = (token, list, tx) => { 51 | return store.put(tx, vbEncode(getGapsArray(list)), token) 52 | } 53 | 54 | /** 55 | * Insert a token-keyword mapping. 56 | * @param {String} token 57 | * @param {Array} terms 58 | * @param {IDBTransaction} tx 59 | */ 60 | const insertLink = async (token = '', terms = [], tx) => { 61 | const oldValues = await getList(token, tx) 62 | const newValues = unique(oldValues.concat(terms)) 63 | setList(token, newValues, tx) 64 | } 65 | 66 | /** 67 | * Store wildcards <-> terms 68 | * @param {Array} stringTerms 69 | * @param {Array} terms 70 | * @returns {Promise} 71 | */ 72 | // eslint-disable-next-line no-unused-vars 73 | const insertBulk = async (stringTerms, terms) => { 74 | const tx = await getTransaction(table, READWRITE) 75 | const map = splitToMap(stringTerms, terms) 76 | const promise = transaction(tx) 77 | Object.keys(map) 78 | .forEach((token) => insertLink(token, map[token], tx)) 79 | return promise 80 | } 81 | 82 | const insertLink2 = (token = '', oldValues, terms = [], tx) => { 83 | const newValues = unique(oldValues.concat(terms)) 84 | return setList(token, newValues, tx) 85 | } 86 | 87 | const insertBulk2 = async (stringTerms, terms) => { 88 | const map = splitToMap(stringTerms, terms) 89 | const keys = Object.keys(map) 90 | if (!keys.length) { 91 | return 92 | } 93 | const tx = await getTransaction(table, READWRITE) 94 | const links = await Promise.all(keys.map((key) => getList(key, tx))) 95 | 96 | let req 97 | 98 | for (let i = 0; i < links.length; ++i) { 99 | req = insertLink2(keys[i], links[i], map[keys[i]], tx) 100 | } 101 | 102 | if (!req) { 103 | return 104 | } 105 | 106 | return request(req) 107 | } 108 | 109 | /** 110 | * Get a list of term ids from a wildcard pattern. 111 | * @param {String} query Wildcard pattern 112 | * @returns {Promise>} 113 | */ 114 | const get = async (query) => { 115 | const queryToken = extractQueryTokenPadding(query) 116 | const tx = await getTransaction(table) 117 | return getList(queryToken, tx) 118 | } 119 | 120 | /** 121 | * Remove a keyword-id mapping. 122 | * If it was the only id, remove the keyword completely. 123 | * @param {String} token 124 | * @param {Array} terms 125 | * @param {IDBTransaction} tx 126 | * @returns {Promise} 127 | */ 128 | const removeLink = async (token = '', terms = [], tx) => { 129 | const oldValues = await getList(token, tx) 130 | const newValues = oldValues.filter((aTerm) => !terms.some((term) => term === aTerm)) 131 | if (newValues.length === 0) { 132 | store.remove(tx, token) 133 | return 134 | } 135 | setList(token, newValues, tx) 136 | } 137 | 138 | /** 139 | * Remove a list of keyword-id mapping 140 | * @param {Array} stringTerms 141 | * @param {Array} terms 142 | * @returns {Promise} 143 | */ 144 | // eslint-disable-next-line no-unused-vars 145 | const removeBulk = async (stringTerms = [], terms) => { 146 | const map = splitToMap(stringTerms, terms) 147 | const tx = await getTransaction(table, READWRITE) 148 | const promise = transaction(tx) 149 | Object.keys(map) 150 | .forEach((token) => removeLink(token, map[token], tx)) 151 | return promise 152 | } 153 | 154 | const removeBulk2 = async (stringTerms = [], terms) => { 155 | const map = splitToMap(stringTerms, terms) 156 | const tx = await getTransaction(table, READWRITE) 157 | const keys = Object.keys(map) 158 | if (!keys.length) { 159 | return 160 | } 161 | const lists = await Promise.all(keys.map((key) => getList(key, tx))) 162 | let req 163 | for (let i = 0; i < keys.length; ++i) { 164 | const token = keys[i] 165 | const oldValues = lists[i] 166 | const tokenTerms = map[token] 167 | 168 | const newValues = oldValues.filter((aTerm) => !tokenTerms.some((term) => term === aTerm)) 169 | if (newValues.length === 0) { 170 | req = store.remove(tx, token) 171 | continue 172 | } 173 | req = setList(token, newValues, tx) 174 | } 175 | return request(req) 176 | } 177 | 178 | return { 179 | insertBulk: insertBulk2, 180 | get, 181 | removeBulk: removeBulk2, 182 | name: store.name, 183 | count: store.count, 184 | size: store.size, 185 | clear: store.clear 186 | } 187 | } 188 | --------------------------------------------------------------------------------