├── .gitignore ├── filter.js ├── .travis.yml ├── lib ├── value-mapper │ └── default.js ├── util │ ├── values-or-keys.js │ ├── property-tree.js │ ├── statistics.js │ ├── gc.js │ ├── range.js │ └── query-plan.js ├── streams │ ├── map-filter.js │ ├── view.js │ ├── query-filter.js │ └── search.js └── index.js ├── appveyor.yml ├── select.js ├── test ├── util │ ├── tape-debug.js │ └── test.js ├── select.js ├── query-filter.js ├── indexing.js ├── gc.js └── search.js ├── search.js ├── package.json ├── index.js └── readme.md /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | /todo* 3 | -------------------------------------------------------------------------------- /filter.js: -------------------------------------------------------------------------------- 1 | module.exports = require('./lib/streams/query-filter') 2 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | node_js: 3 | - '0.10' 4 | - 'iojs' 5 | -------------------------------------------------------------------------------- /lib/value-mapper/default.js: -------------------------------------------------------------------------------- 1 | var deep = require('deep-dot') 2 | 3 | module.exports = function defaultMap(key, value, props) { 4 | return props.map(deep.bind(null, value)) 5 | } 6 | -------------------------------------------------------------------------------- /lib/util/values-or-keys.js: -------------------------------------------------------------------------------- 1 | var through2 = require('through2').obj 2 | 3 | module.exports = function (stream, opts) { 4 | var values = !opts || opts.values!==false 5 | , keys = !opts || opts.keys!==false 6 | 7 | if (values && keys) return stream 8 | 9 | return stream.pipe(through2(function(kv, _, next){ 10 | next(null, values ? kv.value : kv.key) 11 | })) 12 | } 13 | -------------------------------------------------------------------------------- /appveyor.yml: -------------------------------------------------------------------------------- 1 | version: "{build}" 2 | skip_tags: true 3 | shallow_clone: true 4 | build: off 5 | 6 | environment: 7 | matrix: 8 | - nodejs_version: "0.10" 9 | - nodejs_version: "1" 10 | 11 | install: 12 | - ps: Install-Product node $env:nodejs_version 13 | - set CI=true 14 | - npm i tape -g 15 | - npm install 16 | 17 | test_script: 18 | - node --version 19 | - npm --version 20 | - npm test 21 | -------------------------------------------------------------------------------- /lib/streams/map-filter.js: -------------------------------------------------------------------------------- 1 | var range = require('../util/range') 2 | , through2 = require('through2').obj 3 | 4 | module.exports = function (mapFn, keyRange, subset){ 5 | var match = range.matcher(range.normalize(subset.length, keyRange)) 6 | 7 | return through2(function(kv, _, next){ 8 | var indexKey = mapFn(kv.key, kv.value, subset) 9 | 10 | if (indexKey != null && match(indexKey)) next(null, kv) 11 | else next() 12 | }) 13 | } 14 | -------------------------------------------------------------------------------- /lib/streams/view.js: -------------------------------------------------------------------------------- 1 | var through2 = require('through2').obj 2 | 3 | module.exports = function(db, opts) { 4 | var values = opts && (opts.values && !opts.keys) 5 | , keys = opts && (opts.keys && !opts.values) 6 | 7 | return through2(function(key, _, next){ 8 | db.get(key, function(err, entity){ 9 | if (err) return next() 10 | 11 | if (values) return next(null, entity) 12 | if (keys) return next(null, key) 13 | 14 | next(null, {key: key, value: entity}) 15 | }) 16 | }) 17 | } 18 | -------------------------------------------------------------------------------- /lib/util/property-tree.js: -------------------------------------------------------------------------------- 1 | exports.add = function(tree, index) { 2 | var props = index.properties, node = tree 3 | 4 | for(var i=0, l=props.length; i 0 && ops.slice(-added)) 43 | } 44 | 45 | // A preAll() hook would be nice 46 | gc.clear().forEach(add) 47 | }) 48 | 49 | db.post(function(op){ 50 | if (op.type != 'del') return 51 | 52 | var add = gc.start(op.key) 53 | , done = after(Object.keys(db.indexes).length, function(){ 54 | gc.end(op.key) 55 | db.emit('gc', op.key) 56 | }) 57 | 58 | for(var k in db.indexes) db.indexes[k].deleted(op.key, add, done) 59 | }) 60 | } 61 | -------------------------------------------------------------------------------- /lib/util/statistics.js: -------------------------------------------------------------------------------- 1 | var schedule = require('level-schedule') 2 | , through2 = require('through2').obj 3 | , sigmund = require('sigmund') 4 | , debug = require('debug')('level-scout') 5 | , HyperLogLog = require('hyperloglog32') 6 | , bytespace = require('bytespace') 7 | 8 | // Update statistics one index at a time 9 | module.exports = function(db, indexes) { 10 | var interval = 300 // seconds 11 | 12 | // TODO: key encoding should just be utf8? 13 | var sub = bytespace(db, 'scout-stat-jobs', { 14 | valueEncoding: 'utf8', 15 | keyEncoding: bytespace.bytewise 16 | }) 17 | 18 | var jobs = schedule(sub) 19 | 20 | jobs.job('cardinality', function (payload, done) { 21 | var names = Object.keys(indexes), job = this 22 | 23 | function next() { 24 | if (!names.length) return end() 25 | 26 | var name = names.pop() 27 | if (!indexes[name]) return next() 28 | 29 | updateIndex(indexes[name], next) 30 | } 31 | 32 | function end() { 33 | job.run('cardinality', Date.now() + interval) 34 | done() 35 | } 36 | 37 | next() 38 | }) 39 | 40 | jobs.run('cardinality', Date.now() + interval); 41 | } 42 | 43 | function updateIndex(index, done) { 44 | var h = HyperLogLog(12) // Standard error is 1.625% 45 | var length = 0 // Approximate number of total rows 46 | 47 | function end(err) { 48 | if (err) debug(err) 49 | done && done() 50 | done = null 51 | } 52 | 53 | index.createKeyStream().pipe(through2(function(key, _, next){ 54 | h.add(sigmund(key.slice(0,-1))) 55 | length++ 56 | next() 57 | })).on('end', function() { 58 | 59 | // TODO: maybe save all stats in one place 60 | index.saveStatistics({ 61 | cardinality: h.count(), 62 | length: length 63 | }, end) 64 | 65 | }).on('error', end) 66 | } 67 | -------------------------------------------------------------------------------- /test/util/test.js: -------------------------------------------------------------------------------- 1 | var index = require('../../index') 2 | , search = require('../../search') 3 | , bytespace = require('bytespace') 4 | , xtend = require('xtend') 5 | , tape = require('tape') 6 | , levelup = require('levelup') 7 | , memdown = require('memdown') 8 | , rimraf = require('rimraf') 9 | , mkdirp = require('mkdirp') 10 | , tmpdir = require('osenv').tmpdir() 11 | , path = require('path') 12 | 13 | require('./tape-debug') 14 | 15 | var defs = { valueEncoding: 'json' } 16 | , num = 0 17 | , down = disk('level-scout') 18 | , mem = levelup('mem', { db: memdown }) 19 | 20 | module.exports = test.bind(null, tape) 21 | module.exports.skip = test.bind(null, tape.skip) 22 | module.exports.only = test.bind(null, tape.only) 23 | 24 | function test(method, name, opts, body) { 25 | if (typeof opts == 'function') body = opts, opts = {} 26 | 27 | // Test without database 28 | if (opts === false) return method(name, body) 29 | 30 | method(name, function(t){ 31 | t.test('[memdown] '+name, function(t){ 32 | body(t, create(mem, opts)) 33 | }) 34 | 35 | down && t.test('[leveldown] '+name, function(t){ 36 | body(t, create(down, opts)) 37 | }) 38 | }) 39 | } 40 | 41 | function create(db, opts) { 42 | var sdb = bytespace(db, String(++num), xtend(defs, opts)) 43 | 44 | if (opts.index) index.install(sdb) 45 | if (opts.search) search.install(sdb) 46 | 47 | return sdb 48 | } 49 | 50 | function disk (name, opts, cb) { 51 | opts || (opts = {}) 52 | 53 | try { 54 | var leveldown = require('leveldown') 55 | } catch (err) { 56 | console.error('Could not load leveldown, skipping tests') 57 | return false 58 | } 59 | 60 | mkdirp.sync(tmpdir) 61 | var dir = path.join(tmpdir, name) 62 | 63 | if (opts.clean !== false) rimraf.sync(dir) 64 | opts.db = leveldown 65 | 66 | return levelup(dir, opts, cb) 67 | } 68 | -------------------------------------------------------------------------------- /test/query-filter.js: -------------------------------------------------------------------------------- 1 | var test = require('./util/test') 2 | , filter = require('../filter') 3 | , concat = require('concat-stream') 4 | 5 | test('query filter (does not use indexes)', function(t, db) { 6 | t.plan(19) 7 | 8 | db.batch([ 9 | { key: 'readme', value: {type: 'text', size: 30 }}, 10 | { key: 'logo', value: {type: 'png', size: 1000, author: { name: 'mary'} }}, 11 | { key: 'license', value: {type: 'text', size: 10 }} 12 | ], function(err) { 13 | q({size: 10}, ['license'], 'default is eq') 14 | q({size: 30}, ['readme'], 'default is eq') 15 | 16 | q({size: {eq: 30}}, ['readme'], 'eq') 17 | q({size: {gte: 30}}, ['logo', 'readme'], 'gte') 18 | q({size: {gt: 30}}, ['logo'], 'gt') 19 | q({size: {lt: 10}}, [], 'lt') 20 | q({size: {lte: 10}}, ['license'], 'lte') 21 | 22 | q({size: {eq: 10, gt: 10}}, ['license'], 'eq takes precedence') 23 | q({size: {gt: 10, gte: 10}}, ['logo', 'readme'], 'gt takes precedence over gte') 24 | q({size: {lt: 1000, lte: 1000}}, ['license', 'readme'], 'lt takes precedence over lte') 25 | 26 | q({size: {lte: 1000, gt: 10}}, ['logo', 'readme'], 'lte + gt') 27 | q({size: {lte: 1000, gt: 10, gte: 3000}}, ['logo', 'readme'], 'lte + gt + gte = lte + gt') 28 | q({size: {lte: 1000, lt: 1000, gt: 10, gte: 3000}}, ['readme'], 'lte + lt + gte + gt = lt + gt') 29 | 30 | q({size: {gt: 0}}, ['license', 'logo', 'readme'], 'gt 0') 31 | q({size: {gt: null}}, ['license', 'logo', 'readme'], 'gt null') 32 | 33 | q({type: 'text'}, ['license', 'readme']) 34 | q({type: 'text', size: 30}, ['readme'], 'multiple props') 35 | q({type: 'text', size: {gte: 30}}, ['readme'], 'multiple props') 36 | 37 | q({'author.name': 'mary'}, ['logo'], 'nested property') 38 | }) 39 | 40 | function q(query, expectedKeys, msg) { 41 | // TODO: { keys: true, values: false } opts doesn't work. 42 | db.createReadStream().pipe(filter(query)).pipe(concat(function(items){ 43 | var keys = items.map(function(item){ 44 | return item.key 45 | }) 46 | keys.sort() 47 | t.deepEqual(keys, expectedKeys, msg) 48 | })) 49 | } 50 | }) 51 | -------------------------------------------------------------------------------- /test/indexing.js: -------------------------------------------------------------------------------- 1 | var test = require('./util/test') 2 | , concat = require('concat-stream') 3 | 4 | test('indexes', {index: true}, function(t, db) { 5 | var xy = db.index(['x', 'y']) 6 | 7 | t.test('direct access', function(t){ 8 | t.plan(1) 9 | db.put('bar', {x: 5, y: 20}, function(){ 10 | xy.get([5, 20, 'bar'], function(err, value){ 11 | t.equal(value, 'bar') 12 | }) 13 | }) 14 | }) 15 | 16 | t.test('rebuild', function(t){ 17 | var y = db.index('y') 18 | t.plan(2) 19 | db.put('foo', {x: 10, y: 6}, function(){ 20 | y.rebuild(function() { 21 | y.get([6, 'foo'], function(err, value){ 22 | t.equal(value, 'foo') 23 | }) 24 | 25 | y.get([20, 'bar'], function(err, value){ 26 | t.equal(value, 'bar') 27 | }) 28 | }) 29 | }) 30 | }) 31 | 32 | t.test('view stream', function(t){ 33 | t.plan(2) 34 | db.batch([ 35 | {key: 'a', value: {x: 100, y: 200}}, 36 | {key: 'b', value: {x: 200, y: 300}}, 37 | {key: 'c', value: {x: 300, y: 400}} 38 | ], function(){ 39 | xy.createViewStream({gt: 200}).on('data', function(data){ 40 | t.equal(data.key, 'c', 'normalizes range') 41 | }) 42 | xy.createViewStream({gte: [200, 300]}).once('data', function(data){ 43 | t.equal(data.key, 'b', 'sorted') 44 | }) 45 | }) 46 | }) 47 | }) 48 | 49 | test('property tree', {index: true}, function(t, db){ 50 | var ab = db.index(['a', 'b']) 51 | t.deepEqual(db.propertyTree, { a: { b: { __index: ab } } }) 52 | 53 | var a = db.index(['a']) 54 | t.deepEqual(db.propertyTree, { a: { __index: a, b: { __index: ab } } }) 55 | 56 | var abcd = db.index(['a', 'b', 'c', 'd']) 57 | var ba = db.index(['b', 'a']) 58 | var beep = db.index('beep') 59 | 60 | t.deepEqual(db.propertyTree, { 61 | a: { __index: a, 62 | b: { 63 | __index: ab, c: { 64 | d: { __index: abcd } } 65 | } 66 | }, 67 | b: { a: { __index: ba } }, 68 | beep: { __index: beep } 69 | }) 70 | 71 | t.end() 72 | }) 73 | 74 | // todo: is no longer updated immediately 75 | test.skip('selectivity', {index: true}, function(t, db){ 76 | t.plan(2) 77 | 78 | var color = db.index('color') 79 | 80 | t.equal(color.selectivity(), 0) 81 | 82 | db.batch([ 83 | {key: 1, value: {color: 'red'}}, 84 | {key: 2, value: {color: 'orange'}}, 85 | {key: 3, value: {color: 'red'}} 86 | ], function(){ 87 | t.equal(color.selectivity(), 2/3) 88 | }) 89 | }) 90 | -------------------------------------------------------------------------------- /lib/util/gc.js: -------------------------------------------------------------------------------- 1 | var sigmund = require('sigmund') 2 | 3 | // Optimistic batch queue for garbage collection 4 | // ops. Operations can be canceled by entity key. 5 | 6 | module.exports = function(db, opts) { 7 | opts || (opts = {}) 8 | 9 | var size = opts.size || 50 10 | , queue = Object.create(null) 11 | , numQueued = 0 12 | , active = Object.create(null) 13 | , numActive = 0 14 | , writing = null 15 | , rewrites = [] 16 | , gc = {} 17 | , timeout 18 | , delay = opts.delay || 1000 * 30 19 | 20 | gc.start = function(entityKey) { 21 | var h = sigmund(entityKey) 22 | 23 | active[h] = true 24 | numActive++ 25 | 26 | return function(ops) { 27 | if (active[h]) add(entityKey, ops, h) 28 | else return false 29 | } 30 | } 31 | 32 | function add(entityKey, ops, _hash) { 33 | var h = _hash || sigmund(entityKey) 34 | 35 | queue[h] = (queue[h] || []).concat(ops) 36 | numQueued+= Array.isArray(ops) ? ops.length : 1 37 | 38 | if (numQueued >= size) gc.flush() 39 | else { 40 | clearTimeout(timeout) 41 | timeout = setTimeout(gc.flush, delay) 42 | timeout.unref && timeout.unref() 43 | } 44 | } 45 | 46 | gc.clear = function() { 47 | var ops = [] 48 | clearTimeout(timeout) 49 | 50 | if (numQueued) { 51 | for(var k in queue) if (queue[k]) ops = ops.concat(queue[k]) 52 | queue = Object.create(null) 53 | numQueued = 0 54 | } 55 | 56 | return ops 57 | } 58 | 59 | // Don't accept any more additions 60 | gc.end = function(entityKey, _hash) { 61 | var h = _hash || sigmund(entityKey) 62 | 63 | if (active[h]) { 64 | active[h] = false 65 | if (--numActive === 0) active = Object.create(null) 66 | } 67 | } 68 | 69 | gc.cancel = function(entityKey, ops) { 70 | var h = sigmund(entityKey) 71 | 72 | this.end(entityKey, h) 73 | 74 | if (queue[h] != null) { 75 | numQueued-= queue[h].length 76 | queue[h] = null 77 | } else if (ops && writing && writing[h]) { 78 | // Schedule a "rewrite" for the new ops 79 | rewrites.push(add.bind(null, entityKey, ops, h)) 80 | } 81 | } 82 | 83 | gc.flush = function(cb) { 84 | if (!numQueued || writing) return cb && setImmediate(cb); 85 | 86 | clearTimeout(timeout) 87 | writing = queue 88 | 89 | db.batch(gc.clear(), function(){ 90 | writing = null 91 | 92 | var a = rewrites 93 | rewrites = [] 94 | 95 | cb && cb() 96 | 97 | if (a.length) a.forEach(function(cb){ cb() }) 98 | else if (numQueued >= size) gc.flush() 99 | }) 100 | } 101 | 102 | return gc 103 | } 104 | -------------------------------------------------------------------------------- /lib/util/range.js: -------------------------------------------------------------------------------- 1 | var lowerBound = null 2 | , upperBound = undefined 3 | 4 | exports.matcher = function(expr) { 5 | if ('eq' in expr) return function(a, b) { 6 | return compare(a, b) === 0 7 | }.bind(null, expr.eq) 8 | 9 | if ('gt' in expr) 10 | var gt = compare.bind(null, expr.gt), min = -1 11 | else if ('gte' in expr) 12 | gt = compare.bind(null, expr.gte), min = 0 13 | 14 | if ('lt' in expr) 15 | var lt = compare.bind(null, expr.lt), max = 1 16 | else if ('lte' in expr) 17 | lt = compare.bind(null, expr.lte), max = 0 18 | 19 | return function(b) { 20 | return (!gt || gt(b) <= min) && (!lt || lt(b) >= max) 21 | } 22 | } 23 | 24 | function compare (a, b) { 25 | if (a === lowerBound && b !== lowerBound) return -1 26 | if (a === upperBound && b !== upperBound) return 1 27 | if (b === lowerBound && a !== lowerBound) return 1 28 | if (b === upperBound && a !== upperBound) return -1 29 | if (a === lowerBound || a === upperBound) return 0 30 | 31 | if(isArrayLike(a) && isArrayLike(b)) { 32 | var la = a.length, lb = b.length, c 33 | 34 | for(var i=0; i b ? 1 : 0 42 | } 43 | 44 | function isArrayLike (a) { 45 | return Array.isArray(a) || Buffer.isBuffer(a) 46 | } 47 | 48 | exports.exprToRange = function (expressions, meta) { 49 | var range = {} 50 | , key = [], equirange = true 51 | 52 | expressions.forEach(function(expr){ 53 | if ('eq' in expr) return key.push(expr.eq) 54 | 55 | if (!equirange) 56 | throw new Error('Supports only one non-equi predicate') 57 | 58 | equirange = false 59 | 60 | if ('lt' in expr) range.lt = key.concat(expr.lt) 61 | else if ('lte' in expr && !('lt' in range)) range.lte = key.concat(expr.lte) 62 | 63 | if ('gt' in expr) range.gt = key.concat(expr.gt) 64 | else if ('gte' in expr && !('gt' in range)) range.gte = key.concat(expr.gte) 65 | }) 66 | 67 | if (equirange) range.eq = key 68 | else if (key.length) { // non-equi needs counterpart 69 | for(var op in range) { 70 | var oppo = (op[0] == 'l' ? 'g' : 'l') + op.slice(1) 71 | , alt = op[2] == 'e' ? oppo.slice(0,2) : oppo + 'e' 72 | 73 | if (!(oppo in range) && !(alt in range)) 74 | range[oppo] = key.slice() 75 | } 76 | } 77 | 78 | return meta ? [range, equirange] : range 79 | } 80 | 81 | exports.normalize = function normalize(length, opts) { 82 | var normal = {} 83 | 84 | if ('eq' in opts) { 85 | // Fix `eq` if it is an incomplete range, to gte+lte 86 | var key = opts.eq 87 | if (!Array.isArray(key)) key = [key] 88 | 89 | if (key.length!==length) { 90 | return normalize(length, { gte: key.slice(), lte: key.slice() }) 91 | } else { 92 | normal.eq = key 93 | return normal 94 | } 95 | } 96 | 97 | function fill(val, fill) { 98 | var a = Array.isArray(val) ? val.slice() : [val] 99 | while(a.length < length) a.push(fill) 100 | return a 101 | } 102 | 103 | if ('gt' in opts) normal.gt = fill(opts.gt, upperBound) 104 | else if ('gte' in opts) normal.gte = fill(opts.gte, lowerBound) 105 | 106 | if ('lt' in opts) normal.lt = fill(opts.lt, lowerBound) 107 | else if ('lte' in opts) normal.lte = fill(opts.lte, upperBound) 108 | 109 | return normal 110 | } 111 | -------------------------------------------------------------------------------- /lib/streams/search.js: -------------------------------------------------------------------------------- 1 | var queryPlan = require('../util/query-plan') 2 | , queryFilter = require('./query-filter') 3 | , viewStream = require('./view') 4 | , debug = require('debug')('level-scout') 5 | , util = require('util') 6 | , valuesOrKeys = require('../util/values-or-keys') 7 | , mapFilter = require('./map-filter') 8 | , intersect = require('sorted-intersect-stream') 9 | 10 | // Search selects the most optimal index(es) and adds filters if necessary. 11 | module.exports = function (db, query, opts) { 12 | query = normalizeQuery(query) 13 | 14 | if (opts && opts.indexes) { 15 | var indexes = opts.indexes 16 | 17 | if (Array.isArray(indexes)) { 18 | indexes = Object.create(null) 19 | opts.indexes.forEach(function(index){ 20 | indexes[index.name] = index 21 | }) 22 | } 23 | } else { 24 | indexes = db.indexes || Object.create(null) 25 | var tree = db.propertyTree 26 | } 27 | 28 | // TODO: Maybe use lazypipe to construct plan + streams 29 | // TODO: Don't decode keys, use raw keyEncoding, decode after filtering 30 | // TODO: In viewStream, if entity does not exist, notify gc 31 | // TODO: Compute overhead (old index keys not deleted due to a process crash) 32 | // while filtering and notify gc 33 | 34 | var plan = new queryPlan(query, indexes, tree) 35 | , paths = plan.accessPaths 36 | , debugPlan = [] 37 | 38 | // TODO: Move this to queryPlan and expose as accesspath 39 | // options (later, add the ability to choose and analyze paths). 40 | if (!paths.length) { 41 | var stream = db.createReadStream() 42 | debugPlan.push('Table scan') 43 | } else { 44 | // We can do an index intersect when we have multiple full-range 45 | // equality predicates (because then, streams are key-ordered). 46 | if (plan.equiPaths.length > 1) { 47 | stream = plan.equiPaths.reduce(function(acc, path){ 48 | return acc ? intersect(acc, keyStream(path), intersectKey) : keyStream(path) 49 | }, null) 50 | 51 | debugPlan.push({intersect: plan.equiPaths}) 52 | } else { 53 | // First index should be most optimal. 54 | stream = keyStream(paths[0]) 55 | debugPlan.push(paths[0]) 56 | } 57 | 58 | stream = stream.pipe(viewStream(db)) 59 | } 60 | 61 | // Filter by indexed predicates (also to catch old indexKeys) 62 | plan.filters.forEach(function(filter){ 63 | stream = stream.pipe(mapFilter(filter.map, filter.range, filter.match)) 64 | }) 65 | 66 | // Filter by unindexed predicates 67 | if (plan.extraneous.length) { 68 | var restQuery = Object.create(null) 69 | 70 | plan.extraneous.forEach(function(property){ 71 | restQuery[property] = query[property] 72 | }) 73 | 74 | debugPlan.push({filter: restQuery}) 75 | stream = stream.pipe(queryFilter(restQuery)) 76 | } 77 | 78 | stream = valuesOrKeys(stream, opts) 79 | stream.plan = debugPlan 80 | 81 | debug(util.inspect({plan: debugPlan}, {depth: 4})) 82 | return stream 83 | } 84 | 85 | function keyStream(path) { 86 | return path.index.createValueStream(path.range) 87 | } 88 | 89 | function intersectKey(key) { 90 | return key 91 | } 92 | 93 | function normalizeQuery(query) { 94 | var normal = {} 95 | 96 | Object.keys(query).forEach(function(property){ 97 | var predicate = query[property] 98 | 99 | if (typeof predicate !== 'object') { 100 | normal[property] = { eq: predicate } 101 | } else if (('lt' in predicate) || ('lte' in predicate) || // todo: Object.keys() 102 | ('gt' in predicate) || ('gte' in predicate) || 103 | ('eq' in predicate) ){ 104 | normal[property] = predicate 105 | } 106 | }) 107 | 108 | return normal 109 | } 110 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # level-scout 2 | 3 | **ltgt syntax + bytewise encoded indexes + stream filters + query planner = pretty awesome search capabilities. A search will use a range query on the most optimal index, even intersect indexes if possible, or do a full scan.** 4 | 5 | [![npm status](http://img.shields.io/npm/v/level-scout.svg?style=flat-square)](https://www.npmjs.org/package/level-scout) [![Travis build status](https://img.shields.io/travis/vweevers/level-scout.svg?style=flat-square&label=travis)](http://travis-ci.org/vweevers/level-scout) [![AppVeyor build status](https://img.shields.io/appveyor/ci/vweevers/level-scout.svg?style=flat-square&label=appveyor)](https://ci.appveyor.com/project/vweevers/level-scout) [![Dependency status](https://img.shields.io/david/vweevers/level-scout.svg?style=flat-square)](https://david-dm.org/vweevers/level-scout) 6 | 7 | As an example, suppose you have a compound index on the `x` and `y` properties of your entities, resulting in index keys in the form of `[x, y, entity key]`. If you search for `x: 20, y: { gte: 5 }`, scout combines those predicates to a key range like `gte: [20, 5], lte: [20, undefined]`. But if you search for `x: { gte: 5 }, y: 20`, scout produces a ranged stream for `x` and filters that by `y`. Basically, scout can combine zero or more equality predicates with zero or one non-equality predicates, in the order of the index properties (so a compound "x, y" index is not the same as a "y, x" index). And maybe more in the future, if something like a "skip scan" is implemented. 8 | 9 | Note: the API and dependencies are unstable, documentation is missing, terminology possibly garbled. Requires sublevel and leveldown (there are some unresolved issues with other backends like memdown). **Requires `leveldown >= 1.0.0` or `level/memdown#v1.0.2`, and JSON value encoding. Incompatible with `level-sublevel`.** 10 | 11 | ## Quick overview 12 | 13 | ```js 14 | var index = require('level-scout/index') 15 | search = require('level-scout/search') 16 | select = require('level-scout/select') 17 | filter = require('level-scout/filter') 18 | 19 | var db = .. 20 | 21 | index(db, 'age') // Single property index 22 | index(db, 'owner.lastname') // Nested property 23 | index(db, ['a', 'b', 'c']) // Compound index 24 | 25 | // Compound index with custom mapper. You 26 | // can now search for `sum` even though it's 27 | // not a property of the entity. Function 28 | // is used for both indexing and filtering. 29 | index(db, ['a', 'sum'], function(key, entity){ 30 | return [entity.a, entity.a + entity.b] 31 | }) 32 | 33 | // Insert some data 34 | db.batch(..) 35 | 36 | // Would select the "a, sum" index as access 37 | // path, because those combined predicates are 38 | // more selective than "age" - and "color" is not 39 | // indexed. 40 | var stream = search(db, { 41 | a: 45, 42 | sum: { gte: 45, lt: 60 }, 43 | color: 'red', 44 | age: 300 45 | }) 46 | 47 | // Get a subset of each entity 48 | .pipe(select({the_age: 'age', color: true})) 49 | 50 | // Filter some more (would yield no results) 51 | .pipe(filter({ the_age: { lt: 100 } })) 52 | ``` 53 | 54 | Search with a callback: 55 | 56 | ```js 57 | search(db, { year: 1988 }, function(err, results, plan){ 58 | // `plan` contains debug info about the selected 59 | // access path and filters 60 | }) 61 | ``` 62 | 63 | ## Setup 64 | 65 | ```js 66 | var levelup = require('levelup') 67 | , index = require('level-scout/index') 68 | , search = require('level-scout/search') 69 | 70 | var db = levelup('./db', { valueEncoding: 'json' }) 71 | 72 | index(db, ..) 73 | search(db, ..) 74 | ``` 75 | 76 | Or attach the methods to your database: 77 | 78 | ```js 79 | index.install(db) 80 | search.install(db) 81 | 82 | db.index('x') 83 | 84 | db.put('key', {x: 10 }, function(){ 85 | db.search({x: 10}, function(err, results){ 86 | // .. 87 | }) 88 | }) 89 | ``` 90 | -------------------------------------------------------------------------------- /test/gc.js: -------------------------------------------------------------------------------- 1 | var test = require('./util/test') 2 | 3 | test('gc queue is injected into batch', {index: true}, function(t, db){ 4 | t.plan(2) 5 | 6 | var color = db.index('color') 7 | 8 | db.put('G', {color: 'green'}, function(){ 9 | color.get(['green', 'G'], function(err, key){ 10 | t.equal(key, 'G', 'has index key') 11 | 12 | db.del('G', function(){}) 13 | db.once('gc', function(){ 14 | db.put('P', {color: 'pink'}, function(){ 15 | color.get(['green', 'G'], function(err, key){ 16 | t.ok(err, 'index key removed') 17 | }) 18 | }) 19 | }) 20 | }) 21 | }) 22 | }) 23 | 24 | test('gc queue is flushed after delay', {index: true}, function(t, db){ 25 | t.plan(2) 26 | 27 | var color = db.index('color', { gc: { delay: 10 }}) 28 | 29 | db.batch([ 30 | {key: 'R', value: {color: 'red'}}, 31 | {key: 'G', value: {color: 'green'}}, 32 | {key: 'B', value: {color: 'blue'}} 33 | ], function(){ 34 | color.get(['green', 'G'], function(err, key){ 35 | t.equal(key, 'G', 'has index key') 36 | 37 | db.del('G', function(){}) 38 | db.once('gc', function(){ 39 | setTimeout(function(){ 40 | color.get(['green', 'G'], function(err, key){ 41 | t.ok(err, 'index key removed') 42 | }) 43 | }, 300) 44 | }) 45 | }) 46 | }) 47 | }) 48 | 49 | test('gc queue is flushed if full', {index: true}, function(t, db){ 50 | t.plan(2) 51 | 52 | var color = db.index('color', { gc: { size: 1, delay: 60000 }}) 53 | 54 | db.batch([ 55 | {key: 'R', value: {color: 'red'}}, 56 | {key: 'G', value: {color: 'green'}}, 57 | {key: 'B', value: {color: 'blue'}} 58 | ], function(){ 59 | color.get(['green', 'G'], function(err, key){ 60 | t.equal(key, 'G', 'has index key') 61 | 62 | db.del('G', function(){}) 63 | db.once('gc', function(){ 64 | setTimeout(function(){ 65 | color.get(['green', 'G'], function(err, key){ 66 | t.ok(err, 'index key removed') 67 | }) 68 | }, 300) 69 | }) 70 | }) 71 | }) 72 | }) 73 | 74 | test('queued gc op is canceled by new write', {index: true}, function(t, db){ 75 | t.plan(3) 76 | 77 | var color = db.index('color') 78 | 79 | db.put('G', {color: 'green'}, function(){ 80 | color.get(['green', 'G'], function(err, key){ 81 | t.equal(key, 'G', 'has index key') 82 | 83 | db.del('G', function(){}) 84 | db.once('gc', function(){ 85 | db.put('G', {color: 'pink'}, function(){ 86 | color.get(['green', 'G'], function(err, key){ 87 | t.notOk(err, 'old index key not removed') 88 | }) 89 | color.get(['pink', 'G'], function(err, key){ 90 | t.notOk(err, 'has new index key') 91 | }) 92 | }) 93 | }) 94 | }) 95 | }) 96 | }) 97 | 98 | test('a new write is requeued if gc is writing', {index: true}, function(t, db){ 99 | t.plan(4) 100 | 101 | var color = db.index('color') 102 | 103 | var batch = db.batch 104 | var delayedBatch = function() { 105 | var args = [].slice.apply(arguments) 106 | setTimeout(batch.apply.bind(batch, db, args), 300) 107 | } 108 | 109 | db.put('G', {color: 'green'}, function(){ 110 | color.get(['green', 'G'], function(err, key){ 111 | t.equal(key, 'G', 'has key') 112 | 113 | db.del('G', function(){}) 114 | db.once('gc', function(){ 115 | // Simulate delayed flush 116 | db.batch = delayedBatch 117 | 118 | db.gc.flush(function(){ 119 | color.get(['green', 'G'], function(err, key){ 120 | t.ok(err, 'old key removed') 121 | }) 122 | 123 | color.get(['pink', 'G'], function(err, key){ 124 | t.ok(err, 'new key not yet flushed') 125 | db.gc.flush(function(){ 126 | color.get(['pink', 'G'], function(err, key){ 127 | t.notOk(err, 'new key flushed') 128 | }) 129 | }) 130 | }) 131 | }) 132 | 133 | db.batch = batch 134 | 135 | // Write new value before flush is finished 136 | db.gc.cancel('G', [ 137 | { prefix: color.db, key: ['pink', 'G'], value: null } 138 | ]) 139 | }) 140 | }) 141 | }) 142 | }) 143 | -------------------------------------------------------------------------------- /lib/util/query-plan.js: -------------------------------------------------------------------------------- 1 | var debug = require('debug')('level-scout') 2 | , exprToRange = require('../util/range').exprToRange 3 | , createTree = require('./property-tree').create 4 | 5 | module.exports = QueryPlan 6 | 7 | function QueryPlan (query, indexes, tree) { 8 | this.query = query 9 | this.indexes = indexes 10 | this.indexed = [] 11 | this.tree = tree || createTree(indexes) 12 | 13 | // Indexed properties grouped by operator: eq or other 14 | this.equi = [] 15 | this.nonEqui = [] 16 | 17 | var properties = Object.keys(query) 18 | 19 | for(var name in indexes) { 20 | var p = indexes[name].properties 21 | 22 | for(var j=0, jl=p.length; j=0 && this.indexed.indexOf(p[j])<0) { 24 | this.indexed.push(p[j]) 25 | ;('eq' in this.query[p[j]] ? this.equi : this.nonEqui).push(p[j]) 26 | } 27 | } 28 | } 29 | 30 | this.extraneous = properties.filter(notContainedIn, this.indexed) 31 | 32 | this.selectAccessPaths() 33 | this.selectFilters() 34 | } 35 | 36 | // Select indexes for scanning 37 | QueryPlan.prototype.selectAccessPaths = function () { 38 | this.equiPaths = [] 39 | this.accessPaths = this.selectCandidates(true, this.equiPaths) 40 | 41 | debug({query: this.query}) 42 | debug({winners: this.accessPaths}) 43 | } 44 | 45 | // Select indexes for filtering 46 | QueryPlan.prototype.selectFilters = function () { 47 | this.filters = this.selectCandidates(false) 48 | } 49 | 50 | // Select indexes until all predicates are represented 51 | // TODO: for scanning, we'll likely use only one index. 52 | // Once I have that part figured out, skip this and just 53 | // use the first candidate. 54 | QueryPlan.prototype.selectCandidates = function (contiguous, accEquality) { 55 | var remaining = this.indexed.slice() 56 | , paths = [], had = Object.create(null) 57 | , candidates = [] 58 | 59 | // Find candidates 60 | this.traverse(contiguous, this.tree, candidates) 61 | 62 | // Sort so that the first candidate has the most 63 | // matched and least unmatched properties 64 | // TODO: include cardinality. 65 | candidates.sort(optimalFirst) 66 | 67 | candidateLoop: 68 | for(var i=0, l=candidates.length; i