├── .gitignore ├── LICENSE ├── README.md ├── example.js ├── index.js ├── package.json └── write-stream.js /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | *-db 3 | db.json 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2018 Mathias Buus 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # jsonkv 2 | 3 | Single file write-once database that is valid JSON with efficient random access on bigger datasets 4 | 5 | ``` 6 | npm install jsonkv 7 | ``` 8 | 9 | ## Usage 10 | 11 | ``` js 12 | const jsonkv = require('jsonkv') 13 | 14 | // First create a database (all data will be stored in ./db.json as valid JSON) 15 | const ws = jsonkv.createWriteStream('db.json') 16 | 17 | // Write a ton of data to it 18 | for (var i = 0; i < 10000; i++) { 19 | ws.write({ 20 | key: i, 21 | value: `this is a value: ${i}` 22 | }) 23 | } 24 | 25 | ws.end(function () { 26 | // our jsonkv is now fully written and cannot be updated again. 27 | // to query it make an instance 28 | const db = jsonkv('db.json') 29 | 30 | db.get(42, function (err, doc) { 31 | console.log(doc) // prints {key: 42, value: 'this is a value: 42'} 32 | }) 33 | }) 34 | ``` 35 | 36 | ## API 37 | 38 | #### `ws = jsonkv.createWriteStream(filename, [opts])` 39 | 40 | Create a new database by writing data to the input stream. 41 | 42 | All data should be objects and include a sortable primary key. Per default the property `key` is used. If you want to use another property pass your own sort function as in options. 43 | 44 | ``` js 45 | const ws = jsonkv.createWriteStream('db.json') 46 | 47 | ws.write({ 48 | key: 'hello', // per default key is used as the primary key 49 | world: true 50 | }) 51 | ``` 52 | 53 | The data will be stored temporarily as `{filename}.tmp` and will then be indexed and stored in `filename` as a valid JSON file where all the data is stored sorted in a `values` array with some whitespace padding to make lookups efficient. 54 | 55 | The indexing procedure is memory efficient so should be able to handle large datasets as input. 56 | 57 | When the stream emits `finish` the database is safe to use. 58 | 59 | #### `db = jsonkv(filename, [opts])` 60 | 61 | After writing data to a database file you can query by making a database instance. 62 | 63 | If you used an optional sort function when writing your data you should pass that here as well. 64 | 65 | #### `db.get(key, callback)` 66 | 67 | Lookup a key. Return the value if found and `null` otherwise. 68 | 69 | #### `rs = db.createReadStream([opts])` 70 | 71 | Make a readable stream that traverses the database in sorted order. 72 | 73 | Options include: 74 | 75 | ``` js 76 | { 77 | gt: key, // only keys > than key 78 | gte: key, // only keys >= than key 79 | lt: key, // only keys < than key 80 | lte: key // only keys <= than key 81 | } 82 | ``` 83 | 84 | #### `ite = db.iterate([opts])` 85 | 86 | Same as above but returns a [nanoiterator](https://github.com/mafintosh/nanoiterator) instance instead of a stream 87 | 88 | ## License 89 | 90 | MIT 91 | -------------------------------------------------------------------------------- /example.js: -------------------------------------------------------------------------------- 1 | const jsonkv = require('./') 2 | const from = require('from2') 3 | 4 | const db = jsonkv('db.json') 5 | 6 | if (process.argv[2] === 'write') write() 7 | else get() 8 | 9 | function get () { 10 | db.get('hello', function (err, doc) { 11 | if (err) throw err 12 | console.log(doc) 13 | }) 14 | } 15 | 16 | function write () { 17 | var len = 100000000 18 | const target = Math.floor(Math.random() * len) 19 | const stream = jsonkv.createWriteStream('db.json') 20 | 21 | const rs = from.obj(function (size, cb) { 22 | if (!len) return cb(null, null) 23 | 24 | const data = { 25 | key: len === target 26 | ? 'hello' 27 | : Math.random().toString(16).slice(2), 28 | value: len-- 29 | } 30 | 31 | cb(null, data) 32 | }) 33 | 34 | rs.pipe(stream) 35 | } 36 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | const raf = require('random-access-file') 2 | const nanoiterator = require('nanoiterator') 3 | const toStream = require('nanoiterator/to-stream') 4 | 5 | jsonkv.createWriteStream = require('./write-stream') 6 | module.exports = jsonkv 7 | 8 | function jsonkv (filename, opts) { 9 | return new DB(filename, opts) 10 | } 11 | 12 | class DB { 13 | constructor (filename, opts) { 14 | if (typeof opts === 'function') opts = {sort: opts} 15 | if (!opts) opts = {} 16 | this.sort = opts.sort || sortByKey 17 | this.storage = typeof filename === 'string' ? raf(filename) : filename 18 | this.valueSize = 0 19 | this.pageSize = 0 20 | this.iteratorSize = 0 21 | this.length = 0 22 | this.offset = 0 23 | this.opened = false 24 | } 25 | 26 | open (cb) { 27 | if (!cb) cb = noop 28 | if (this.opened) return cb(null) 29 | 30 | const self = this 31 | 32 | this.storage.stat(function (err, st) { 33 | if (err) return cb(err) 34 | 35 | const headerSize = Math.min(st.size, 128) 36 | if (!headerSize) return cb(new Error('Database file should not empty')) 37 | 38 | self.storage.read(0, headerSize, function (err, buf) { 39 | if (err) return cb(err) 40 | 41 | // 91 is [ 42 | const idx = buf.lastIndexOf 43 | ? buf.lastIndexOf(91) 44 | : buf.indexOf(91) 45 | 46 | const nl = buf.indexOf(10) // \n 47 | const cr = nl < 1 || buf[nl - 1] === 13 // \r\n 48 | 49 | const header = decode(buf.toString('utf-8', 0, idx + 1) + ']}') 50 | if (!header) return cb(new Error('Database has an invalid header')) 51 | 52 | self.valueSize = header.valueSize 53 | self.pageSize = 4 + self.valueSize + 1 + (cr ? 2 : 1) 54 | self.iteratorSize = Math.max(16, Math.floor(65536 / self.pageSize)) 55 | self.length = header.length 56 | self.offset = idx + 2 + 1 57 | self.opened = true 58 | 59 | cb(null) 60 | }) 61 | }) 62 | } 63 | 64 | createReadStream (opts) { 65 | return toStream(this.iterator(opts)) 66 | } 67 | 68 | iterate (opts) { 69 | return this.iterator(opts) // backwards compat 70 | } 71 | 72 | iterator (opts) { 73 | if (!opts) opts = {} 74 | 75 | const self = this 76 | const g = opts.gte || opts.gt 77 | const l = opts.lte || opts.lt 78 | const start = g && (typeof g === 'object' ? g : {key: g}) 79 | const end = l && (typeof l === 'object' ? l : {key: l}) 80 | 81 | var offset = 0 82 | var limit = 0 83 | var block = Buffer.alloc(0) 84 | var ptr = 0 85 | 86 | return nanoiterator({open, next}) 87 | 88 | function open (cb) { 89 | if (!start) return self.open(cb) 90 | self.get(start, {closest: true}, onstart) 91 | 92 | function onstart (err, val, seq) { 93 | if (err) return cb(err) 94 | 95 | offset = self.offset + seq * self.pageSize 96 | limit = val ? self.length - seq : 0 97 | 98 | if (limit <= 0) return cb(null) 99 | 100 | const cmp = self.sort(val, start) 101 | if (opts.gte && cmp >= 0) return cb(null) 102 | if (opts.gt && cmp > 0) return cb(null) 103 | 104 | getValue(self, seq + 1, (err, val) => onstart(err, val, seq + 1)) 105 | } 106 | } 107 | 108 | function next (cb) { 109 | if (!offset) { 110 | offset = self.offset 111 | limit = self.length 112 | } 113 | 114 | if (!limit) return cb(null, null) 115 | if (ptr < block.length) return onblock(null, block) 116 | 117 | ptr = 0 118 | const blockSize = Math.min(limit, self.iteratorSize) * self.pageSize 119 | self.storage.read(offset, blockSize, onblock) 120 | 121 | function onblock (err, buf) { 122 | if (err) return cb(err) 123 | 124 | block = buf 125 | offset += self.pageSize 126 | limit-- 127 | 128 | const val = decodeValue(buf.toString('utf-8', ptr, ptr += self.pageSize)) 129 | if (!val) return cb(new Error('Invalid database entry')) 130 | 131 | if (end) { 132 | const cmp = self.sort(val, end) 133 | if (opts.lte && cmp > 0) return cb(null, null) 134 | if (opts.lt && cmp >= 0) return cb(null, null) 135 | } 136 | 137 | cb(null, val) 138 | } 139 | } 140 | } 141 | 142 | getByIndex (index, cb) { 143 | getValue(this, index, cb) 144 | } 145 | 146 | get (key, opts, cb) { 147 | if (typeof opts === 'function') return this.get(key, null, opts) 148 | if (!this.opened) return openAndGet(this, key, opts, cb) 149 | 150 | const self = this 151 | const target = typeof key === 'object' ? key : {key} 152 | const closest = !!(opts && opts.closest) 153 | 154 | var midpoint = (opts && opts.midpoint) || defaultMidpoint 155 | var top = this.length 156 | var btm = 0 157 | var mid = midpoint(btm, top) 158 | 159 | getValue(this, mid, function loop (err, val) { 160 | if (err) return cb(err) 161 | 162 | const cmp = self.sort(target, val) 163 | 164 | if (!cmp) return cb(null, val, mid) 165 | if (top - btm <= 1) return cb(null, closest ? val : null, mid) 166 | 167 | if (cmp < 0) top = mid 168 | else btm = mid 169 | 170 | const nextMid = midpoint(btm, top) 171 | if (nextMid === mid) return cb(null, closest ? val : null, mid) 172 | mid = nextMid 173 | 174 | getValue(self, mid, loop) 175 | }) 176 | } 177 | 178 | destroy (cb) { 179 | this.storage.destroy(cb) 180 | } 181 | } 182 | 183 | function defaultMidpoint (btm, top) { 184 | return Math.floor((top + btm) / 2) 185 | } 186 | 187 | function getValue (db, idx, cb) { 188 | const offset = db.offset + idx * db.pageSize 189 | 190 | db.storage.read(offset, db.pageSize, function (err, buf) { 191 | if (err) return cb(err) 192 | const val = decodeValue(buf.toString()) 193 | if (!val) return cb(new Error('Invalid database entry')) 194 | cb(null, val) 195 | }) 196 | } 197 | 198 | function decodeValue (str) { 199 | const val = decode('[' + str + 'null]') 200 | if (val) return val[0] 201 | return decode(str) 202 | } 203 | 204 | function openAndGet (db, key, opts, cb) { 205 | db.open(function (err) { 206 | if (err) return cb(err) 207 | db.get(key, opts, cb) 208 | }) 209 | } 210 | 211 | function decode (str) { 212 | try { 213 | return JSON.parse(str) 214 | } catch (err) { 215 | return null 216 | } 217 | } 218 | 219 | function noop () {} 220 | 221 | function sortByKey (a, b) { 222 | if (a.key === b.key) return 0 223 | return a.key < b.key ? -1 : 1 224 | } 225 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "jsonkv", 3 | "version": "1.4.1", 4 | "description": "Single file write-once database that is valid JSON with efficient random access on bigger datasets", 5 | "main": "index.js", 6 | "dependencies": { 7 | "bulk-write-stream": "^1.1.4", 8 | "nanoiterator": "^1.1.0", 9 | "random-access-file": "^2.0.1" 10 | }, 11 | "devDependencies": { 12 | "from2": "^2.3.0", 13 | "standard": "^11.0.1" 14 | }, 15 | "scripts": { 16 | "test": "standard" 17 | }, 18 | "repository": { 19 | "type": "git", 20 | "url": "https://github.com/mafintosh/jsonkv.git" 21 | }, 22 | "author": "Mathias Buus (@mafintosh)", 23 | "license": "MIT", 24 | "bugs": { 25 | "url": "https://github.com/mafintosh/jsonkv/issues" 26 | }, 27 | "homepage": "https://github.com/mafintosh/jsonkv" 28 | } 29 | -------------------------------------------------------------------------------- /write-stream.js: -------------------------------------------------------------------------------- 1 | const nanoiterator = require('nanoiterator') 2 | const raf = require('random-access-file') 3 | const bulk = require('bulk-write-stream') 4 | 5 | const BUCKET_SIZE = 65536 6 | const READ_BUFFER = 64 7 | const WRITE_BUFFER = 256 8 | 9 | module.exports = createWriteStream 10 | 11 | function createWriteStream (filename, sort) { 12 | if (!sort) sort = sortByKey 13 | 14 | const tmp = raf(filename + '.tmp', {truncate: true}) 15 | const buckets = [] 16 | var next = createBucket() 17 | 18 | return bulk.obj({highWaterMark: BUCKET_SIZE}, batch, flush) 19 | 20 | function batch (data, cb) { 21 | for (var i = 0; i < data.length; i++) next.push(data[i]) 22 | if (next.size < BUCKET_SIZE) return cb(null) 23 | next.flush(cb) // always nextticked 24 | next = createBucket() 25 | } 26 | 27 | function flush (cb) { 28 | next.flush(function (err) { 29 | if (err) return cb(err) 30 | mergeSort(cb) 31 | }) 32 | } 33 | 34 | function mergeSort (cb) { 35 | const iterators = buckets.map(toIterator) 36 | var missing = iterators.length 37 | var error = null 38 | 39 | for (var i = 0; i < iterators.length; i++) iterators[i].next(ondone) 40 | 41 | function ondone (err) { 42 | if (err) error = err 43 | if (--missing) return 44 | if (error) return cb(error) 45 | 46 | const iterator = reduceIterators(iterators, sort) 47 | const valueSize = buckets.map(a => a.valueSize).reduce((a, b) => Math.max(a, b)) 48 | const storage = raf(filename, {truncate: true}) 49 | 50 | var length = buckets.map(a => a.size).reduce((a, b) => a + b) 51 | var offset = 0 // after the header 52 | var block = Buffer.allocUnsafe(WRITE_BUFFER * (4 + valueSize + 1 + 2)) 53 | var ptr = 0 54 | 55 | const header = Buffer.from( 56 | `{\r\n "valueSize": ${valueSize},\r\n "length": ${length},\r\n "values": [\r\n` 57 | ) 58 | 59 | offset = header.length 60 | storage.write(0, header, loop) 61 | 62 | function loop (err) { 63 | if (err) return flush(err) 64 | if (!iterator.value) return flush(null) 65 | 66 | length-- 67 | const sep = length ? ',' : ' ' 68 | const wrote = block.write(' ' + JSON.stringify(iterator.value) + sep, ptr) 69 | const end = ptr + 4 + valueSize + 1 70 | 71 | block.fill(32, ptr + wrote, end) 72 | block.write('\r\n', end) 73 | ptr = end + 2 74 | 75 | if (ptr === block.length) { 76 | ptr = 0 77 | storage.write(offset, block, nextAndLoop) 78 | offset += block.length 79 | return 80 | } 81 | 82 | iterator.next(loop) 83 | } 84 | 85 | function nextAndLoop (err) { 86 | if (err) return flush(err) 87 | iterator.next(loop) 88 | } 89 | 90 | function flush (err) { 91 | if (err) return afterWrite(err) 92 | const end = Buffer.from(' ]\r\n}\r\n') 93 | const buf = Buffer.concat([block.slice(0, ptr), end]) 94 | storage.write(offset, buf, afterWrite) 95 | } 96 | 97 | function afterWrite (err) { 98 | tmp.destroy(function () { 99 | if (err) return storage.destroy(_ => cb(err)) 100 | storage.close(cb) 101 | }) 102 | } 103 | } 104 | } 105 | 106 | function createBucket () { 107 | const start = buckets.length ? buckets[buckets.length - 1].end : 0 108 | const bucket = new Bucket(tmp, start, sort) 109 | buckets.push(bucket) 110 | return bucket 111 | } 112 | } 113 | 114 | function emptyIterator () { 115 | const ite = nanoiterator({ next: cb => cb(null, null) }) 116 | ite.value = null 117 | return ite 118 | } 119 | 120 | function reduceIterators (iterators, sort) { 121 | while (iterators.length > 1) { 122 | const tmp = [] 123 | 124 | for (var i = 0; i < iterators.length; i += 2) { 125 | const left = iterators[i] 126 | const right = i + 1 < iterators.length ? iterators[i + 1] : emptyIterator() 127 | const ite = nanoiterator({ 128 | next (cb) { 129 | if (!ite.value) return cb(null, null) 130 | if (ite.value === left.value) return left.next(done) 131 | right.next(done) 132 | 133 | function done (err) { 134 | if (err) return cb(err) 135 | updateValue(ite, left, right, sort) 136 | cb(null, ite.value) 137 | } 138 | } 139 | }) 140 | 141 | updateValue(ite, left, right, sort) 142 | tmp.push(ite) 143 | } 144 | 145 | iterators = tmp 146 | } 147 | 148 | return iterators[0] 149 | } 150 | 151 | function updateValue (ite, left, right, sort) { 152 | if (!left.value && !right.value) ite.value = null 153 | else if (!left.value) ite.value = right.value 154 | else if (!right.value) ite.value = left.value 155 | else if (sort(left.value, right.value) < 0) ite.value = left.value 156 | else ite.value = right.value 157 | } 158 | 159 | function toIterator (bucket) { 160 | return bucket.iterate() 161 | } 162 | 163 | class Bucket { 164 | constructor (storage, start, sort) { 165 | this.storage = storage 166 | this.values = [] 167 | this.start = start 168 | this.end = start 169 | this.valueSize = 0 170 | this.size = 0 171 | this.sort = sort 172 | } 173 | 174 | iterate () { 175 | if (this.values) throw new Error('Flush the bucket first') 176 | 177 | var offset = this.start 178 | var block = Buffer.alloc(0) 179 | var ptr = 0 180 | 181 | const end = this.end 182 | const storage = this.storage 183 | const valueSize = this.valueSize 184 | const blockSize = valueSize * READ_BUFFER 185 | 186 | const ite = nanoiterator({ 187 | next (cb) { 188 | if (offset < end) { 189 | if (ptr < block.length) { 190 | push() 191 | cb(null, ite.value) 192 | } else { 193 | storage.read(offset, Math.min(blockSize, end - offset), onblock) 194 | } 195 | } else { 196 | ite.value = null 197 | cb(null, null) 198 | } 199 | 200 | function onblock (err, buf) { 201 | if (err) return cb(err) 202 | block = buf 203 | ptr = 0 204 | push() 205 | cb(null, ite.value) 206 | } 207 | } 208 | }) 209 | 210 | ite.value = null 211 | return ite 212 | 213 | function push () { 214 | const str = block.toString('utf-8', ptr, ptr += valueSize) 215 | ite.value = JSON.parse(str) 216 | offset += valueSize 217 | } 218 | } 219 | 220 | push (val) { 221 | this.size++ 222 | this.values.push(val) 223 | } 224 | 225 | flush (cb) { 226 | this.values.sort(this.sort) 227 | 228 | var i 229 | var maxSize = 0 230 | const encoded = [] 231 | 232 | for (i = 0; i < this.size; i++) { 233 | const enc = JSON.stringify(this.values[i]) 234 | const len = Buffer.byteLength(enc) 235 | 236 | if (len > maxSize) maxSize = len 237 | encoded.push(enc) 238 | } 239 | 240 | this.valueSize = maxSize 241 | this.values = null 242 | 243 | const buf = allocSpaces(maxSize * this.size) 244 | 245 | for (i = 0; i < this.size; i++) { 246 | buf.write(encoded[i], maxSize * i) 247 | } 248 | 249 | this.end += maxSize * this.size 250 | this.storage.write(this.start, buf, cb) 251 | } 252 | } 253 | 254 | function sortByKey (a, b) { 255 | if (a.key === b.key) return 0 256 | return a.key < b.key ? -1 : 1 257 | } 258 | 259 | function allocSpaces (n) { 260 | const buf = Buffer.allocUnsafe(n) 261 | buf.fill(32) // spaces 262 | return buf 263 | } 264 | --------------------------------------------------------------------------------