├── .travis.yml ├── LICENSE ├── README.md ├── index.js ├── notify.js ├── package.json ├── test └── counter.js ├── update.js └── validate.js /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | node_js: 3 | - 0.6 4 | - 0.8 5 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016 Dominic Tarr 2 | 3 | Permission is hereby granted, free of charge, 4 | to any person obtaining a copy of this software and 5 | associated documentation files (the "Software"), to 6 | deal in the Software without restriction, including 7 | without limitation the rights to use, copy, modify, 8 | merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom 10 | the Software is furnished to do so, 11 | subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice 14 | shall be included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 18 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR 20 | ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 21 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 22 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # streamview 2 | 3 | a database view that consumes a streamable (write ahead) log. 4 | 5 | ## motivation 6 | 7 | Previously, when I wanted a index or a view on to a leveldb, 8 | I used [level-sublevel](https://github.com/dominictarr/level-sublevel). 9 | sublevel worked by dividing the database into nested sections, 10 | and since it's all one database, a write could be atomic across 11 | multiple sections. So, to create an index, detect when a write is 12 | about to occur (via `db.pre(hook)`) and insert the index also. 13 | 14 | This worked pretty well for a time, but some problems started to arise 15 | 16 | * a view/index cannot be async 17 | * aggregate views, even a simple count was not possible. 18 | * but worse: difficult to migrate a view 19 | 20 | The last one is the biggest problem. If you where actually using this 21 | to build software, you had to run some special batch-mode script to 22 | regenerate the index, if you, say, fixed a bug in that index or added 23 | a feature to it. 24 | 25 | A `streamview` is a very different pattern to sublevel, but can also 26 | be used to create views or indexes, and solves the above problems. 27 | A `streamview` stores the point it was currently up to in the main log, 28 | so that if the database crashes, it can recover without rereading the whole 29 | database, on startup, it reads that value, then restreams the log from that point. 30 | This async separation has several new benefits. 31 | 32 | * since the view is rebuilt, updating it is just starting over. 33 | * since it's separate, the view doesn't even need to be persisted in leveldb. 34 | room opens to experiment with different, simpler persistence mechanisms (i.e. memory or files ;) 35 | * since the view is rebuilt, and the log provides durability, the view doesn't 36 | need to worry about durability, and can instead worry about performance. 37 | you only need to write out the view occasionally, so rebuilding isn't too expensive, 38 | but not on every write. 39 | * the view could even be implemented in another process in another language or on another machine. 40 | 41 | But this does create one new difficulty: 42 | 43 | * to perform a write to the main log, and then read a view consistently with that write, you have to wait until the view has processed that write. 44 | 45 | So, whatever module implements a streamview needs to provide notifications about where it is currently up to. 46 | 47 | ## Api 48 | 49 | Streamviews are very generic, so they don't really need to inherit from a common module, 50 | just implement a common pattern. 51 | 52 | ### init (cb(err, since)) 53 | 54 | Initialize the streamview, reading where the current view is currently up to. 55 | If the view code has changed, this should return to zero, so a rebuild occurs. 56 | 57 | ### write(cb) 58 | 59 | Create a sink pull-stream (aka, writable) that puts data into this view. 60 | 61 | to get a streamview running, initialize it, then connect it to the log. 62 | 63 | ``` js 64 | var sv = YourStreamview(args...) 65 | 66 | sv.init(function (err, since) { 67 | if(err) //disaster, fs is borked 68 | pull( 69 | mainlog.read({gt: since, live: true}), 70 | sv.write(function (err) { 71 | //the mainlog ended for some reason, and all of the view's 72 | //writes are flushed. 73 | }) 74 | ) 75 | }) 76 | ``` 77 | 78 | Normally, in production, you'd connect the view to the log as a live stream, 79 | it's always going, waiting for the next item. However, in testing it's 80 | useful to have a log stream that ends, with the view consistent, so that 81 | you can test it. 82 | 83 | ## listen (onWrite(since)) => rm() 84 | 85 | call `listen` with an `onWrite` function, and `onWrite` 86 | will be called whenever a write has been processed, with the currently 87 | most up to date sequence number. The streamview does not need to 88 | callback for _every_ sequence number (although it may) but each 89 | sequence _must_ be monotonically increasing. 90 | 91 | listen also returns a function `rm`, which removes that listener. 92 | 93 | ## read 94 | 95 | What sort of read interface the streamview creates is totally an implementation detail. 96 | 97 | ## Example 98 | 99 | This module is a very simple demonstration of the streamview idea, 100 | and provides a steamview made from a reduce function. 101 | 102 | you must provide a `get` and `set` function. Here is a simple example 103 | that stores the current state in a file. Naturally since in this example, 104 | the entire file is rewritten each time, it would not be suitable for when 105 | the result of the reduce function grows large, but would be fine when it 106 | stays approximately the same, as in a count or a sum. 107 | 108 | ``` js 109 | var Reduce = require('streamview') 110 | var fs = require('fs') 111 | var filename = '/tmp/streamview-example' 112 | //the simplest reduce is counter. 113 | var sv = Reduce(function (a, b) { 114 | //be sure to handle the initial case. 115 | if(!a) a = 0 116 | return a + 1 117 | }, 118 | //get current state 119 | function (cb) { 120 | fs.readFile(filename, 'utf8', function (err, data) { 121 | if(err) cb(err) 122 | try { 123 | data = JSON.parse(data) 124 | } catch (err) { 125 | return cb(err) 126 | } 127 | return cb(null, data) 128 | }) 129 | }, 130 | //set current state 131 | function (data, cb) { 132 | fs.writeFile(filename, JSON.stringify(data), cb) 133 | }) 134 | 135 | ``` 136 | 137 | ## Links 138 | 139 | * [my initial idea for this architecture](https://gist.github.com/dominictarr/2934a6aa17061a67d012) 140 | * I got the name "stream view" from [the Octopus DB paper](https://infosys.uni-saarland.de/publications/DJ11.pdf) 141 | * This is also known as [kappa architecture](http://milinda.pathirage.org/kappa-architecture.com/) especially when used in the context of "Big Data" (although in my opinion that is a really terrible name because naming it after a random greek letter does give any hints about what it might be) 142 | * [streamview-links](https://github.com/dominictarr/streamview-links) is a far more interesting streamview. 143 | 144 | ## License 145 | 146 | MIT 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | //a simple stream view that reduces all values into a single 2 | //value. suitable for counts, sums, that produce a single value. 3 | 4 | var Update = require('./update') 5 | var Notify = require('./notify') 6 | var pull = require('pull-stream') 7 | 8 | module.exports = function ReduceStreamView (reduce, get, set) { 9 | var since = null, value = null, last = Date.now() 10 | var update = Update(set) 11 | var notify = Notify() 12 | return { 13 | init: function (cb) { 14 | get(function (err, data) { 15 | if(err) return cb(err) 16 | if(data) { 17 | value = data.value 18 | since = data.key 19 | } 20 | cb(null, since) 21 | }) 22 | }, 23 | write: function (cb) { 24 | var self = this 25 | return pull.drain(function (data) { 26 | if(data.sync) return 27 | since = data.key 28 | value = reduce(value, data.value, data.key) 29 | notify(data.key) 30 | update({key: data.key, value: value}) 31 | }, function (err) { 32 | update.flush(cb) 33 | }) 34 | }, 35 | listen: notify.listen, 36 | manifest: 'sync', 37 | api: function () { 38 | return value 39 | } 40 | } 41 | } 42 | 43 | /* 44 | { 45 | init: (cb), 46 | // latest: (listener) 47 | onLatest: listener, //set a listener 48 | write: Sink, 49 | manifest: ''||{}, //just one string incase it's just one method. 50 | api: () || {...} 51 | } 52 | 53 | var sv = StreamView( 54 | function set (value, cb) { db.put('count', value, cb) }, 55 | function get (cb) { db.get('count', cb) } 56 | ) 57 | 58 | sv.init(function (err, since) { 59 | pull( 60 | pl.read(db, {gte: since || null, values: false, keys: true, live: true}), 61 | sv.write() 62 | ) 63 | }) 64 | 65 | api = delayApi(sv.api, sv.mainfest, sv.latest) 66 | */ 67 | 68 | // stream-views are async, so just because the write callbacked doesn't mean 69 | // that the view is consistent. so, we can either delay the write callback 70 | // until all the views are ready, or we can delay our read from a specific 71 | // view until that view is up to date. Sometimes it might not matter, 72 | // and a stale view is fine. 73 | // 74 | // it would be pretty simple to delay any view api... if we have a manifest 75 | // then we can defer the streams. Also, that means we get clients for free! 76 | // that means we just need an api to track where a given stream-view is consistent to. 77 | 78 | -------------------------------------------------------------------------------- /notify.js: -------------------------------------------------------------------------------- 1 | module.exports = function Notify () { 2 | var listeners = [] 3 | function notify (value) { 4 | listeners.forEach(function (fn) { fn(value) }) 5 | } 6 | notify.listen = function (fn) { 7 | listeners.push(fn) 8 | //returns function to remove this listener 9 | return function () { 10 | var i = listeners.indexOf(fn) 11 | if(~i) listeners.splice(i, 1) 12 | } 13 | } 14 | return notify 15 | } 16 | 17 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "streamview", 3 | "description": "streaming database views", 4 | "version": "1.0.1", 5 | "homepage": "https://github.com/dominictarr/streamview", 6 | "repository": { 7 | "type": "git", 8 | "url": "git://github.com/dominictarr/streamview.git" 9 | }, 10 | "dependencies": { 11 | "pull-stream": "^3.1.0", 12 | "pull-write": "^1.0.0" 13 | }, 14 | "devDependencies": { 15 | "interleavings": "^0.3.0", 16 | "osenv": "^0.1.3", 17 | "pull-level": "^1.4.1", 18 | "rimraf": "^2.3.4", 19 | "tape": "^4.4.0", 20 | "tar-stream": "^1.3.1" 21 | }, 22 | "scripts": { 23 | "test": "set -e; for t in test/*.js; do node $t; done" 24 | }, 25 | "author": "Dominic Tarr (http://dominictarr.com)", 26 | "license": "MIT" 27 | } 28 | -------------------------------------------------------------------------------- /test/counter.js: -------------------------------------------------------------------------------- 1 | 2 | var tape = require('tape') 3 | 4 | var Reduce = require('../') 5 | var Validate = require('../validate') 6 | var pull = require('pull-stream') 7 | 8 | function Range (start, end) { 9 | if(null == end) end = Infinity 10 | if(null == start) start = 0 11 | return function (abort, cb) { 12 | if(abort) cb(abort) 13 | else if (start > end) cb(true) 14 | else cb(null, start++) 15 | } 16 | } 17 | 18 | function toKV () { 19 | return pull.map(function (n) { 20 | return {key: n, value: 1} 21 | }) 22 | } 23 | 24 | tape('ReduceStreamView works as counter (from zero)', function (t) { 25 | 26 | var written = false 27 | 28 | var sv = Reduce(function (acc, _) { 29 | return (acc || 0) + 1 30 | }, function (cb) { 31 | cb(null, {key: 0, value: 0}) 32 | }, function (data, cb) { 33 | //ignore writes 34 | written = data.key 35 | cb() 36 | }) 37 | 38 | sv.listen(function (key) { 39 | latest = key 40 | }) 41 | 42 | sv.init(function (err, since) { 43 | if(err) throw err 44 | t.equal(since, 0) 45 | 46 | pull( 47 | Range(1, 100), toKV(), 48 | sv.write(function (err) { 49 | if(err) throw err 50 | t.equal(sv.api(), 100) 51 | t.ok(written) 52 | t.end() 53 | }) 54 | ) 55 | }) 56 | 57 | }) 58 | 59 | 60 | tape('ReduceStreamView works as counter (from 100)', function (t) { 61 | 62 | var sv = Reduce(function (acc, _) { 63 | return (acc || 0) + 1 64 | }, function (cb) { 65 | cb(null, {key: 100, value: 100}) 66 | }, function (value, cb) { 67 | //ignore writes 68 | cb() 69 | }) 70 | 71 | sv.listen(function (key) { 72 | latest = key 73 | }) 74 | 75 | sv.init(function (err, since) { 76 | if(err) throw err 77 | t.equal(since, 100) 78 | 79 | pull( 80 | Range(since+1, since+100), toKV(), 81 | sv.write(function (err) { 82 | if(err) throw err 83 | t.equal(sv.api(), 200) 84 | t.end() 85 | }) 86 | ) 87 | }) 88 | 89 | }) 90 | 91 | tape('ReduceStreamView works as counter (start and recover)', function (t) { 92 | var value 93 | function get (cb) { 94 | cb(null, value) 95 | } 96 | function set (_value, cb) { 97 | value = _value 98 | cb() 99 | } 100 | var sv = Reduce(function (acc, _) { 101 | return (acc || 0) + 1 102 | }, get, set) 103 | 104 | var sv2 = Reduce(function (acc, _) { 105 | return (acc || 0) + 1 106 | }, get, set) 107 | 108 | sv.listen(function (key) { 109 | latest = key 110 | }) 111 | 112 | sv.init(function (err, since) { 113 | if(err) throw err 114 | t.equal(since, null) 115 | 116 | pull( 117 | Range((since||0)+1, (since||0)+100), toKV(), 118 | sv.write(function (err) { 119 | if(err) throw err 120 | t.equal(sv.api(), 100) 121 | 122 | sv2.init(function (err, since) { 123 | if(err) throw err 124 | t.equal(since, 100, 'since: 100') 125 | pull( 126 | Range((since||0)+1, (since||0)+100), toKV(), 127 | sv2.write(function (err) { 128 | if(err) throw err 129 | t.equal(sv2.api(), 200, 'api(): 200') 130 | t.end() 131 | }) 132 | ) 133 | }) 134 | 135 | }) 136 | ) 137 | }) 138 | 139 | }) 140 | 141 | -------------------------------------------------------------------------------- /update.js: -------------------------------------------------------------------------------- 1 | 2 | // update a view of a single value. 3 | // might be to a file or a single db record. 4 | 5 | function isFunction (f) { 6 | return 'function' === typeof f 7 | } 8 | 9 | 10 | function andor (a, b, and) { 11 | return and ? a&&b : a||b 12 | } 13 | 14 | function UpdateByCountAndOrDelay (opts, write) { 15 | if(isFunction(opts)) write = opts, opts = {} 16 | var delay = opts.delay || 60*1000 //1 minute. 17 | var count = opts.count || 100, value 18 | 19 | var busy = false, c = 0, _c = 0, last = Date.now() 20 | var cb 21 | function drain (_value) { 22 | busy = true 23 | if(!_value) throw new Error('null write') 24 | console.log('write', _value) 25 | write(_value, function (err) { 26 | busy = false 27 | if(!err) { 28 | _c = c; last = Date.now() 29 | } 30 | if(cb) cb(err, _value) 31 | }) 32 | } 33 | function update (_value) { 34 | value = _value 35 | if(!busy && (c++ > _c + count || last + delay < Date.now())) 36 | drain(value) 37 | } 38 | 39 | update.flush = function (_cb) { 40 | cb = _cb 41 | if(!value) cb() 42 | else if(!busy) drain(value) 43 | else { 44 | cb = function (err) { 45 | cb = _cb 46 | if(c !== _c) drain(value) 47 | else _cb() 48 | } 49 | } 50 | } 51 | 52 | return update 53 | 54 | } 55 | 56 | var Stats = require('statistics') 57 | 58 | function UpdateByDutyCycle (duty, write) { 59 | var busy = false, writeTime, stats = Stats() 60 | return function (value) { 61 | if(busy) return 62 | var start = Date.now() 63 | write(value, function (_) { 64 | stats.value(Date.now() - start) 65 | busy = false 66 | }) 67 | } 68 | } 69 | 70 | module.exports = UpdateByCountAndOrDelay 71 | 72 | -------------------------------------------------------------------------------- /validate.js: -------------------------------------------------------------------------------- 1 | 2 | 3 | module.exports = function (sv) { 4 | var ready = false, expected = [] 5 | sv.listen(function (key) { 6 | while(expected[0] <= key) expected.shift() 7 | }) 8 | 9 | validateManifest(vs) 10 | 11 | return { 12 | init: function (cb) { 13 | sv.init(function (err) { 14 | if(!err) ready = true 15 | cb(err) 16 | }) 17 | }, 18 | write: function (cb) { 19 | //if init was not called, write should throw. 20 | if(!ready) { 21 | try { 22 | sv.write(cb) 23 | } catch (err) { 24 | throw err 25 | } 26 | throw new Error('init did not callback yet, so write *must* thow') 27 | } 28 | //wrap to check that each write is eventually emited as latest 29 | return pull( 30 | pull.through(function (e) { 31 | expected.push(e.key) 32 | }), 33 | sv.write(cb) 34 | ) 35 | }, 36 | listen: function (listener) { 37 | sv.listen(listener) 38 | }, 39 | //properties that are only for the validator. 40 | expected: expected, 41 | validate: function () { 42 | if(expected.length) 43 | throw new Error('some expected values not acknowledged:'+JSON.stringify(expected)) 44 | } 45 | } 46 | 47 | } 48 | --------------------------------------------------------------------------------