├── .gitignore ├── .npmignore ├── .travis.yml ├── README.md ├── example.js ├── index.js ├── intro.md ├── package.json └── test └── basic.js /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | package-lock.json 3 | -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | design.md 2 | todo 3 | test/ 4 | example.js 5 | .travis.yml 6 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | node_js: 3 | - '10' 4 | - '12' 5 | - '14' 6 | os: 7 | - windows 8 | - osx 9 | - linux 10 | notifications: 11 | email: false 12 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # kappa-core 2 | 3 | > kappa-core is a minimal peer-to-peer database, based on append-only logs and materialized views. 4 | 5 | New to kappa architecture? There is [a short introduction](./intro.md). 6 | 7 | ## Example 8 | 9 | This example sets up an on-disk log store and an in-memory view store. The view 10 | tallies the sum of all of the numbers in the logs, and provides an API for 11 | getting that sum. 12 | 13 | ```js 14 | var kappa = require('kappa-core') 15 | var view = require('kappa-view') 16 | var memdb = require('memdb') 17 | 18 | // Store logs in a directory called "log". Store views in memory. 19 | var core = kappa('./log', { valueEncoding: 'json' }) 20 | var store = memdb() 21 | 22 | // View definition 23 | var sumview = view(store, function (db) { 24 | return { 25 | // Called with a batch of log entries to be processed by the view. 26 | // No further entries are processed by this view until 'next()' is called. 27 | map: function (entries, next) { 28 | db.get('sum', function (err, value) { 29 | var sum 30 | if (err && err.notFound) sum = 0 31 | else if (err) return next(err) 32 | else sum = value 33 | }) 34 | entries.forEach(function (entry) { 35 | if (typeof entry.value === 'number') sum += entry.value 36 | }) 37 | db.put('sum', sum, next) 38 | }, 39 | 40 | // Whatever is defined in the "api" object is publicly accessible 41 | api: { 42 | get: function (core, cb) { 43 | this.ready(function () { // wait for all views to catch up 44 | cb(null, sum) 45 | }) 46 | } 47 | } 48 | } 49 | }) 50 | 51 | // the api will be mounted at core.api.sum 52 | core.use('sum', 1, sumview) // name the view 'sum' and consider the 'sumview' logic as version 1 53 | 54 | core.writer('default', function (err, writer) { 55 | writer.append(1, function (err) { 56 | core.api.sum.get(function (err, value) { 57 | console.log(value) // 1 58 | }) 59 | }) 60 | }) 61 | ``` 62 | 63 | ## API 64 | 65 | ```js 66 | var kappa = require('kappa-core') 67 | ``` 68 | 69 | ### var core = kappa(storage, opts) 70 | 71 | Create a new kappa-core database. 72 | 73 | - `storage` is a [random-access-storage](https://github.com/random-access-storage) function, or a string. If a string is given, [random-access-file](https://github.com/random-access-storage/random-access-storage) is used with that string as the filename. 74 | - Valid `opts` include: 75 | - `valueEncoding`: a string describing how the data will be encoded. 76 | - `multifeed`: A preconfigured instance of [multifeed](https://github.com/kappa-db/multifeed) 77 | 78 | ### core.writer(name, cb) 79 | 80 | Get or create a local writable log called `name`. If it already exists, it is 81 | returned, otherwise it is created. A writer is an instance of 82 | [hypercore](https://github.com/mafintosh/hypercore). 83 | 84 | ### var feed = core.feed(key) 85 | 86 | Fetch a log / feed by its **public key** (a `Buffer` or hex string). 87 | 88 | ### var feeds = core.feeds() 89 | 90 | An array of all hypercores in the kappa-core. Check a feed's `key` to find the 91 | one you want, or check its `writable` / `readable` properties. 92 | 93 | Only populated once `core.ready(fn)` is fired. 94 | 95 | ### core.use(name[, version], view) 96 | 97 | Install a view called `name` to the kappa-core instance. A view is an object of 98 | the form 99 | 100 | ```js 101 | // All are optional except "map" 102 | { 103 | 104 | // Process each batch of entries 105 | map: function (entries, next) { 106 | entries.forEach(function (entry) { 107 | // ... 108 | }) 109 | next() 110 | }, 111 | 112 | // Your useful functions for users of this view to call 113 | api: { 114 | someSyncFunction: function (core) { return ... }, 115 | someAsyncFunction: function (core, cb) { process.nextTick(cb, ...) } 116 | }, 117 | 118 | // Save progress state so processing can resume on later runs of the program. 119 | // Not required if you're using the "kappa-view" module, which handles this for you. 120 | fetchState: function (cb) { ... }, 121 | storeState: function (state, cb) { ... }, 122 | clearState: function (cb) { ... } 123 | 124 | // Runs after each batch of entries is done processing and progress is persisted 125 | indexed: function (entries) { ... }, 126 | 127 | // Number of entries to process in a batch 128 | maxBatch: 100, 129 | } 130 | ``` 131 | 132 | **NOTE**: The kappa-core instance `core` is always passed as the first parameter 133 | in all of the `api` functions you define. 134 | 135 | `version` is an integer that represents what version you want to consider the 136 | view logic as. Whenever you change it (generally by incrementing it by 1), the 137 | underlying data generated by the view will be wiped, and the view will be 138 | regenerated again from scratch. This provides a means to change the logic or 139 | data structure of a view over time in a way that is future-compatible. 140 | 141 | The `fetchState`, `storeState`, and `clearState` functions are optional: they 142 | tell the view where to store its state information about what log entries have 143 | been indexed thus far. If not passed in, they will be stored in memory (i.e. 144 | reprocessed on each fresh run of the program). You can use any backend you want 145 | (like leveldb) to store the `Buffer` object `state`. If you use a module like 146 | [kappa-view](https://github.com/kappa-db/kappa-view), it will handle state 147 | management on your behalf. 148 | 149 | `indexed` is an optional function to run whenever a new batch of entries have 150 | been indexed and written to storage. Receives an array of entries. 151 | 152 | ### core.ready(viewNames, cb) 153 | 154 | Wait until all views named by `viewNames` are caught up. E.g. 155 | 156 | ``` 157 | // one 158 | core.ready('sum', function () { ... }) 159 | 160 | // or several 161 | core.ready(['kv', 'refs', 'spatial'], function () { ... }) 162 | ``` 163 | 164 | If viewNames is `[]` or not included, all views will be waited on. 165 | 166 | ### core.pause([viewNames], [cb]) 167 | 168 | Pause some or all of the views' indexing process. If no `viewNames` are given, 169 | they will all be paused. `cb` is called once the views finish up any entries 170 | they're in the middle of processing and are fully stopped. 171 | 172 | ### core.resume([viewNames]) 173 | 174 | Resume some or all paused views. If no `viewNames` is given, all views are 175 | resumed. 176 | 177 | ### core.replicate(isInitiator, [opts]) 178 | 179 | Create a duplex replication stream. `opts` are passed in to 180 | [multifeed](https://github.com/kappa-db/multifeed)'s API of the same name. 181 | 182 | Ensure that `isInitiator` to `true` to one side, and `false` on the other. This is necessary for setting up the encryption mechanism. 183 | 184 | ### core.on('error', function (err) {}) 185 | 186 | Event emitted when an error within kappa-core has occurred. This is very 187 | important to listen on, lest things suddenly seem to break and it's not 188 | immediately clear why. 189 | 190 | ## Install 191 | 192 | With [npm](https://npmjs.org/) installed, run 193 | 194 | ``` 195 | $ npm install kappa-core 196 | ``` 197 | 198 | ## Useful helper modules 199 | 200 | Here are some useful modules that play well with kappa-core for building 201 | materialized views: 202 | 203 | - [unordered-materialized-bkd](https://github.com/digidem/unordered-materialized-bkd): spatial index 204 | - [unordered-materialized-kv](https://github.com/digidem/unordered-materialized-kv): key/value store 205 | - [unordered-materialized-backrefs](https://github.com/digidem/unordered-materialized-backrefs): back-references 206 | 207 | ## Why? 208 | 209 | kappa-core is built atop two major building blocks: 210 | 211 | 1. [hypercore][hypercore], which is used for (append-only) log storage 212 | 2. materialized views, which are built by traversing logs in potentially out-of-order sequence 213 | 214 | hypercore provides some very useful superpowers: 215 | 216 | 1. all data is cryptographically associated with a writer's public key 217 | 2. partial replication: parts of logs can be selectively sync'd between peers, 218 | instead of all-or-nothing, without loss of cryptographic integrity 219 | 220 | Building views in arbitrary sequence is more challenging than when order is 221 | known to be topographic or sorted in some way, but confers some benefits: 222 | 223 | 1. most programs are only interested in the latest values of data; the long tail 224 | of history can be traversed asynchronously at leisure after the tips of the 225 | logs are processed 226 | 2. the views are tolerant of partially available data. Many of the modules 227 | listed in the section below depend on *topographic completeness*: all entries 228 | referenced by an entry **must** be present for indexes to function. This makes 229 | things like the equivalent to a *shallow clone* (think [git][git-shallow]), 230 | where a small subset of the full dataset can be used and built on without 231 | breaking anything. 232 | 233 | ## Acknowledgments 234 | 235 | kappa-core is built atop ideas from a huge body of others' work: 236 | 237 | - [flumedb][flumedb] 238 | - [secure scuttlebutt](http://scuttlebutt.nz) 239 | - [hypercore][hypercore] 240 | - [hyperdb](https://github.com/mafintosh/hyperdb) 241 | - [forkdb](https://github.com/substack/forkdb) 242 | - [hyperlog](https://github.com/mafintosh/hyperlog) 243 | - a harmonious meshing of ideas with @[substack](https://github.com/substack) in the south of spain 244 | 245 | ## Further Reading 246 | 247 | - [kappa architecture](http://kappa-architecture.com) 248 | 249 | ## License 250 | 251 | ISC 252 | 253 | [hypercore]: https://github.com/mafintosh/hypercore 254 | [flumedb]: https://github.com/flumedb/flumedb 255 | [git-shallow]: https://www.git-scm.com/docs/gitconsole.log(one#gitconsole.log(one---depthltdepthgt) 256 | [kappa]: http://kappa-architecture.com 257 | -------------------------------------------------------------------------------- /example.js: -------------------------------------------------------------------------------- 1 | var kappa = require('.') 2 | 3 | var core = kappa('./log', { valueEncoding: 'json' }) 4 | 5 | var sum = 0 6 | 7 | var sumview = { 8 | api: { 9 | get: function (core, cb) { 10 | this.ready(function () { 11 | cb(null, sum) 12 | }) 13 | } 14 | }, 15 | map: function (msgs, next) { 16 | msgs.forEach(function (msg) { 17 | if (typeof msg.value === 'number') sum += msg.value 18 | }) 19 | next() 20 | } 21 | } 22 | 23 | // the api will be mounted at core.api.sum 24 | core.use('sum', sumview) 25 | 26 | core.writer('default', function (err, feed) { 27 | feed.append(1, function (err) { 28 | core.api.sum.get(function (err, value) { 29 | console.log(value) // 1 30 | }) 31 | }) 32 | }) 33 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | var inherits = require('inherits') 2 | var EventEmitter = require('events').EventEmitter 3 | var multifeed = require('multifeed') 4 | var indexer = require('multifeed-index') 5 | 6 | module.exports = Kappa 7 | 8 | function Kappa (storage, opts) { 9 | if (!(this instanceof Kappa)) return new Kappa(storage, opts) 10 | if (!opts) opts = {} 11 | 12 | this._logs = opts.multifeed || multifeed(storage, opts) 13 | this._indexes = {} 14 | 15 | this.api = {} 16 | } 17 | 18 | inherits(Kappa, EventEmitter) 19 | 20 | Kappa.prototype.use = function (name, version, view) { 21 | var self = this 22 | if (typeof version !== 'number') { 23 | view = version 24 | version = undefined 25 | } 26 | var idx = indexer(Object.assign({}, view, { 27 | log: this._logs, 28 | version: version, 29 | maxBatch: view.maxBatch || 10, 30 | batch: view.map 31 | })) 32 | idx.on('error', function (err) { 33 | self.emit('error', err) 34 | }) 35 | if (view.indexed) idx.on('indexed', view.indexed) 36 | this._indexes[name] = idx 37 | this.api[name] = {} 38 | this.api[name].ready = idx.ready.bind(idx) 39 | for (var key in view.api) { 40 | if (typeof view.api[key] === 'function') this.api[name][key] = view.api[key].bind(idx, this) 41 | else this.api[name][key] = view.api[key] 42 | } 43 | } 44 | 45 | Kappa.prototype.feeds = function () { 46 | return this._logs.feeds() 47 | } 48 | 49 | Kappa.prototype.ready = function (viewNames, cb) { 50 | if (typeof viewNames === 'function') { 51 | cb = viewNames 52 | viewNames = [] 53 | } 54 | 55 | if (typeof viewNames === 'string') viewNames = [viewNames] 56 | if (viewNames.length === 0) { 57 | viewNames = Object.keys(this._indexes) 58 | } 59 | 60 | var pending = viewNames.length + 1 61 | var self = this 62 | this._logs.ready(function () { 63 | for (var i = 0; i < viewNames.length; i++) { 64 | self._indexes[viewNames[i]].ready(done) 65 | } 66 | done() 67 | }) 68 | 69 | function done () { 70 | if (!--pending) cb() 71 | } 72 | } 73 | 74 | Kappa.prototype.pause = function (viewNames, cb) { 75 | if (typeof viewNames === 'function') { 76 | cb = viewNames 77 | viewNames = [] 78 | } 79 | cb = cb || noop 80 | 81 | if (!viewNames) viewNames = [] 82 | if (typeof viewNames === 'string') viewNames = [viewNames] 83 | if (viewNames.length === 0) { 84 | viewNames = Object.keys(this._indexes) 85 | } 86 | 87 | var pending = viewNames.length + 1 88 | var self = this 89 | this._logs.ready(function () { 90 | for (var i = 0; i < viewNames.length; i++) { 91 | self._indexes[viewNames[i]].pause(done) 92 | } 93 | done() 94 | }) 95 | 96 | function done () { 97 | if (!--pending) cb() 98 | } 99 | } 100 | 101 | Kappa.prototype.resume = function (viewNames) { 102 | if (!viewNames) viewNames = [] 103 | if (typeof viewNames === 'string') viewNames = [viewNames] 104 | if (viewNames.length === 0) { 105 | viewNames = Object.keys(this._indexes) 106 | } 107 | 108 | var self = this 109 | this._logs.ready(function () { 110 | for (var i = 0; i < viewNames.length; i++) { 111 | self._indexes[viewNames[i]].resume() 112 | } 113 | }) 114 | } 115 | 116 | Kappa.prototype.writer = function (name, cb) { 117 | this._logs.writer(name, cb) 118 | } 119 | 120 | Kappa.prototype.feed = function (key) { 121 | return this._logs.feed(key) 122 | } 123 | 124 | Kappa.prototype.replicate = function (isInitiator, opts) { 125 | return this._logs.replicate(isInitiator, opts) 126 | } 127 | 128 | function noop () {} 129 | -------------------------------------------------------------------------------- /intro.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | 3 | kappa-core is built on an abstraction called a [kappa architecture](kappa), or 4 | "event sourcing". This differs from the traditional approach to databases, which 5 | is centered on storing the latest value for each key in the database. You might 6 | have a *table* like this: 7 | 8 | |id|key|value| 9 | |--|--|--| 10 | |51387|soup|cold| 11 | |82303|sandwich|warm| 12 | |23092|berries|room temp| 13 | 14 | If you wanted to change the value of `soup` to `warm`, you would *modify* the 15 | entry with `id=51387` so that the table was now 16 | 17 | |id|key|value| 18 | |--|--|--| 19 | |51387|soup|warm| 20 | |82303|sandwich|warm| 21 | |23092|berries|room temp| 22 | 23 | This table now, once again, represents the current state of the data. 24 | 25 | There are some consequences to this style of data representation: 26 | 1. historic data is lost 27 | 2. there is exactly one global truth for any datum 28 | 3. no verifiable authorship information 29 | 4. data is represented in a fixed way (changing this requires "table migrations") 30 | 31 | In contrast, kappa architecture centers on a primitive called the "append-only 32 | log" as its single source of truth. 33 | 34 | An append-only log is a data structure that can only be added to. Each entry in 35 | a log is addressable by its "sequence number" (starting at 0, then 1, 2, 3, 36 | ...). In the case of kappa-core, which uses [hypercore](https://github.com/hypercore-protocol/hypercore) underneath, 37 | each log is also identified by a cryptographic *public key*, which allows each 38 | log entry to be digitally signed with that log's *private key*, certifying that 39 | each entry in the log was indeed authored by the same person or device. A 40 | single kappa-core database can have one, ten, or hundreds of append-only logs 41 | comprising it. 42 | 43 | kappa-core still uses tables like the above, though. However, instead of being 44 | the source of truth, these tables are generated (or *materialized*) from the 45 | log data, providing a *view* of the log data in a new or optimized context. 46 | These are called *materialized views*. 47 | 48 | The twin concepts of *append-only logs* and *materialized views* are the key 49 | concepts of kappa-core. Any kappa-core database does only a few things: 50 | 51 | 1. define various materialized views that it finds useful 52 | 2. write data to append-only logs 53 | 3. query those views to retrieve useful information 54 | 55 | Let's look at an example of how the traditional table from the beginning of 56 | this section could be represented as a kappa architecture. The three initial 57 | rows would begin as log entries first: 58 | 59 | ``` 60 | [ 61 | { 62 | id: 51387, 63 | key: 'soup', 64 | value: 'cold' 65 | }, 66 | { 67 | id: 82303, 68 | key: 'sandwich', 69 | value: 'warm' 70 | }, 71 | { 72 | id: 23092, 73 | key: 'berries', 74 | value: 'room temp' 75 | } 76 | ] 77 | ``` 78 | 79 | These might be written to one log, or perhaps spread across several. They all 80 | get fed into materialized views in a nondeterministic order anyway, so it 81 | doesn't matter. 82 | 83 | To produce a look-up table like before, a view might be defined like this: 84 | 85 | ``` 86 | when new log entry E: 87 | table.put(E.key, E.value) 88 | ``` 89 | 90 | This would map each `key` from the full set of log entries to its `value`, 91 | producing this table: 92 | 93 | |key|value| 94 | |--|--| 95 | |soup|cold| 96 | |sandwich|warm| 97 | |berries|room temp| 98 | 99 | Notice `id` isn't present. We didn't need it, so we didn't bother writing it to 100 | the view. It's still stored in each log entry it came from though. 101 | 102 | Now let's say an entry like `{ id: 51387, key: 'soup', value: 'warm' }` is 103 | written to a log. The view logic above the table dictates that the `key` is 104 | mapped to the `value` for this view, so the a table would be produced: 105 | 106 | |key|value| 107 | |--|--| 108 | |soup|warm| 109 | |sandwich|warm| 110 | |berries|room temp| 111 | 112 | Like the traditional database, the table is mutated in-place to produce the new 113 | current state. The difference is that this table was *derived* from immutable 114 | log data, instead of being the truth source itself. 115 | 116 | This is all very useful: 117 | 1. log entries are way easier to replicate over a network or USB keys than tables 118 | 2. the log entries are immutable, so they can be cached indefinitely 119 | 3. the log entries are digitally signed, so their authenticity can be trusted 120 | 4. views are derived, so they can be regenerated 121 | 122 | \#4 is really powerful and worth examination: *views can be regenerated*. In 123 | kappa-core, views are *versioned*: the view we just generated was version 1, 124 | and was defined by the logic 125 | 126 | ``` 127 | when new log entry E: 128 | table.put(E.key, E.value) 129 | ``` 130 | 131 | What if we wanted to change this view at some point, to instead map the entry's 132 | `id` to its `value`? Maybe like this: 133 | 134 | ``` 135 | when new log entry E: 136 | table.put(E.id, E.value) 137 | ``` 138 | 139 | With kappa-core, this would mean bumping the view's *version* to `2`. 140 | kappa-core will purge the existing table, and regenerate it from scratch by 141 | processing all of the entries in all of the logs all over again. This makes 142 | views cheap, and also means *no table migrations*! Your data structures can 143 | evolve as you program evolves, and peers won't need to worry about migrating to 144 | new formats. 145 | 146 | Lastly, a kappa-core database is able to *replicate* itself to another 147 | kappa-core database. The `replicate` API (below) returns a Node `Duplex` 148 | stream. This stream can operate over any stream-compatible transport medium, 149 | such as TCP, UTP, Bluetooth, a Unix pipe, or even audio waves sent over the 150 | air! When two kappa-core databases replicate, they exchange the logs and the 151 | entries in the logs, so that both sides end up with the same full set of log 152 | entries. This will trigger your database's materialized views to process these 153 | new entries to update themselves and reflect the latest state. 154 | 155 | Because this is all built on [hypercore][hypercore], replication can be done 156 | over an encrypted channel. 157 | 158 | Thanks for reading! You can also try the [kappa-core 159 | workshop](https://github.com/kappa-db/workshop) to use kappa-core yourself, or 160 | get support and/or chat about development on 161 | 162 | - IRC: #kappa-core on Freenode 163 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "kappa-core", 3 | "description": "Minimal peer-to-peer database, based on kappa architecture.", 4 | "author": "Stephen Whitmore ", 5 | "version": "7.0.0", 6 | "repository": { 7 | "url": "git://github.com/noffle/kappa-core.git" 8 | }, 9 | "homepage": "https://github.com/noffle/kappa-core", 10 | "bugs": "https://github.com/noffle/kappa-core/issues", 11 | "main": "index.js", 12 | "scripts": { 13 | "test": "tape test/*.js", 14 | "lint": "standard" 15 | }, 16 | "keywords": [], 17 | "dependencies": { 18 | "inherits": "^2.0.4", 19 | "multifeed": "^6.0.0", 20 | "multifeed-index": "^3.3.2" 21 | }, 22 | "devDependencies": { 23 | "random-access-memory": "^3.1.1", 24 | "standard": "~12.0.1", 25 | "tape": "^4.11.0" 26 | }, 27 | "license": "ISC" 28 | } 29 | -------------------------------------------------------------------------------- /test/basic.js: -------------------------------------------------------------------------------- 1 | var test = require('tape') 2 | var ram = require('random-access-memory') 3 | var kappa = require('..') 4 | 5 | test('simple view', function (t) { 6 | var core = kappa(ram, { valueEncoding: 'json' }) 7 | 8 | var sum = 0 9 | 10 | var sumview = { 11 | api: { 12 | get: function (core, cb) { 13 | this.ready(function () { 14 | cb(null, sum) 15 | }) 16 | } 17 | }, 18 | map: function (msgs, next) { 19 | msgs.forEach(function (msg) { 20 | if (typeof msg.value === 'number') sum += msg.value 21 | }) 22 | next() 23 | } 24 | } 25 | 26 | core.use('sum', sumview) 27 | 28 | core.writer('default', function (err, feed) { 29 | feed.append(1, function (err) { 30 | core.api.sum.get(function (err, value) { 31 | t.equals(1, value) 32 | t.end() 33 | }) 34 | }) 35 | }) 36 | }) 37 | 38 | test('REGRESSION: views still work while indexing is paused', function (t) { 39 | t.plan(1) 40 | 41 | var core = kappa(ram, { valueEncoding: 'json' }) 42 | 43 | var sum = 0 44 | 45 | var sumview = { 46 | api: { 47 | get: function (core, cb) { 48 | this.ready(function () { 49 | cb(null, sum) 50 | }) 51 | } 52 | }, 53 | map: function (msgs, next) { 54 | msgs.forEach(function (msg) { 55 | if (typeof msg.value === 'number') sum += msg.value 56 | }) 57 | next() 58 | } 59 | } 60 | 61 | core.use('sum', sumview) 62 | 63 | core.writer('default', function (err, feed) { 64 | feed.append(1, function (err) { 65 | core.pause(function () { 66 | core.api.sum.get(function (err, value) { 67 | t.equals(1, value) 68 | }) 69 | }) 70 | }) 71 | }) 72 | }) 73 | --------------------------------------------------------------------------------