├── examples ├── importer │ ├── fastify.js │ ├── .gitignore │ ├── webpage │ │ ├── html-to-markdown.js │ │ └── index.js │ ├── package.json │ ├── views │ │ └── sonar.js │ ├── serve.js │ ├── importer.js │ └── cli.js ├── movies │ ├── package.json │ └── cli.js ├── basic.js └── dateview.js ├── .gitignore ├── messages.proto ├── lib ├── constants.js ├── util.js ├── hyperdrive-index.js ├── multidrive-index.js ├── kappa.js ├── messages.js └── multidrive.js ├── test ├── lib │ └── util.js ├── schema.js ├── views.js ├── conflict.js ├── records.js └── replication.js ├── package.json ├── views ├── content.js ├── entities.js └── schema-aware.js ├── README.md └── index.js /examples/importer/fastify.js: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | package-lock.json 3 | .data* 4 | examples/movies/movies.txt* 5 | _old 6 | -------------------------------------------------------------------------------- /examples/importer/.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | package-lock.json 3 | .tantivy 4 | .data* 5 | .copy* 6 | _old 7 | -------------------------------------------------------------------------------- /messages.proto: -------------------------------------------------------------------------------- 1 | message State { 2 | message FeedState { 3 | required string key = 1; 4 | required bytes state = 2; 5 | } 6 | repeated FeedState states = 1; 7 | } 8 | 9 | -------------------------------------------------------------------------------- /lib/constants.js: -------------------------------------------------------------------------------- 1 | exports.P_DATA = '.data' 2 | exports.P_SCHEMA = '.schema' 3 | exports.P_SOURCES = '.sources' 4 | 5 | exports.CHAR_END = '\uffff' 6 | exports.CHAR_SPLIT = '\u0000' 7 | exports.CHAR_START = '\u0001' 8 | -------------------------------------------------------------------------------- /examples/movies/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "movies", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "cli.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1" 8 | }, 9 | "author": "", 10 | "license": "GPL-3.0", 11 | "dependencies": { 12 | "nanobench": "^2.1.1" 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /lib/util.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | promiseCallback 3 | } 4 | 5 | function promiseCallback (cb) { 6 | if (cb) return [cb, undefined] 7 | let _resolve, _reject 8 | const promise = new Promise((resolve, reject) => { 9 | resolve = _resolve 10 | reject = _reject 11 | }) 12 | cb = (err, result) => { 13 | if (err) return _reject(err) 14 | _resolve(result) 15 | } 16 | return [cb, promise] 17 | } 18 | -------------------------------------------------------------------------------- /examples/importer/webpage/html-to-markdown.js: -------------------------------------------------------------------------------- 1 | var unified = require('unified') 2 | // var createStream = require('unified-stream') 3 | var parse = require('rehype-parse') 4 | var rehype2remark = require('rehype-remark') 5 | var stringify = require('remark-stringify') 6 | 7 | module.exports = function htmlToMarkdown (html) { 8 | const processor = unified() 9 | .use(parse) 10 | .use(rehype2remark) 11 | .use(stringify) 12 | return processor.processSync(html).toString() 13 | } 14 | -------------------------------------------------------------------------------- /test/lib/util.js: -------------------------------------------------------------------------------- 1 | module.exports = { stepper, once } 2 | 3 | function stepper (cb) { 4 | const steps = [] 5 | return function step (name, fn) { 6 | if (!fn) return step(null, name) 7 | if (!name) name = steps.length 8 | steps.push({ fn, name }) 9 | if (steps.length === 1) process.nextTick(run) 10 | } 11 | function run (lastResult) { 12 | const { fn, name } = steps.shift() 13 | console.log(`> step ${name}`) 14 | fn(done, lastResult) 15 | } 16 | function done (err, result) { 17 | if (err) return cb(err) 18 | if (steps.length) process.nextTick(run, result) 19 | else cb(null, result) 20 | } 21 | } 22 | 23 | function once (fn) { 24 | let didrun = false 25 | return (...args) => { 26 | if (didrun) return 27 | didrun = true 28 | return fn(...args) 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "hyper-content-db", 3 | "version": "0.1.6", 4 | "description": "", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "tape test/*.js" 8 | }, 9 | "keywords": [], 10 | "author": "", 11 | "license": "GPL-3.0", 12 | "dependencies": { 13 | "collect-stream": "^1.2.1", 14 | "corestore": "^2.0.0", 15 | "duplexify": "^4.1.1", 16 | "end-of-stream": "^1.4.1", 17 | "hypercore-crypto": "^1.0.0", 18 | "hyperdrive": "^10.3", 19 | "hyperdrive-schemas": "^0.12", 20 | "hypertrie-index": "^1.0.5", 21 | "inspect-custom-symbol": "^1.1.0", 22 | "kappa-view": "^3.0.0", 23 | "lodash": "^4.17.15", 24 | "lru-cache": "^5.1.1", 25 | "memdb": "^1.3.1", 26 | "mutexify": "^1.2.0", 27 | "pump": "^3.0.0", 28 | "pumpify": "^2.0.0", 29 | "random-access-file": "^2.1.3", 30 | "shortid": "^2.2.14", 31 | "standard": "^14.1.0", 32 | "subleveldown": "^4.1.1", 33 | "through2": "^3.0.1", 34 | "thunky": "^1.0.3" 35 | }, 36 | "devDependencies": { 37 | "corestore-swarm-networking": "^1.0.4", 38 | "random-access-memory": "^3.1.1", 39 | "tape": "^4.11.0" 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /examples/importer/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@archipel/importer", 3 | "version": "1.0.0", 4 | "description": "Importer example for content-store", 5 | "main": "importer.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1" 8 | }, 9 | "author": "", 10 | "license": "GPL-3.0", 11 | "dependencies": { 12 | "corestore-swarm-networking": "^1.0.4", 13 | "express": "^4.17.1", 14 | "freeze-dry": "github:Frando/freeze-dry#node-compat-dev", 15 | "jsdom": "^11.11.0", 16 | "ky": "^0.9.0", 17 | "ky-universal": "^0.1.0", 18 | "level": "^5.0.1", 19 | "metascraper": "^4.10.1", 20 | "metascraper-author": "^4.10.1", 21 | "metascraper-clearbit-logo": "^4.10.1", 22 | "metascraper-date": "^4.10.1", 23 | "metascraper-description": "^4.10.1", 24 | "metascraper-image": "^4.10.1", 25 | "metascraper-logo": "^4.10.1", 26 | "metascraper-publisher": "^4.10.1", 27 | "metascraper-title": "^4.10.1", 28 | "metascraper-url": "^4.10.1", 29 | "minimist": "^1.2.0", 30 | "mirror-folder": "^3.0.0", 31 | "mkdirp": "^0.5.1", 32 | "pretty-bytes": "^5.2.0", 33 | "readability": "github:mozilla/readability#master", 34 | "rehype-parse": "6.0.0", 35 | "rehype-remark": "5.0.1", 36 | "remark-stringify": "6.0.4", 37 | "sonar": "github:Frando/sonar#master", 38 | "unified": "7.1.0" 39 | }, 40 | "devDependencies": { 41 | "@hyperswarm/replicator": "^1.1.0" 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /views/content.js: -------------------------------------------------------------------------------- 1 | module.exports = contentView 2 | 3 | function contentView (opts) { 4 | const view = { 5 | ...opts, 6 | prefix: '.data/', 7 | transformNodes: true, 8 | readFile: true, 9 | map (msgs, next) { 10 | // console.log('contentView MSGS', msgs) 11 | // let ops = [] 12 | // let pending = 0 13 | msgs = msgs.map(msg => { 14 | if (msg.value.isDirectory()) return 15 | const id = msg.keySplit.pop().replace(/\.json$/, '') 16 | const schema = msg.keySplit.slice(1).join('/') 17 | let value 18 | try { 19 | value = JSON.parse(msg.fileContent.toString()) 20 | } catch (err) { 21 | // TODO: What to do with this error? 22 | value = msg.fileContent 23 | } 24 | 25 | msg = { 26 | id, 27 | schema, 28 | delete: msg.delete, 29 | stat: msg.value, 30 | value, 31 | source: msg.source.toString('hex'), 32 | seq: msg.seq 33 | } 34 | 35 | return msg 36 | }).filter(m => m) 37 | 38 | if (msgs.length) opts.map(msgs, finish) 39 | else finish() 40 | 41 | function finish (res) { 42 | next() 43 | // if (res && Array.isArray(res)) { 44 | // ops.push.apply(ops, res) 45 | // } else if (typeof res === 'object') { 46 | // ops.push(res) 47 | // } 48 | // if (--pending <= 0) { 49 | // next(null, ops) 50 | // } 51 | } 52 | } 53 | } 54 | return view 55 | } 56 | -------------------------------------------------------------------------------- /examples/basic.js: -------------------------------------------------------------------------------- 1 | const hypercontent = require('..') 2 | const ram = require('random-access-memory') 3 | const db = hypercontent(ram) 4 | 5 | // Let's put a basic schema first. 6 | db.putSchema('event', { 7 | properties: { 8 | title: { 9 | type: 'string', 10 | index: true 11 | }, 12 | date: { 13 | type: 'date', 14 | index: true 15 | } 16 | } 17 | }) 18 | 19 | // Now add some records. 20 | db.batch([ 21 | { schema: 'event', value: { title: 'Workshop', date: new Date(2019, 10, 10) } }, 22 | { schema: 'event', value: { title: 'Reading', date: new Date(2019, 8, 2) } } 23 | ]) 24 | 25 | // When all indexing is done, query and log results. 26 | db.on('indexed-all', query) 27 | 28 | db.ready(() => { 29 | // Create a second database. Set the first database as primary key. 30 | // This will make db2 a "fork" or "extension" of the first. 31 | const db2 = hypercontent(ram, db.key) 32 | db2.ready(() => { 33 | // Add the second database as a source for the first. 34 | db.addSource(db2.localKey) 35 | 36 | // Connect the two databases. 37 | replicate(db, db2) 38 | 39 | // Add content to the second database. 40 | db2.batch([ 41 | { schema: 'event', value: { title: 'Dinner', date: new Date(2019, 9, 22) } } 42 | ]) 43 | }) 44 | }) 45 | 46 | function query () { 47 | const eventsSortedByDate = db.api.indexes.query({ schema: 'event', prop: 'date' }).pipe(db.createGetStream()) 48 | eventsSortedByDate.on('data', row => console.log(row.value.date, row.value.title)) 49 | } 50 | 51 | function replicate (a, b) { 52 | const stream = a.replicate() 53 | const stream2 = b.replicate() 54 | stream.pipe(stream2).pipe(stream) 55 | } 56 | -------------------------------------------------------------------------------- /examples/importer/views/sonar.js: -------------------------------------------------------------------------------- 1 | const Sonar = require('sonar') 2 | const sonarSchema = require('sonar/node/fixtures').schema 3 | 4 | module.exports = sonarView 5 | 6 | function sonarView (db, cstore, opts) { 7 | const catalog = new Sonar(opts.storage || '.tantivy') 8 | 9 | const getIndex = catalog.openOrCreate('default', sonarSchema) 10 | 11 | return { 12 | batch: true, 13 | map (msgs, next) { 14 | _map(msgs) 15 | .then(res => next(null, res)) 16 | .catch(err => next(err)) 17 | }, 18 | api: { 19 | query (kcore, string, cb) { 20 | _query(string) 21 | .then(res => cb(null, res)) 22 | .catch(err => cb(err)) 23 | } 24 | } 25 | } 26 | 27 | async function _query (string) { 28 | const index = await getIndex 29 | const results = await index.query(string) 30 | return results 31 | } 32 | 33 | async function _map (msgs) { 34 | const index = await getIndex 35 | const docs = [] 36 | console.log('sonar map', msgs.map(msg => ({ id: msg.id, schema: msg.schema, seq: msg.seq }))) 37 | msgs.forEach(msg => { 38 | let { schema, id, value } = msg 39 | schema = schema.split('/')[1] 40 | const doc = { id } 41 | if (schema === 'metascrape') { 42 | doc.title = value.title || '' 43 | doc.body = value.description || '' 44 | } 45 | if (schema === 'readable') { 46 | doc.body = value.content || '' 47 | doc.title = value.title || '' 48 | } 49 | console.log('DOC', doc) 50 | 51 | if (doc.body || doc.title) { 52 | docs.push(doc) 53 | } 54 | }) 55 | console.log('AFTER MAP', docs) 56 | // console.log('add docs', docs.map(d => ({ 57 | // title: d.title.length + ' > ' + d.title.substring(0, 10), 58 | // body: d.body.length + ' > ' + d.body.substring(0, 10) 59 | // }))) 60 | try { 61 | await index.addDocuments(docs) 62 | } catch (e) { 63 | console.log('ERROR', e) 64 | } 65 | console.log('added') 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /examples/dateview.js: -------------------------------------------------------------------------------- 1 | const through = require('through2') 2 | const barco = require('..') 3 | const db = barco('/tmp/testdb') 4 | 5 | function dateView (lvl, db) { 6 | return { 7 | map (records, next) { 8 | let ops = [] 9 | console.log('MAP', records.map(r => ({ id: r.id, title: r.value.title }))) 10 | for (let record of records) { 11 | if (!record.value.date) return 12 | ops.push({ 13 | type: 'put', 14 | key: `${record.value.date}!${record.id}!${record.schema}!${record.source}`, 15 | value: record.seq 16 | }) 17 | } 18 | // console.log('ops', ops) 19 | lvl.batch(ops, next) 20 | }, 21 | api: { 22 | range (kcore, from, to) { 23 | return lvl.createReadStream({ 24 | gte: from.toJSON(), 25 | lte: to.toJSON() 26 | }).pipe(through.obj(function (row, enc, next) { 27 | // console.log('row', row) 28 | const [date, id, schema, source] = row.key.split('!') 29 | this.push({ id, schema, source }) 30 | })) 31 | } 32 | } 33 | } 34 | } 35 | 36 | db.useRecordView('dates', dateView) 37 | 38 | const records = [ 39 | { title: 'Party', date: new Date(2019, 11, 2) }, 40 | { title: 'Demonstration', date: new Date(2020, 1, 10) }, 41 | { title: 'Reading circle', date: new Date(2019, 8, 7) }, 42 | { title: 'Workshop', date: new Date(2019, 12, 5) } 43 | ] 44 | 45 | const ops = records.map(value => ({ op: 'put', schema: 'event', value })) 46 | 47 | db.batch(ops, (err, ids) => { 48 | if (err) return console.error(err) 49 | else console.log('put', ids) 50 | }) 51 | 52 | db.on('indexed-all', () => { 53 | const queryStream = db.api.dates.range( 54 | new Date(2019, 9), 55 | new Date(2019, 12, 31) 56 | ) 57 | // queryStream.on('data', console.log) 58 | 59 | const getStream = db.createGetStream() 60 | const resultStream = queryStream.pipe(getStream) 61 | resultStream.on('data', record => console.log(record.value.title)) 62 | }) 63 | 64 | setTimeout(() => {}, 1000) 65 | -------------------------------------------------------------------------------- /test/schema.js: -------------------------------------------------------------------------------- 1 | const tape = require('tape') 2 | const cstore = require('..') 3 | const ram = require('random-access-memory') 4 | const view = require('../views/schema-aware.js') 5 | 6 | tape('schema-aware view', t => { 7 | const store1 = cstore(ram) 8 | 9 | const schema = 'post' 10 | 11 | store1.putSchema(schema, { 12 | properties: { 13 | title: { 14 | type: 'string', 15 | index: true 16 | }, 17 | date: { 18 | type: 'string', 19 | index: true 20 | } 21 | } 22 | }) 23 | 24 | store1.useRecordView('idx', view) 25 | 26 | let rows = [ 27 | { title: 'abcd', date: '2019-11' }, 28 | { title: 'abc', date: '2019-12' }, 29 | { title: 'old', date: '2018-07' }, 30 | { title: 'future', date: '2020-01' } 31 | ] 32 | let batch = rows.map(value => ({ 33 | op: 'put', 34 | id: cstore.id(), 35 | value, 36 | schema 37 | })) 38 | 39 | let _run = false 40 | 41 | store1.batch(batch, (err, ids) => { 42 | t.error(err, 'batch') 43 | store1.on('indexed', (name) => { 44 | if (name === 'idx') query() 45 | }) 46 | }) 47 | 48 | function query () { 49 | if (_run) return 50 | _run = true 51 | 52 | const queries = [ 53 | { 54 | name: 'date all', 55 | q: { schema, prop: 'date' }, 56 | v: ['2018-07', '2019-11', '2019-12', '2020-01'] 57 | }, 58 | { 59 | name: 'title all', 60 | q: { schema, prop: 'title' }, 61 | v: ['abc', 'abcd', 'future', 'old'] 62 | }, 63 | { 64 | name: 'title gte lt', 65 | q: { schema, prop: 'title', gt: 'abcd', lt: 'h' }, 66 | v: ['abcd', 'future'] 67 | } 68 | ] 69 | testQueries(queries, (err) => { 70 | t.error(err) 71 | t.end() 72 | }) 73 | } 74 | 75 | function testQueries (queries, cb) { 76 | testQuery(queries.shift()) 77 | 78 | function testQuery (query) { 79 | const { name, q, v } = query 80 | let rs = store1.api.idx.query(q) 81 | let rows = [] 82 | rs.on('data', d => rows.push(d)) 83 | rs.on('err', err => cb(err)) 84 | rs.on('end', () => { 85 | t.deepEqual( 86 | rows.map(r => r.value), 87 | v, 88 | name + ': results match' 89 | ) 90 | if (queries.length) { 91 | process.nextTick(testQuery, queries.shift()) 92 | } else cb() 93 | }) 94 | } 95 | } 96 | }) 97 | -------------------------------------------------------------------------------- /views/entities.js: -------------------------------------------------------------------------------- 1 | const { CHAR_END } = require('../lib/constants') 2 | const through = require('through2') 3 | 4 | module.exports = entityView 5 | 6 | function entityView (db) { 7 | // const idSchema = sub(db, 'is') 8 | // const schemaId = sub(db, 'si') 9 | return { 10 | map (msgs, next) { 11 | // console.log('ldb MSGS', msgs) 12 | const ops = [] 13 | msgs.forEach(msg => { 14 | const { id, schema, seq, source } = msg 15 | const value = `${source}@${seq}` 16 | const type = 'put' 17 | ops.push({ 18 | type, 19 | key: `is|${id}|${schema}|${source}`, 20 | value 21 | }) 22 | ops.push({ 23 | type, 24 | key: `si|${schema}|${id}|${source}`, 25 | value 26 | }) 27 | }) 28 | // console.log('ldb BATCH', ops) 29 | db.batch(ops, next) 30 | }, 31 | api: { 32 | all (kcore) { 33 | const rs = db.createReadStream({ 34 | gt: 'is|', 35 | lt: 'is|' + CHAR_END 36 | }) 37 | 38 | return rs.pipe(through.obj(function (row, enc, next) { 39 | // console.log('ldb GET', row) 40 | let [id, schema] = row.key.split('|').slice(1) 41 | let [source, seq] = row.value.split('@') 42 | this.push({ id, schema, source, seq }) 43 | next() 44 | })) 45 | }, 46 | get (kcore, opts) { 47 | const { schema, id } = opts 48 | }, 49 | allWithSchema (kcore, opts) { 50 | const schema = opts.schema 51 | let rs = db.createReadStream({ 52 | gt: `si|${schema}|`, 53 | lt: `si|${schema}|` + CHAR_END 54 | }) 55 | return rs.pipe(through.obj(function (row, enc, next) { 56 | let [schema, id] = row.key.split('|').slice(1) 57 | let [source, seq] = row.value.split('@') 58 | this.push({ id, schema, source, seq }) 59 | next() 60 | })) 61 | }, 62 | allWithId (kcore, opts) { 63 | const id = opts.id 64 | let rs = db.createReadStream({ 65 | gt: `is|${id}|`, 66 | lt: `is|${id}|` + CHAR_END 67 | }) 68 | return rs.pipe(through.obj(function (row, enc, next) { 69 | let [id, schema] = row.key.split('|').slice(1) 70 | let [source, seq] = row.value.split('@') 71 | this.push({ id, schema, source, seq }) 72 | next() 73 | })) 74 | } 75 | } 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /lib/hyperdrive-index.js: -------------------------------------------------------------------------------- 1 | const hypertrieIndex = require('hypertrie-index') 2 | const inspect = require('inspect-custom-symbol') 3 | const StatEncoder = require('hyperdrive-schemas/lib/stat') 4 | 5 | module.exports = hyperdriveIndex 6 | 7 | function hyperdriveIndex (drive, opts) { 8 | const htiOpts = { 9 | map, 10 | batchSize: opts.batchSize, 11 | prefix: opts.prefix, 12 | storeState: opts.storeState, 13 | fetchState: opts.fetchState, 14 | // This should not really be needed, but the hypertrie 15 | // logic does not directly comply to the interface expected 16 | // by the codecs module. TODO: PR to hyperdrive. 17 | valueEncoding: { 18 | encode: stat => stat.encode(), 19 | decode: StatEncoder.decode 20 | }, 21 | transformNode: true 22 | } 23 | 24 | const index = hypertrieIndex(drive._db.trie, htiOpts) 25 | 26 | return index 27 | 28 | function map (msgs, done) { 29 | asyncFilterMap({ 30 | data: msgs, 31 | filter: msg => msg.value.isFile(), 32 | map: _map, 33 | done: _done 34 | }) 35 | 36 | function _map (msg, next) { 37 | msg.source = drive.key 38 | overrideInspect(msg) 39 | 40 | if (!opts.readFile) return next(null, msg) 41 | 42 | // const checkout = drive.checkout(msg.seq) 43 | drive.readFile(msg.key, (err, data) => { 44 | msg.fileContent = data 45 | next(err, msg) 46 | }) 47 | } 48 | 49 | function _done (err, msgs) { 50 | // todo: handle err better? 51 | if (err) index.emit('error', err) 52 | if (msgs.length) opts.map(msgs, done) 53 | else done() 54 | } 55 | } 56 | } 57 | 58 | function asyncFilterMap (opts) { 59 | const { data, filter, map, done } = opts 60 | 61 | let pending = data.length 62 | let nextMsgs = [] 63 | let errors = [] 64 | 65 | if (!pending) return done(null, data) 66 | data.forEach((msg, i) => { 67 | if (!filter(msg)) return finish(null, msg) 68 | map(msg, finish) 69 | }) 70 | 71 | function finish (err, msg) { 72 | if (err) errors.push(err) 73 | if (typeof msg !== 'undefined') nextMsgs.push(msg) 74 | if (--pending === 0) done(errors.length ? errors : null, nextMsgs) 75 | } 76 | } 77 | 78 | function overrideInspect (msg) { 79 | const keys = ['seq', 'key', 'value', 'source', 'fileContent'] 80 | msg[inspect] = function (depth, opts) { 81 | return keys.reduce((agg, key) => { 82 | agg[key] = msg[key] 83 | return agg 84 | }, {}) 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /test/views.js: -------------------------------------------------------------------------------- 1 | const tape = require('tape') 2 | const cstore = require('..') 3 | const ram = require('random-access-memory') 4 | const { stepper } = require('./lib/util') 5 | 6 | // function collect (stream, cb) { 7 | // let buf = [] 8 | // stream.on('data', d => buf.push(d)) 9 | // stream.on('end', () => cb(null, buf)) 10 | // stream.on('error', err => cb(err)) 11 | // } 12 | 13 | function collectById (stream, cb) { 14 | let data = {} 15 | stream.on('data', row => { 16 | let { id } = row 17 | data[id] = data[id] || [] 18 | data[id].push(row) 19 | }) 20 | stream.on('end', () => cb(null, data)) 21 | stream.on('error', err => cb(err)) 22 | } 23 | 24 | tape('entities', t => { 25 | const store1 = cstore(ram) 26 | const schema = 'arso.xyz/Entity' 27 | const schema2 = 'arso.xyz/Resource' 28 | 29 | let ids = [cstore.id(), cstore.id(), cstore.id()] 30 | 31 | const records = [ 32 | { op: 'put', id: ids[0], schema, value: { title: 'hello' } }, 33 | { op: 'put', id: ids[1], schema, value: { title: 'hello' } }, 34 | { op: 'put', id: ids[1], schema: schema2, value: { link: 'foo' } }, 35 | { op: 'put', id: ids[2], schema, value: { title: 'moon' } } 36 | ] 37 | 38 | store1.batch(records, (err, ids) => { 39 | t.error(err, 'batch succeeded') 40 | }) 41 | 42 | const step = stepper(err => { 43 | t.error(err) 44 | t.end() 45 | }) 46 | 47 | store1.on('indexed-all', () => { 48 | const ev = store1.api.entities 49 | step((done) => { 50 | const rs = ev.all() 51 | collectById(rs, (err, rows) => { 52 | t.error(err) 53 | t.equal(Object.keys(rows).length, 3, 'row count matches') 54 | t.equal(rows[ids[1]].length, 2, 'two records for two schemas') 55 | t.deepEqual( 56 | rows[ids[1]].map(r => r.schema).sort(), 57 | [schema, schema2], 58 | 'schemas match' 59 | ) 60 | done() 61 | }) 62 | }) 63 | 64 | step((done) => { 65 | const rs = ev.allWithSchema({ schema: schema2 }) 66 | collectById(rs, (err, rows) => { 67 | // console.log(rows) 68 | t.error(err) 69 | t.equal(Object.keys(rows).length, 1, 'count for schema2 matches') 70 | t.equal(rows[ids[1]][0].schema, schema2, 'schema matches') 71 | done() 72 | }) 73 | }) 74 | 75 | step((done) => { 76 | const rs = ev.allWithSchema({ schema }) 77 | collectById(rs, (err, rows) => { 78 | t.error(err) 79 | t.equal(Object.keys(rows).length, 3, 'count for schema1 matches') 80 | t.deepEqual(Object.keys(rows).sort(), ids.sort(), 'ids match') 81 | done() 82 | }) 83 | }) 84 | }) 85 | }) 86 | -------------------------------------------------------------------------------- /test/conflict.js: -------------------------------------------------------------------------------- 1 | const tape = require('tape') 2 | const cstore = require('..') 3 | const ram = require('random-access-memory') 4 | const collect = require('collect-stream') 5 | const L = require('lodash') 6 | 7 | const { stepper, once } = require('./lib/util') 8 | 9 | tape('conflict', t => { 10 | const step = stepper(err => { 11 | t.error(err) 12 | t.end() 13 | }) 14 | 15 | const schema = 'event' 16 | 17 | let store1, store2 18 | 19 | let ids 20 | 21 | step(cb => { 22 | store1 = cstore(ram) 23 | ids = [store1.id(), store1.id()] 24 | store1.ready(cb) 25 | }) 26 | step(cb => { 27 | store2 = cstore(ram, store1.key) 28 | store2.ready(cb) 29 | }) 30 | // step(cb => { 31 | // let opts = { live: true } 32 | // let stream = store1.replicate(opts) 33 | // stream.pipe(store2.replicate(opts)).pipe(store1) 34 | // }) 35 | step(cb => { 36 | store1.batch([ 37 | { schema, id: ids[0], value: { title: 'first!', slug: 'first' } }, 38 | { schema, id: ids[1], value: { title: 'second!', slug: 'second' } } 39 | ], cb) 40 | }) 41 | step((cb, ids1) => { 42 | t.equal(ids1.length, 2, 'ids1 len 2') 43 | store2.batch([ 44 | { schema, id: ids[0], value: { title: 'other first', slug: 'first' } }, 45 | { schema, id: ids[1], value: { title: 'other second', slug: 'second' } }, 46 | { schema, value: { title: 'third', slug: 'third' } } 47 | ], (err, ids2) => cb(err, [ids1, ids2])) 48 | }) 49 | step('replicate', (cb, [ids1, ids2]) => { 50 | t.equal(ids2.length, 3, 'ids2 len 3') 51 | // console.log({ ids1, ids2 }) 52 | // t.deepEqual(ids2.slice(0, 2), ids1) 53 | replicate(store1, store2, cb) 54 | }) 55 | step('add source', cb => { 56 | store1.addSource(store2.localKey, cb) 57 | }) 58 | step('replicate', cb => replicate(store1, store2, cb)) 59 | step('list', cb => { 60 | store1.list(schema, (err, list1) => { 61 | t.error(err) 62 | store2.list(schema, (err, list2) => cb(err, [list1, list2])) 63 | }) 64 | }) 65 | step((cb, [list1, list2]) => { 66 | // console.log('done!') 67 | t.deepEqual(list1.sort(), list2.sort()) 68 | t.equal(list1.length, 3) 69 | let rs = store1.createGetStream({ reduce: true }) 70 | list1.forEach(id => rs.write({ id, schema })) 71 | rs.end(null) 72 | collect(rs, (err, data) => { 73 | t.error(err) 74 | // console.log('RESULT', data) 75 | t.equal(data.length, 3) 76 | t.deepEqual(data.map(d => d.value.title).sort(), ['other first', 'other second', 'third']) 77 | cb() 78 | }) 79 | }) 80 | }) 81 | 82 | function replicate (a, b, cb) { 83 | cb = once(cb) 84 | var stream = a.replicate() 85 | stream.pipe(b.replicate()).pipe(stream).on('end', cb) 86 | setTimeout(() => cb(), 100) 87 | } 88 | -------------------------------------------------------------------------------- /examples/importer/serve.js: -------------------------------------------------------------------------------- 1 | const express = require('express') 2 | const p = require('path') 3 | const Swarm = require('corestore-swarm-networking') 4 | 5 | module.exports = serve 6 | 7 | function logErrors (err, req, res, next) { 8 | console.error(err.stack) 9 | res.status(500) 10 | next(err) 11 | } 12 | 13 | function share (cstore) { 14 | // const swarm = new Swarm(cstore) 15 | // swarm.listen() 16 | // const core = swarm.default() 17 | // core.ready(() => { 18 | // swarm.seed(core.key) 19 | // console.log('Seeding ' + core.key.toString('hex')) 20 | // }) 21 | } 22 | 23 | function serve (cstore) { 24 | // share(cstore) 25 | const port = 8080 26 | const host = 'localhost' 27 | const app = express() 28 | app.use(logErrors) 29 | app.get('/*', onPath) 30 | app.get('/', onPath) 31 | 32 | app.listen(port, host, () => console.log(`Server listening on http://${host}:${port}`)) 33 | 34 | function onPath (req, res, next) { 35 | let path = req.params['0'] || '' 36 | path = p.join('/', path) 37 | console.log('GET', path) 38 | cstore.writer((err, drive) => { 39 | if (err) return next(err) 40 | drive.stat(path, (err, stat) => { 41 | console.log(err) 42 | // TODO 43 | if (err && err.code === 'ENOENT') return res.status(404).send('File not found.') 44 | if (err) return next(err) 45 | console.log('got stat', stat) 46 | 47 | if (stat.isDirectory()) { 48 | console.log('DIR!') 49 | drive.readdir(path, (err, list) => { 50 | if (err) return next(err) 51 | list = list.sort().filter(a => a) 52 | let content = `` 55 | let buf = Buffer.from(content) 56 | res.setHeader('Content-Type', 'text/html; charset=UTF-8') 57 | res.setHeader('Content-Length', Buffer.byteLength(buf)) 58 | res.send(buf) 59 | }) 60 | } else if (stat.isFile()) { 61 | console.log('FILE!') 62 | drive.readFile(path, (err, buf) => { 63 | if (err) return next(err) 64 | let type 65 | if (stat.metadata && stat.metadata.headers) { 66 | console.log('META', stat.metadata) 67 | const headers = JSON.parse(stat.metadata.headers.toString()) 68 | console.log('HEADERS', headers) 69 | if (headers['content-type']) { 70 | console.log('SET TYPE', headers['content-type']) 71 | type = headers['content-type'] 72 | } 73 | // Object.entries(headers).forEach((header, value) => { 74 | // res.setHeader(header + ': ' + value) 75 | // }) 76 | } 77 | 78 | if (!type) type = 'text/html; charset=UTF-8' 79 | 80 | res.setHeader('Content-Type', type) 81 | res.setHeader('Content-Length', Buffer.byteLength(buf)) 82 | res.status(200) 83 | res.send(buf) 84 | }) 85 | } else { 86 | next(new Error('Invalid stat.')) 87 | } 88 | }) 89 | }) 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /examples/movies/cli.js: -------------------------------------------------------------------------------- 1 | const cstore = require('../..') 2 | const bench = require('nanobench') 3 | const fs = require('fs') 4 | const ram = require('random-access-memory') 5 | 6 | const store = cstore('.data') 7 | // const store = cstore(ram) 8 | run() 9 | 10 | function run () { 11 | const s = stepper() 12 | s.step('readFile', readFile) 13 | s.step('parse', parse) 14 | s.step('prepare', prepare) 15 | s.step('insert', insert) 16 | s.final((err, result) => { 17 | console.log('DONE', err) 18 | }) 19 | s.run() 20 | } 21 | 22 | // TODO: Implement putSchema 23 | function init () { 24 | store.writer(writer => { 25 | writer.putSchema('movies', { 26 | label: 'Movies', 27 | properties: { 28 | title: { 29 | type: 'string', 30 | index: true 31 | }, 32 | body: { 33 | type: 'string' 34 | } 35 | } 36 | }) 37 | }) 38 | } 39 | 40 | function readFile (_, cb) { 41 | fs.readFile('./movies.txt', cb) 42 | } 43 | 44 | function parse (buf, cb) { 45 | let lines = buf.toString().split('\n') 46 | let movies = [] 47 | lines.forEach(line => { 48 | let parts = line.split('\t') 49 | movies.push({ title: parts[0], body: parts[1] }) 50 | }) 51 | console.log('rows', movies.length) 52 | cb(null, movies) 53 | } 54 | 55 | function prepare (movies, cb) { 56 | const batchSize = 1000 57 | let pos = 0 58 | const batches = [] 59 | const schema = 'movie' 60 | 61 | while (pos < movies.length) { 62 | let rows = movies.slice(pos, pos + batchSize) 63 | pos = pos + batchSize 64 | let batch = rows.map(value => ({ 65 | op: 'put', 66 | id: cstore.id(), 67 | value, 68 | schema 69 | })) 70 | batches.push(batch) 71 | } 72 | console.log('batches', batches.length) 73 | cb(null, batches) 74 | } 75 | 76 | function insert (batches, cb) { 77 | let ids = [] 78 | let i = 0 79 | workBench() 80 | function workBench () { 81 | // bench('batch insert no.' + i, b => { 82 | // b.start() 83 | let batch = batches.shift() 84 | store.batch(batch, (err, newIds) => { 85 | // b.end() 86 | i++ 87 | if (err) return cb(err) 88 | ids = [...ids, ...newIds] 89 | ids = ids.concat(newIds) 90 | if (batches.length) process.nextTick(workBench) 91 | else cb(null, ids) 92 | }) 93 | // }) 94 | } 95 | } 96 | 97 | function stepper () { 98 | let _steps = [] 99 | let _result 100 | let _final 101 | let _step 102 | 103 | return { step, run, final } 104 | 105 | function step (name, cb) { 106 | _steps.push({ name, cb }) 107 | } 108 | 109 | function final (cb) { 110 | _final = cb 111 | } 112 | 113 | function run () { 114 | _step = _steps.shift() 115 | bench(_step.name, b => { 116 | b.start() 117 | _step.cb(_result, (err, data) => { 118 | b.end() 119 | if (err) return finish(err) 120 | _result = data 121 | if (_steps.length) process.nextTick(run) 122 | else finish(err) 123 | }) 124 | }) 125 | } 126 | 127 | function finish (err) { 128 | if (err && typeof err === 'object') err._step = _step.name 129 | if (_final) { 130 | process.nextTick(_final, err, _result) 131 | } else if (err) { 132 | throw err 133 | } 134 | } 135 | } 136 | -------------------------------------------------------------------------------- /examples/importer/importer.js: -------------------------------------------------------------------------------- 1 | const cstore = require('../..') 2 | const thunky = require('thunky') 3 | const sonarView = require('./views/sonar') 4 | const leveldb = require('level') 5 | const p = require('path') 6 | const mkdirp = require('mkdirp') 7 | const util = require('util') 8 | 9 | const replicate = require('@hyperswarm/replicator') 10 | 11 | module.exports = (...args) => new Importer(...args) 12 | 13 | class Importer { 14 | constructor (opts) { 15 | this._opts = opts 16 | this.ready = thunky(this._ready.bind(this)) 17 | this.workers = [] 18 | } 19 | 20 | _ready (cb) { 21 | const basePath = this._opts.storage 22 | const paths = { 23 | level: p.join(basePath, 'level'), 24 | corestore: p.join(basePath, 'corestore'), 25 | sonar: p.join(basePath, 'sonar') 26 | } 27 | Object.values(paths).forEach(p => mkdirp.sync(p)) 28 | 29 | this.level = leveldb(paths.level, 'level') 30 | this.cstore = cstore(paths.corestore, this._opts.key, { level: this.level, sparse: false }) 31 | this.cstore.useRecordView('sonar', sonarView, { storage: paths.sonar }) 32 | 33 | this.swarm = replicate(this.cstore, { 34 | live: true, 35 | announce: true, 36 | lookup: true 37 | }) 38 | 39 | this.swarm.on('join', dkey => console.log('Joining swarm for %s', dkey.toString('hex'))) 40 | 41 | console.log('here') 42 | 43 | logEvents(this.swarm, 'swarm') 44 | 45 | this.cstore.writer((err, drive) => { 46 | const key = hex(this.cstore.key) 47 | const localKey = hex(drive.key) 48 | console.log('Importer ready.') 49 | console.log(`Primary key: ${key}`) 50 | console.log(`Local key: ${localKey}`) 51 | 52 | this.workers.push( 53 | require('./webpage')(this.cstore) 54 | ) 55 | 56 | cb(err) 57 | }) 58 | } 59 | 60 | add (url, cb) { 61 | const self = this 62 | this.ready(() => { 63 | let idx = 0 64 | const handlers = [] 65 | next(idx) 66 | 67 | function next (idx) { 68 | let worker = self.workers[idx] 69 | if (!worker) return done() 70 | worker.input(url, (handle) => { 71 | if (handle) handlers.push(worker) 72 | next(++idx) 73 | }) 74 | } 75 | 76 | function done () { 77 | if (!handlers.length) return cb(new Error('No handler found for input: ' + url)) 78 | if (handlers.length > 1) return cb(new Error('Conflicting handlers found: ' + handlers.map(h => h.label))) 79 | handle(handlers[0]) 80 | } 81 | 82 | function handle (handler) { 83 | const msg = { 84 | id: cstore.id(), 85 | url 86 | } 87 | handler.handle(msg, (err, statusStream) => { 88 | if (err) return cb(err) 89 | statusStream.on('data', msg => console.log('MSG', msg)) 90 | statusStream.on('end', () => cb()) 91 | }) 92 | } 93 | }) 94 | } 95 | } 96 | 97 | function hex (key) { 98 | return Buffer.isBuffer(key) ? key.toString('hex') : key 99 | } 100 | 101 | function logEvents (emitter, name) { 102 | let emit = emitter.emit 103 | emitter.emit = (...args) => { 104 | // const params = args.slice(1).map(arg => { 105 | // util.inspect(arg, { depth: 0 }) 106 | // }) 107 | const params = util.inspect(args.slice(1), { depth: 0 }) 108 | console.log('(%s) %s %o', name, args[0], params) 109 | emit.apply(emitter, args) 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /test/records.js: -------------------------------------------------------------------------------- 1 | const tape = require('tape') 2 | const cstore = require('..') 3 | const ram = require('random-access-memory') 4 | const collect = require('collect-stream') 5 | const L = require('lodash') 6 | 7 | tape('prefix', t => { 8 | const store1 = cstore(ram) 9 | const schema = 'arso.xyz/Entity' 10 | const record1 = { title: 'world', tags: ['foo', 'bar'] } 11 | let results = [] 12 | store1.useRecordView('view', () => ({ 13 | map (msgs, next) { 14 | results = [...results, ...msgs] 15 | next() 16 | }, 17 | indexed () { 18 | t.equal(results.length, 1, 'one result') 19 | t.equal(results[0].value.title, 'world', 'value matches') 20 | t.end() 21 | } 22 | })) 23 | 24 | store1.ready(() => { 25 | store1.writer((err, drive) => { 26 | t.error(err, 'noerr writer') 27 | drive.writeFile('foo', 'bar', (err) => { 28 | t.error(err, 'noerr writeFile') 29 | store1.put({ schema, value: record1 }, (err, id) => { 30 | t.error(err, 'noerr put', id) 31 | }) 32 | }) 33 | }) 34 | }) 35 | }) 36 | 37 | tape('batch', t => { 38 | const store1 = cstore(ram) 39 | const schema = 'foo/bar' 40 | const records = [ 41 | { op: 'put', id: cstore.id(), schema, value: { title: 'hello' } }, 42 | { op: 'put', id: cstore.id(), schema, value: { title: 'world' } }, 43 | { op: 'put', id: cstore.id(), schema, value: { title: 'moon' } } 44 | ] 45 | store1.batch(records, (err, ids) => { 46 | t.error(err) 47 | t.equal(ids.length, 3) 48 | store1.list(schema, (err, ids) => { 49 | t.error(err) 50 | t.equal(ids.length, 3) 51 | let data = [] 52 | ids.forEach(id => store1.get({ schema, id }, collect)) 53 | function collect (err, records) { 54 | if (err) t.error(err) 55 | data = [...data, ...records] 56 | if (data.length === ids.length) finish(data) 57 | } 58 | }) 59 | }) 60 | 61 | function finish (data) { 62 | const results = data.map(d => d.value.title).sort() 63 | const sources = records.map(r => r.value.title).sort() 64 | t.deepEqual(results, sources, 'results match') 65 | t.end() 66 | } 67 | }) 68 | 69 | tape('batch and get stream', t => { 70 | const store = cstore(ram) 71 | 72 | const records = [ 73 | { 74 | op: 'put', 75 | schema: 'event', 76 | value: { 77 | date: new Date(2019, 12, 10), 78 | title: 'Release' 79 | } 80 | }, 81 | { 82 | op: 'put', 83 | schema: 'event', 84 | value: { 85 | date: new Date(2019, 9, 2), 86 | title: 'Party' 87 | } 88 | } 89 | ] 90 | 91 | const stream = store.createBatchStream() 92 | stream.write(records) 93 | stream.end() 94 | collect(stream, (err, ids) => { 95 | t.error(err) 96 | t.equal(ids.length, 2, 'got two ids back') 97 | for (let id of ids) { 98 | t.equal(typeof id, 'string') 99 | } 100 | }) 101 | 102 | // stream.on('data', data => console.log('batch result', data)) 103 | 104 | store.on('indexed-all', query) 105 | 106 | stream.on('error', err => t.error(err)) 107 | 108 | function query () { 109 | const queryStream = store.api.entities.all() 110 | // queryStream.on('data', d => console.log('QUERYRES', d)) 111 | const getTransform = store.createGetStream() 112 | const resultStream = queryStream.pipe(getTransform) 113 | // resultStream.on('data', d => console.log('GETREC', d)) 114 | collect(resultStream, (err, data) => { 115 | t.error(err) 116 | // console.log('DATA', data) 117 | data = L.orderBy(data, r => r.value.title) 118 | t.equal(data.length, 2) 119 | t.equal(data[0].value.title, 'Party') 120 | t.equal(data[1].value.title, 'Release') 121 | t.end() 122 | }) 123 | } 124 | }) 125 | -------------------------------------------------------------------------------- /lib/multidrive-index.js: -------------------------------------------------------------------------------- 1 | const thunky = require('thunky') 2 | const { EventEmitter } = require('events') 3 | // const debug = require('debug')('multidrive-index') 4 | 5 | const hyperdriveIndex = require('./hyperdrive-index') 6 | const { State } = require('./messages') 7 | 8 | module.exports = (...args) => new MultidriveIndex(...args) 9 | 10 | let cnt = 0 11 | 12 | class MultidriveIndex extends EventEmitter { 13 | constructor (opts) { 14 | super() 15 | this.multidrive = opts.multidrive 16 | this.name = opts.name || 'index' + cnt++ 17 | 18 | ensureStateHandlers(opts) 19 | 20 | this._opts = opts 21 | this._storeState = opts.storeState 22 | this._fetchState = opts.fetchState 23 | this._clearIndex = opts.clearIndex || null 24 | 25 | this._states = {} 26 | this._indexes = new Map() 27 | this._running = new Set() 28 | 29 | this.ready = thunky(this._ready.bind(this)) 30 | this.ready() 31 | } 32 | 33 | _ready (cb) { 34 | this.multidrive.sources(sources => { 35 | // this._label = this.multidrive.key.toString('hex').substring(0, 4) + ':' + (this.multidrive.primaryDrive.writable ? 'w' : 'r') + ':' + this.name 36 | sources.forEach(source => this._onsource(source)) 37 | this.multidrive.on('source', this._onsource.bind(this)) 38 | }) 39 | } 40 | 41 | pause (cb) { 42 | for (let idx of this._running) { 43 | idx.pause() 44 | } 45 | if (cb) cb() 46 | } 47 | 48 | resume (cb) { 49 | for (let idx of this._running) { 50 | idx.resume() 51 | } 52 | if (cb) cb() 53 | } 54 | 55 | _onsource (drive) { 56 | const key = drive.key.toString('hex') 57 | if (this._indexes.has(key)) return 58 | 59 | const opts = { 60 | map: this._opts.map, 61 | batchSize: this._opts.batchSize, 62 | prefix: this._opts.prefix, 63 | readFile: this._opts.readFile, 64 | storeState: (state, cb) => this._storeDriveState(key, state, cb), 65 | fetchState: (cb) => this._fetchDriveState(key, cb) 66 | } 67 | 68 | const index = hyperdriveIndex(drive, opts) 69 | this._indexes.set(key, index) 70 | 71 | index.on('start', () => { 72 | // debug(this._label, 'start', key.substring(0, 4)) 73 | if (!this._running.size) this.emit('start') 74 | this._running.add(key.toString('hex')) 75 | }) 76 | 77 | index.on('indexed', (nodes, complete) => { 78 | // debug(this._label, 'indexed', key.substring(0, 4)) 79 | if (!complete) return 80 | this.emit('indexed', drive.key, nodes) 81 | this._running.delete(key.toString('hex')) 82 | if (!this._running.size) this.emit('indexed-all') 83 | }) 84 | } 85 | 86 | _storeDriveState (key, state, cb) { 87 | this._states[key] = state 88 | let buf = encodeStates(this._states) 89 | this._storeState(buf, cb) 90 | } 91 | 92 | _fetchDriveState (key, cb) { 93 | this._fetchState((err, buf) => { 94 | if (err) return cb(err) 95 | this._states = decodeStates(buf) 96 | cb(null, this._states[key]) 97 | }) 98 | } 99 | } 100 | 101 | function encodeStates (states) { 102 | const statesArray = [] 103 | for (let [key, state] of Object.entries(states)) { 104 | statesArray.push({ key, state }) 105 | } 106 | return State.encode({ states: statesArray }) 107 | } 108 | 109 | function decodeStates (buf) { 110 | if (!buf) return {} 111 | const value = State.decode(buf) 112 | const states = {} 113 | value.states.forEach(({ key, state }) => { 114 | states[key] = state 115 | }) 116 | return states 117 | } 118 | 119 | function ensureStateHandlers (opts) { 120 | if (!opts.storeState && !opts.fetchState && !opts.clearIndex) { 121 | // In-memory storage implementation 122 | let state 123 | opts.storeState = function (buf, cb) { 124 | state = buf 125 | process.nextTick(cb) 126 | } 127 | opts.fetchState = function (cb) { 128 | process.nextTick(cb, null, state) 129 | } 130 | opts.clearIndex = function (cb) { 131 | state = null 132 | process.nextTick(cb) 133 | } 134 | } 135 | } 136 | -------------------------------------------------------------------------------- /views/schema-aware.js: -------------------------------------------------------------------------------- 1 | const { Transform } = require('stream') 2 | 3 | const { CHAR_END, CHAR_SPLIT, CHAR_START } = require('../lib/constants') 4 | 5 | module.exports = schemaView 6 | 7 | function schemaView (db, cstore) { 8 | async function map (msgs, next) { 9 | const bins = {} 10 | const ops = [] 11 | const proms = [] 12 | 13 | for (const msg of msgs) { 14 | const { schema: name } = msg 15 | if (!bins[name]) { 16 | bins[name] = { msgs: [] } 17 | proms.push(new Promise((resolve, reject) => { 18 | cstore.getSchema(name, (err, schema) => { 19 | if (err) reject(err) 20 | bins[name].schema = schema 21 | resolve() 22 | }) 23 | })) 24 | } 25 | bins[name].msgs.push(msg) 26 | } 27 | 28 | // Wait until all schemas are loaded. 29 | await Promise.all(proms) 30 | 31 | Object.values(bins).forEach(bin => { 32 | // Filter out messages without a schema. 33 | if (!bin.schema) return 34 | bin.msgs.forEach(msg => mapMessage(bin.schema, msg)) 35 | }) 36 | 37 | db.batch(ops, () => { 38 | next() 39 | }) 40 | 41 | function mapMessage (schema, msg) { 42 | const { id, source, seq, schema: schemaName, value } = msg 43 | for (let [name, def] of Object.entries(schema.properties)) { 44 | if (!def.index) continue 45 | if (typeof value[name] === 'undefined') continue 46 | 47 | const ikey = `${schemaName}|${name}|${value[name]}` + 48 | CHAR_SPLIT + 49 | `${id}|${source}` 50 | 51 | const ivalue = seq 52 | 53 | ops.push({ 54 | type: 'put', 55 | key: ikey, 56 | value: ivalue 57 | }) 58 | } 59 | } 60 | } 61 | 62 | const api = { 63 | query (kcore, opts, cb) { 64 | // const example = { 65 | // schema: 'arso.xyz/Book', 66 | // prop: 'publicatenDate', 67 | // value: '2018-11-12--......', 68 | // gte: '2018-11-12', 69 | // lte: '2019-01-01', 70 | // reverse: true, 71 | // limit: 10 72 | // } 73 | 74 | const proxy = new Transform({ 75 | objectMode: true, 76 | transform (row, enc, next) { 77 | this.push(decodeNode(row)) 78 | next() 79 | } 80 | }) 81 | 82 | process.nextTick(init) 83 | 84 | return proxy 85 | 86 | function init () { 87 | if (!opts.schema || !opts.prop) return proxy.destroy(new Error('schema and prop are required.')) 88 | cstore.expandSchemaName(opts.schema, (err, name) => { 89 | if (err) return proxy.destroy(new Error('Invalid schema name.')) 90 | opts.schema = name 91 | run() 92 | }) 93 | } 94 | 95 | // const { schema, prop, value, gt, lt, gte, lte, reverse, limit } = opts 96 | 97 | function run () { 98 | const lvlopts = { 99 | reverse: opts.reverse, 100 | limit: opts.limit 101 | } 102 | const key = `${opts.schema}|${opts.prop}|` 103 | lvlopts.gt = key + CHAR_SPLIT 104 | lvlopts.lt = key + CHAR_END 105 | if (opts.value) { 106 | lvlopts.gt = key + opts.value + CHAR_SPLIT 107 | lvlopts.lt = key + opts.value + CHAR_SPLIT + CHAR_END 108 | } else if (opts.gt) { 109 | lvlopts.gt = key + opts.gt + CHAR_SPLIT 110 | lvlopts.lt = key + opts.gt + CHAR_END 111 | } else if (opts.gte) { 112 | lvlopts.gte = key + opts.gte + CHAR_SPLIT 113 | lvlopts.lt = key + opts.gte + CHAR_END 114 | } 115 | if (opts.lt) { 116 | lvlopts.lt = key + opts.lt + CHAR_START 117 | } else if (opts.lte) { 118 | lvlopts.lt = undefined 119 | lvlopts.lte = key + opts.lte + CHAR_END 120 | } 121 | 122 | const rs = db.createReadStream(lvlopts) 123 | 124 | rs.pipe(proxy) 125 | } 126 | 127 | // TODO: continue... 128 | } 129 | } 130 | 131 | return { 132 | map, 133 | api 134 | } 135 | } 136 | 137 | function decodeNode (node) { 138 | let { key, value: seq } = node 139 | let [path, rec] = key.split(CHAR_SPLIT) 140 | let [schema, prop, value] = path.split('|') 141 | let [id, source] = rec.split('|') 142 | return { schema, prop, value, id, source, seq } 143 | } 144 | -------------------------------------------------------------------------------- /lib/kappa.js: -------------------------------------------------------------------------------- 1 | // NOTE: This is copy-pasted from 2 | // https://github.com/kappa-db/kappa-core/blob/master/index.js 3 | // and adapted to work with multidrive-index instead of 4 | // multifeed-index. Once things become a bit more stable, 5 | // likely we'd want to work on a PR to kappa-core to allow 6 | // swapping out the indexing backend. 7 | 8 | var inherits = require('inherits') 9 | var EventEmitter = require('events').EventEmitter 10 | var indexer = require('./multidrive-index') 11 | // const debug = require('debug')('kappa') 12 | 13 | module.exports = Kappa 14 | 15 | function Kappa (opts) { 16 | if (!(this instanceof Kappa)) return new Kappa(opts) 17 | if (!opts) opts = {} 18 | 19 | this._logs = opts.multidrive 20 | this._indexes = {} 21 | this._running = new Set() 22 | 23 | this._viewContext = opts.viewContext || this 24 | 25 | this.api = {} 26 | } 27 | 28 | inherits(Kappa, EventEmitter) 29 | 30 | Kappa.prototype.use = function (name, version, view) { 31 | if (typeof version !== 'number') { 32 | view = version 33 | version = undefined 34 | } 35 | var idx = indexer({ 36 | multidrive: this._logs, 37 | 38 | name, 39 | 40 | prefix: view.prefix, 41 | map: view.map, 42 | readFile: view.readFile, 43 | fetchState: view.fetchState, 44 | storeState: view.storeState, 45 | batchSize: view.batchSize || 100 46 | 47 | // NOTE: kappa-core also has these. 48 | // version: version, 49 | // maxBatch: view.maxBatch || 10, 50 | // batch: view.map, 51 | }) 52 | idx.name = name 53 | 54 | // TODO: Rethink event names. 55 | idx.on('indexed', (driveKey, batch) => { 56 | this.emit('indexed', name, batch, driveKey) 57 | if (view.indexed) view.indexed(batch, driveKey) 58 | }) 59 | 60 | idx.on('indexed-all', () => { 61 | this._running.delete(name) 62 | if (!this._running.size) this.emit('indexed-all') 63 | }) 64 | 65 | idx.on('start', () => { 66 | if (!this._running.size) this.emit('start') 67 | this._running.add(name) 68 | }) 69 | 70 | // idx.on('error', function (err) { 71 | // self.emit('error', err) 72 | // }) 73 | // if (view.indexed) idx.on('indexed', view.indexed) 74 | const context = this._viewContext 75 | this._indexes[name] = idx 76 | this.api[name] = {} 77 | this.api[name].ready = idx.ready.bind(idx) 78 | for (var key in view.api) { 79 | if (typeof view.api[key] === 'function') this.api[name][key] = view.api[key].bind(view.api, context) 80 | else this.api[name][key] = view.api[key] 81 | } 82 | } 83 | 84 | // Kappa.prototype.feeds = function () { 85 | // return this._logs.feeds() 86 | // } 87 | 88 | Kappa.prototype.ready = function (viewNames, cb) { 89 | if (typeof viewNames === 'function') { 90 | cb = viewNames 91 | viewNames = [] 92 | } 93 | 94 | if (typeof viewNames === 'string') viewNames = [viewNames] 95 | if (viewNames.length === 0) { 96 | viewNames = Object.keys(this._indexes) 97 | } 98 | 99 | var pending = viewNames.length 100 | var self = this 101 | this._logs.ready(function () { 102 | for (var i = 0; i < viewNames.length; i++) { 103 | self._indexes[viewNames[i]].ready(done) 104 | } 105 | }) 106 | 107 | function done () { 108 | if (!--pending) cb() 109 | } 110 | } 111 | 112 | Kappa.prototype.pause = function (viewNames, cb) { 113 | if (typeof viewNames === 'function') { 114 | cb = viewNames 115 | viewNames = [] 116 | } 117 | cb = cb || noop 118 | 119 | if (!viewNames) viewNames = [] 120 | if (typeof viewNames === 'string') viewNames = [viewNames] 121 | if (viewNames.length === 0) { 122 | viewNames = Object.keys(this._indexes) 123 | } 124 | 125 | var pending = viewNames.length 126 | var self = this 127 | this._logs.ready(function () { 128 | for (var i = 0; i < viewNames.length; i++) { 129 | self._indexes[viewNames[i]].pause(done) 130 | } 131 | }) 132 | 133 | function done () { 134 | if (!--pending) cb() 135 | } 136 | } 137 | 138 | Kappa.prototype.resume = function (viewNames) { 139 | if (!viewNames) viewNames = [] 140 | if (typeof viewNames === 'string') viewNames = [viewNames] 141 | if (viewNames.length === 0) { 142 | viewNames = Object.keys(this._indexes) 143 | } 144 | 145 | var self = this 146 | this._logs.ready(function () { 147 | for (var i = 0; i < viewNames.length; i++) { 148 | self._indexes[viewNames[i]].resume() 149 | } 150 | }) 151 | } 152 | 153 | Kappa.prototype.writer = function (name, cb) { 154 | this._logs.writer(name, cb) 155 | } 156 | 157 | // Kappa.prototype.feed = function (key) { 158 | // return this._logs.feed(key) 159 | // } 160 | 161 | Kappa.prototype.replicate = function (opts) { 162 | return this._logs.replicate(opts) 163 | } 164 | 165 | function noop () {} 166 | -------------------------------------------------------------------------------- /test/replication.js: -------------------------------------------------------------------------------- 1 | const tape = require('tape') 2 | const cstore = require('..') 3 | const ram = require('random-access-memory') 4 | 5 | tape('replication and sources', async t => { 6 | const store1 = cstore(ram) 7 | var store2, id, store2localWriterKey 8 | 9 | const schema = 'arso.xyz/Entity' 10 | const record1 = { title: 'world', tags: ['foo', 'bar'] } 11 | const record2 = { title: 'moon', tags: ['bar', 'baz'] } 12 | 13 | await runAll([ 14 | cb => store1.ready(cb), 15 | cb => { 16 | store2 = cstore(ram, store1.key) 17 | store2.ready(cb) 18 | }, 19 | cb => store1.put({ schema, value: record1 }, (err, _id) => { 20 | id = _id 21 | cb(err) 22 | }), 23 | cb => store2.put({ schema, id, value: record2 }, cb), 24 | 25 | cb => store2.writer((err, drive) => { 26 | store2localWriterKey = drive.key 27 | cb(err) 28 | }), 29 | 30 | // First replication. Note that this will keep running. 31 | cb => replicate(store1, store2, cb), 32 | cb => { 33 | store2.get({ schema, id }, (err, records) => { 34 | t.error(err, 'no err') 35 | t.equal(records.length, 2) 36 | t.equal(records[0].id, id) 37 | t.equal(records[1].id, id) 38 | t.equal(records[0].value.title, 'world') 39 | t.equal(records[1].value.title, 'moon') 40 | cb() 41 | }) 42 | }, 43 | 44 | // the primary source has not added store2's local writer 45 | cb => store1.get({ schema, id }, (err, records) => { 46 | t.error(err) 47 | t.equal(records.length, 1) 48 | cb() 49 | }), 50 | 51 | cb => store1.addSource(store2localWriterKey, cb), 52 | 53 | cb => setTimeout(cb, 100), 54 | 55 | cb => store1.get({ schema, id }, (err, records) => { 56 | t.error(err) 57 | t.equal(records.length, 2) 58 | cb() 59 | }), 60 | 61 | cb => { 62 | store1.sources(drives => { 63 | cb() 64 | }) 65 | } 66 | ]) 67 | 68 | t.end() 69 | }) 70 | 71 | function replicate (a, b, cb) { 72 | cb = once(cb) 73 | var stream = a.replicate({ live: true }) 74 | stream.pipe(b.replicate()).pipe(stream).on('end', cb) 75 | setTimeout(() => cb(), 100) 76 | } 77 | 78 | function once (fn) { 79 | let didrun = false 80 | return (...args) => { 81 | if (didrun) return 82 | didrun = true 83 | return fn(...args) 84 | } 85 | } 86 | 87 | function runAll (ops) { 88 | return new Promise((resolve, reject) => { 89 | runNext(ops.shift()) 90 | function runNext (op, previousResult) { 91 | op((err, result) => { 92 | if (err) return reject(err) 93 | let next = ops.shift() 94 | if (!next) return resolve() 95 | return runNext(next, result) 96 | }, previousResult) 97 | } 98 | }) 99 | } 100 | 101 | // function validateCore(t, core, values) { 102 | // const ops = values.map((v, idx) => cb => { 103 | // core.get(idx, (err, value) => { 104 | // t.error(err, 'no error') 105 | // t.same(value, values[idx]) 106 | // return cb(null) 107 | // }) 108 | // }) 109 | // return runAll(ops) 110 | // } 111 | 112 | function key (k) { 113 | return k.toString('hex').slice(0, 2) 114 | } 115 | 116 | function contentFeed (drive) { 117 | const contentState = drive._contentStates.get(drive._db) 118 | if (!contentState) return false 119 | return contentState.feed 120 | } 121 | 122 | function logDrive (drive, name) { 123 | const cf = contentFeed(drive) 124 | name = name || 'Hyperdrive' 125 | console.log(`%s: 126 | key %s 127 | ckey %s 128 | writable %s 129 | version %s 130 | contentLength %s`, name, key(drive.key), cf && key(cf.key), drive.writable, drive.version, cf && cf.length) 131 | } 132 | 133 | // function repl (s1, s2, cb) { 134 | // // const opts = { live: false } 135 | // const opts = {} 136 | // // const stream = s1.replicate(opts) 137 | // // stream.pipe(s2.replicate(opts)).pipe(stream) 138 | // // stream.on('end', cb) 139 | // // stream.on('error', err => console.error(err)) 140 | // const dr1 = s1.multidrive.primaryDrive 141 | // const dr2 = s2.multidrive.primaryDrive 142 | // logDrive(dr1, 'drive1') 143 | // logDrive(dr2, 'drive2') 144 | // s2.writer((err, drive) => logDrive(drive, 'drive2.writer')) 145 | // const str1 = dr1.replicate() 146 | // const str2 = dr2.replicate() 147 | // console.log('stream1', key(str1.id)) 148 | // console.log('stream2', key(str2.id)) 149 | // pump(str1, str2, str1) 150 | // // str1.on('data', d => console.log('d1', d)) 151 | // // str2.on('data', d => console.log('d2', d)) 152 | // str1.on('end', cb) 153 | // setTimeout(() => { 154 | // logDrive(dr1, 'drive1') 155 | // logDrive(dr2, 'drive2') 156 | // // console.log(str1) 157 | // // console.log(str2) 158 | // cb() 159 | // }, 200) 160 | // } 161 | -------------------------------------------------------------------------------- /lib/messages.js: -------------------------------------------------------------------------------- 1 | // This file is auto generated by the protocol-buffers compiler 2 | 3 | /* eslint-disable quotes */ 4 | /* eslint-disable indent */ 5 | /* eslint-disable no-redeclare */ 6 | /* eslint-disable camelcase */ 7 | 8 | // Remember to `npm install --save protocol-buffers-encodings` 9 | var encodings = require('protocol-buffers-encodings') 10 | var varint = encodings.varint 11 | var skip = encodings.skip 12 | 13 | var State = exports.State = { 14 | buffer: true, 15 | encodingLength: null, 16 | encode: null, 17 | decode: null 18 | } 19 | 20 | defineState() 21 | 22 | function defineState () { 23 | var FeedState = State.FeedState = { 24 | buffer: true, 25 | encodingLength: null, 26 | encode: null, 27 | decode: null 28 | } 29 | 30 | defineFeedState() 31 | 32 | function defineFeedState () { 33 | var enc = [ 34 | encodings.string, 35 | encodings.bytes 36 | ] 37 | 38 | FeedState.encodingLength = encodingLength 39 | FeedState.encode = encode 40 | FeedState.decode = decode 41 | 42 | function encodingLength (obj) { 43 | var length = 0 44 | if (!defined(obj.key)) throw new Error("key is required") 45 | var len = enc[0].encodingLength(obj.key) 46 | length += 1 + len 47 | if (!defined(obj.state)) throw new Error("state is required") 48 | var len = enc[1].encodingLength(obj.state) 49 | length += 1 + len 50 | return length 51 | } 52 | 53 | function encode (obj, buf, offset) { 54 | if (!offset) offset = 0 55 | if (!buf) buf = Buffer.allocUnsafe(encodingLength(obj)) 56 | var oldOffset = offset 57 | if (!defined(obj.key)) throw new Error("key is required") 58 | buf[offset++] = 10 59 | enc[0].encode(obj.key, buf, offset) 60 | offset += enc[0].encode.bytes 61 | if (!defined(obj.state)) throw new Error("state is required") 62 | buf[offset++] = 18 63 | enc[1].encode(obj.state, buf, offset) 64 | offset += enc[1].encode.bytes 65 | encode.bytes = offset - oldOffset 66 | return buf 67 | } 68 | 69 | function decode (buf, offset, end) { 70 | if (!offset) offset = 0 71 | if (!end) end = buf.length 72 | if (!(end <= buf.length && offset <= buf.length)) throw new Error("Decoded message is not valid") 73 | var oldOffset = offset 74 | var obj = { 75 | key: "", 76 | state: null 77 | } 78 | var found0 = false 79 | var found1 = false 80 | while (true) { 81 | if (end <= offset) { 82 | if (!found0 || !found1) throw new Error("Decoded message is not valid") 83 | decode.bytes = offset - oldOffset 84 | return obj 85 | } 86 | var prefix = varint.decode(buf, offset) 87 | offset += varint.decode.bytes 88 | var tag = prefix >> 3 89 | switch (tag) { 90 | case 1: 91 | obj.key = enc[0].decode(buf, offset) 92 | offset += enc[0].decode.bytes 93 | found0 = true 94 | break 95 | case 2: 96 | obj.state = enc[1].decode(buf, offset) 97 | offset += enc[1].decode.bytes 98 | found1 = true 99 | break 100 | default: 101 | offset = skip(prefix & 7, buf, offset) 102 | } 103 | } 104 | } 105 | } 106 | 107 | var enc = [ 108 | FeedState 109 | ] 110 | 111 | State.encodingLength = encodingLength 112 | State.encode = encode 113 | State.decode = decode 114 | 115 | function encodingLength (obj) { 116 | var length = 0 117 | if (defined(obj.states)) { 118 | for (var i = 0; i < obj.states.length; i++) { 119 | if (!defined(obj.states[i])) continue 120 | var len = enc[0].encodingLength(obj.states[i]) 121 | length += varint.encodingLength(len) 122 | length += 1 + len 123 | } 124 | } 125 | return length 126 | } 127 | 128 | function encode (obj, buf, offset) { 129 | if (!offset) offset = 0 130 | if (!buf) buf = Buffer.allocUnsafe(encodingLength(obj)) 131 | var oldOffset = offset 132 | if (defined(obj.states)) { 133 | for (var i = 0; i < obj.states.length; i++) { 134 | if (!defined(obj.states[i])) continue 135 | buf[offset++] = 10 136 | varint.encode(enc[0].encodingLength(obj.states[i]), buf, offset) 137 | offset += varint.encode.bytes 138 | enc[0].encode(obj.states[i], buf, offset) 139 | offset += enc[0].encode.bytes 140 | } 141 | } 142 | encode.bytes = offset - oldOffset 143 | return buf 144 | } 145 | 146 | function decode (buf, offset, end) { 147 | if (!offset) offset = 0 148 | if (!end) end = buf.length 149 | if (!(end <= buf.length && offset <= buf.length)) throw new Error("Decoded message is not valid") 150 | var oldOffset = offset 151 | var obj = { 152 | states: [] 153 | } 154 | while (true) { 155 | if (end <= offset) { 156 | decode.bytes = offset - oldOffset 157 | return obj 158 | } 159 | var prefix = varint.decode(buf, offset) 160 | offset += varint.decode.bytes 161 | var tag = prefix >> 3 162 | switch (tag) { 163 | case 1: 164 | var len = varint.decode(buf, offset) 165 | offset += varint.decode.bytes 166 | obj.states.push(enc[0].decode(buf, offset, offset + len)) 167 | offset += enc[0].decode.bytes 168 | break 169 | default: 170 | offset = skip(prefix & 7, buf, offset) 171 | } 172 | } 173 | } 174 | } 175 | 176 | function defined (val) { 177 | return val !== null && val !== undefined && (typeof val !== 'number' || !isNaN(val)) 178 | } 179 | -------------------------------------------------------------------------------- /lib/multidrive.js: -------------------------------------------------------------------------------- 1 | const hyperdrive = require('hyperdrive') 2 | const raf = require('random-access-file') 3 | const crypto = require('hypercore-crypto') 4 | const thunky = require('thunky') 5 | const { EventEmitter } = require('events') 6 | const p = require('path') 7 | const corestore = require('corestore') 8 | 9 | module.exports = (...args) => new Multidrive(...args) 10 | 11 | const { P_SOURCES } = require('./constants') 12 | 13 | class Multidrive extends EventEmitter { 14 | constructor (storage, key, opts = {}) { 15 | super() 16 | this._opts = opts 17 | 18 | // this.storage = typeof storage === 'string' ? () => raf(storage) : storage 19 | if (typeof storage === 'function') { 20 | var factory = path => storage(path) 21 | } else if (typeof storage === 'string') { 22 | factory = path => raf(storage + '/' + path) 23 | } 24 | this.factory = factory 25 | 26 | this.corestore = opts.corestore || corestore(factory) 27 | 28 | this.primaryDrive = hyperdrive(this.corestore, key, { 29 | sparse: opts.sparse, 30 | secretKey: opts.secretKey, 31 | keyPair: opts.keyPair 32 | }) 33 | 34 | this.ready = thunky(this._ready.bind(this)) 35 | 36 | this._sources = new Map() 37 | } 38 | 39 | _ready (cb) { 40 | this._pushSource(this.primaryDrive, cb) 41 | } 42 | 43 | get key () { 44 | return this.primaryDrive.key 45 | } 46 | 47 | get discoveryKey () { 48 | return this.primaryDrive.discoveryKey 49 | } 50 | 51 | get localKey () { 52 | if (!this._localWriter || !this._localWriter.key) return undefined 53 | return this._localWriter.key 54 | } 55 | 56 | _pushSource (drive, cb) { 57 | cb = cb || noop 58 | drive.ready(err => { 59 | if (err) return cb(err) 60 | // console.log(drive.key.toString('hex').substring(0, 4), 'pushSource', drive.key.toString('hex')) 61 | 62 | this._sources.set(hex(drive.key), drive) 63 | this.emit('source', drive) 64 | 65 | drive.readdir(P_SOURCES, (err, list) => { 66 | if (err || !list.length) return cb(err, drive) 67 | let pending = list.length 68 | for (let key of list) { 69 | this._addSource(key, finish) 70 | } 71 | function finish (err) { 72 | if (err) return cb(err, drive) 73 | if (--pending === 0) cb(null, drive) 74 | } 75 | }) 76 | }) 77 | } 78 | 79 | _addSource (key, opts, cb) { 80 | if (typeof opts === 'function') return this._addSource(key, {}, opts) 81 | opts = { ...this._opts, ...opts || {} } 82 | key = hex(key) 83 | const drive = hyperdrive(this.corestore, Buffer.from(key, 'hex'), opts) 84 | this._pushSource(drive, cb) 85 | } 86 | 87 | _writeSource (key, cb) { 88 | key = hex(key) 89 | this.writer((err, drive) => { 90 | if (err) return cb(err) 91 | // drive.writeFile(p.join(P_SOURCES, hex(key)), Buffer.alloc(0), cb) 92 | drive.mount(p.join(P_SOURCES, key), Buffer.from(key, 'hex'), cb) 93 | }) 94 | } 95 | 96 | addSource (key, cb) { 97 | key = hex(key) 98 | this.ready(() => { 99 | // console.log(this.key.toString('hex').substring(0, 4), 'addSource', key.toString('hex')) 100 | if (this._sources.has(hex(key))) return cb(null, this._sources.get(key)) 101 | this._addSource(key, cb) 102 | }) 103 | } 104 | 105 | hasSource (key) { 106 | key = hex(key) 107 | return this._sources.has(key) 108 | } 109 | 110 | saveSource (key, cb) { 111 | if (!key) return cb(new Error('Key is required.')) 112 | key = hex(key) 113 | this.addSource(key, err => { 114 | if (err) return cb(err) 115 | this._writeSource(key, cb) 116 | }) 117 | } 118 | 119 | sources (fn) { 120 | this.ready(() => { 121 | fn([...this._sources.values()]) 122 | }) 123 | } 124 | 125 | source (key, cb) { 126 | this.ready(() => { 127 | if (this._sources.has(hex(key))) return cb(this._sources.get(key)) 128 | else cb() 129 | }) 130 | } 131 | 132 | writer (cb) { 133 | if (this._localWriter) cb(null, this._localWriter) 134 | else this._initWriter(cb) 135 | } 136 | 137 | _initWriter (cb) { 138 | const self = this 139 | if (!this._loadLocalWriter) this._loadLocalWriter = thunky(loadWriter) 140 | this._loadLocalWriter(err => cb(err, loadWriter)) 141 | 142 | function loadWriter (cb) { 143 | self.ready(err => { 144 | if (err) return cb(err) 145 | if (self.primaryDrive.writable) { 146 | finish(null, self.primaryDrive) 147 | } else { 148 | readKey() 149 | } 150 | }) 151 | 152 | function readKey () { 153 | if (self._localWriter) finish(null, self._localWriter) 154 | let keystore = self.factory('localwriter') 155 | keystore.stat((err, stat) => { 156 | if (err || !stat || !stat.size) return createWriter(keystore) 157 | keystore.read(0, 64, (err, hexKey) => { 158 | if (err) return finish(err) 159 | const key = Buffer.from(hexKey.toString(), 'hex') 160 | openWriter(key) 161 | }) 162 | }) 163 | } 164 | 165 | function createWriter (keystore) { 166 | const { publicKey, secretKey } = crypto.keyPair() 167 | const hexKey = Buffer.from(publicKey.toString('hex')) 168 | keystore.write(0, hexKey, err => { 169 | if (err) return cb(err) 170 | openWriter(publicKey, { secretKey }) 171 | }) 172 | } 173 | 174 | function openWriter (key, opts) { 175 | self._addSource(key, opts, finish) 176 | } 177 | 178 | function finish (err, drive) { 179 | if (err) return cb(err) 180 | self._localWriter = drive 181 | cb() 182 | } 183 | } 184 | } 185 | 186 | replicate (opts) { 187 | return this.primaryDrive.replicate(opts) 188 | } 189 | } 190 | 191 | function hex (key) { 192 | return Buffer.isBuffer(key) ? key.toString('hex') : key 193 | } 194 | 195 | function noop () {} 196 | 197 | // function nestStorage (storage, prefix) { 198 | // prefix = prefix || '' 199 | // return function (name, opts) { 200 | // let path = p.join(prefix, name) 201 | // return storage(path, opts) 202 | // } 203 | // } 204 | -------------------------------------------------------------------------------- /examples/importer/cli.js: -------------------------------------------------------------------------------- 1 | const minimist = require('minimist') 2 | const Importer = require('./importer') 3 | const p = require('path') 4 | const pretty = require('pretty-bytes') 5 | const mirrorFolder = require('mirror-folder') 6 | 7 | const argv = minimist(process.argv.slice(2), { 8 | alias: { key: 'k', storage: 'd' }, 9 | default: { storage: './.data' } 10 | }) 11 | 12 | try { 13 | run(argv, close) 14 | } catch (err) { 15 | close(err) 16 | } 17 | 18 | function close (err, msg) { 19 | if (err) { 20 | console.error(err) 21 | process.exit(1) 22 | } 23 | if (msg) console.log(msg) 24 | process.exit(0) 25 | } 26 | 27 | function run (argv, cb) { 28 | const [cmd, ...args] = argv._ 29 | 30 | // const cb = err => console.log('done', err) 31 | 32 | switch (cmd) { 33 | case 'add': return add(args, argv, cb) 34 | case 'mirror': return mirror(args, argv, cb) 35 | // case 'work': return work() 36 | // case 'status': return status() 37 | case 'show': return show(args, argv, cb) 38 | case 'search': return search(args, argv, cb) 39 | case 'serve': return serve(args, argv, cb) 40 | default: return usage(args, argv, cb) 41 | } 42 | } 43 | 44 | function usage (args, argv, cb) { 45 | let msg = `archipel-import [-k key] command [arguments] 46 | 47 | Commands: 48 | 49 | add URL 50 | search 51 | status 52 | show 53 | serve 54 | 55 | Options: 56 | -k, --key Content store key 57 | -d, --data Directory to store data at 58 | Default: ./.data 59 | ` 60 | cb(null, msg) 61 | } 62 | 63 | function open (opts) { 64 | const importer = Importer({ key: opts.key, storage: opts.storage }) 65 | return importer 66 | } 67 | 68 | function add (args, opts, cb) { 69 | const importer = open(opts) 70 | const url = args[0] 71 | if (!url) return cb(new Error('URL is required.')) 72 | console.log('add', url) 73 | importer.add(url, cb) 74 | } 75 | 76 | function serve (args, opts, cb) { 77 | const importer = open(opts) 78 | importer.ready(() => { 79 | const cstore = importer.cstore 80 | require('./serve')(cstore) 81 | }) 82 | } 83 | 84 | function mirror (args, opts, cb) { 85 | console.log(opts) 86 | const importer = open(opts) 87 | importer.ready(() => { 88 | const store = importer.cstore 89 | store.writer((err, drive) => { 90 | // console.log('go', drive) 91 | let target = p.resolve(args[0]) 92 | console.log('TARGET', target) 93 | const equals = function (src, dst, cb) { 94 | cb(null, false) 95 | } 96 | let prog = mirrorFolder({ name: '/', fs: drive }, p.resolve(args[0]), { equals }, (err, res) => { 97 | console.log('done', err, res) 98 | }) 99 | }) 100 | }) 101 | } 102 | 103 | function search (args, opts, cb) { 104 | const importer = open(opts) 105 | importer.ready(() => { 106 | const store = importer.cstore 107 | store.api.sonar.query(args.join(' '), (err, results) => { 108 | if (!results) return console.log('no results', err) 109 | console.log('RESULTS', err, results.results.map(r => { 110 | return { score: r.score, title: r.doc.title } 111 | })) 112 | }) 113 | }) 114 | } 115 | 116 | function show (args, opts, cb) { 117 | const importer = open(opts) 118 | importer.ready(() => { 119 | const store = importer.cstore 120 | store.on('indexed', (key) => { 121 | let records = [] 122 | store.api.entities.all((err, list) => { 123 | let missing = 0 124 | for (let [id, rows] of Object.entries(list)) { 125 | for (let row of rows) { 126 | missing++ 127 | store.get({ schema: row.schema, id }, (err, record) => { 128 | records.push(record) 129 | if (--missing === 0) { 130 | let flat = records.reduce((agg, rows) => ([...agg, ...rows]), []) 131 | let mapped = flat.map(simplify) 132 | console.log(mapped) 133 | cb() 134 | } 135 | }) 136 | } 137 | } 138 | }) 139 | }) 140 | }) 141 | 142 | function simplify (rec) { 143 | return { 144 | id: rec.id, 145 | schema: rec.schema, 146 | value: rec.value 147 | } 148 | } 149 | } 150 | 151 | // function show (args, opts, cb) { 152 | // const importer = open(opts) 153 | // const store = importer.cstore 154 | // store.writer((err, drive) => { 155 | // if (err) return cb(err) 156 | // // iterate(drive, args[0] || '/', cb) 157 | // // statRecursive(drive, args[0] || '/', 100, (err, stat) => { 158 | // // console.log(err, stat) 159 | // // print([stat]) 160 | // // }) 161 | // console.log('drive', drive.readdir) 162 | // walk(drive, '/', (err, stats) => { 163 | // console.log(err, stats) 164 | // }) 165 | // }) 166 | 167 | // function print (stats, indent) { 168 | // indent = indent || 0 169 | // stats.forEach(stat => { 170 | // console.log(stat.path, pretty(stat.size)) 171 | // if (stat.children && stat.children.length) print(stat.children, indent + 2) 172 | // }) 173 | // } 174 | 175 | // function iterate (drive, cur, cb) { 176 | // let missing = 0 177 | // const dirs = [] 178 | // drive.readdir(cur, (err, list) => { 179 | // if (err) return cb(err) 180 | // if (!list.length) return done() 181 | // list.forEach(name => { 182 | // let path = p.join(cur, name) 183 | // missing++ 184 | // drive.stat(path, onstat.bind(onstat, path, name)) 185 | // }) 186 | // }) 187 | // function onstat (path, name, err, stat) { 188 | // if (stat.isDirectory()) { 189 | // done(null, path) 190 | // } else if (stat.isFile()) { 191 | // drive.readFile(path, (err, data) => { 192 | // console.log(path, pretty(stat.size)) 193 | // console.log() 194 | // done() 195 | // }) 196 | // } 197 | // } 198 | 199 | // function done (err, path) { 200 | // if (path) dirs.push(path) 201 | // if (--missing <= 0) handleDirs(err, dirs) 202 | // } 203 | 204 | // function handleDirs (err, dirs) { 205 | // let missing = 0 206 | // if (!dirs.length || err) return cb(err) 207 | // dirs.forEach(dir => { 208 | // missing++ 209 | // iterate(drive, dir, (err) => { 210 | // if (err) return cb(err) 211 | // if (--missing === 0) return cb() 212 | // }) 213 | // }) 214 | // } 215 | // } 216 | // } 217 | 218 | // function statRecursive (drive, path, depth, cb) { 219 | // console.log('GO', path) 220 | // depth = depth || 0 221 | // statPath(path, 0, cb) 222 | 223 | // function statPath (path, currentDepth, cb) { 224 | // console.log('go', path) 225 | // drive.stat(path, (err, stat) => { 226 | // // console.log('path', path, depth, currentDepth, stat) 227 | // // console.log('stat', path, stat.isDirectory()) 228 | // if (err) return cb(err) 229 | // stat.path = path 230 | // if (stat.isDirectory() && currentDepth < depth) { 231 | // // console.log('DIR!', path) 232 | // statChildren(path, currentDepth + 1, (err, children) => { 233 | // console.log('CHILDREN', path, err, children) 234 | // if (err) return cb(err) 235 | // stat.children = children 236 | // cb(null, stat) 237 | // }) 238 | // } else { 239 | // cb(null, stat) 240 | // } 241 | // }) 242 | // } 243 | 244 | // function statChildren (path, currentDepth, cb) { 245 | // drive.readdir(path, (err, children) => { 246 | // // console.log('READDIR', path, children) 247 | // if (err) return cb(err) 248 | // children = children.filter(c => c) 249 | // // console.log('CHILDRENa', children) 250 | // if (!children.length) return cb(null, []) 251 | // let stats = [] 252 | // let missing = children.length + 1 253 | 254 | // for (let child of children) { 255 | // statPath(joinPath(path, child), currentDepth, (err, stat) => { 256 | // // console.log('ret from path', path, child, stat.children) 257 | // if (err) return cb(err) 258 | // stats.push(stat) 259 | // if (--missing === 0) cb(null, stats) 260 | // }) 261 | // } 262 | // }) 263 | // } 264 | // } 265 | 266 | // function walk (fs, dir, done) { 267 | // let results = [] 268 | // fs.readdir(dir, function (err, list) { 269 | // if (err) return done(err) 270 | // let pending = list.length 271 | // if (!pending) return done(null, results) 272 | // list.forEach(function (file) { 273 | // // file = [dir, file].join('/') 274 | // file = joinPath(dir, file) 275 | // fs.stat(file, function (err, stat) { 276 | // if (err) done(err) 277 | // console.log(file) 278 | // if (stat && stat.isDirectory()) { 279 | // walk(fs, file, function (err, res) { 280 | // if (err) done(err) 281 | // results = results.concat(res) 282 | // if (!--pending) done(null, results) 283 | // }) 284 | // } else { 285 | // results.push(file) 286 | // if (!--pending) done(null, results) 287 | // } 288 | // }) 289 | // }) 290 | // }) 291 | // }; 292 | 293 | // function joinPath (prefix, suffix) { 294 | // if (prefix.slice(-1) === '/') prefix = prefix.substring(0, prefix.length - 1) 295 | // if (suffix[0] === '/') suffix = suffix.substring(1) 296 | // return prefix + '/' + suffix 297 | // } 298 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # hyper-content-db 2 | 3 | A [Kappa-style](http://kappa-architecture.com) peer-to-peer content database, on top of hyperdrives. 4 | 5 | **Note: I'm not using this anymore. Try out [kappa-record-db](https://github.com/arso-project/kappa-record-db) instead.** 6 | 7 | ## Installation 8 | 9 | #### `npm install hyper-content-db` 10 | 11 | ## Example 12 | 13 | ```javascript 14 | const hypercontent = require('hyper-content-db') 15 | const db = hypercontent('./data/db1') 16 | 17 | // Let's put a basic schema first. 18 | db.putSchema('event', { 19 | properties: { 20 | title: { 21 | type: 'string', 22 | index: true 23 | }, 24 | date: { 25 | type: 'date', 26 | index: true 27 | } 28 | } 29 | }) 30 | 31 | // Now add some records. 32 | db.batch([ 33 | { schema: 'event', value: { title: 'Workshop', date: new Date(2019, 10, 10) } }, 34 | { schema: 'event', value: { title: 'Reading', date: new Date(2019, 8, 2) } } 35 | ]) 36 | 37 | // When all indexing is done, query and log results. 38 | db.on('indexed-all', query) 39 | 40 | db.ready(() => { 41 | // Create a second database. Set the first database as primary key. 42 | // This will make db2 a "fork" or "extension" of the first. 43 | const db2 = hypercontent('./data/db2', db.key) 44 | db2.ready(() => { 45 | // Add the second database as a source for the first. 46 | db.addSource(db2.localKey) 47 | 48 | // Connect the two databases. 49 | replicate(db, db2) 50 | 51 | // Add content to the second database. 52 | db2.batch([ 53 | { schema: 'event', value: { title: 'Dinner', date: new Date(2019, 9, 22) } } 54 | ]) 55 | }) 56 | }) 57 | 58 | function query () { 59 | const eventsSortedByDate = db.api.indexes.query({ schema: 'event', prop: 'date' }).pipe(db.createGetStream()) 60 | eventsSortedByDate.on('data', row => console.log(row.value.date, row.value.title)) 61 | } 62 | 63 | function replicate (a, b) { 64 | const stream = a.replicate() 65 | const stream2 = b.replicate() 66 | stream.pipe(stream2).pipe(stream) 67 | } 68 | 69 | ``` 70 | 71 | ## API 72 | 73 | `const hypercontent = require('hyper-content-db')` 74 | 75 | #### `const db = hypercontent(storage, key, opts)` 76 | 77 | `storage` is either a string to a file system path or a [random-access-storage](https://github.com/random-access-storage/) instance. 78 | 79 | `key` is a `Buffer` containing a key to the primary drive. If omitted, it will be loaded from storage. If no key exists a new keypair will be generated. 80 | 81 | #### `db.ready(cb)` 82 | 83 | `cb` is called after the database is fully initialized. 84 | 85 | #### `db.replicate(opts)` 86 | 87 | Create a hypercore-protocol replication stream. See [hyperdrive](https://github.com/andrewosh/hyperdrive) for details. 88 | 89 | #### `db.addSource(key, cb)` 90 | 91 | Add an additional source hyperdrive. `key` is the key of a hypercontent hyperdrive. Barcobase treats all sources equally. 92 | 93 | > TODO: Document how records from different sources relate to each other. 94 | 95 | #### `db.put(record, cb)` 96 | 97 | Put a record into the database. 98 | 99 | `record` is a plain js object: 100 | ```javascript 101 | { 102 | id: 'string', 103 | schema: 'string' 104 | value: someObject, 105 | } 106 | ``` 107 | 108 | * `schema` is required. All records have a schema name. Schemas are identified by strings. Schemas can either be local or well-defined. Local schemas are identifiers that should be unique in the context of the database, their names may not contain slashes (`/`). Well-defined schemas are identified by a domain, followed by an identifier (e.g. `arso.xyz/event`). They have to include exactly one slash. By this convention, schema names are compatible with the [unwalled.garden](https://unwalled.garden) spec. Usually, you will want to put the schema's declaration into the database (see below), but this is not required. 109 | 110 | * `id` identifies a record uniquely within the database. When creating new recors, leave `id` undefined. When updating existing records, `id` is required. 111 | 112 | * `value` is the record's value. It has to be a JavaScript object that is serializable to JSON. If the record's schema has its definition stored in the database, the value has to conform to the schema. 113 | 114 | > TODO: Validating records to their schemas is not yet implemented. 115 | 116 | * `cb` is a callback that will be called with `(err, id)`. 117 | 118 | The records will be saved as files within the database according to a fixed naming schema: `/.data/schema-domain/schema-name/id.json`. For local schemas, the `schema-domain` is the key of the database. 119 | 120 | #### `db.get(req, [opts], cb)` 121 | 122 | Get a record from the database. `req` should look like this: 123 | 124 | ```javascript 125 | { 126 | id: 'string' // required, 127 | schema: 'string' // required, 128 | source: 'string' // optional, 129 | seq: int // optional 130 | } 131 | ``` 132 | `id` and `schema` are required. If `source` is set to a source key (hex string), it will only lookup in that source. If source is omitted, all sources will be checked. 133 | 134 | `cb` is a callback and will be called with `(err, record)` if source is set and with `(err, records)` if source is omitted. 135 | 136 | If `opts.reduce` is true, conflicting records will be reduced by modification timestamp, and the callback will be called with `(err, record)` even if `source` is not set. Set `opts.reduce` to a reduce function to change the reduce logic. The reduce function will be called with `(recordA, recordB)` and should return the preferred record. 137 | 138 | #### `db.putSchema(name, schema, cb)` 139 | 140 | Save a schema into the database. The schema declaration follows the [JSON Schema](https://json-schema.org), with some additional properties. 141 | 142 | ```javascript 143 | const schema = { 144 | properties: { 145 | title: { 146 | type: 'string', 147 | index: true 148 | }, 149 | date: { 150 | type: 'string', 151 | index: true 152 | } 153 | } 154 | } 155 | db.putSchema('event', schema, (err) => { 156 | if (!err) console.log('schema saved.') 157 | }) 158 | ``` 159 | 160 | Supported properties in addition to the JSON Schema spec are: 161 | 162 | * `index`: Set on a top-level simple field to index values of that field in the database. 163 | 164 | The top-level JSON schema declaration can be omitted and will be filled in automatically. 165 | 166 | > TODO: Also support putting full JSON schemas (including the outer section) 167 | 168 | #### `db.getSchema(name, [opts], cb)` 169 | 170 | Load a schema declaration from the database. 171 | 172 | #### `db.batch(ops, cb)` 173 | 174 | Execute multiple operations. `ops` looks like this: 175 | ```javascript 176 | const ops = [ 177 | { 178 | op: 'put' | 'del', 179 | schema, 180 | id, 181 | value 182 | } 183 | ] 184 | ``` 185 | 186 | If `op` is omitted, it is set to `put`. 187 | 188 | #### `const batchStream = db.createBatchStream()` 189 | 190 | Returns a duplex stream. The writable side expects to be written to with `op` objects as in `db.batch()`. The readable side emits arrays of ids of the putted records and errors in case of errors. 191 | 192 | #### `const getStream = db.createGetStream()` 193 | 194 | Returns a transform stream that transforms get requests into records. 195 | 196 | ```javascript 197 | const getStream = db.createGetStream() 198 | getStream.push({ id, schema }) 199 | getStream.on('data', (record) => { 200 | console.log(record.value) 201 | }) 202 | ``` 203 | 204 | #### `db.list(schema, cb)` 205 | 206 | Get a list of all IDs for a schema. 207 | 208 | > TODO: This should be a stream instead that can be piped into `createGetStream()`. 209 | 210 | #### `db.useRecordView(name, makeView, [opts])` 211 | 212 | Register a new database view. Views are functions that will be called whenever records are being put or deleted. The database maintains the state of each view so that they catch up on updates automatically. See [kappa-core](https://github.com/kappa-db/kappa-core) for good introduction on how to work with kappa views. 213 | 214 | `name` is the name of the view. It has to be unique per database. 215 | 216 | `makeView` is a constructor function. It will be called with `(level, db, opts)`: 217 | 218 | * `level`: an [LevelUp](https://github.com/Level/levelup)-compatible LevelDB instance for this view 219 | * `db`: the hypercontent db 220 | * `opts`: optional opts passed into `useRecordView` 221 | 222 | The constructor function should return a view object with the following keys: 223 | 224 | * `map: function (records, next) {}` 225 | This function will be called with a batch of records. Process the entries (e.g. by inserting rows into the leveldb). Call `next()` when done. 226 | * `api`: An object of query functions that this view exposes to the outside world. They should be safe to call (may not modify data) as they may be called from the client side. 227 | * TODO: Document more props. 228 | * TODO: Add support for `filter()` and `reduce()` 229 | 230 | ##### Example 231 | 232 | ```javascript 233 | const through = require('through2') 234 | const hypercontent = require('hyper-content-db') 235 | const db = hypercontent('/tmp/testdb') 236 | 237 | function dateView (lvl, db) { 238 | return { 239 | map (records, next) { 240 | let ops = [] 241 | for (let record of records) { 242 | if (!record.value.date) return 243 | ops.push({ 244 | type: 'put', 245 | key: `${record.value.date}!${record.id}!${record.schema}!${record.source}` 246 | value: record.seq 247 | }) 248 | } 249 | lvl.batch(ops, next) 250 | }, 251 | api: { 252 | range (from, to) { 253 | from = from.toJSON() 254 | to = to.toJSON() 255 | return db.createReadStream({ 256 | gte: from, 257 | lte: to 258 | }).pipe(through(function (row, enc, next) { 259 | const { key: [date, id, schema, source] } = row 260 | this.push({ id, schema, source }) 261 | })) 262 | } 263 | } 264 | } 265 | } 266 | 267 | db.useRecordView('dates', dateView) 268 | 269 | const records = [ 270 | { title: 'Party', date: new Date(2019, 11, 2) }, 271 | { title: 'Demonstration', date: new Date(2020, 1, 10) }, 272 | { title: 'Reading circle', date: new Date(2019, 8, 7) }, 273 | { title: 'Workshop', date: new Date(2019, 12, 5) } 274 | ] 275 | 276 | const ops = records.map(value => ({ op: 'put', schema: 'event', value })) 277 | 278 | db.batch(ops, (err, ids) => { 279 | if (err) return console.error(err) 280 | 281 | const queryStream = db.api.date.range(new Date(2019, 9), new Date(2019, 12, 31)) 282 | const resultStream = queryStream.pipe(db.createGetStream()) 283 | resultStream.on('data', record => console.log(record)) 284 | }) 285 | ``` 286 | 287 | #### `db.api` 288 | 289 | This is where query functions from views are exposed. 290 | 291 | #### `db.on('indexed', cb)` 292 | 293 | Emitted whenever a view finished an indexing batch. `cb` is called with `(viewName, sourceKey, batch)` where batch is an array of the processed records. 294 | 295 | #### `db.on('indexed-all', cb)` 296 | 297 | Emitted whenever all views are finished with processing. 298 | 299 | #### `db.on('start', cb)` 300 | 301 | Emitted when a new indexing round is started after `indexed-all` has been emitted. 302 | 303 | -------------------------------------------------------------------------------- /examples/importer/webpage/index.js: -------------------------------------------------------------------------------- 1 | const { Readable } = require('stream') 2 | const freezeDry = require('freeze-dry').default 3 | const ky = require('ky-universal') 4 | const u = require('url') 5 | const p = require('path') 6 | const jsdom = require('jsdom') 7 | const blake2b = require('blake2b') 8 | const mkdirp = require('mkdirp') 9 | const htmlToMd = require('./html-to-markdown.js') 10 | const crypto = require('hypercore-crypto') 11 | // const got = require('got') 12 | // 13 | const debug = require('debug')('import') 14 | 15 | // const { makeId } = require('../common') 16 | // 17 | // fullState (id) { 18 | // if (!jobs[id]) throw new Error('Invalid job id.') 19 | // return jobs[id].serialize() 20 | // } 21 | 22 | const metascraper = require('metascraper')([ 23 | require('metascraper-author')(), 24 | require('metascraper-date')(), 25 | require('metascraper-description')(), 26 | require('metascraper-image')(), 27 | require('metascraper-logo')(), 28 | require('metascraper-clearbit-logo')(), 29 | require('metascraper-publisher')(), 30 | require('metascraper-title')(), 31 | require('metascraper-url')() 32 | ]) 33 | 34 | const Readability = require('readability') 35 | 36 | module.exports = importer 37 | 38 | function importer (cstore) { 39 | const jobs = {} 40 | return { 41 | label: 'Web page importer', 42 | input (string, next) { 43 | if (string.match(/^https?:\/\//)) next(true) 44 | else next() 45 | }, 46 | handle (msg, done) { 47 | const { url, id } = msg 48 | cstore.writer((err, drive) => { 49 | if (err) done(err) 50 | const job = new Importer(cstore, id, url) 51 | jobs[job.id] = job 52 | job.setPipeline([ 53 | download, 54 | metascrape, 55 | readable, 56 | freeze, 57 | saveFiles 58 | ]) 59 | job.start() 60 | done(null, job.statusStream) 61 | }) 62 | } 63 | } 64 | } 65 | 66 | class Importer { 67 | constructor (cstore, id, url) { 68 | this.id = id || cstore.id() 69 | this.url = url 70 | this.cstore = cstore 71 | 72 | this.state = {} 73 | 74 | this.resources = {} 75 | this.files = {} 76 | this.derivedFiles = {} 77 | this.records = [] 78 | 79 | this.pipeline = [] 80 | 81 | this.currentStep = -1 82 | 83 | this.statusStream = new Readable({ 84 | objectMode: true, 85 | read () {} 86 | }) 87 | } 88 | 89 | start () { 90 | const self = this 91 | this.setState({ status: 'work' }, 'start!') 92 | 93 | this.executeNextStep(finish) 94 | 95 | function finish (error) { 96 | if (error) this.setState({ status: 'error', error }, 'error!') 97 | self.setState({ status: 'done' }, 'finished!') 98 | self.statusStream.push(null) 99 | } 100 | } 101 | 102 | executeNextStep (done) { 103 | const self = this 104 | this.currentStep++ 105 | if (!this.pipeline[this.currentStep]) { 106 | return done() 107 | } 108 | let worker = this.pipeline[this.currentStep] 109 | this.log('starting step: ' + worker.name) 110 | 111 | process.nextTick(() => { 112 | try { 113 | worker(this, (err) => { 114 | if (err) return error(err) 115 | this.executeNextStep(done) 116 | }) 117 | // if (typeof w === 'object' && w.then) { 118 | // w.catch(err => error(err)).then(() => this.executeNextStep(done)) 119 | // } 120 | } catch (err) { 121 | return error(err) 122 | } 123 | }) 124 | 125 | function error (err) { 126 | self.error('error in step: ' + worker.name, err) 127 | self.executeNextStep(done) 128 | } 129 | } 130 | 131 | setState (newState, message) { 132 | if (typeof newState === 'function') this.state = newState(this.state) 133 | else this.state = { ...this.state, ...newState } 134 | this.statusStream.push({ state: this.state, message }) 135 | } 136 | 137 | getState (cb) { 138 | if (cb) cb(this.state) 139 | return this.state 140 | } 141 | 142 | log (message) { 143 | debug(message) 144 | this.statusStream.push({ message }) 145 | } 146 | 147 | error (message, ...args) { 148 | debug('error', message, ...args) 149 | this.statusStream.push({ error: { message, args } }) 150 | } 151 | 152 | setPipeline (steps) { 153 | this.pipeline = steps 154 | } 155 | 156 | addResource (id, resource) { 157 | this.resources[id] = resource 158 | } 159 | 160 | getResource (id) { 161 | return this.resources[id] 162 | } 163 | 164 | addFile (path, value, metadata) { 165 | this.files[path] = { value, metadata } 166 | } 167 | 168 | // addDerivedFile (path, body) { 169 | // this.derivedFiles[path] = body 170 | // } 171 | 172 | addRecord (schema, value) { 173 | this.records.push({ schema, value }) 174 | } 175 | 176 | serialize () { 177 | return { 178 | state: this.state, 179 | files: this.files, 180 | resources: this.resources 181 | } 182 | } 183 | } 184 | 185 | function urlToFilename (url, opts) { 186 | opts = opts || {} 187 | let parsed = u.parse(url) 188 | let PREFIX = '/_import' 189 | // todo: how to handle GET params? 190 | let prefix = opts.prefix || 'source' 191 | let pathname = parsed.pathname 192 | if (opts.hash && pathname.length > 30) { 193 | const ext = p.extname(pathname) 194 | pathname = hash(pathname).toString('hex') + ext 195 | } 196 | let path = p.join(PREFIX, prefix, parsed.hostname, pathname) 197 | return path 198 | } 199 | 200 | function urlToHtmlFile (url, opts) { 201 | let filepath = urlToFilename(url, opts) 202 | if (filepath.substring(-1).charAt(0) === '/') { 203 | filepath = p.join(filepath, 'index.html') 204 | } 205 | return filepath 206 | } 207 | 208 | function blobToFilename (buf, url) { 209 | let parsed = u.parse(url) 210 | let PREFIX = '/_blobs' 211 | let name = hash(buf).toString('hex') 212 | let ext = p.extname(parsed.pathname) 213 | let path = p.join(PREFIX, parsed.hostname, name + ext) 214 | return path 215 | } 216 | 217 | async function download (job, next) { 218 | let url = job.url 219 | // let drive = job.api.hyperdrive 220 | 221 | let filepath = urlToHtmlFile(job.url) 222 | 223 | debug('fetch', url) 224 | let response = await ky(url) 225 | let html = await response.text() 226 | 227 | let headers = {} 228 | for (let [key, value] of response.headers) { 229 | headers[key] = value 230 | } 231 | 232 | const metadata = { headers } 233 | 234 | // drive.writeFile(filepath, text) 235 | job.addResource('html', html) 236 | 237 | // job.addFile(filepath, text) 238 | job.addFile(filepath, html, metadata) 239 | job.addRecord('file', { 240 | path: filepath, 241 | mimetype: 'text/html', 242 | type: 'source', 243 | origin: { 244 | type: 'http', 245 | headers, 246 | url 247 | } 248 | }) 249 | 250 | const dom = new jsdom.JSDOM(html, { url }) 251 | job.addResource('dom', dom) 252 | next() 253 | } 254 | 255 | async function freeze (job, next) { 256 | const dom = job.getResource('dom') 257 | let html = job.getResource('html') 258 | if (!dom) return next() 259 | 260 | try { 261 | html = await freezeDry(dom.window.document, { 262 | docUrl: job.url, 263 | fetchResource, 264 | blobToURL, 265 | getCsp 266 | }) 267 | } catch (err) { 268 | job.error('Cannot freeze-dry', dom.window.location, err) 269 | } 270 | 271 | // job.addResource('html-clean', html) 272 | let filepath = urlToHtmlFile(job.url, { prefix: 'freeze-dry' }) 273 | 274 | job.addFile(filepath, html) 275 | job.addRecord('file', { path: filepath, mimetype: 'text/html', type: 'freeze-dry', dangerous: false }) 276 | 277 | job.baseFilePath = filepath 278 | 279 | next() 280 | 281 | async function fetchResource (url, opts) { 282 | if (url.startsWith('data:')) return url 283 | // TODO: Fetch locally.. 284 | // const filename = urlToFilename(url, opts) 285 | try { 286 | const response = await ky(url, opts) 287 | job.log(`Fetched ${url}: ${response.status} ${response.statusText}`) 288 | return response 289 | } catch (err) { 290 | let response = err.reponse 291 | job.error(`Could not fetch ${url}: ${response.status} ${response.statusText}`) 292 | return url 293 | } 294 | // return ky(...args) 295 | // return got(...args) 296 | } 297 | 298 | async function blobToURL (blob, link, resource) { 299 | // const name = hash(blob) 300 | // console.log('make url: res', resource) 301 | if (!blob) return null 302 | let metadata = {} 303 | if (blob.type) { 304 | metadata.headers = { 'content-type': blob.type } 305 | } 306 | const buf = blob.toBuffer() 307 | const filename = blobToFilename(buf, link.resource.url) 308 | job.addFile(filename, blob.toBuffer(), metadata) 309 | return filename 310 | // const url = '/' + filename 311 | // return url 312 | 313 | // const filename = urlToFilename(link.resource.url, { hash: false }) 314 | // const parent = urlToFilename(resource.url) 315 | // const relative = p.relative(parent, filename) 316 | // job.addFile(filename, blob.toBuffer()) 317 | // return relative 318 | // return '/' + filename 319 | } 320 | 321 | function getCsp (resource) { 322 | const csp = [ 323 | "default-src 'none'", // By default, block all connectivity and scripts. 324 | "img-src 'self' data:", // Allow inlined images. 325 | "media-src 'self' data:", // Allow inlined audio/video. 326 | "style-src 'self' data: 'unsafe-inline'", // Allow inlined styles. 327 | "font-src 'self' data:", // Allow inlined fonts. 328 | 'frame-src data:' // Allow inlined iframes. 329 | ].join('; ') 330 | 331 | return csp 332 | } 333 | // setTimeout(() => next(), 1000) 334 | } 335 | 336 | freeze.name = 'freeze-dry' 337 | 338 | async function metascrape (job, next) { 339 | const html = job.getResource('html') 340 | const url = job.url 341 | const metadata = await metascraper({ html, url }) 342 | job.addRecord('metascrape', metadata) 343 | // job.addDerivedFile('meta.json', Buffer.from(JSON.stringify(metadata))) 344 | next() 345 | } 346 | 347 | metascrape.name = 'metascrape' 348 | 349 | function readable (job, next) { 350 | // const html = job.getResource('html') 351 | // if (!html) return next() 352 | const dom = job.getResource('dom') 353 | if (!dom) return next() 354 | 355 | const article = new Readability(dom.window.document).parse() 356 | // const readable = readability(html, { href: job.url }) 357 | // job.addResource('readable', readable) 358 | const md = htmlToMd(article.content) 359 | const content = `# ${article.title}\n\n${md}` 360 | // job.addDerivedFile('readable.md', content) 361 | const record = { ...article, content } 362 | job.addRecord('readable', record) 363 | next() 364 | } 365 | 366 | readable.name = 'readability' 367 | 368 | function saveFiles (job, next) { 369 | job.cstore.writer((err, writer) => { 370 | if (err) return next(err) 371 | _saveFiles(job, writer, () => { 372 | _saveRecords(job, next) 373 | }) 374 | }) 375 | } 376 | 377 | function _saveRecords (job, next) { 378 | const cstore = job.cstore 379 | const batch = [] 380 | job.records.forEach(record => { 381 | batch.push({ 382 | op: 'put', 383 | id: job.id, 384 | schema: record.schema, 385 | value: record.value 386 | }) 387 | }) 388 | cstore.batch(batch, next) 389 | } 390 | 391 | function _saveFiles (job, drive, next) { 392 | // const basename = job.baseFilePath || urlToFilename(job.url) 393 | 394 | let missing = 0 395 | 396 | for (let [filename, file] of Object.entries(job.files)) { 397 | let { metadata = {}, value } = file 398 | if (typeof value === 'string') value = Buffer.from(value, 'utf8') 399 | if (!value || !Buffer.isBuffer(value)) { 400 | job.error('Invalid file content', filename) 401 | console.error('Invalid file content', value) 402 | continue 403 | } 404 | 405 | metadata.id = job.id 406 | 407 | for (let key of Object.keys(metadata)) { 408 | metadata[key] = Buffer.from(JSON.stringify(metadata[key])) 409 | } 410 | 411 | if (!filename.startsWith('/')) filename = '/' + filename 412 | 413 | missing++ 414 | 415 | mkdirp(p.dirname(filename), { fs: drive }, (err, cb) => { 416 | if (err && err.code !== 'EEXIST') return cb(err) 417 | drive.writeFile(filename, value, { metadata }, err => { 418 | if (err) console.error('ERROR WRITING', filename, value, metadata) 419 | let msg = 'Written file: ' + filename 420 | done(err, msg) 421 | }) 422 | }) 423 | } 424 | 425 | // for (let [filename, content] of Object.entries(job.derivedFiles)) { 426 | // // if (typeof content === 'string') content = Buffer.from(content, 'utf8') 427 | // if (typeof content === 'string') { 428 | // content = Buffer.from(content) 429 | // } 430 | // if (!content) { 431 | // job.error('No content set for file', filename) 432 | // continue 433 | // } 434 | // let path = p.join('/_import/DERIVED', filename) 435 | 436 | // if (!path.startsWith('/')) filename = '/' + filename 437 | 438 | // missing++ 439 | // mkdirp(p.dirname(path), { fs: drive }, (err, cb) => { 440 | // if (err && err.code !== 'EEXIST') return cb(err) 441 | // drive.writeFile(path, content, (err) => { 442 | // done(err, 'Written derived file: ' + path) 443 | // }) 444 | // }) 445 | // } 446 | 447 | if (!missing) done() 448 | 449 | function done (err, msg) { 450 | if (err && msg) job.error(msg, err) 451 | else if (msg) job.log(msg) 452 | if (--missing <= 0) next() 453 | } 454 | } 455 | 456 | function hash (blob) { 457 | let input = Buffer.from(blob) 458 | // let output = Buffer.alloc(128) 459 | // let hash = blake2b(output.length).update(input).digest('hex') 460 | // return hash 461 | return crypto.data(input) 462 | } 463 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | const thunky = require('thunky') 2 | const p = require('path') 3 | const { EventEmitter } = require('events') 4 | const through = require('through2') 5 | const LRU = require('lru-cache') 6 | // const hyperid = require('hyperid') 7 | const shortid = require('shortid') 8 | const memdb = require('memdb') 9 | const sub = require('subleveldown') 10 | const levelBaseView = require('kappa-view') 11 | 12 | const multidrive = require('./lib/multidrive') 13 | const kappa = require('./lib/kappa') 14 | 15 | const entitiesView = require('./views/entities') 16 | const contentView = require('./views/content') 17 | const schemaAwareView = require('./views/schema-aware') 18 | 19 | const { P_DATA, P_SCHEMA, P_SOURCES } = require('./lib/constants') 20 | 21 | // const JSON_STRING = Symbol('json-buffer') 22 | 23 | module.exports = (...args) => new HyperContentDB(...args) 24 | module.exports.id = () => HyperContentDB.id() 25 | // module.exports.JSON_STRING = JSON_STRING 26 | 27 | class HyperContentDB extends EventEmitter { 28 | constructor (storage, key, opts) { 29 | super() 30 | opts = opts || {} 31 | 32 | this.multidrive = multidrive(storage, key, opts) 33 | 34 | this.kcore = kappa({ 35 | multidrive: this.multidrive, 36 | viewContext: this 37 | }) 38 | 39 | this.recordCache = new LRU({ 40 | max: opts.cacheSize || 16777216, // 16M 41 | length: record => (record.stat && record.stat.size) || 256 42 | }) 43 | 44 | this.level = opts.level || memdb() 45 | 46 | this.api = {} 47 | 48 | this.kcore.on('indexed', (...args) => this.emit('indexed', ...args)) 49 | this.kcore.on('indexed-all', (...args) => this.emit('indexed-all', ...args)) 50 | 51 | this.id = HyperContentDB.id 52 | 53 | if (opts.defaultViews !== false) { 54 | this.useRecordView('entities', entitiesView) 55 | this.useRecordView('indexes', schemaAwareView) 56 | } 57 | 58 | this.ready = thunky(this._ready.bind(this)) 59 | } 60 | 61 | useRecordView (name, makeView, opts = {}) { 62 | const db = sub(this.level, 'view.' + name) 63 | // levelBaseView takes care of the state handling 64 | // and passes on a subdb, and expects regular 65 | // kappa view opts (i.e., map). 66 | const view = levelBaseView(db, (db) => { 67 | // contentView wraps the inner view, taking care of 68 | // adding a .data prefix and optionally loading 69 | // record contents. 70 | return contentView(makeView(db, this, opts)) 71 | }) 72 | 73 | this.kcore.use(name, view) 74 | this.api[name] = this.kcore.api[name] 75 | } 76 | 77 | useFileView (name, makeView, opts = {}) { 78 | const db = sub(this.level, 'view.' + name) 79 | // levelBaseView takes care of the state handling 80 | // and passes on a subdb, and expects regular 81 | // kappa view opts (i.e., map). 82 | const view = levelBaseView(db, (db) => { 83 | // contentView wraps the inner view, taking care of 84 | // adding a .data prefix and optionally loading 85 | // record contents. 86 | return { 87 | transformNodes: true, 88 | prefix: opts.prefix || undefined, 89 | ...makeView(db, this, opts) 90 | } 91 | }) 92 | 93 | this.kcore.use(name, view) 94 | this.api[name] = this.kcore.api[name] 95 | } 96 | 97 | _ready (cb) { 98 | this.multidrive.ready(err => { 99 | if (err) return cb(err) 100 | // TODO: Always wait for a writer? 101 | this.multidrive.writer((err) => cb(err)) 102 | }) 103 | } 104 | 105 | get key () { 106 | return this.multidrive.key 107 | } 108 | 109 | get discoveryKey () { 110 | return this.multidrive.discoveryKey 111 | } 112 | 113 | close () { 114 | this.emit('close') 115 | } 116 | 117 | _initWriter (cb) { 118 | this._writerReady = true 119 | this.multidrive.writer((err, drive) => { 120 | if (err) return cb(err) 121 | // TODO: Don't do this on every start? 122 | let dirs = [P_DATA, P_SCHEMA, P_SOURCES] 123 | let pending = dirs.length 124 | for (let dir of dirs) { 125 | drive.mkdir(dir, done) 126 | } 127 | function done (err) { 128 | if (err && err.code !== 'EEXIST') return cb(err) 129 | if (--pending === 0) { 130 | cb(null, drive) 131 | } 132 | } 133 | }) 134 | } 135 | 136 | use (view, opts) { 137 | this.kcore.use(view, opts) 138 | } 139 | 140 | writer (cb) { 141 | this.ready(err => { 142 | if (err) return cb(err) 143 | if (!this._writerReady) this._initWriter(cb) 144 | else this.multidrive.writer(cb) 145 | }) 146 | } 147 | 148 | get localKey () { 149 | return this.multidrive.localKey 150 | } 151 | 152 | replicate (opts) { 153 | return this.multidrive.replicate(opts) 154 | } 155 | 156 | addSource (key, cb) { 157 | cb = cb || noop 158 | this.multidrive.saveSource(key, cb) 159 | } 160 | 161 | hasSource (key) { 162 | return this.multidrive.hasSource(key) 163 | } 164 | 165 | sources (cb) { 166 | this.multidrive.sources(cb) 167 | } 168 | 169 | source (key, cb) { 170 | this.multidrive.source(key, cb) 171 | } 172 | 173 | batch (msgs, cb) { 174 | cb = cb || noop 175 | const results = [] 176 | const errors = [] 177 | 178 | let pending = msgs.length 179 | 180 | for (let msg of msgs) { 181 | const { op = 'put', schema, id, value } = msg 182 | 183 | if (op === 'put') this.put({ schema, id, value }, finish) 184 | else if (op === 'del') this.del(schema, id, finish) 185 | else if (op === 'source') this.addSource(value) 186 | else if (op === 'schema') this.putSchema(schema, value, finish) 187 | // NOTE: Without process.nextTick this would break because 188 | // pending would not fullyincrease before finishing. 189 | else process.nextTick(finish) 190 | } 191 | 192 | function finish (err, result) { 193 | if (err) errors.push(err) 194 | if (result) results.push(result) 195 | if (--pending === 0) cb(errors.length && errors, results) 196 | } 197 | } 198 | 199 | /** 200 | * Create a batch stream. 201 | * 202 | * The returned stream is a transform stream. Write batch ops 203 | * to it, read results and erros. 204 | * 205 | * Wants either array of ops or a single op, where op is 206 | * { 207 | * op: 'put' | 'del' | 'schema', 208 | * id, 209 | * schema, 210 | * value 211 | * } 212 | * 213 | * For details see example in tests. 214 | */ 215 | createBatchStream () { 216 | const self = this 217 | 218 | const batchStream = through.obj(function (msg, encoding, next) { 219 | msg = Array.isArray(msg) ? msg : [msg] 220 | self.batch(msg, (err, ids) => { 221 | if (err) this.emit('error', err) 222 | else this.push(ids) 223 | next(err) 224 | }) 225 | }) 226 | 227 | return batchStream 228 | } 229 | 230 | /** 231 | * Create a get stream. 232 | * 233 | * The returned stream is a transform stream. Write get requests 234 | * to it, read results and erros. 235 | * 236 | * Wants messages that look like 237 | * { id, schema, source } 238 | * 239 | * Emits messages that look like 240 | * { id, schema, source, value, stat } 241 | * 242 | * TODO: Support no source. 243 | * TODO: Support seq. 244 | * 245 | * For details see example in tests. 246 | */ 247 | createGetStream (opts) { 248 | const self = this 249 | return through.obj(function (msg, enc, next) { 250 | self.get(msg, opts, (err, record) => { 251 | if (err) { 252 | this.emit('error', err) 253 | } else if (record) { 254 | if (Array.isArray(record)) { 255 | record.forEach(record => this.push(record)) 256 | } else { 257 | this.push(record) 258 | } 259 | } 260 | next() 261 | }) 262 | }) 263 | } 264 | 265 | put (req, cb) { 266 | let { schema, id, value } = req 267 | if (!id) id = this.id() 268 | 269 | this.expandSchemaName(schema, (err, schema) => { 270 | if (err) return cb(err) 271 | this.writer((err, drive) => { 272 | if (err) return cb(err) 273 | const dir = p.join(P_DATA, schema) 274 | drive.mkdir(dir, (err) => { 275 | if (err && err.code !== 'EEXIST') return cb(err) 276 | const path = makePath(schema, id) 277 | const buf = Buffer.from(JSON.stringify(value)) 278 | drive.writeFile(path, buf, (err) => { 279 | if (err) return cb(err) 280 | cb(null, id) 281 | }) 282 | }) 283 | }) 284 | }) 285 | } 286 | 287 | get (req, opts, cb) { 288 | if (typeof opts === 'function') return this.get(req, null, opts) 289 | const self = this 290 | cb = once(cb) 291 | opts = opts || {} 292 | 293 | const { id, schema, source, seq } = req 294 | 295 | if (seq && !source) return cb(new Error('Invalid request: seq without source')) 296 | 297 | if (opts.reduce === true) opts.reduce = defaultReduce 298 | 299 | this.expandSchemaName(schema, (err, schema) => { 300 | if (err) return cb(err) 301 | let pending 302 | let records = [] 303 | 304 | if (source) { 305 | pending = 1 306 | this.source(source, drive => load(drive, onrecord)) 307 | } else { 308 | this.sources(drives => { 309 | pending = drives.length 310 | drives.forEach(drive => load(drive, onrecord)) 311 | }) 312 | } 313 | 314 | function onrecord (err, record) { 315 | // Skip not found errors. 316 | if (err && err.code !== 'ENOENT') return cb(err) 317 | if (record) records.push(record) 318 | if (--pending === 0) finish() 319 | } 320 | 321 | function finish () { 322 | // If reduce is false, return all records. 323 | if (!opts.reduce) return cb(null, records) 324 | 325 | if (!records.length) return cb(null, null) 326 | if (records.length === 1) return cb(null, records[0]) 327 | 328 | const result = records.reduce((result, record) => { 329 | if (!result) return record 330 | else return opts.reduce(result, record) 331 | }, null) 332 | if (result) result.alternatives = records.filter(r => r.source !== result.source) 333 | cb(null, result) 334 | } 335 | 336 | function load (drive, cb) { 337 | if (!drive) return cb() 338 | 339 | const path = makePath(schema, id) 340 | const source = hex(drive.key) 341 | const cacheKey = `${source}@${seq || drive.version}/${path}` 342 | 343 | const cachedRecord = self.recordCache.get(cacheKey) 344 | if (cachedRecord) return cb(null, cachedRecord) 345 | 346 | const record = { source, id, schema } 347 | 348 | // TODO: Find out why seq has to be incremented by one. 349 | // If doing drive.checkout(seq), the files are not found. 350 | if (seq) drive = drive.checkout(Math.min(seq + 1, drive.version)) 351 | 352 | drive.stat(path, (err, stat, trie) => { 353 | if (err || !stat.isFile()) return cb(err, null) 354 | 355 | if (opts.fullStat) record.stat = stat 356 | 357 | record.meta = cleanStat(stat) 358 | 359 | drive.readFile(path, (err, buf) => { 360 | if (err) return cb(err) 361 | try { 362 | record.value = JSON.parse(buf.toString()) 363 | self.recordCache.set(cacheKey, record) 364 | cb(null, record) 365 | } catch (err) { 366 | cb(err) 367 | } 368 | }) 369 | }) 370 | } 371 | }) 372 | 373 | function defaultReduce (a, b) { 374 | return a.meta.mtime > b.meta.mtime ? a : b 375 | } 376 | } 377 | 378 | // TODO: This should likely be streaming. 379 | list (schema, cb) { 380 | this.expandSchemaName(schema, (err, schema) => { 381 | if (err) return cb(err) 382 | let ids = new Set() 383 | let pending 384 | this.sources(drives => { 385 | pending = drives.length 386 | drives.forEach(drive => { 387 | let path = p.join(P_DATA, schema) 388 | drive.readdir(path, (err, list) => { 389 | if (err) return finish(err) 390 | if (!list.length) return finish() 391 | list = list.map(id => id.replace(/\.json$/, '')) 392 | finish(null, list) 393 | }) 394 | }) 395 | }) 396 | 397 | function finish (err, list) { 398 | if (!err && list) { 399 | list.forEach(id => ids.add(id)) 400 | } 401 | if (--pending === 0) cb(null, Array.from(ids)) 402 | } 403 | }) 404 | } 405 | 406 | expandSchemaName (name, cb) { 407 | this.ready(() => { 408 | if (!validSchemaName(name)) return cb(new InvalidSchemaName(name)) 409 | if (name.indexOf('/') === -1) { 410 | let expanded = hex(this.key) + '/' + name 411 | cb(null, expanded) 412 | // this.writer((err, drive) => { 413 | // if (err) return cb(err) 414 | // let expanded = hex(drive.key) + '/' + name 415 | // cb(null, expanded) 416 | // }) 417 | } else { 418 | cb(null, name) 419 | } 420 | }) 421 | } 422 | 423 | putSchema (name, schema, cb = noop) { 424 | this.expandSchemaName(name, (err, name) => { 425 | if (err) return cb(err) 426 | const id = schemaId(name) 427 | const value = this._encodeSchema(schema, name, id) 428 | this.put({ schema: 'core/schema', id, value }, cb) 429 | }) 430 | } 431 | 432 | getSchema (name, opts, cb) { 433 | if (typeof opts === 'function') return this.getSchema(name, {}, opts) 434 | opts = opts || {} 435 | this.expandSchemaName(name, (err, name) => { 436 | if (err) return cb(err) 437 | const id = schemaId(name) 438 | this.get({ schema: 'core/schema', id }, { reduce }, (err, record) => { 439 | if (err) return cb(err) 440 | if (!record) return cb(null, null) 441 | return cb(null, record.value) 442 | }) 443 | }) 444 | 445 | function reduce (a, b) { 446 | if (opts.reduce) return opts.reduce(a, b) 447 | if (a.version && b.version) return a.version > b.version ? a : b 448 | if (a.version) return a 449 | if (b.version) return b 450 | return a 451 | } 452 | } 453 | 454 | _encodeSchema (schema, name, id) { 455 | const $id = `dat://${hex(this.key)}/${makePath('core/schema', id)}` 456 | const defaults = { 457 | '$schema': 'http://json-schema.org/draft-07/schema#', 458 | '$id': $id, 459 | type: 'object', 460 | title: name 461 | } 462 | return Object.assign({}, defaults, schema) 463 | } 464 | } 465 | 466 | function schemaId (name) { 467 | return name.replace('/', '__') 468 | } 469 | 470 | class InvalidSchemaName extends Error { 471 | constructor (name) { 472 | super() 473 | this.message = `Invalid schema name: ${name}` 474 | } 475 | } 476 | 477 | // HyperContentDB.id = hyperid({ fixedLength: true, urlSafe: true }) 478 | HyperContentDB.id = () => shortid.generate() 479 | 480 | function makePath (schema, id) { 481 | return p.join(P_DATA, schema, id + '.json') 482 | } 483 | 484 | function validSchemaName (schema) { 485 | if (!schema || typeof schema !== 'string') return false 486 | return schema.match(/^[a-zA-Z0-9_\-./]*$/) 487 | // return schema.split('/').length === 2 488 | } 489 | 490 | function hex (key) { 491 | return Buffer.isBuffer(key) ? key.toString('hex') : key 492 | } 493 | 494 | function cleanStat (stat) { 495 | return { 496 | ctime: stat.ctime, 497 | mtime: stat.mtime, 498 | size: stat.size, 499 | seq: stat.seq 500 | } 501 | } 502 | 503 | function once (fn) { 504 | let wrapper = (...args) => { 505 | fn(...args) 506 | wrapper = () => {} 507 | } 508 | return wrapper 509 | } 510 | 511 | function noop () {} 512 | --------------------------------------------------------------------------------