├── .gitignore ├── .datignore ├── dat.json ├── test ├── lib │ └── util.js ├── compound-keys.js └── view.js ├── lib ├── errors.js ├── view-def.js ├── util.js ├── util-level.js ├── view.js └── indexer.js ├── LICENSE ├── package.json ├── index.js ├── .eslintrc.json └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | .DS_Store -------------------------------------------------------------------------------- /.datignore: -------------------------------------------------------------------------------- 1 | .git 2 | .dat 3 | node_modules 4 | *.log 5 | **/.DS_Store 6 | Thumbs.db 7 | -------------------------------------------------------------------------------- /dat.json: -------------------------------------------------------------------------------- 1 | { 2 | "title": "dat-archive-map-reduce.js", 3 | "description": "Index files in Dat archives to create queryable data views.", 4 | "type": [ 5 | "" 6 | ] 7 | } -------------------------------------------------------------------------------- /test/lib/util.js: -------------------------------------------------------------------------------- 1 | const tempy = require('tempy') 2 | const DatArchive = require('node-dat-archive') 3 | const DAMR = require('../../index') 4 | const {debug, veryDebug} = require('../../lib/util') 5 | 6 | var __counter = 0 7 | exports.newDB = function () { 8 | const name = 'test' + (++__counter) 9 | debug('\n##', name, '\n') 10 | var dir = tempy.directory() 11 | veryDebug('DB dir:', dir) 12 | return new DAMR(dir, {DatArchive}) 13 | } 14 | 15 | exports.reopenDB = function (db) { 16 | return new DAMR(db.name, {DatArchive}) 17 | } 18 | 19 | var lastTs = 0 20 | exports.ts = function () { 21 | var ts = Date.now() 22 | while (ts <= lastTs) { 23 | ts++ // cheat to avoid a collision 24 | } 25 | lastTs = ts 26 | return ts 27 | } 28 | -------------------------------------------------------------------------------- /lib/errors.js: -------------------------------------------------------------------------------- 1 | class ExtendableError extends Error { 2 | constructor (msg) { 3 | super(msg) 4 | this.name = this.constructor.name 5 | this.message = msg 6 | if (typeof Error.captureStackTrace === 'function') { 7 | Error.captureStackTrace(this, this.constructor) 8 | } else { 9 | this.stack = (new Error(msg)).stack 10 | } 11 | } 12 | } 13 | 14 | exports.SchemaError = class SchemaError extends ExtendableError { 15 | constructor (msg) { 16 | super(msg || 'Schema error') 17 | this.schemaError = true 18 | } 19 | } 20 | 21 | exports.ParameterError = class ParameterError extends ExtendableError { 22 | constructor (msg) { 23 | super(msg || 'Invalid parameter') 24 | this.parameterError = true 25 | } 26 | } 27 | 28 | exports.QueryError = class QueryError extends ExtendableError { 29 | constructor (msg) { 30 | super(msg || 'Query is malformed') 31 | this.queryError = true 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /lib/view-def.js: -------------------------------------------------------------------------------- 1 | const {assert} = require('./util') 2 | const {SchemaError} = require('./errors') 3 | 4 | exports.validateAndSanitize = function (definition) { 5 | // validate and sanitize 6 | assert(definition && typeof definition === 'object', SchemaError, `Must pass a definition object to db.define(), got ${definition}`) 7 | assert(definition.path && isStringOrArrayOfStrings(definition.path), SchemaError, `The .path field must be a string or array of strings`) 8 | assert(definition.map && typeof definition.map === 'function', SchemaError, `The .map field must be a function, got ${typeof definition.map}`) 9 | assert(!definition.reduce || typeof definition.reduce === 'function', SchemaError, `The .reduce field must be a function, got ${typeof definition.reduce}`) 10 | } 11 | 12 | // helpers 13 | // = 14 | 15 | function isStringOrArrayOfStrings (v) { 16 | if (typeof v === 'string') return true 17 | if (Array.isArray(v)) { 18 | return v.every(item => typeof item === 'string') 19 | } 20 | return false 21 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Blue Link Labs 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@beaker/dat-archive-map-reduce", 3 | "version": "0.0.0", 4 | "description": "Index files in Dat archives to create queryable data views.", 5 | "main": "index.js", 6 | "directories": { 7 | "lib": "lib", 8 | "test": "test" 9 | }, 10 | "dependencies": { 11 | "anymatch": "^1.3.2", 12 | "await-lock": "^1.1.2", 13 | "concat-stream": "^2.0.0", 14 | "level-browserify": "^1.1.2", 15 | "lodash.debounce": "^4.0.8", 16 | "subleveldown": "^2.1.0", 17 | "through2": "^2.0.3", 18 | "url-parse": "^1.2.0" 19 | }, 20 | "devDependencies": { 21 | "ava": "^2.2.0", 22 | "eslint": "^6.1.0", 23 | "node-dat-archive": "^1.6.1", 24 | "tempy": "^0.1.0" 25 | }, 26 | "scripts": { 27 | "lint": "eslint . --ignore-pattern 'node_modules/*' --fix", 28 | "test": "ava -s test/*.js -T 30000 -v" 29 | }, 30 | "repository": { 31 | "type": "git", 32 | "url": "git+https://github.com/beakerbrowser/dat-archive-map-reduce.git" 33 | }, 34 | "keywords": [ 35 | "beaker", 36 | "dat", 37 | "p2p" 38 | ], 39 | "author": "Paul Frazee ", 40 | "license": "MIT", 41 | "bugs": { 42 | "url": "https://github.com/beakerbrowser/dat-archive-map-reduce/issues" 43 | }, 44 | "homepage": "https://github.com/beakerbrowser/dat-archive-map-reduce#readme" 45 | } 46 | -------------------------------------------------------------------------------- /lib/util.js: -------------------------------------------------------------------------------- 1 | const AwaitLock = require('await-lock') 2 | const URL = (typeof window === 'undefined') ? require('url-parse') : window.URL 3 | exports.URL = URL 4 | 5 | // read log level from the environment 6 | const LOG_LEVEL = (typeof window === 'undefined' 7 | ? +process.env.LOG_LEVEL 8 | : +window.localStorage.LOG_LEVEL) || 0 9 | const LOG_LEVEL_DEBUG = 1 10 | const LOG_LEVEL_VERYDEBUG = 2 11 | 12 | // debug logging 13 | function noop () {} 14 | exports.debug = (LOG_LEVEL >= LOG_LEVEL_DEBUG) ? console.log : noop 15 | exports.veryDebug = (LOG_LEVEL >= LOG_LEVEL_VERYDEBUG) ? console.log : noop 16 | 17 | // assert helper 18 | exports.assert = function (cond, ErrorConstructor = Error, msg) { 19 | if (!cond) { 20 | throw new ErrorConstructor(msg) 21 | } 22 | } 23 | 24 | // provide a diff of 2 arrays 25 | // eg diffArrays([1,2], [2,3]) => {add: [3], remove: [1]} 26 | // if no difference, returns false 27 | exports.diffArrays = function (left, right) { 28 | var diff = {add: [], remove: []} 29 | 30 | // iterate all values in the arrays 31 | var union = new Set(left.concat(right)) 32 | for (let index of union) { 33 | // push to add/remove based on left/right membership 34 | var leftHas = left.indexOf(index) !== -1 35 | var rightHas = right.indexOf(index) !== -1 36 | if (leftHas && !rightHas) { 37 | diff.remove.push(index) 38 | } else if (!leftHas && rightHas) { 39 | diff.add.push(index) 40 | } 41 | } 42 | 43 | if (diff.add.length === 0 && diff.remove.add === 0) { 44 | return false 45 | } 46 | return diff 47 | } 48 | 49 | exports.deepClone = function (v) { 50 | return JSON.parse(JSON.stringify(v)) 51 | } 52 | 53 | exports.toArchiveUrl = function (v) { 54 | if (v) { 55 | if (typeof v.url === 'string') { 56 | v = v.url 57 | } 58 | const urlp = new URL(v) 59 | return urlp.protocol + '//' + urlp.hostname 60 | } 61 | throw new Error('Not a valid archive') 62 | } 63 | 64 | // wraps await-lock in a simpler interface, with many possible locks 65 | // usage: 66 | /* 67 | async function foo () { 68 | var release = await lock('bar') 69 | // ... 70 | release() 71 | } 72 | */ 73 | var locks = {} 74 | exports.lock = async function (key) { 75 | if (!(key in locks)) locks[key] = new AwaitLock() 76 | 77 | var lock = locks[key] 78 | await lock.acquireAsync() 79 | return lock.release.bind(lock) 80 | } 81 | -------------------------------------------------------------------------------- /lib/util-level.js: -------------------------------------------------------------------------------- 1 | const through2 = require('through2') 2 | const concat = require('concat-stream') 3 | const {assert, debug, veryDebug} = require('./util') 4 | 5 | exports.push = async function (db, key, value) { 6 | try { 7 | var list = await get(db, key) 8 | } catch (e) {} 9 | list = list || [] 10 | list.push(value) 11 | await put(db, key, list) 12 | } 13 | 14 | exports.update = async function (db, key, updates) { 15 | assert(updates && typeof updates === 'object') 16 | try { 17 | var record = await get(db, key) 18 | } catch (e) {} 19 | record = record || {} 20 | for (var k in updates) { 21 | record[k] = updates[k] 22 | } 23 | await put(db, key, record) 24 | } 25 | 26 | exports.clear = async function (db) { 27 | return new Promise((resolve, reject) => { 28 | var stream = db.createKeyStream() 29 | stream 30 | .pipe(through2.obj((key, enc, cb) => db.del(key).then(cb, cb))) 31 | .on('error', reject) 32 | .on('end', () => resolve()) 33 | stream.resume() 34 | }) 35 | } 36 | 37 | const get = 38 | exports.get = async function (db, key) { 39 | return new Promise((resolve, reject) => { 40 | db.get(key, (err, value) => { 41 | if (err) { 42 | if (err.notFound) resolve(undefined) 43 | else reject(err) 44 | } else { 45 | resolve(value) 46 | } 47 | }) 48 | }) 49 | } 50 | 51 | const put = 52 | exports.put = async function (db, key, value) { 53 | return new Promise((resolve, reject) => { 54 | db.put(key, value, (err, value) => { 55 | if (err) { 56 | reject(err) 57 | } else { 58 | resolve(value) 59 | } 60 | }) 61 | }) 62 | } 63 | 64 | exports.del = async function (db, key) { 65 | return new Promise((resolve, reject) => { 66 | db.del(key, (err, value) => { 67 | if (err) { 68 | reject(err) 69 | } else { 70 | resolve(value) 71 | } 72 | }) 73 | }) 74 | } 75 | 76 | exports.list = async function (db, opts) { 77 | return new Promise((resolve, reject) => { 78 | var stream = db.createReadStream(opts) 79 | stream 80 | .on('error', reject) 81 | .pipe(concat(resolve)) 82 | stream.resume() 83 | }) 84 | } 85 | 86 | exports.each = async function (db, fn) { 87 | return new Promise((resolve, reject) => { 88 | var stream = db.createValueStream() 89 | stream.on('data', fn) 90 | stream.on('error', reject) 91 | stream.on('end', resolve) 92 | stream.resume() 93 | }) 94 | } 95 | -------------------------------------------------------------------------------- /lib/view.js: -------------------------------------------------------------------------------- 1 | const EventEmitter = require('events') 2 | const sublevel = require('subleveldown') 3 | const LevelUtil = require('./util-level') 4 | const {debug, veryDebug} = require('./util') 5 | 6 | // typedefs 7 | // = 8 | 9 | /** 10 | * @typedef {Object} InternalEntryValue 11 | * @prop {string} fileUrl 12 | * @prop {any} value 13 | * 14 | * @typedef {Object} InternalEntry 15 | * @prop {any} key 16 | * @prop {InternalEntryValue[]} value 17 | * 18 | * @typedef {Object} Entry 19 | * @prop {any} key 20 | * @prop {any} value 21 | */ 22 | 23 | // exported api 24 | // = 25 | 26 | class View extends EventEmitter { 27 | constructor (db, name, definition) { 28 | super() 29 | veryDebug('View', name, definition) 30 | this.db = db 31 | this.name = name 32 | this.filePattern = definition.path 33 | this.map = definition.map 34 | this.reduce = definition.reduce 35 | 36 | // construct db objects 37 | const levelOpts = {keyEncoding: 'json', valueEncoding: 'json'} 38 | this.level = sublevel(db.level, name, levelOpts) 39 | this.archiveVersionLevel = sublevel(this.level, 'av', levelOpts) 40 | this.entriesByFileLevel = sublevel(this.level, 'ebf', levelOpts) 41 | this.entriesLevel = sublevel(this.level, 'e', levelOpts) 42 | if (this.reduce) { 43 | this.reducesLevel = sublevel(this.level, 'r', levelOpts) 44 | } 45 | } 46 | 47 | /** 48 | * @param {any} key 49 | * @returns {Promise} 50 | */ 51 | async get (key) { 52 | if (this.reducesLevel) { 53 | let v = await LevelUtil.get(this.reducesLevel, key) 54 | if (!v) return v 55 | return {key, value: v} 56 | } else { 57 | let v = await LevelUtil.get(this.entriesLevel, key) 58 | if (!v) return v 59 | return {key, value: v.map(({value}) => value)} 60 | } 61 | } 62 | 63 | /** 64 | * @param {Object} opts 65 | * @returns {Promise} 66 | */ 67 | async list (opts) { 68 | if (this.reducesLevel) { 69 | // simple case- list the items directly 70 | return LevelUtil.list(this.reducesLevel, opts) 71 | } 72 | 73 | // mapped entries can have multiple values per key 74 | // run the list() query and then flatten the results into a single array 75 | var entries = [] 76 | var items = await LevelUtil.list(this.entriesLevel, opts) 77 | for (let item of items) { 78 | for (let v of item.value) { 79 | entries.push({key: item.key, value: v.value}) 80 | } 81 | } 82 | return entries 83 | } 84 | 85 | /** 86 | * @param {string} fileUrl 87 | * @param {Entry[]} entries 88 | */ 89 | async addEntries (fileUrl, entries) { 90 | veryDebug('addEntries()', this.name, fileUrl, entries) 91 | 92 | // store in the db: 93 | // - (data) the values are added to the array of values at the given key 94 | // - (meta) the keys related to this file are stored as an array 95 | var ps = [] 96 | var keys = [] 97 | for (let entry of entries) { 98 | keys.push(entry.key) 99 | ps.push(LevelUtil.push(this.entriesLevel, entry.key, {fileUrl, value: entry.value})) 100 | } 101 | ps.push(LevelUtil.put(this.entriesByFileLevel, fileUrl, keys)) 102 | await Promise.all(ps) 103 | } 104 | 105 | /** 106 | * 107 | * @param {string} key 108 | * @returns {Promise} 109 | */ 110 | async getEntries (key) { 111 | return LevelUtil.get(this.entriesLevel, key) 112 | } 113 | 114 | /** 115 | * @param {string} fileUrl 116 | * @returns {Promise} 117 | */ 118 | async getEntryKeysByFile (fileUrl) { 119 | return LevelUtil.get(this.entriesByFileLevel, fileUrl) 120 | } 121 | 122 | /** 123 | * @param {string} fileUrl 124 | * @returns {Promise} 125 | */ 126 | async clearEntriesByFile (fileUrl) { 127 | veryDebug('clearEntriesByFile()', this.name, fileUrl) 128 | 129 | // fetch keys of entries generated by the file 130 | var keys = await LevelUtil.get(this.entriesByFileLevel, fileUrl) 131 | if (!keys || !keys.length) return 132 | 133 | for (let key of keys) { 134 | // get the entries for the key 135 | var entries = await LevelUtil.get(this.entriesLevel, key) 136 | 137 | // remove any entry generated by the file 138 | entries = entries.filter(entry => entry.fileUrl !== fileUrl) 139 | if (!entries.length) { 140 | await LevelUtil.del(this.entriesLevel, key) 141 | } else { 142 | await LevelUtil.put(this.entriesLevel, key, entries) 143 | } 144 | } 145 | 146 | // remove the pointer as all related data has been removed 147 | await LevelUtil.del(this.entriesByFileLevel, fileUrl) 148 | } 149 | 150 | /** 151 | * @param {any} key 152 | * @param {any} acc 153 | * @returns {Promise} 154 | */ 155 | async putReducedValue (key, acc) { 156 | veryDebug('putReducedValue()', this.name, key, acc) 157 | if (typeof acc === 'undefined') { 158 | await LevelUtil.del(this.reducesLevel, key) 159 | } else { 160 | await LevelUtil.put(this.reducesLevel, key, acc) 161 | } 162 | } 163 | 164 | /** 165 | * @returns {Promise} 166 | */ 167 | async clearData () { 168 | veryDebug('clearData()', this.name) 169 | await LevelUtil.clear(this.level) 170 | } 171 | } 172 | 173 | module.exports = View 174 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | const EventEmitter = require('events') 2 | const level = require('level-browserify') 3 | const {debug, veryDebug, assert, URL} = require('./lib/util') 4 | const {SchemaError} = require('./lib/errors') 5 | const ViewDef = require('./lib/view-def') 6 | const Indexer = require('./lib/indexer') 7 | const View = require('./lib/view') 8 | 9 | class MapReduce extends EventEmitter { 10 | /** 11 | * @param {string} [name] 12 | * @param {Object} [opts] 13 | * @param {Object} [opts.DatArchive] 14 | */ 15 | constructor (name = 'views', opts = {}) { 16 | super() 17 | if (typeof window === 'undefined' && !opts.DatArchive) { 18 | throw new Error('Must provide {DatArchive} opt when using MapReduce outside the browser.') 19 | } 20 | this.level = false 21 | this.name = name 22 | this.isBeingOpened = false 23 | this.isOpen = false 24 | this.DatArchive = opts.DatArchive || window.DatArchive 25 | this.views = {} 26 | this._archives = {} 27 | this._viewFilePatterns = [] 28 | this._dbReadyPromise = new Promise((resolve, reject) => { 29 | this.once('open', () => resolve(this)) 30 | this.once('open-failed', reject) 31 | }) 32 | } 33 | 34 | async open () { 35 | // guard against duplicate opens 36 | if (this.isBeingOpened) { 37 | veryDebug('duplicate open, returning ready promise') 38 | return this._dbReadyPromise 39 | } 40 | if (this.isOpen) { 41 | return 42 | } 43 | this.isBeingOpened = true 44 | 45 | // open the db 46 | debug('opening') 47 | try { 48 | this.level = level(this.name, {valueEncoding: 'json'}) 49 | 50 | debug('opened') 51 | this.isBeingOpened = false 52 | this.isOpen = true 53 | this.emit('open') 54 | } catch (e) { 55 | console.error('Open has failed', e) 56 | this.isBeingOpened = false 57 | this.emit('open-failed', e) 58 | throw e 59 | } 60 | } 61 | 62 | async close () { 63 | if (!this.isOpen) return 64 | debug('closing') 65 | this.isOpen = false 66 | if (this.level) { 67 | Object.values(this._archives).forEach(archive => Indexer.unwatchArchive(this, archive)) 68 | this._archives = {} 69 | await new Promise(resolve => this.level.close(resolve)) 70 | this.level = null 71 | veryDebug('db .level closed') 72 | } else { 73 | veryDebug('db .level didnt yet exist') 74 | } 75 | } 76 | 77 | async destroy () { 78 | if (this.isOpen) { 79 | await this.close() 80 | } 81 | 82 | if (typeof level.destroy !== 'function') { 83 | // TODO add support for node? 84 | throw new Error('Cannot .destroy() databases outside of the browser environment. You should just delete the files manually.') 85 | } 86 | 87 | // delete the database from indexeddb 88 | return new Promise((resolve, reject) => { 89 | level.destroy(this.name, err => { 90 | if (err) reject(err) 91 | else resolve() 92 | }) 93 | }) 94 | } 95 | 96 | async define (viewName, definition) { 97 | if (viewName in this.views) { 98 | throw new SchemaError(`${viewName} has already been defined`) 99 | } 100 | await this.open() 101 | ViewDef.validateAndSanitize(definition) 102 | this.views[viewName] = new View(this, viewName, definition) 103 | 104 | if (Array.isArray(definition.path)) { 105 | this._viewFilePatterns = this._viewFilePatterns.concat(definition.path) 106 | } else { 107 | this._viewFilePatterns.push(definition.path) 108 | } 109 | } 110 | 111 | async reset (viewName) { 112 | await this.open() 113 | await Indexer.resetIndex(this, viewName) 114 | this.emit('view-reset', {view: viewName}) 115 | } 116 | 117 | async get (viewName, key) { 118 | await this.open() 119 | return this.views[viewName].get(key) 120 | } 121 | 122 | async list (viewName, opts={}) { 123 | await this.open() 124 | return this.views[viewName].list(opts) 125 | } 126 | 127 | async index (archive, opts = {watch: false}) { 128 | await this.open() 129 | opts.watch = (typeof opts.watch === 'boolean') ? opts.watch : true 130 | 131 | // create our own new DatArchive instance 132 | archive = typeof archive === 'string' ? new (this.DatArchive)(archive) : archive 133 | debug('MapReduce.index', archive.url) 134 | if (!(archive.url in this._archives)) { 135 | // store and process 136 | this._archives[archive.url] = archive 137 | await Indexer.addArchive(this, archive, opts) 138 | } else { 139 | await Indexer.indexArchive(this, archive) 140 | } 141 | } 142 | 143 | async unindex (archive) { 144 | await this.open() 145 | archive = typeof archive === 'string' ? new (this.DatArchive)(archive) : archive 146 | if (archive.url in this._archives) { 147 | debug('MapReduce.unindex', archive.url) 148 | delete this._archives[archive.url] 149 | await Indexer.removeArchive(this, archive) 150 | } 151 | } 152 | 153 | async indexFile (archive, filepath) { 154 | await this.open() 155 | if (typeof archive === 'string') { 156 | const urlp = new URL(archive) 157 | archive = new (this.DatArchive)(urlp.protocol + '//' + urlp.hostname) 158 | return this.indexFile(archive, urlp.pathname) 159 | } 160 | for (let name in this.views) { 161 | await Indexer.readAndIndexFile(this, this.views[name], archive, filepath) 162 | } 163 | } 164 | 165 | async unindexFile (archive, filepath) { 166 | await this.open() 167 | if (typeof archive === 'string') { 168 | const urlp = new URL(archive) 169 | archive = new (this.DatArchive)(urlp.protocol + '//' + urlp.hostname) 170 | return this.indexFile(archive, urlp.pathname) 171 | } 172 | for (let name in this.views) { 173 | await Indexer.unindexFile(this, this.views[name], archive, filepath) 174 | } 175 | } 176 | 177 | listIndexed () { 178 | // TODO pull from DB? 179 | return Object.keys(this._archives) 180 | } 181 | 182 | isIndexed (url) { 183 | // TODO pull from DB? 184 | if (!url) return false 185 | if (url.url) url = url.url // an archive 186 | return (url in this._archives) 187 | } 188 | } 189 | module.exports = MapReduce 190 | -------------------------------------------------------------------------------- /.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "parserOptions": { 3 | "ecmaVersion": 9, 4 | "ecmaFeatures": { 5 | "jsx": true 6 | }, 7 | "sourceType": "module" 8 | }, 9 | 10 | "env": { 11 | "es6": true, 12 | "node": true 13 | }, 14 | 15 | "plugins": [ 16 | ], 17 | 18 | "globals": { 19 | "document": false, 20 | "navigator": false, 21 | "window": false 22 | }, 23 | 24 | "rules": { 25 | "accessor-pairs": "error", 26 | "arrow-spacing": ["error", { "before": true, "after": true }], 27 | "block-spacing": ["error", "always"], 28 | "brace-style": ["off", "1tbs", { "allowSingleLine": true }], 29 | "camelcase": "off", 30 | "comma-dangle": "off", 31 | "comma-spacing": ["error", { "before": false, "after": true }], 32 | "comma-style": ["error", "last"], 33 | "constructor-super": "error", 34 | "curly": ["error", "multi-line"], 35 | "dot-location": ["error", "property"], 36 | "eol-last": "off", 37 | "eqeqeq": "off", 38 | "func-call-spacing": ["error", "never"], 39 | "generator-star-spacing": ["error", { "before": true, "after": true }], 40 | "handle-callback-err": "off", 41 | "indent": "off", 42 | "key-spacing": ["error", { "beforeColon": false, "afterColon": true }], 43 | "keyword-spacing": ["error", { "before": true, "after": true }], 44 | "new-cap": ["error", { "newIsCap": true, "capIsNew": false }], 45 | "new-parens": "error", 46 | "no-array-constructor": "error", 47 | "no-caller": "error", 48 | "no-class-assign": "error", 49 | "no-compare-neg-zero": "error", 50 | "no-cond-assign": "error", 51 | "no-const-assign": "error", 52 | "no-constant-condition": ["error", { "checkLoops": false }], 53 | "no-control-regex": "off", 54 | "no-debugger": "error", 55 | "no-delete-var": "error", 56 | "no-dupe-args": "error", 57 | "no-dupe-class-members": "error", 58 | "no-dupe-keys": "error", 59 | "no-duplicate-case": "error", 60 | "no-empty-character-class": "error", 61 | "no-empty-pattern": "error", 62 | "no-eval": "error", 63 | "no-ex-assign": "error", 64 | "no-extend-native": "error", 65 | "no-extra-bind": "error", 66 | "no-extra-boolean-cast": "error", 67 | "no-extra-parens": ["error", "functions"], 68 | "no-fallthrough": "error", 69 | "no-floating-decimal": "error", 70 | "no-func-assign": "error", 71 | "no-global-assign": "error", 72 | "no-implied-eval": "error", 73 | "no-inner-declarations": "off", 74 | "no-invalid-regexp": "error", 75 | "no-irregular-whitespace": "error", 76 | "no-iterator": "error", 77 | "no-label-var": "error", 78 | "no-labels": ["error", { "allowLoop": false, "allowSwitch": false }], 79 | "no-lone-blocks": "error", 80 | "no-mixed-operators": ["error", { 81 | "groups": [ 82 | ["==", "!=", "===", "!==", ">", ">=", "<", "<="], 83 | ["&&", "||"], 84 | ["in", "instanceof"] 85 | ], 86 | "allowSamePrecedence": true 87 | }], 88 | "no-mixed-spaces-and-tabs": "error", 89 | "no-multi-spaces": "off", 90 | "no-multi-str": "error", 91 | "no-multiple-empty-lines": ["error", { "max": 1, "maxEOF": 0 }], 92 | "no-negated-in-lhs": "error", 93 | "no-new": "off", 94 | "no-new-func": "error", 95 | "no-new-object": "error", 96 | "no-new-require": "error", 97 | "no-new-symbol": "error", 98 | "no-new-wrappers": "error", 99 | "no-obj-calls": "error", 100 | "no-octal": "error", 101 | "no-octal-escape": "error", 102 | "no-path-concat": "error", 103 | "no-proto": "error", 104 | "no-redeclare": "error", 105 | "no-regex-spaces": "error", 106 | "no-return-assign": ["error", "except-parens"], 107 | "no-return-await": "off", 108 | "no-self-assign": "error", 109 | "no-self-compare": "error", 110 | "no-sequences": "error", 111 | "no-shadow-restricted-names": "error", 112 | "no-sparse-arrays": "error", 113 | "no-tabs": "error", 114 | "no-template-curly-in-string": "error", 115 | "no-this-before-super": "error", 116 | "no-throw-literal": "error", 117 | "no-trailing-spaces": "error", 118 | "no-undef": "error", 119 | "no-undef-init": "error", 120 | "no-unexpected-multiline": "error", 121 | "no-unmodified-loop-condition": "error", 122 | "no-unneeded-ternary": ["off", { "defaultAssignment": false }], 123 | "no-unreachable": "error", 124 | "no-unsafe-finally": "error", 125 | "no-unsafe-negation": "error", 126 | "no-unused-expressions": ["error", { "allowShortCircuit": true, "allowTernary": true, "allowTaggedTemplates": true }], 127 | "no-unused-vars": ["off", { "vars": "all", "args": "none", "ignoreRestSiblings": true }], 128 | "no-use-before-define": ["error", { "functions": false, "classes": false, "variables": false }], 129 | "no-useless-call": "error", 130 | "no-useless-computed-key": "error", 131 | "no-useless-constructor": "error", 132 | "no-useless-escape": "error", 133 | "no-useless-rename": "error", 134 | "no-useless-return": "off", 135 | "no-whitespace-before-property": "error", 136 | "no-with": "error", 137 | "object-property-newline": ["error", { "allowMultiplePropertiesPerLine": true }], 138 | "one-var": ["error", { "initialized": "never" }], 139 | "operator-linebreak": ["off", "after", { "overrides": { "?": "before", ":": "before" } }], 140 | "padded-blocks": ["error", { "blocks": "never", "switches": "never", "classes": "never" }], 141 | "prefer-promise-reject-errors": "off", 142 | "quotes": ["error", "single", { "avoidEscape": true, "allowTemplateLiterals": true }], 143 | "rest-spread-spacing": ["error", "never"], 144 | "semi": ["error", "never"], 145 | "semi-spacing": ["error", { "before": false, "after": true }], 146 | "space-before-blocks": ["error", "always"], 147 | "space-before-function-paren": ["error", "always"], 148 | "space-in-parens": ["error", "never"], 149 | "space-infix-ops": "off", 150 | "space-unary-ops": ["error", { "words": true, "nonwords": false }], 151 | "spaced-comment": ["off", "always", { 152 | "line": { "markers": ["*package", "!", "/", ","] }, 153 | "block": { "balanced": true, "markers": ["*package", "!", ",", ":", "::", "flow-include"], "exceptions": ["*"] } 154 | }], 155 | "symbol-description": "error", 156 | "template-curly-spacing": ["error", "never"], 157 | "template-tag-spacing": ["error", "never"], 158 | "unicode-bom": ["error", "never"], 159 | "use-isnan": "error", 160 | "valid-typeof": ["error", { "requireStringLiterals": true }], 161 | "wrap-iife": ["error", "any", { "functionPrototypeMethods": true }], 162 | "yield-star-spacing": ["error", "both"], 163 | "yoda": ["error", "never"] 164 | } 165 | } -------------------------------------------------------------------------------- /test/compound-keys.js: -------------------------------------------------------------------------------- 1 | const test = require('ava') 2 | const {newDB, ts} = require('./lib/util') 3 | const DatArchive = require('node-dat-archive') 4 | const tempy = require('tempy') 5 | 6 | test.before(() => console.log('compound-keys.js')) 7 | 8 | var archives = [] 9 | 10 | async function setupNewDB (indexOpts) { 11 | async function def (fn) { 12 | const a = await DatArchive.create({localPath: tempy.directory()}) 13 | await a.mkdir('/multi') 14 | const write = (path, record) => a.writeFile(path, JSON.stringify(record)) 15 | await fn(write, a) 16 | return a 17 | } 18 | archives = [] 19 | for (let i = 0; i < 10; i++) { 20 | archives.push(await def(async write => { 21 | await write('/single.json', {first: 'first' + i, second: i, third: 'third' + i + 'single'}) 22 | await write('/multi/1.json', {first: 'first' + i, second: (i+1)*100, third: 'third' + i + 'multi1'}) 23 | await write('/multi/2.json', {first: 'first' + i, second: i, third: 'third' + i + 'multi2'}) 24 | await write('/multi/3.json', {first: 'first' + i + 'b', second: i, third: 'third' + i + 'multi3'}) 25 | })) 26 | } 27 | 28 | const testDB = newDB() 29 | testDB.define('single', { 30 | path: '/single.json', 31 | map (value, meta, emit) { 32 | let obj = JSON.parse(value) 33 | emit([meta.origin, obj.first], meta.url) 34 | } 35 | }) 36 | testDB.define('single-reduced', { 37 | path: '/single.json', 38 | map (value, meta, emit) { 39 | let obj = JSON.parse(value) 40 | emit([meta.origin, obj.first], 1) 41 | }, 42 | reduce (acc, value, key) { 43 | return (acc||0) + 1 44 | } 45 | }) 46 | testDB.define('multi', { 47 | path: '/multi/*.json', 48 | map (value, meta, emit) { 49 | let obj = JSON.parse(value) 50 | emit([meta.origin, obj.first], meta.url) 51 | } 52 | }) 53 | testDB.define('multi-reduced', { 54 | path: '/multi/*.json', 55 | map (value, meta, emit) { 56 | let obj = JSON.parse(value) 57 | emit([meta.origin, obj.first], 1) 58 | }, 59 | reduce (acc, value, key) { 60 | return (acc||0) + 1 61 | } 62 | }) 63 | for (let a of archives) { 64 | await testDB.index(a, indexOpts) 65 | } 66 | return testDB 67 | } 68 | 69 | test('get()', async t => { 70 | const testDB = await setupNewDB() 71 | 72 | t.deepEqual(await testDB.get('single', [archives[0].url, 'first0']), {key: [archives[0].url, 'first0'], value: [archives[0].url + '/single.json']}) 73 | t.deepEqual(await testDB.get('single', [archives[1].url, 'first1']), {key: [archives[1].url, 'first1'], value: [archives[1].url + '/single.json']}) 74 | t.deepEqual(await testDB.get('single-reduced', [archives[0].url, 'first0']), {key: [archives[0].url, 'first0'], value: 1}) 75 | t.deepEqual(await testDB.get('single-reduced', [archives[1].url, 'first1']), {key: [archives[1].url, 'first1'], value: 1}) 76 | t.deepEqual(await testDB.get('multi', [archives[0].url, 'first0']), {key: [archives[0].url, 'first0'], value: [archives[0].url + '/multi/1.json', archives[0].url + '/multi/2.json']}) 77 | t.deepEqual(await testDB.get('multi', [archives[1].url, 'first1']), {key: [archives[1].url, 'first1'], value: [archives[1].url + '/multi/1.json', archives[1].url + '/multi/2.json']}) 78 | t.deepEqual(await testDB.get('multi-reduced', [archives[0].url, 'first0']), {key: [archives[0].url, 'first0'], value: 2}) 79 | t.deepEqual(await testDB.get('multi-reduced', [archives[1].url, 'first1']), {key: [archives[1].url, 'first1'], value: 2}) 80 | 81 | await testDB.close() 82 | }) 83 | 84 | test('list()', async t => { 85 | const testDB = await setupNewDB() 86 | 87 | var res1 = await testDB.list('single') 88 | t.is(res1.length, archives.length) 89 | for (let i = 0; i < res1.length; i++) { 90 | let ai = archives.findIndex(a => a.url === res1[i].key[0]) 91 | t.deepEqual(res1[i], {key: [archives[ai].url, `first${ai}`], value: archives[ai].url + '/single.json'}) 92 | } 93 | 94 | var res2 = await testDB.list('single-reduced') 95 | t.is(res2.length, archives.length) 96 | for (let i = 0; i < archives.length; i++) { 97 | t.is(res2[i].value, 1) 98 | } 99 | 100 | var res3 = await testDB.list('multi') 101 | t.is(res3.length, archives.length * 3) 102 | for (let i = 0; i < archives.length; i++) { 103 | let ai = archives.findIndex(a => a.url === res3[i*3].key[0]) 104 | t.deepEqual(res3[i*3 + 0], {key: [archives[ai].url, `first${ai}`], value: archives[ai].url + '/multi/1.json'}) 105 | t.deepEqual(res3[i*3 + 1], {key: [archives[ai].url, `first${ai}`], value: archives[ai].url + '/multi/2.json'}) 106 | t.deepEqual(res3[i*3 + 2], {key: [archives[ai].url, `first${ai}b`], value: archives[ai].url + '/multi/3.json'}) 107 | } 108 | 109 | var res4 = await testDB.list('multi-reduced') 110 | t.is(res4.length, archives.length * 2) 111 | for (let i = 0; i < archives.length; i++) { 112 | t.is(res4[i*2 + 0].value, 2) 113 | t.is(res4[i*2 + 1].value, 1) 114 | } 115 | 116 | var res5 = await testDB.list('single', {gt: [archives[4].url, 'first4']}) 117 | t.truthy(res5.length < archives.length) 118 | for (let i = 0; i < res5.length; i++) { 119 | let ai = archives.findIndex(a => a.url === res5[i].key[0]) 120 | t.deepEqual(res5[i].key, [archives[ai].url, `first${ai}`]) 121 | } 122 | 123 | var res6 = await testDB.list('single', {gt: [archives[4].url, 'first4']}) 124 | t.truthy(res6.length < archives.length) 125 | for (let i = 0; i < res6.length; i++) { 126 | let ai = archives.findIndex(a => a.url === res6[i].key[0]) 127 | t.deepEqual(res6[i].key, [archives[ai].url, `first${ai}`]) 128 | } 129 | 130 | var res7 = await testDB.list('single', {reverse: true}) 131 | var res7b = await testDB.list('single', {reverse: false}) 132 | t.deepEqual(res7, res7b.reverse()) 133 | 134 | var res8 = await testDB.list('single', {limit: 3}) 135 | var res8b = await testDB.list('single', {limit: undefined}) 136 | t.is(res8.length, 3) 137 | t.deepEqual(res8, res8b.slice(0, 3)) 138 | 139 | await testDB.close() 140 | }) 141 | 142 | test('correctly index changed files', async t => { 143 | const testDB = await setupNewDB() 144 | 145 | // test initial 146 | t.deepEqual(await testDB.get('single', [archives[0].url, 'first0']), {key: [archives[0].url, 'first0'], value: [archives[0].url + '/single.json']}) 147 | t.deepEqual(await testDB.get('single', [archives[1].url, 'first1']), {key: [archives[1].url, 'first1'], value: [archives[1].url + '/single.json']}) 148 | t.deepEqual(await testDB.get('single-reduced', [archives[0].url, 'first0']), {key: [archives[0].url, 'first0'], value: 1}) 149 | t.deepEqual(await testDB.get('single-reduced', [archives[1].url, 'first1']), {key: [archives[1].url, 'first1'], value: 1}) 150 | t.deepEqual(await testDB.get('multi', [archives[0].url, 'first0']), {key: [archives[0].url, 'first0'], value: [archives[0].url + '/multi/1.json', archives[0].url + '/multi/2.json']}) 151 | t.deepEqual(await testDB.get('multi', [archives[1].url, 'first1']), {key: [archives[1].url, 'first1'], value: [archives[1].url + '/multi/1.json', archives[1].url + '/multi/2.json']}) 152 | t.deepEqual(await testDB.get('multi-reduced', [archives[0].url, 'first0']), {key: [archives[0].url, 'first0'], value: 2}) 153 | t.deepEqual(await testDB.get('multi-reduced', [archives[1].url, 'first1']), {key: [archives[1].url, 'first1'], value: 2}) 154 | 155 | // make changes & index 156 | for (let i = 0; i < 10; i++) { 157 | await archives[i].writeFile('/single.json', JSON.stringify({first: 'first' + (10 - i), second: i, third: 'third' + i + 'single'})) 158 | await archives[i].writeFile('/multi/1.json', JSON.stringify({first: 'first' + (10 - i), second: (i+1)*100, third: 'third' + i + 'multi1'})) 159 | await archives[i].writeFile('/multi/2.json', JSON.stringify({first: 'first' + (10 - i), second: i, third: 'third' + i + 'multi2'})) 160 | await archives[i].writeFile('/multi/3.json', JSON.stringify({first: 'first' + (10 - i) + 'b', second: i, third: 'third' + i + 'multi3'})) 161 | } 162 | for (let a of archives) { 163 | await testDB.index(a) 164 | } 165 | 166 | // test changed 167 | t.deepEqual(await testDB.get('single', [archives[0].url, 'first10']), {key: [archives[0].url, 'first10'], value: [archives[0].url + '/single.json']}) 168 | t.deepEqual(await testDB.get('single', [archives[1].url, 'first9']), {key: [archives[1].url, 'first9'], value: [archives[1].url + '/single.json']}) 169 | t.deepEqual(await testDB.get('single-reduced', [archives[0].url, 'first10']), {key: [archives[0].url, 'first10'], value: 1}) 170 | t.deepEqual(await testDB.get('single-reduced', [archives[1].url, 'first9']), {key: [archives[1].url, 'first9'], value: 1}) 171 | t.deepEqual(await testDB.get('multi', [archives[0].url, 'first10']), {key: [archives[0].url, 'first10'], value: [archives[0].url + '/multi/1.json', archives[0].url + '/multi/2.json']}) 172 | t.deepEqual(await testDB.get('multi', [archives[1].url, 'first9']), {key: [archives[1].url, 'first9'], value: [archives[1].url + '/multi/1.json', archives[1].url + '/multi/2.json']}) 173 | t.deepEqual(await testDB.get('multi-reduced', [archives[0].url, 'first10']), {key: [archives[0].url, 'first10'], value: 2}) 174 | t.deepEqual(await testDB.get('multi-reduced', [archives[1].url, 'first9']), {key: [archives[1].url, 'first9'], value: 2}) 175 | 176 | await testDB.close() 177 | }) 178 | -------------------------------------------------------------------------------- /test/view.js: -------------------------------------------------------------------------------- 1 | const test = require('ava') 2 | const {newDB, ts} = require('./lib/util') 3 | const DatArchive = require('node-dat-archive') 4 | const tempy = require('tempy') 5 | 6 | test.before(() => console.log('view.js')) 7 | 8 | var archives = [] 9 | 10 | async function setupNewDB (indexOpts) { 11 | async function def (fn) { 12 | const a = await DatArchive.create({localPath: tempy.directory()}) 13 | await a.mkdir('/multi') 14 | const write = (path, record) => a.writeFile(path, JSON.stringify(record)) 15 | await fn(write, a) 16 | return a 17 | } 18 | archives = [] 19 | for (let i = 0; i < 10; i++) { 20 | archives.push(await def(async write => { 21 | await write('/single.json', {first: 'first' + i, second: i, third: 'third' + i + 'single'}) 22 | await write('/multi/1.json', {first: 'first' + i, second: (i+1)*100, third: 'third' + i + 'multi1'}) 23 | await write('/multi/2.json', {first: 'first' + i, second: i, third: 'third' + i + 'multi2'}) 24 | await write('/multi/3.json', {first: 'first' + i + 'b', second: i, third: 'third' + i + 'multi3'}) 25 | })) 26 | } 27 | 28 | const testDB = newDB() 29 | testDB.define('single', { 30 | path: '/single.json', 31 | map (value, meta, emit) { 32 | let obj = JSON.parse(value) 33 | emit(obj.first, meta.url) 34 | } 35 | }) 36 | testDB.define('single-reduced', { 37 | path: '/single.json', 38 | map (value, meta, emit) { 39 | emit(meta.origin, 1) 40 | }, 41 | reduce (acc, value, key) { 42 | return (acc||0) + 1 43 | } 44 | }) 45 | testDB.define('multi', { 46 | path: '/multi/*.json', 47 | map (value, meta, emit) { 48 | let obj = JSON.parse(value) 49 | emit(obj.first, meta.url) 50 | } 51 | }) 52 | testDB.define('multi-reduced', { 53 | path: '/multi/*.json', 54 | map (value, meta, emit) { 55 | emit(meta.origin, 1) 56 | }, 57 | reduce (acc, value, key) { 58 | return (acc||0) + 1 59 | } 60 | }) 61 | for (let a of archives) { 62 | await testDB.index(a, indexOpts) 63 | } 64 | return testDB 65 | } 66 | 67 | test('get()', async t => { 68 | const testDB = await setupNewDB() 69 | 70 | t.deepEqual(await testDB.get('single', 'first0'), {key: 'first0', value: [archives[0].url + '/single.json']}) 71 | t.deepEqual(await testDB.get('single', 'first1'), {key: 'first1', value: [archives[1].url + '/single.json']}) 72 | t.deepEqual(await testDB.get('single-reduced', archives[0].url), {key: archives[0].url, value: 1}) 73 | t.deepEqual(await testDB.get('single-reduced', archives[1].url), {key: archives[1].url, value: 1}) 74 | t.deepEqual(await testDB.get('multi', 'first0'), {key: 'first0', value: [archives[0].url + '/multi/1.json', archives[0].url + '/multi/2.json']}) 75 | t.deepEqual(await testDB.get('multi', 'first1'), {key: 'first1', value: [archives[1].url + '/multi/1.json', archives[1].url + '/multi/2.json']}) 76 | t.deepEqual(await testDB.get('multi-reduced', archives[0].url), {key: archives[0].url, value: 3}) 77 | t.deepEqual(await testDB.get('multi-reduced', archives[1].url), {key: archives[1].url, value: 3}) 78 | 79 | await testDB.close() 80 | }) 81 | 82 | test('list()', async t => { 83 | const testDB = await setupNewDB() 84 | 85 | var res1 = await testDB.list('single') 86 | t.is(res1.length, archives.length) 87 | for (let i = 0; i < archives.length; i++) { 88 | t.deepEqual(res1[i], {key: `first${i}`, value: archives[i].url + '/single.json'}) 89 | } 90 | 91 | var res2 = await testDB.list('single-reduced') 92 | t.is(res2.length, archives.length) 93 | for (let i = 0; i < archives.length; i++) { 94 | t.is(res2[i].value, 1) 95 | } 96 | 97 | var res3 = await testDB.list('multi') 98 | t.is(res3.length, archives.length * 3) 99 | for (let i = 0; i < archives.length; i++) { 100 | t.deepEqual(res3[i*3 + 0], {key: `first${i}`, value: archives[i].url + '/multi/1.json'}) 101 | t.deepEqual(res3[i*3 + 1], {key: `first${i}`, value: archives[i].url + '/multi/2.json'}) 102 | t.deepEqual(res3[i*3 + 2], {key: `first${i}b`, value: archives[i].url + '/multi/3.json'}) 103 | } 104 | 105 | var res4 = await testDB.list('multi-reduced') 106 | t.is(res4.length, archives.length) 107 | for (let i = 0; i < archives.length; i++) { 108 | t.is(res4[i].value, 3) 109 | } 110 | 111 | var res5 = await testDB.list('single', {gt: 'first4'}) 112 | t.is(res5.length, archives.length - 5) 113 | for (let i = 5; i < archives.length; i++) { 114 | t.deepEqual(res5[i - 5], {key: `first${i}`, value: archives[i].url + '/single.json'}) 115 | } 116 | 117 | var res6 = await testDB.list('single', {gte: 'first4'}) 118 | t.is(res6.length, archives.length - 4) 119 | for (let i = 4; i < archives.length; i++) { 120 | t.deepEqual(res6[i - 4], {key: `first${i}`, value: archives[i].url + '/single.json'}) 121 | } 122 | 123 | var res7 = await testDB.list('single', {lt: 'first5'}) 124 | t.is(res7.length, archives.length - 5) 125 | for (let i = 0; i < archives.length - 5; i++) { 126 | t.deepEqual(res7[i], {key: `first${i}`, value: archives[i].url + '/single.json'}) 127 | } 128 | 129 | var res7 = await testDB.list('single', {lte: 'first5'}) 130 | t.is(res7.length, archives.length - 4) 131 | for (let i = 0; i < archives.length - 4; i++) { 132 | t.deepEqual(res7[i], {key: `first${i}`, value: archives[i].url + '/single.json'}) 133 | } 134 | 135 | var res8 = await testDB.list('single', {reverse: true}) 136 | t.is(res8.length, archives.length) 137 | for (let i = 0; i < archives.length; i++) { 138 | t.deepEqual(res8[archives.length - i - 1], {key: `first${i}`, value: archives[i].url + '/single.json'}) 139 | } 140 | 141 | var res9 = await testDB.list('single', {limit: 3}) 142 | t.is(res9.length, 3) 143 | for (let i = 0; i < 3; i++) { 144 | t.deepEqual(res9[i], {key: `first${i}`, value: archives[i].url + '/single.json'}) 145 | } 146 | 147 | await testDB.close() 148 | }) 149 | 150 | test('correctly index changed files', async t => { 151 | const testDB = await setupNewDB() 152 | 153 | // test initial 154 | t.deepEqual(await testDB.get('single', 'first0'), {key: 'first0', value: [archives[0].url + '/single.json']}) 155 | t.deepEqual(await testDB.get('single', 'first1'), {key: 'first1', value: [archives[1].url + '/single.json']}) 156 | t.deepEqual(await testDB.get('single-reduced', archives[0].url), {key: archives[0].url, value: 1}) 157 | t.deepEqual(await testDB.get('single-reduced', archives[1].url), {key: archives[1].url, value: 1}) 158 | t.deepEqual(await testDB.get('multi', 'first0'), {key: 'first0', value: [archives[0].url + '/multi/1.json', archives[0].url + '/multi/2.json']}) 159 | t.deepEqual(await testDB.get('multi', 'first1'), {key: 'first1', value: [archives[1].url + '/multi/1.json', archives[1].url + '/multi/2.json']}) 160 | t.deepEqual(await testDB.get('multi-reduced', archives[0].url), {key: archives[0].url, value: 3}) 161 | t.deepEqual(await testDB.get('multi-reduced', archives[1].url), {key: archives[1].url, value: 3}) 162 | 163 | // make changes & index 164 | for (let i = 0; i < 10; i++) { 165 | await archives[i].writeFile('/single.json', JSON.stringify({first: 'first' + (10 - i), second: i, third: 'third' + i + 'single'})) 166 | await archives[i].writeFile('/multi/1.json', JSON.stringify({first: 'first' + (10 - i), second: (i+1)*100, third: 'third' + i + 'multi1'})) 167 | await archives[i].writeFile('/multi/2.json', JSON.stringify({first: 'first' + (10 - i), second: i, third: 'third' + i + 'multi2'})) 168 | await archives[i].writeFile('/multi/3.json', JSON.stringify({first: 'first' + (10 - i) + 'b', second: i, third: 'third' + i + 'multi3'})) 169 | } 170 | for (let a of archives) { 171 | await testDB.index(a) 172 | } 173 | 174 | // test changed 175 | t.deepEqual(await testDB.get('single', 'first10'), {key: 'first10', value: [archives[0].url + '/single.json']}) 176 | t.deepEqual(await testDB.get('single', 'first9'), {key: 'first9', value: [archives[1].url + '/single.json']}) 177 | t.deepEqual(await testDB.get('single-reduced', archives[0].url), {key: archives[0].url, value: 1}) 178 | t.deepEqual(await testDB.get('single-reduced', archives[1].url), {key: archives[1].url, value: 1}) 179 | t.deepEqual(await testDB.get('multi', 'first10'), {key: 'first10', value: [archives[0].url + '/multi/1.json', archives[0].url + '/multi/2.json']}) 180 | t.deepEqual(await testDB.get('multi', 'first9'), {key: 'first9', value: [archives[1].url + '/multi/1.json', archives[1].url + '/multi/2.json']}) 181 | t.deepEqual(await testDB.get('multi-reduced', archives[0].url), {key: archives[0].url, value: 3}) 182 | t.deepEqual(await testDB.get('multi-reduced', archives[1].url), {key: archives[1].url, value: 3}) 183 | 184 | await testDB.close() 185 | }) 186 | 187 | test('correctly index changed files (using watch)', async t => { 188 | const testDB = await setupNewDB({watch: true}) 189 | 190 | // test initial 191 | t.deepEqual(await testDB.get('single', 'first0'), {key: 'first0', value: [archives[0].url + '/single.json']}) 192 | t.deepEqual(await testDB.get('single', 'first1'), {key: 'first1', value: [archives[1].url + '/single.json']}) 193 | t.deepEqual(await testDB.get('single-reduced', archives[0].url), {key: archives[0].url, value: 1}) 194 | t.deepEqual(await testDB.get('single-reduced', archives[1].url), {key: archives[1].url, value: 1}) 195 | t.deepEqual(await testDB.get('multi', 'first0'), {key: 'first0', value: [archives[0].url + '/multi/1.json', archives[0].url + '/multi/2.json']}) 196 | t.deepEqual(await testDB.get('multi', 'first1'), {key: 'first1', value: [archives[1].url + '/multi/1.json', archives[1].url + '/multi/2.json']}) 197 | t.deepEqual(await testDB.get('multi-reduced', archives[0].url), {key: archives[0].url, value: 3}) 198 | t.deepEqual(await testDB.get('multi-reduced', archives[1].url), {key: archives[1].url, value: 3}) 199 | 200 | // make changes & index 201 | var ps = [], resolves = [] 202 | for (let i = 0; i < 10; i++) { 203 | ps.push(new Promise((resolve, reject) => { 204 | resolves.push(resolve) 205 | })) 206 | } 207 | testDB.on('indexes-updated', ({origin, version}) => { 208 | for (let i = 0; i < 10; i++) { 209 | if (origin === archives[i].url) resolves[i]() 210 | } 211 | }) 212 | for (let i = 0; i < 10; i++) { 213 | await archives[i].writeFile('/single.json', JSON.stringify({first: 'first' + (10 - i), second: i, third: 'third' + i + 'single'})) 214 | await archives[i].writeFile('/multi/1.json', JSON.stringify({first: 'first' + (10 - i), second: (i+1)*100, third: 'third' + i + 'multi1'})) 215 | await archives[i].writeFile('/multi/2.json', JSON.stringify({first: 'first' + (10 - i), second: i, third: 'third' + i + 'multi2'})) 216 | await archives[i].writeFile('/multi/3.json', JSON.stringify({first: 'first' + (10 - i) + 'b', second: i, third: 'third' + i + 'multi3'})) 217 | } 218 | await Promise.all(ps) 219 | 220 | // test changed 221 | t.deepEqual(await testDB.get('single', 'first10'), {key: 'first10', value: [archives[0].url + '/single.json']}) 222 | t.deepEqual(await testDB.get('single', 'first9'), {key: 'first9', value: [archives[1].url + '/single.json']}) 223 | t.deepEqual(await testDB.get('single-reduced', archives[0].url), {key: archives[0].url, value: 1}) 224 | t.deepEqual(await testDB.get('single-reduced', archives[1].url), {key: archives[1].url, value: 1}) 225 | t.deepEqual(await testDB.get('multi', 'first10'), {key: 'first10', value: [archives[0].url + '/multi/1.json', archives[0].url + '/multi/2.json']}) 226 | t.deepEqual(await testDB.get('multi', 'first9'), {key: 'first9', value: [archives[1].url + '/multi/1.json', archives[1].url + '/multi/2.json']}) 227 | t.deepEqual(await testDB.get('multi-reduced', archives[0].url), {key: archives[0].url, value: 3}) 228 | t.deepEqual(await testDB.get('multi-reduced', archives[1].url), {key: archives[1].url, value: 3}) 229 | 230 | await testDB.close() 231 | }) -------------------------------------------------------------------------------- /lib/indexer.js: -------------------------------------------------------------------------------- 1 | const anymatch = require('anymatch') 2 | const _debounce = require('lodash.debounce') 3 | const View = require('./view') 4 | const LevelUtil = require('./util-level') 5 | const {debug, veryDebug, lock} = require('./util') 6 | 7 | const READ_TIMEOUT = 30e3 8 | 9 | // typedefs 10 | // = 11 | 12 | /** 13 | * @typedef {Object} RelevantFile 14 | * @prop {string} url 15 | * @prop {View} view 16 | * 17 | * @typedef {Object} Update 18 | * @prop {string} type 19 | * @prop {string} path 20 | * @prop {number} version 21 | */ 22 | 23 | // globals 24 | // = 25 | 26 | var archiveFileEvents = {} 27 | 28 | // exported api 29 | // = 30 | 31 | /** 32 | * @param {Object} db 33 | * @param {Object} archive 34 | * @param {Object} opts 35 | * @param {boolean} opts.watch 36 | * @returns {Promise} 37 | */ 38 | exports.addArchive = async function (db, archive, {watch}) { 39 | veryDebug('Indexer.addArchive', archive.url, {watch}) 40 | 41 | // process the archive 42 | await ( 43 | indexArchive(db, archive) 44 | .then(() => { 45 | if (watch) exports.watchArchive(db, archive) 46 | }) 47 | .catch(e => onFailInitialIndex(e, db, archive, {watch})) 48 | ) 49 | } 50 | 51 | /** 52 | * @param {Object} db 53 | * @param {Object} archive 54 | * @returns {Promise} 55 | */ 56 | exports.removeArchive = async function (db, archive) { 57 | veryDebug('Indexer.removeArchive', archive.url) 58 | await unindexArchive(db, archive) 59 | exports.unwatchArchive(db, archive) 60 | } 61 | 62 | /** 63 | * @param {Object} db 64 | * @param {Object} archive 65 | * @returns {Promise} 66 | */ 67 | exports.watchArchive = async function (db, archive) { 68 | veryDebug('Indexer.watchArchive', archive.url) 69 | if (archiveFileEvents[archive.url]) { 70 | console.error('watchArchive() called on archive that already is being watched', archive.url) 71 | return 72 | } 73 | if (archive._loadPromise) { 74 | // HACK node-dat-archive fix 75 | // Because of a weird API difference btwn node-dat-archive and beaker's DatArchive... 76 | // ...the event-stream methods need await _loadPromise 77 | // -prf 78 | await archive._loadPromise 79 | } 80 | archiveFileEvents[archive.url] = archive.createFileActivityStream(db._viewFilePatterns) // TODO switch to watch() 81 | // autodownload all changes to the watched files 82 | archiveFileEvents[archive.url].addEventListener('invalidated', ({path}) => archive.download(path)) 83 | // autoindex on changes 84 | archiveFileEvents[archive.url].addEventListener('changed', _debounce(({path}) => { 85 | indexArchive(db, archive) 86 | }, 500)) 87 | } 88 | 89 | /** 90 | * @param {Object} db 91 | * @param {Object} archive 92 | * @returns {void} 93 | */ 94 | exports.unwatchArchive = function (db, archive) { 95 | veryDebug('Indexer.unwatchArchive', archive.url) 96 | if (archiveFileEvents[archive.url]) { 97 | archiveFileEvents[archive.url].close() 98 | archiveFileEvents[archive.url] = null 99 | } 100 | } 101 | 102 | /** 103 | * @param {Object} db 104 | * @param {string} viewName 105 | * @returns {Promise} 106 | */ 107 | exports.resetIndex = async function (db, viewName) { 108 | debug(`Indexer.resetIndex(${viewName})`) 109 | await db.views[viewName].clearData() 110 | } 111 | 112 | /** 113 | * @desc 114 | * figure how what changes need to be processed 115 | * then update the indexes 116 | * 117 | * @param {Object} db 118 | * @param {Object} archive 119 | * @returns {Promise} 120 | */ 121 | async function indexArchive (db, archive) { 122 | debug('Indexer.indexArchive', archive.url) 123 | var release = await lock(`index:${archive.url}`) 124 | try { 125 | // sanity check 126 | if (!db.isOpen && !db.isBeingOpened) { 127 | return veryDebug('Indexer.indexArchive aborted, not open') 128 | } 129 | if (!db.level) { 130 | return console.log('indexArchive called on corrupted db') 131 | } 132 | 133 | // fetch the current archive state 134 | var archiveMeta = await archive.getInfo({timeout: READ_TIMEOUT}) 135 | 136 | for (let viewName in db.views) { 137 | let view = db.views[viewName] 138 | let version = await LevelUtil.get(view.archiveVersionLevel, archive.url) 139 | version = +version || 0 140 | try { 141 | debug('Indexer.indexArchive', view.name, archive.url, 'start', version, 'end', archiveMeta.version) 142 | db.emit('archive-indexing', { 143 | view: view.name, 144 | origin: archive.url, 145 | start: version, 146 | end: archiveMeta.version 147 | }) 148 | } catch (e) { 149 | console.error(e) 150 | } 151 | 152 | // find and apply all changes which haven't yet been processed 153 | var updates = await scanArchiveHistoryForUpdates(view, archive, { 154 | start: version + 1, 155 | end: archiveMeta.version + 1 156 | }) 157 | await applyUpdates(db, view, archive, updates) 158 | debug('Indexer.indexArchive applied', updates.length, 'updates from', archive.url, 'in', view.name) 159 | 160 | // emit 161 | try { 162 | db.emit('archive-indexed', { 163 | view: view.name, 164 | origin: archive.url, 165 | version: archiveMeta.version 166 | }) 167 | } catch (e) { 168 | console.error(e) 169 | } 170 | } 171 | 172 | db.emit('indexes-updated', { 173 | origin: archive.url, 174 | version: archiveMeta.version 175 | }) 176 | } finally { 177 | release() 178 | } 179 | } 180 | exports.indexArchive = indexArchive 181 | 182 | /** 183 | * @desc 184 | * delete all records generated from the archive 185 | * 186 | * @param {Object} db 187 | * @param {Object} archive 188 | * @returns {Promise} 189 | */ 190 | async function unindexArchive (db, archive) { 191 | var release = await lock(`index:${archive.url}`) 192 | try { 193 | // find any relevant records and delete them from the indexes 194 | var foundFiles = await scanArchiveForRelevantFiles(db, archive) 195 | await Promise.all(foundFiles.map(foundFile => 196 | foundFile.view.clearEntriesByFile(foundFile.url) 197 | )) 198 | for (let view of db.views) { 199 | await LevelUtil.del(view.archiveVersionLevel, archive.url) 200 | } 201 | } finally { 202 | release() 203 | } 204 | } 205 | exports.unindexArchive = unindexArchive 206 | 207 | /** 208 | * @desc 209 | * read the file, find the matching table, validate, then store 210 | * 211 | * @param {Object} db 212 | * @param {View} view 213 | * @param {Object} archive 214 | * @param {string} filepath 215 | * @returns {Promise} 216 | */ 217 | async function readAndIndexFile (db, view, archive, filepath) { 218 | const fileUrl = archive.url + filepath 219 | try { 220 | // read file 221 | let value = await archive.readFile(filepath, {timeout: READ_TIMEOUT}) 222 | 223 | // clear past entries for this file 224 | var oldEntryKeys = await view.getEntryKeysByFile(fileUrl) 225 | await view.clearEntriesByFile(fileUrl) 226 | 227 | // run map function 228 | let keys = new Set(oldEntryKeys) 229 | let entries = [] 230 | let meta = {origin: archive.url, url: fileUrl, pathname: filepath} 231 | try { 232 | view.map(value, meta, (key, value) => { 233 | keys.add(key) 234 | entries.push({key, value}) 235 | }) 236 | } catch (e) { 237 | console.error('Error in map function for', view.name) 238 | throw e 239 | } 240 | 241 | // store entries 242 | await view.addEntries(fileUrl, entries) 243 | 244 | // run reduce 245 | if (view.reduce) { 246 | for (let key of keys) { 247 | let acc 248 | let entries = (await view.getEntries(key)) || [] 249 | for (let entry of entries) { 250 | acc = view.reduce(acc, entry.value, key) 251 | } 252 | await view.putReducedValue(key, acc) 253 | } 254 | } 255 | } catch (e) { 256 | console.log('Failed to index', fileUrl, e) 257 | throw e 258 | } 259 | } 260 | exports.readAndIndexFile = readAndIndexFile 261 | 262 | /** 263 | * @param {Object} db 264 | * @param {View} view 265 | * @param {Object} archive 266 | * @param {string} filepath 267 | * @returns {Promise} 268 | */ 269 | async function unindexFile (db, view, archive, filepath) { 270 | const fileUrl = archive.url + filepath 271 | try { 272 | // clear past entries for this file 273 | var oldEntryKeys = await view.getEntryKeysByFile(fileUrl) 274 | await view.clearEntriesByFile(fileUrl) 275 | 276 | // run reduce 277 | if (view.reduce) { 278 | for (let key of oldEntryKeys) { 279 | let acc 280 | let entries = await view.getEntries(key) 281 | for (let entry of entries) { 282 | acc = view.reduce(acc, entry.value, key) 283 | } 284 | await view.putReducedValue(key, acc) 285 | } 286 | } 287 | } catch (e) { 288 | console.log('Failed to unindex', fileUrl, e) 289 | throw e 290 | } 291 | } 292 | exports.unindexFile = unindexFile 293 | 294 | // internal methods 295 | // = 296 | 297 | /** 298 | * @desc 299 | * helper for when the first indexArchive() fails 300 | * emit an error, and (if it's a timeout) keep looking for the archive 301 | * 302 | * @param {Object} e 303 | * @param {Object} db 304 | * @param {Object} archive 305 | * @param {Object} opts 306 | * @param {boolean} opts.watch 307 | * @returns {Promise} 308 | */ 309 | async function onFailInitialIndex (e, db, archive, {watch}) { 310 | if (e.name === 'TimeoutError') { 311 | debug('Indexer.onFailInitialIndex starting retry loop', archive.url) 312 | db.emit('archive-missing', {origin: archive.url}) 313 | while (true) { 314 | veryDebug('Indexer.onFailInitialIndex attempting load', archive.url) 315 | // try again every 30 seconds 316 | await new Promise(resolve => setTimeout(resolve, 30e3)) 317 | // still a source? 318 | if (!db.isOpen || !(archive.url in db._archives)) { 319 | return 320 | } 321 | // re-attempt the index 322 | try { 323 | await indexArchive(db, archive) 324 | veryDebug('Indexer.onFailInitialIndex successfully loaded', archive.url) 325 | break // made it! 326 | } catch (e) { 327 | // abort if we get a non-timeout error 328 | if (e.name !== 'TimeoutError') { 329 | veryDebug('Indexer.onFailInitialIndex failed attempt, aborting', archive.url, e) 330 | return 331 | } 332 | } 333 | } 334 | // success 335 | db.emit('archive-found', {origin: archive.url}) 336 | if (watch) exports.watchArchive(db, archive) 337 | } else { 338 | db.emit('archive-error', {origin: archive.url, error: e}) 339 | } 340 | } 341 | 342 | /** 343 | * @desc 344 | * look through the given history slice 345 | * match against the tables' path patterns 346 | * return back the *latest* change to each matching changed record, as an array ordered by revision 347 | * 348 | * @param {View} view 349 | * @param {Object} archive 350 | * @param {Object} opts 351 | * @param {number} opts.start 352 | * @param {number} opts.end 353 | * @returns {Promise} 354 | */ 355 | async function scanArchiveHistoryForUpdates (view, archive, {start, end}) { 356 | var history = await archive.history({start, end, timeout: READ_TIMEOUT}) 357 | 358 | // pull the latest update to each file 359 | var updates = {} 360 | history.forEach(update => { 361 | if (anymatch(view.filePattern, update.path)) { 362 | updates[update.path] = update 363 | } 364 | }) 365 | 366 | // return an array ordered by version 367 | return Object.values(updates).sort((a, b) => a.version - b.version) 368 | } 369 | 370 | /** 371 | * @param {Object} db 372 | * @param {Object} archive 373 | * @returns {Promise} 374 | */ 375 | async function scanArchiveForRelevantFiles (db, archive) { 376 | var foundFiles = [] 377 | var filepaths = await archive.readdir('/', {recursive: true}) 378 | for (let filepath of filepaths) { 379 | let url = archive.url + filepath 380 | for (let view of db.views) { 381 | if (anymatch(view.filePattern, filepath)) { 382 | foundFiles.push({url, view}) 383 | } 384 | } 385 | } 386 | return foundFiles 387 | } 388 | 389 | /** 390 | * @desc 391 | * iterate the updates and apply them one by one, 392 | * updating the metadata as each is applied successfully 393 | * 394 | * @param {Object} db 395 | * @param {View} view 396 | * @param {Object} archive 397 | * @param {Update[]} updates 398 | * @returns {Promise} 399 | */ 400 | async function applyUpdates (db, view, archive, updates) { 401 | for (let i = 0; i < updates.length; i++) { 402 | // process update 403 | let update = updates[i] 404 | if (update.type === 'del') { 405 | await unindexFile(db, view, archive, update.path) 406 | } else { 407 | await readAndIndexFile(db, view, archive, update.path) 408 | } 409 | 410 | // update meta 411 | await LevelUtil.put(view.archiveVersionLevel, archive.url, update.version) 412 | try { 413 | db.emit('archive-index-progress', { 414 | view: view.name, 415 | origin: archive.url, 416 | current: (i + 1), 417 | total: updates.length 418 | }) 419 | } catch (e) { 420 | console.error(e) 421 | } 422 | } 423 | } 424 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # dat-archive-map-reduce 2 | 3 | Index files in Dat archives with map-reduce to create queryable data views. 4 | 5 | **Not yet stable**. 6 | 7 | ```js 8 | // in beaker 9 | import DatArchiveMapReduce from 'dat://map-reduce.beakerbrowser.com/v/1.0.0/index.js' 10 | // in node 11 | const DatArchiveMapReduce = require('@beaker/dat-archive-map-reduce') 12 | 13 | // create instance 14 | const damr = new DatArchiveMapReduce() 15 | ``` 16 | 17 | ```js 18 | // define your view 19 | damr.define('site-posts-by-date', { 20 | path: '/.data/unwalled.garden/posts/*.json', 21 | map (value, meta, emit) { 22 | let obj = JSON.parse(value) 23 | if (isPost(obj)) { 24 | let timestamp = Number(new Date(obj.createdAt)) 25 | emit([meta.origin, timestamp], meta.url) 26 | } 27 | } 28 | }) 29 | function isPost (obj) { 30 | if (obj.type !== 'unwalled.garden/post') return false 31 | if (!obj.content || typeof obj.content !== 'string') return false 32 | if (!obj.createdAt || typeof obj.createdAt !== 'string') return false 33 | return true 34 | } 35 | 36 | // index sites 37 | damr.index('dat://pfrazee.com', {watch: true}) 38 | damr.index('dat://mafintosh.com', {watch: true}) 39 | damr.index('dat://andrewosh.com', {watch: true}) 40 | 41 | // list the most recent 30 posts by pfrazee.com 42 | await damr.list('site-posts-by-date', { 43 | gt: ['dat://pfrazee.com', 0], 44 | lt: ['dat://pfrazee.com', Infinity], 45 | limit: 30, 46 | reverse: true 47 | }) 48 | // list the posts in the last 5 days by mafintosh.com 49 | await damr.list('site-posts-by-date', { 50 | gte: ['dat://mafintosh.com', Date.now() - ms('5d')], 51 | lte: ['dat://mafintosh.com', Date.now()], 52 | reverse: true 53 | }) 54 | ``` 55 | 56 | ```js 57 | // reduce example 58 | damr.define('site-posts-count', { 59 | path: '/.data/unwalled.garden/posts/*.json', 60 | map (value, meta, emit) { 61 | let obj = JSON.parse(value) 62 | if (isPost(obj)) { 63 | emit(meta.origin, meta.pathname) 64 | } 65 | }, 66 | reduce (acc, value, key) { 67 | return (acc||0) + 1 68 | } 69 | }) 70 | await damr.get('site-posts-count', 'dat://pfrazee.com') 71 | ``` 72 | 73 | 74 | 75 | **Table of Contents** *generated with [DocToc](https://github.com/thlorenz/doctoc)* 76 | 77 | - [Class: DatArchiveMapReduce](#class-datarchivemapreduce) 78 | - [new DatArchiveMapReduce([name, opts])](#new-datarchivemapreducename-opts) 79 | - [Instance: DatArchiveMapReduce](#instance-datarchivemapreduce) 80 | - [damr.open()](#damropen) 81 | - [damr.close()](#damrclose) 82 | - [damr.destroy()](#damrdestroy) 83 | - [damr.define(name, definition)](#damrdefinename-definition) 84 | - [damr.reset(view)](#damrresetview) 85 | - [damr.get(view, key)](#damrgetview-key) 86 | - [damr.list(view, opts)](#damrlistview-opts) 87 | - [damr.index(url[, opts])](#damrindexurl-opts) 88 | - [damr.unindex(url)](#damrunindexurl) 89 | - [damr.indexFile(archive, filepath)](#damrindexfilearchive-filepath) 90 | - [damr.indexFile(url)](#damrindexfileurl) 91 | - [damr.unindexFile(archive, filepath)](#damrunindexfilearchive-filepath) 92 | - [damr.unindexFile(url)](#damrunindexfileurl) 93 | - [damr.listIndexed()](#damrlistindexed) 94 | - [damr.isIndexed(url)](#damrisindexedurl) 95 | - [Event: 'open'](#event-open) 96 | - [Event: 'open-failed'](#event-open-failed) 97 | - [Event: 'view-reset'](#event-view-reset) 98 | - [Event: 'archive-indexing'](#event-archive-indexing) 99 | - [Event: 'archive-index-progress'](#event-archive-index-progress) 100 | - [Event: 'archive-indexed'](#event-archive-indexed) 101 | - [Event: 'archive-missing'](#event-archive-missing) 102 | - [Event: 'archive-found'](#event-archive-found) 103 | - [Event: 'archive-error'](#event-archive-error) 104 | 105 | 106 | 107 | ## Class: DatArchiveMapReduce 108 | 109 | ### new DatArchiveMapReduce([name, opts]) 110 | 111 | ```js 112 | var damr = new DatArchiveMapReduce('views') 113 | ``` 114 | 115 | - `name` String. Defaults to `'views'`. If run in the browser, this will be the name of the IndexedDB instance. If run in NodeJS, this will be the path of the LevelDB folder. 116 | - `opts` Object. 117 | - `DatArchive` Constructor. The class constructor for dat archive instances. If in node, you should specify [node-dat-archive](https://npm.im/node-dat-archive). 118 | 119 | Create a new `DatArchiveMapReduce` instance. 120 | The given `name` will control where the indexes are saved. 121 | You can specify different names to run multiple DatArchiveMapReduce instances at once. 122 | 123 | ## Instance: DatArchiveMapReduce 124 | 125 | ### damr.open() 126 | 127 | ```js 128 | await damr.open() 129 | ``` 130 | 131 | - Returns Promise<Void>. 132 | 133 | Opens the internal databases. Will be called automatically by other methods, so you usually don't need to call this method. 134 | 135 | ### damr.close() 136 | 137 | ```js 138 | await damr.close() 139 | ``` 140 | 141 | - Returns Promise<Void>. 142 | 143 | Closes the DatArchiveMapReduce instance. 144 | 145 | ### damr.destroy() 146 | 147 | ```js 148 | await damr.destroy() 149 | ``` 150 | 151 | - Returns Promise<Void>. 152 | 153 | Closes and deletes all indexes in the DatArchiveMapReduce instance. 154 | 155 | You can `.destroy()` and then `.open()` a DatArchiveMapReduce to recreate its indexes. 156 | 157 | ```js 158 | await damr.destroy() 159 | await damr.open() 160 | ``` 161 | 162 | ### damr.define(name, definition) 163 | 164 | - `name` String. The name of the view. 165 | - `definition` Object. 166 | - `path` String or Array<String>. An [anymatch](https://www.npmjs.com/package/anymatch) list of files to index. 167 | - `map` Function(value, meta, emit). A method to accept a new or changed file and emit new stored entries in the view. 168 | - `value` String. 169 | - `meta` Object. 170 | - `url` String. The URL of the file (eg 'dat://foo.com/bar.json'). 171 | - `origin` String. The origin of the file's site (eg 'dat://foo.com'). 172 | - `pathname` String. The path of the file in the site (eg '/bar.json'). 173 | - `emit` Function(key, value). Call this to emit new mapped values. 174 | - `key` String or Array<String>. The key to store the new entry at. 175 | - `value` Any. The value to store for the entry. 176 | - `reduce` Function(agg, value, key). A method to aggregate mapped entries into a single value. 177 | - `agg` Any. The current value of the reduce method's output. 178 | - `value` Any. The next mapped value to process. 179 | - `key` Any. The key of the entry being processed. 180 | - Must return the current value of the reduced entry. 181 | - Returns Promise<Void>. 182 | 183 | Creates a new view on the `damr` object. 184 | 185 | Example: 186 | 187 | ```js 188 | // create a view that counts the number of posts by each user 189 | damr.define('site-posts-count', { 190 | path: '/.data/unwalled.garden/posts/*.json', 191 | map (value, meta, emit) { 192 | let obj = JSON.parse(value) 193 | if (isPost(obj)) { 194 | emit(meta.origin, meta.pathname) 195 | } 196 | }, 197 | reduce (acc, value, key) { 198 | return (acc||0) + 1 199 | } 200 | }) 201 | 202 | // get the number of posts by dat://pfrazee.com 203 | await damr.index('dat://pfrazee.com') 204 | await damr.get('site-posts-count', 'dat://pfrazee.com') 205 | ``` 206 | 207 | ### damr.reset(view) 208 | 209 | ```js 210 | await damr.reset('site-posts-by-date') 211 | ``` 212 | 213 | - `view` String. The name of the view to reset. 214 | 215 | Clears all data indexed in the view. This should be used when the view-definition has changed and needs to be rebuilt. 216 | 217 | ### damr.get(view, key) 218 | 219 | ```js 220 | // get the post by pfrazee.com that was created at "Tue, 23 Jul 2019 18:23:57 GMT" 221 | var post = await damr.get('site-posts-by-date', ['dat://pfrazee.com', Number(new Date('Tue, 23 Jul 2019 18:23:57 GMT'))]) 222 | ``` 223 | 224 | - `view` String. The name of the view to query. 225 | - `key` Any. The key of the entry to fetch. 226 | - Returns Promise<Object>. 227 | - `key` Any. The key of the entry. 228 | - `value` Any. The value of the entry. 229 | 230 | Get the entry at the given key. 231 | 232 | ### damr.list(view, opts) 233 | 234 | ```js 235 | // list the most recent 30 posts by pfrazee.com 236 | await damr.list('site-posts-by-date', { 237 | gte: ['dat://pfrazee.com', 0], 238 | lte: ['dat://pfrazee.com', Infinity], 239 | limit: 30, 240 | reverse: true 241 | }) 242 | // list the posts in the last 5 days by mafintosh.com 243 | await damr.list('site-posts-by-date', { 244 | gte: ['dat://mafintosh.com', Date.now() - ms('5d')], 245 | lte: ['dat://mafintosh.com', Date.now()], 246 | reverse: true 247 | }) 248 | ``` 249 | 250 | - `view` String. The name of the view to query. 251 | - `opts` Object. 252 | - `gt` Any. The start key in the range to query (exclusive). 253 | - `gte` Any. The start key in the range to query (inclusive). 254 | - `lt` Any. The end key in the range to query (exclusive). 255 | - `lte` Any. The end key in the range to query (inclusive). 256 | - `reverse` Boolean. Reverse the order of the output? Defaults to false. 257 | - `limit` Number. Limit the number of entries returned. Defaults to no limit. 258 | - Returns Promise<Array<Object>>. 259 | - `key` Any. The key of the entry. 260 | - `value` Any. The value of the entry. 261 | 262 | List a range of entries from a view. 263 | 264 | ### damr.index(url[, opts]) 265 | 266 | ```js 267 | await damr.index('dat://foo.com') 268 | ``` 269 | 270 | - `url` String or DatArchive. The site to index. 271 | - `opts` Object. 272 | - `watch` Boolean. Should DatArchiveMapReduce watch the archive for changes, and index them immediately? Defaults to false. 273 | - Returns Promise<Void>. 274 | 275 | Add a dat:// site to be indexed. 276 | The method will return when the site has been fully indexed. 277 | 278 | ### damr.unindex(url) 279 | 280 | ```js 281 | await damr.unindex('dat://foo.com') 282 | ``` 283 | 284 | - `url` String or DatArchive. The site to deindex. 285 | - Returns Promise<Void>. 286 | 287 | Remove a dat:// site from the dataset. 288 | The method will return when the site has been fully de-indexed. 289 | 290 | ### damr.indexFile(archive, filepath) 291 | 292 | ```js 293 | await damr.indexFile(fooArchive, '/bar.json') 294 | ``` 295 | 296 | - `archive` DatArchive. The site containing the file to index. 297 | - `filepath` String. The path of the file to index. 298 | - Returns Promise<Void>. 299 | 300 | Add a single file to the dataset. 301 | The method will return when the file has been indexed. 302 | 303 | This will not add the file or its archive to the list returned by `listIndexed()`. 304 | DatArchiveMapReduce will not watch the file after this call. 305 | 306 | ### damr.indexFile(url) 307 | 308 | ```js 309 | await damr.indexFile('dat://foo.com/bar.json') 310 | ``` 311 | 312 | - `url` String. The url of the file to index. 313 | - Returns Promise<Void>. 314 | 315 | Add a single file to the dataset. 316 | The method will return when the file has been indexed. 317 | 318 | This will not add the file or its archive to the list returned by `listIndexed()`. 319 | DatArchiveMapReduce will not watch the file after this call. 320 | 321 | ### damr.unindexFile(archive, filepath) 322 | 323 | ```js 324 | await damr.unindexFile(fooArchive, '/bar.json') 325 | ``` 326 | 327 | - `archive` DatArchive. The site containing the file to deindex. 328 | - `filepath` String. The path of the file to deindex. 329 | - Returns Promise<Void>. 330 | 331 | Remove a single file from the dataset. 332 | The method will return when the file has been de-indexed. 333 | 334 | ### damr.unindexFile(url) 335 | 336 | ```js 337 | await damr.unindexFile('dat://foo.com/bar.json') 338 | ``` 339 | 340 | - `url` String. The url of the file to deindex. 341 | - Returns Promise<Void>. 342 | 343 | Remove a single file from the dataset. 344 | The method will return when the file has been de-indexed. 345 | 346 | ### damr.listIndexed() 347 | 348 | ```js 349 | var urls = await damr.listIndexed() 350 | ``` 351 | 352 | - Returns Array<String>. 353 | 354 | Lists the URLs of the dat:// sites which are included in the dataset. 355 | 356 | ### damr.isIndexed(url) 357 | 358 | ```js 359 | var yesno = await damr.isIndexed('dat://foo.com') 360 | ``` 361 | 362 | - Returns Boolean. 363 | 364 | Is the given dat:// URL included in the dataset? 365 | 366 | ### Event: 'open' 367 | 368 | ```js 369 | damr.on('open', () => { 370 | console.log('DatArchiveMapReduce is ready for use') 371 | }) 372 | ``` 373 | 374 | Emitted when the DatArchiveMapReduce instance has been opened using [`open()`](#damropen). 375 | 376 | ### Event: 'open-failed' 377 | 378 | ```js 379 | damr.on('open-failed', (err) => { 380 | console.log('DatArchiveMapReduce failed to open', err) 381 | }) 382 | ``` 383 | 384 | - `error` Error. 385 | 386 | Emitted when the DatArchiveMapReduce instance fails to open during [`open()`](#damropen). 387 | 388 | ### Event: 'view-reset' 389 | 390 | ```js 391 | damr.on('view-reset', ({view}) => { 392 | console.log('DatArchiveMapReduce has reset the indexes for', view) 393 | }) 394 | ``` 395 | 396 | - `view` String. The name of the view that was reset. 397 | 398 | Emitted when `reset()` has been called on a view. All map/reduced entries are cleared for the view. 399 | 400 | ### Event: 'archive-indexing' 401 | 402 | ```js 403 | damr.on('archive-indexing', ({view, origin, start, end}) => { 404 | console.log(view, 'is updating for', origin, 'from version', start, 'to', end) 405 | }) 406 | ``` 407 | 408 | - `view` String. The view that is indexing. 409 | - `origin` String. The archive that was updated. 410 | - `start` Number. The version which is being indexed from. 411 | - `end` Number. The version which is being indexed to. 412 | 413 | Emitted when the DatArchiveMapReduce instance has started to index the given archive. 414 | 415 | ### Event: 'archive-index-progress' 416 | 417 | ```js 418 | damr.on('archive-index-progress', ({view, origin, current, total}) => { 419 | console.log(view, 'update for', origin, 'is', Math.round(current / total * 100), '% complete') 420 | }) 421 | ``` 422 | 423 | - `view` String. The view that is indexing. 424 | - `origin` String. The archive that was updated. 425 | - `current` Number. The current update being applied. 426 | - `total` Number. The total number of updates being applied. 427 | 428 | Emitted when an update has been applied during an indexing process. 429 | 430 | ### Event: 'archive-indexed' 431 | 432 | ```js 433 | damr.on('archive-indexed', ({view, origin, version}) => { 434 | console.log(view, 'was updated for', url, 'at version', version) 435 | }) 436 | ``` 437 | 438 | - `view` String. The view that is indexing. 439 | - `origin` String. The archive that was updated. 440 | - `version` Number. The version which was updated to. 441 | 442 | Emitted when the DatArchiveMapReduce instance has indexed the given archive. 443 | This is similar to `'view-updated'`, but it fires every time a archive is indexed, whether or not it results in updates to the indexes. 444 | 445 | ### Event: 'archive-missing' 446 | 447 | ```js 448 | damr.on('archive-missing', ({origin}) => { 449 | console.log('DatArchiveMapReduce couldnt find', origin, '- now searching') 450 | }) 451 | ``` 452 | 453 | - `origin` String. The archive that is missing. 454 | 455 | Emitted when a archive's data was not locally available or found on the network. 456 | When this occurs, DatArchiveMapReduce will continue searching for the data, and emit `'archive-found'` on success. 457 | 458 | ### Event: 'archive-found' 459 | 460 | ```js 461 | damr.on('archive-found', ({origin}) => { 462 | console.log('DatArchiveMapReduce has found and indexed', origin) 463 | }) 464 | ``` 465 | 466 | - `origin` String. The archive that was found. 467 | 468 | Emitted when a archive's data was found after originally not being found during indexing. 469 | This event will only be emitted after `'archive-missing'` is emitted. 470 | 471 | ### Event: 'archive-error' 472 | 473 | ```js 474 | damr.on('archive-error', ({origin, error}) => { 475 | console.log('DatArchiveMapReduce failed to index', origin, error) 476 | }) 477 | ``` 478 | 479 | - `origin` String. The archive that failed. 480 | - `error` Error. The error emitted. 481 | 482 | Emitted when an archive fails to load. 483 | --------------------------------------------------------------------------------