├── .gitignore ├── changedb.js ├── collaborators.md ├── index.js ├── package.json ├── pool.js ├── readme.md ├── test.js └── worker.js /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | .DS_Store -------------------------------------------------------------------------------- /changedb.js: -------------------------------------------------------------------------------- 1 | var sublevel = require('subleveldown') 2 | var changes = require('changes-feed') 3 | var changesdown = require('changesdown') 4 | 5 | module.exports = function (opts) { 6 | var feed = changes(sublevel(opts.db, 'feed')) 7 | var db = changesdown(sublevel(opts.db, 'db', opts), feed, opts) 8 | return { 9 | feed: feed, 10 | db: db 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /collaborators.md: -------------------------------------------------------------------------------- 1 | ## Collaborators 2 | 3 | atomic-queue is only possible due to the excellent work of the following collaborators: 4 | 5 | 6 | 7 |
maxogdenGitHub/maxogden
jcrugzzGitHub/jcrugzz
8 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | var events = require('events') 2 | 3 | var inherits = require('inherits') 4 | var memdb = require('memdb') 5 | var through = require('through2') 6 | var pump = require('pumpify') 7 | var duplex = require('duplexify') 8 | var uuid = require('hat') 9 | 10 | var createPool = require('./pool.js') 11 | var createChangeDB = require('./changedb.js') 12 | 13 | var debug = require('debug')('atomic-queue') 14 | 15 | module.exports = Queue 16 | 17 | function Queue (worker, opts) { 18 | var self = this 19 | if (!(this instanceof Queue)) return new Queue(worker, opts) 20 | if (!opts) opts = {} 21 | 22 | this.concurrency = opts.concurrency || 1 23 | this.db = opts.db || memdb() 24 | this.opts = opts 25 | 26 | this.pool = createPool(worker, opts) 27 | this.changes = createChangeDB({ 28 | db: this.db, 29 | keyEncoding: 'json', 30 | valueEncoding: 'json' 31 | }) 32 | 33 | this.inflight = {} 34 | this.latestChange = 0 35 | this.highestChange = 0 36 | this.pending = 0 37 | 38 | this.stream = this.createDuplexStream() 39 | this.stream._queue = this 40 | // 41 | // Make the reference to the pool available so we can access it via the stream 42 | // 43 | this.stream.pool = this.pool 44 | 45 | this.pool.on('start', function start (data, worker, change) { 46 | var changeNum = change.change 47 | debug('start', changeNum) 48 | self.inflight.jobs[changeNum] = {change: changeNum, finished: false} 49 | }) 50 | 51 | this.pool.on('finish', function finish (output, data, worker, change) { 52 | var changeNum = change.change 53 | debug('finish', changeNum) 54 | // 55 | // When we are dealing with concurrent changes we can hit cases where the 56 | // change here is a seq less than the previous. Lets also keep track of the 57 | // highest change in order to serve both cases. 58 | // 59 | self.latestChange = self.changes.db.db.change 60 | self.highestChange = changeNum < self.highestChange ? self.highestChange : changeNum 61 | self.inflight.jobs[changeNum] = {change: changeNum, finished: true} 62 | }) 63 | 64 | this.stream.on('update-start', function updateStart (data) { 65 | debug('update-start', data) 66 | self.updatingInflight = true 67 | }) 68 | 69 | this.stream.on('update-end', function updateEnd (data) { 70 | if (self.pending === 0) self.stream.emit('idle') 71 | debug('update-end', data) 72 | self.updatingInflight = false 73 | }) 74 | 75 | events.EventEmitter.call(this) 76 | 77 | return this.stream 78 | } 79 | 80 | inherits(Queue, events.EventEmitter) 81 | 82 | Queue.prototype.initialize = function initialize (cb) { 83 | var self = this 84 | 85 | self.db.get('inflight', function doneGet (err, inflightData) { 86 | if (err && err.type !== 'NotFoundError') return cb(err) 87 | if (!inflightData) inflightData = {since: 0, jobs: {}} 88 | debug('inflight-load', inflightData) 89 | self.inflight = inflightData 90 | cb(null) 91 | }) 92 | } 93 | 94 | Queue.prototype.createDuplexStream = function createDuplexStream (opts) { 95 | var self = this 96 | 97 | this.initialize(function ready (err) { 98 | if (err) return self.stream.destroy(err) 99 | self.stream.emit('ready', self.inflight) 100 | var readStream = self.createWorkStream({since: self.inflight.since, live: true}) 101 | duplexStream.setReadable(readStream) 102 | }) 103 | 104 | var writeStream = through.obj( 105 | function write (obj, enc, cb) { 106 | self.changes.db.put(uuid(), obj, function stored (err) { 107 | cb(err) 108 | }) 109 | }, 110 | function end (done) { 111 | finish(self.inflight) 112 | 113 | function finish (inflight) { 114 | debug('finish?', [self.pending, self.latestChange, self.highestChange, inflight.since]) 115 | if (self.pending === 0 && self.highestChange === inflight.since) { 116 | debug('uncorking') 117 | duplexStream.uncork() 118 | done() 119 | } else { 120 | self.stream.once('update-end', finish) 121 | } 122 | } 123 | } 124 | ) 125 | 126 | var duplexStream = duplex.obj(writeStream) 127 | 128 | // one weird trick from mafintosh (makes 'finish' wait for writable end) 129 | duplexStream.on('prefinish', function prefinish () { 130 | if (self.pending) duplexStream.cork() 131 | }) 132 | 133 | return duplexStream 134 | } 135 | 136 | Queue.prototype.createWorkStream = function createWorkStream (opts) { 137 | var self = this 138 | 139 | var changeStream = this.changes.db.createChangesStream(opts) 140 | 141 | var splitStream = through.obj( 142 | function split (data, enc, cb) { 143 | self.pending++ 144 | 145 | self.pool.getFree(function gotWorker (proc) { 146 | // call cb so we get more data written to us 147 | cb() 148 | 149 | // also kick off the worker 150 | proc.work(data.value.value, doneWorking, data) 151 | }) 152 | 153 | function doneWorking (err, output) { 154 | self.pending-- 155 | 156 | if (err) return self.stream.destroy(err) 157 | 158 | // TODO implement purging. should remove processed entries from the changes feed 159 | 160 | var inflight = self.inflightWorkers() 161 | 162 | update() 163 | 164 | function update () { 165 | if (self.updatingInflight) return self.stream.once('update-end', update) 166 | self.stream.emit('update-start', inflight) 167 | self.db.put('inflight', inflight, function updated (err) { 168 | self.inflight = inflight 169 | self.stream.emit('update-end', inflight) 170 | if (err) self.stream.destroy(err) 171 | if (output) splitStream.push(output) 172 | }) 173 | } 174 | } 175 | } 176 | ) 177 | 178 | var pipeline = pump(changeStream, splitStream) 179 | return pipeline 180 | } 181 | 182 | Queue.prototype.inflightWorkers = function inflightWorkers () { 183 | var self = this 184 | 185 | var inflight = Object.keys(this.inflight.jobs) 186 | .map(function expand (el) { 187 | return self.inflight.jobs[el] 188 | }) 189 | .sort(function changeSort (a, b) { 190 | return a.change > b.change 191 | }) 192 | 193 | var lastJob = inflight[inflight.length - 1] 194 | var lastChange = lastJob && lastJob.change >= this.highestChange ? lastJob.change : this.highestChange 195 | var startIndex, startChange 196 | 197 | for (var i = 0; i < inflight.length; i++) { 198 | var el = inflight[i] 199 | if (el.finished === false) { 200 | startIndex = i 201 | startChange = el.change 202 | break 203 | } 204 | } 205 | 206 | if (typeof startIndex === 'undefined') return {since: lastChange, jobs: {}} // all workers are done 207 | else inflight = inflight.slice(startIndex) 208 | 209 | // turn back into object 210 | var inflightObj = {} 211 | inflight.forEach(function (el) { 212 | inflightObj[el.change] = el 213 | }) 214 | 215 | return {since: startChange, jobs: inflightObj} 216 | } 217 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "atomic-queue", 3 | "version": "5.0.4", 4 | "description": "a crash friendly queue that persists queue state and can restart. uses a worker pool and has configurable concurrency", 5 | "main": "index.js", 6 | "author": "max ogden", 7 | "license": "BSD", 8 | "repository": { 9 | "type": "git", 10 | "url": "https://github.com/maxogden/atomic-queue.git" 11 | }, 12 | "scripts": { 13 | "test": "standard && node test.js" 14 | }, 15 | "bugs": { 16 | "url": "https://github.com/maxogden/atomic-queue/issues" 17 | }, 18 | "homepage": "https://github.com/maxogden/atomic-queue", 19 | "dependencies": { 20 | "changes-feed": "^1.1.0", 21 | "changesdown": "^2.3.0", 22 | "debug": "^2.1.2", 23 | "duplexify": "^3.2.0", 24 | "hat": "0.0.3", 25 | "inherits": "^2.0.1", 26 | "memdb": "^0.2.0", 27 | "pumpify": "^1.3.3", 28 | "subleveldown": "^2.0.0", 29 | "through2": "^0.6.3" 30 | }, 31 | "devDependencies": { 32 | "standard": "^2.10.0", 33 | "tape": "^3.5.0" 34 | }, 35 | "keywords": [ 36 | "leveldb", 37 | "levelup" 38 | ] 39 | } 40 | -------------------------------------------------------------------------------- /pool.js: -------------------------------------------------------------------------------- 1 | var events = require('events') 2 | var inherits = require('inherits') 3 | var createWorker = require('./worker.js') 4 | var debug = require('debug')('atomic-queue-pool') 5 | 6 | module.exports = Pool 7 | 8 | function Pool (workerTemplate, opts) { 9 | if (!(this instanceof Pool)) return new Pool(workerTemplate, opts) 10 | if (!opts) opts = {} 11 | this.workerTemplate = workerTemplate 12 | this.working = 0 13 | this.limit = opts.concurrency || 1 14 | this.workers = this.createWorkers() 15 | events.EventEmitter.call(this) 16 | } 17 | 18 | inherits(Pool, events.EventEmitter) 19 | 20 | Pool.prototype.createWorkers = function createWorkers () { 21 | var self = this 22 | var workers = [] 23 | var useExistingWorkers = false 24 | if (Array.isArray(this.workerTemplate)) useExistingWorkers = true 25 | 26 | for (var i = 0; i < this.limit; i++) { 27 | var workFn = useExistingWorkers ? this.workerTemplate[i] : this.workerTemplate 28 | 29 | // if insufficient number of workers was passed in then return early 30 | if (!workFn) return workers 31 | 32 | var worker = createWorker(workFn) 33 | 34 | // consolidate events 35 | worker.on('start', function onStart (data, change) { 36 | self.emit('start', data, worker, change) 37 | }) 38 | 39 | worker.on('finish', function onFinish (output, data, change) { 40 | self.emit('finish', output, data, worker, change) 41 | }) 42 | 43 | workers.push(worker) 44 | } 45 | 46 | debug('created workers', {count: workers.length}) 47 | 48 | return workers 49 | } 50 | 51 | Pool.prototype.getFree = function getFree (cb) { 52 | var self = this 53 | 54 | // try to get a free worker 55 | for (var i = 0; i < this.workers.length; i++) { 56 | var worker = this.workers[i] 57 | if (!worker.available) continue 58 | debug('found free worker') 59 | worker.available = false 60 | return cb(worker) 61 | } 62 | 63 | // otherwise wait for one to finish 64 | wait() 65 | 66 | function wait () { 67 | debug('waiting on free worker') 68 | self.once('finish', function finish (output, data, worker, change) { 69 | // handle case where getFree is waiting on multiple workers 70 | process.nextTick(function next () { 71 | if (!worker.available) return wait() 72 | debug('waited for free worker, just got one') 73 | worker.available = false 74 | cb(worker) 75 | }) 76 | }) 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # atomic-queue 2 | 3 | a crash friendly queue that persists queue state and can restart. uses a worker pool and has configurable concurrency 4 | 5 | [![NPM](https://nodei.co/npm/atomic-queue.png)](https://nodei.co/npm/atomic-queue/) 6 | 7 | [![js-standard-style](https://raw.githubusercontent.com/feross/standard/master/badge.png)](https://github.com/feross/standard) 8 | 9 | ## API 10 | 11 | for example usage see `test.js` 12 | 13 | ### `var queue = require('atomic-queue')(worker, opts)` 14 | 15 | initialize a new queue with a `worker` function and optional options. `queue` is a stream 16 | 17 | you queue things by writing them to the queue stream: 18 | 19 | ```js 20 | queue.write('hello') 21 | queue.write('goodbye') 22 | queue.write({name: 'bob'}) 23 | ``` 24 | 25 | `worker` must be a function that has this API: 26 | 27 | ```js 28 | function work (data, done) { 29 | // do work, then call done with (err) if there was an error 30 | } 31 | ``` 32 | 33 | `data` in the worker function will be the data you wrote into the queue above 34 | 35 | ### events 36 | 37 | in addition to standard stream events you can also listen to the following: 38 | 39 | #### queue.on('ready') 40 | 41 | emitted after startup when the queue state has been read from disk and the queue is now ready to start working 42 | 43 | #### queue.on('error') 44 | 45 | when a catastrophic error has occurred. you **must** handle this. receiving this also means the queue stream has been destroyed. 46 | 47 | #### queue.on('idle') 48 | 49 | when the number of pending jobs reaches 0. may be called multiple times 50 | 51 | #### queue.on('finish') 52 | 53 | when the writable side of the queue has been ended *and* all jobs have finished processing 54 | 55 | #### queue.on('update-start') 56 | 57 | when the queue starts flushing its state to disk 58 | 59 | #### queue.on('update-end') 60 | 61 | when the queue finishes flushing its state to disk 62 | 63 | #### queue.pool.on('start') 64 | 65 | when a job starts working 66 | 67 | #### queue.pool.on('finish') 68 | 69 | when a job finishes working 70 | -------------------------------------------------------------------------------- /test.js: -------------------------------------------------------------------------------- 1 | var test = require('tape') 2 | var createQueue = require('./') 3 | 4 | test('process 6 normal items', function test (t) { 5 | var queue = createQueue(doWork, {concurrency: 1}) 6 | var pending = 6 7 | 8 | queue.on('finish', function end () { 9 | t.equal(pending, 0, 'pending is 0') 10 | t.end() 11 | }) 12 | 13 | queue.write('a') 14 | queue.write('b') 15 | queue.write('c') 16 | queue.write('d') 17 | queue.write('e') 18 | queue.write('f') 19 | queue.end() 20 | 21 | function doWork (data, cb) { 22 | console.error('processing', data) 23 | pending-- 24 | cb() 25 | } 26 | }) 27 | 28 | test('handle error', function test (t) { 29 | var queue = createQueue(doWork, {concurrency: 1}) 30 | var pending = 6 31 | 32 | queue.write('a') 33 | queue.write('b') 34 | queue.write('c') 35 | queue.write('d') 36 | queue.write('e') 37 | queue.write('f') 38 | queue.end() 39 | 40 | queue.on('error', function error (err) { 41 | t.equals(err.message, 'oh god the humanity', 'got error') 42 | t.end() 43 | }) 44 | 45 | function doWork (data, cb) { 46 | console.error('processing', data) 47 | pending-- 48 | if (pending === 3) return cb(new Error('oh god the humanity')) 49 | cb() 50 | } 51 | }) 52 | 53 | test('handle concurrency', function test (t) { 54 | var queue = createQueue(doWork, {concurrency: 2}) 55 | var pending = 6 56 | 57 | queue.write('a') 58 | queue.write('b') 59 | queue.write('c') 60 | queue.write('d') 61 | queue.write('e') 62 | queue.write('f') 63 | queue.end() 64 | 65 | queue.on('finish', function end () { 66 | t.equal(pending, 0, 'pending is 0') 67 | t.end() 68 | }) 69 | 70 | function doWork (data, cb) { 71 | console.error('processing', data) 72 | pending-- 73 | setTimeout(cb, pending * 100) 74 | } 75 | }) 76 | -------------------------------------------------------------------------------- /worker.js: -------------------------------------------------------------------------------- 1 | var events = require('events') 2 | var inherits = require('inherits') 3 | var debug = require('debug')('atomic-queue-worker') 4 | 5 | module.exports = Worker 6 | 7 | function Worker (workFn) { 8 | if (!(this instanceof Worker)) return new Worker(workFn) 9 | this.available = true 10 | this.workFn = workFn 11 | events.EventEmitter.call(this) 12 | } 13 | 14 | inherits(Worker, events.EventEmitter) 15 | 16 | Worker.prototype.work = function work (data, cb, change) { 17 | var self = this 18 | self.available = false 19 | this.emit('start', data, change) 20 | debug('start', change.change) 21 | this.workFn(data, function done (err, output) { 22 | self.available = true 23 | debug('finish', change.change) 24 | self.emit('finish', output, data, change) 25 | cb(err) 26 | }, change) 27 | } 28 | --------------------------------------------------------------------------------