├── .gitignore
├── changedb.js
├── collaborators.md
├── index.js
├── package.json
├── pool.js
├── readme.md
├── test.js
└── worker.js
/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | .DS_Store
--------------------------------------------------------------------------------
/changedb.js:
--------------------------------------------------------------------------------
1 | var sublevel = require('subleveldown')
2 | var changes = require('changes-feed')
3 | var changesdown = require('changesdown')
4 |
5 | module.exports = function (opts) {
6 | var feed = changes(sublevel(opts.db, 'feed'))
7 | var db = changesdown(sublevel(opts.db, 'db', opts), feed, opts)
8 | return {
9 | feed: feed,
10 | db: db
11 | }
12 | }
13 |
--------------------------------------------------------------------------------
/collaborators.md:
--------------------------------------------------------------------------------
1 | ## Collaborators
2 |
3 | atomic-queue is only possible due to the excellent work of the following collaborators:
4 |
5 |
8 |
--------------------------------------------------------------------------------
/index.js:
--------------------------------------------------------------------------------
1 | var events = require('events')
2 |
3 | var inherits = require('inherits')
4 | var memdb = require('memdb')
5 | var through = require('through2')
6 | var pump = require('pumpify')
7 | var duplex = require('duplexify')
8 | var uuid = require('hat')
9 |
10 | var createPool = require('./pool.js')
11 | var createChangeDB = require('./changedb.js')
12 |
13 | var debug = require('debug')('atomic-queue')
14 |
15 | module.exports = Queue
16 |
17 | function Queue (worker, opts) {
18 | var self = this
19 | if (!(this instanceof Queue)) return new Queue(worker, opts)
20 | if (!opts) opts = {}
21 |
22 | this.concurrency = opts.concurrency || 1
23 | this.db = opts.db || memdb()
24 | this.opts = opts
25 |
26 | this.pool = createPool(worker, opts)
27 | this.changes = createChangeDB({
28 | db: this.db,
29 | keyEncoding: 'json',
30 | valueEncoding: 'json'
31 | })
32 |
33 | this.inflight = {}
34 | this.latestChange = 0
35 | this.highestChange = 0
36 | this.pending = 0
37 |
38 | this.stream = this.createDuplexStream()
39 | this.stream._queue = this
40 | //
41 | // Make the reference to the pool available so we can access it via the stream
42 | //
43 | this.stream.pool = this.pool
44 |
45 | this.pool.on('start', function start (data, worker, change) {
46 | var changeNum = change.change
47 | debug('start', changeNum)
48 | self.inflight.jobs[changeNum] = {change: changeNum, finished: false}
49 | })
50 |
51 | this.pool.on('finish', function finish (output, data, worker, change) {
52 | var changeNum = change.change
53 | debug('finish', changeNum)
54 | //
55 | // When we are dealing with concurrent changes we can hit cases where the
56 | // change here is a seq less than the previous. Lets also keep track of the
57 | // highest change in order to serve both cases.
58 | //
59 | self.latestChange = self.changes.db.db.change
60 | self.highestChange = changeNum < self.highestChange ? self.highestChange : changeNum
61 | self.inflight.jobs[changeNum] = {change: changeNum, finished: true}
62 | })
63 |
64 | this.stream.on('update-start', function updateStart (data) {
65 | debug('update-start', data)
66 | self.updatingInflight = true
67 | })
68 |
69 | this.stream.on('update-end', function updateEnd (data) {
70 | if (self.pending === 0) self.stream.emit('idle')
71 | debug('update-end', data)
72 | self.updatingInflight = false
73 | })
74 |
75 | events.EventEmitter.call(this)
76 |
77 | return this.stream
78 | }
79 |
80 | inherits(Queue, events.EventEmitter)
81 |
82 | Queue.prototype.initialize = function initialize (cb) {
83 | var self = this
84 |
85 | self.db.get('inflight', function doneGet (err, inflightData) {
86 | if (err && err.type !== 'NotFoundError') return cb(err)
87 | if (!inflightData) inflightData = {since: 0, jobs: {}}
88 | debug('inflight-load', inflightData)
89 | self.inflight = inflightData
90 | cb(null)
91 | })
92 | }
93 |
94 | Queue.prototype.createDuplexStream = function createDuplexStream (opts) {
95 | var self = this
96 |
97 | this.initialize(function ready (err) {
98 | if (err) return self.stream.destroy(err)
99 | self.stream.emit('ready', self.inflight)
100 | var readStream = self.createWorkStream({since: self.inflight.since, live: true})
101 | duplexStream.setReadable(readStream)
102 | })
103 |
104 | var writeStream = through.obj(
105 | function write (obj, enc, cb) {
106 | self.changes.db.put(uuid(), obj, function stored (err) {
107 | cb(err)
108 | })
109 | },
110 | function end (done) {
111 | finish(self.inflight)
112 |
113 | function finish (inflight) {
114 | debug('finish?', [self.pending, self.latestChange, self.highestChange, inflight.since])
115 | if (self.pending === 0 && self.highestChange === inflight.since) {
116 | debug('uncorking')
117 | duplexStream.uncork()
118 | done()
119 | } else {
120 | self.stream.once('update-end', finish)
121 | }
122 | }
123 | }
124 | )
125 |
126 | var duplexStream = duplex.obj(writeStream)
127 |
128 | // one weird trick from mafintosh (makes 'finish' wait for writable end)
129 | duplexStream.on('prefinish', function prefinish () {
130 | if (self.pending) duplexStream.cork()
131 | })
132 |
133 | return duplexStream
134 | }
135 |
136 | Queue.prototype.createWorkStream = function createWorkStream (opts) {
137 | var self = this
138 |
139 | var changeStream = this.changes.db.createChangesStream(opts)
140 |
141 | var splitStream = through.obj(
142 | function split (data, enc, cb) {
143 | self.pending++
144 |
145 | self.pool.getFree(function gotWorker (proc) {
146 | // call cb so we get more data written to us
147 | cb()
148 |
149 | // also kick off the worker
150 | proc.work(data.value.value, doneWorking, data)
151 | })
152 |
153 | function doneWorking (err, output) {
154 | self.pending--
155 |
156 | if (err) return self.stream.destroy(err)
157 |
158 | // TODO implement purging. should remove processed entries from the changes feed
159 |
160 | var inflight = self.inflightWorkers()
161 |
162 | update()
163 |
164 | function update () {
165 | if (self.updatingInflight) return self.stream.once('update-end', update)
166 | self.stream.emit('update-start', inflight)
167 | self.db.put('inflight', inflight, function updated (err) {
168 | self.inflight = inflight
169 | self.stream.emit('update-end', inflight)
170 | if (err) self.stream.destroy(err)
171 | if (output) splitStream.push(output)
172 | })
173 | }
174 | }
175 | }
176 | )
177 |
178 | var pipeline = pump(changeStream, splitStream)
179 | return pipeline
180 | }
181 |
182 | Queue.prototype.inflightWorkers = function inflightWorkers () {
183 | var self = this
184 |
185 | var inflight = Object.keys(this.inflight.jobs)
186 | .map(function expand (el) {
187 | return self.inflight.jobs[el]
188 | })
189 | .sort(function changeSort (a, b) {
190 | return a.change > b.change
191 | })
192 |
193 | var lastJob = inflight[inflight.length - 1]
194 | var lastChange = lastJob && lastJob.change >= this.highestChange ? lastJob.change : this.highestChange
195 | var startIndex, startChange
196 |
197 | for (var i = 0; i < inflight.length; i++) {
198 | var el = inflight[i]
199 | if (el.finished === false) {
200 | startIndex = i
201 | startChange = el.change
202 | break
203 | }
204 | }
205 |
206 | if (typeof startIndex === 'undefined') return {since: lastChange, jobs: {}} // all workers are done
207 | else inflight = inflight.slice(startIndex)
208 |
209 | // turn back into object
210 | var inflightObj = {}
211 | inflight.forEach(function (el) {
212 | inflightObj[el.change] = el
213 | })
214 |
215 | return {since: startChange, jobs: inflightObj}
216 | }
217 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "atomic-queue",
3 | "version": "5.0.4",
4 | "description": "a crash friendly queue that persists queue state and can restart. uses a worker pool and has configurable concurrency",
5 | "main": "index.js",
6 | "author": "max ogden",
7 | "license": "BSD",
8 | "repository": {
9 | "type": "git",
10 | "url": "https://github.com/maxogden/atomic-queue.git"
11 | },
12 | "scripts": {
13 | "test": "standard && node test.js"
14 | },
15 | "bugs": {
16 | "url": "https://github.com/maxogden/atomic-queue/issues"
17 | },
18 | "homepage": "https://github.com/maxogden/atomic-queue",
19 | "dependencies": {
20 | "changes-feed": "^1.1.0",
21 | "changesdown": "^2.3.0",
22 | "debug": "^2.1.2",
23 | "duplexify": "^3.2.0",
24 | "hat": "0.0.3",
25 | "inherits": "^2.0.1",
26 | "memdb": "^0.2.0",
27 | "pumpify": "^1.3.3",
28 | "subleveldown": "^2.0.0",
29 | "through2": "^0.6.3"
30 | },
31 | "devDependencies": {
32 | "standard": "^2.10.0",
33 | "tape": "^3.5.0"
34 | },
35 | "keywords": [
36 | "leveldb",
37 | "levelup"
38 | ]
39 | }
40 |
--------------------------------------------------------------------------------
/pool.js:
--------------------------------------------------------------------------------
1 | var events = require('events')
2 | var inherits = require('inherits')
3 | var createWorker = require('./worker.js')
4 | var debug = require('debug')('atomic-queue-pool')
5 |
6 | module.exports = Pool
7 |
8 | function Pool (workerTemplate, opts) {
9 | if (!(this instanceof Pool)) return new Pool(workerTemplate, opts)
10 | if (!opts) opts = {}
11 | this.workerTemplate = workerTemplate
12 | this.working = 0
13 | this.limit = opts.concurrency || 1
14 | this.workers = this.createWorkers()
15 | events.EventEmitter.call(this)
16 | }
17 |
18 | inherits(Pool, events.EventEmitter)
19 |
20 | Pool.prototype.createWorkers = function createWorkers () {
21 | var self = this
22 | var workers = []
23 | var useExistingWorkers = false
24 | if (Array.isArray(this.workerTemplate)) useExistingWorkers = true
25 |
26 | for (var i = 0; i < this.limit; i++) {
27 | var workFn = useExistingWorkers ? this.workerTemplate[i] : this.workerTemplate
28 |
29 | // if insufficient number of workers was passed in then return early
30 | if (!workFn) return workers
31 |
32 | var worker = createWorker(workFn)
33 |
34 | // consolidate events
35 | worker.on('start', function onStart (data, change) {
36 | self.emit('start', data, worker, change)
37 | })
38 |
39 | worker.on('finish', function onFinish (output, data, change) {
40 | self.emit('finish', output, data, worker, change)
41 | })
42 |
43 | workers.push(worker)
44 | }
45 |
46 | debug('created workers', {count: workers.length})
47 |
48 | return workers
49 | }
50 |
51 | Pool.prototype.getFree = function getFree (cb) {
52 | var self = this
53 |
54 | // try to get a free worker
55 | for (var i = 0; i < this.workers.length; i++) {
56 | var worker = this.workers[i]
57 | if (!worker.available) continue
58 | debug('found free worker')
59 | worker.available = false
60 | return cb(worker)
61 | }
62 |
63 | // otherwise wait for one to finish
64 | wait()
65 |
66 | function wait () {
67 | debug('waiting on free worker')
68 | self.once('finish', function finish (output, data, worker, change) {
69 | // handle case where getFree is waiting on multiple workers
70 | process.nextTick(function next () {
71 | if (!worker.available) return wait()
72 | debug('waited for free worker, just got one')
73 | worker.available = false
74 | cb(worker)
75 | })
76 | })
77 | }
78 | }
79 |
--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 | # atomic-queue
2 |
3 | a crash friendly queue that persists queue state and can restart. uses a worker pool and has configurable concurrency
4 |
5 | [](https://nodei.co/npm/atomic-queue/)
6 |
7 | [](https://github.com/feross/standard)
8 |
9 | ## API
10 |
11 | for example usage see `test.js`
12 |
13 | ### `var queue = require('atomic-queue')(worker, opts)`
14 |
15 | initialize a new queue with a `worker` function and optional options. `queue` is a stream
16 |
17 | you queue things by writing them to the queue stream:
18 |
19 | ```js
20 | queue.write('hello')
21 | queue.write('goodbye')
22 | queue.write({name: 'bob'})
23 | ```
24 |
25 | `worker` must be a function that has this API:
26 |
27 | ```js
28 | function work (data, done) {
29 | // do work, then call done with (err) if there was an error
30 | }
31 | ```
32 |
33 | `data` in the worker function will be the data you wrote into the queue above
34 |
35 | ### events
36 |
37 | in addition to standard stream events you can also listen to the following:
38 |
39 | #### queue.on('ready')
40 |
41 | emitted after startup when the queue state has been read from disk and the queue is now ready to start working
42 |
43 | #### queue.on('error')
44 |
45 | when a catastrophic error has occurred. you **must** handle this. receiving this also means the queue stream has been destroyed.
46 |
47 | #### queue.on('idle')
48 |
49 | when the number of pending jobs reaches 0. may be called multiple times
50 |
51 | #### queue.on('finish')
52 |
53 | when the writable side of the queue has been ended *and* all jobs have finished processing
54 |
55 | #### queue.on('update-start')
56 |
57 | when the queue starts flushing its state to disk
58 |
59 | #### queue.on('update-end')
60 |
61 | when the queue finishes flushing its state to disk
62 |
63 | #### queue.pool.on('start')
64 |
65 | when a job starts working
66 |
67 | #### queue.pool.on('finish')
68 |
69 | when a job finishes working
70 |
--------------------------------------------------------------------------------
/test.js:
--------------------------------------------------------------------------------
1 | var test = require('tape')
2 | var createQueue = require('./')
3 |
4 | test('process 6 normal items', function test (t) {
5 | var queue = createQueue(doWork, {concurrency: 1})
6 | var pending = 6
7 |
8 | queue.on('finish', function end () {
9 | t.equal(pending, 0, 'pending is 0')
10 | t.end()
11 | })
12 |
13 | queue.write('a')
14 | queue.write('b')
15 | queue.write('c')
16 | queue.write('d')
17 | queue.write('e')
18 | queue.write('f')
19 | queue.end()
20 |
21 | function doWork (data, cb) {
22 | console.error('processing', data)
23 | pending--
24 | cb()
25 | }
26 | })
27 |
28 | test('handle error', function test (t) {
29 | var queue = createQueue(doWork, {concurrency: 1})
30 | var pending = 6
31 |
32 | queue.write('a')
33 | queue.write('b')
34 | queue.write('c')
35 | queue.write('d')
36 | queue.write('e')
37 | queue.write('f')
38 | queue.end()
39 |
40 | queue.on('error', function error (err) {
41 | t.equals(err.message, 'oh god the humanity', 'got error')
42 | t.end()
43 | })
44 |
45 | function doWork (data, cb) {
46 | console.error('processing', data)
47 | pending--
48 | if (pending === 3) return cb(new Error('oh god the humanity'))
49 | cb()
50 | }
51 | })
52 |
53 | test('handle concurrency', function test (t) {
54 | var queue = createQueue(doWork, {concurrency: 2})
55 | var pending = 6
56 |
57 | queue.write('a')
58 | queue.write('b')
59 | queue.write('c')
60 | queue.write('d')
61 | queue.write('e')
62 | queue.write('f')
63 | queue.end()
64 |
65 | queue.on('finish', function end () {
66 | t.equal(pending, 0, 'pending is 0')
67 | t.end()
68 | })
69 |
70 | function doWork (data, cb) {
71 | console.error('processing', data)
72 | pending--
73 | setTimeout(cb, pending * 100)
74 | }
75 | })
76 |
--------------------------------------------------------------------------------
/worker.js:
--------------------------------------------------------------------------------
1 | var events = require('events')
2 | var inherits = require('inherits')
3 | var debug = require('debug')('atomic-queue-worker')
4 |
5 | module.exports = Worker
6 |
7 | function Worker (workFn) {
8 | if (!(this instanceof Worker)) return new Worker(workFn)
9 | this.available = true
10 | this.workFn = workFn
11 | events.EventEmitter.call(this)
12 | }
13 |
14 | inherits(Worker, events.EventEmitter)
15 |
16 | Worker.prototype.work = function work (data, cb, change) {
17 | var self = this
18 | self.available = false
19 | this.emit('start', data, change)
20 | debug('start', change.change)
21 | this.workFn(data, function done (err, output) {
22 | self.available = true
23 | debug('finish', change.change)
24 | self.emit('finish', output, data, change)
25 | cb(err)
26 | }, change)
27 | }
28 |
--------------------------------------------------------------------------------