├── .gitignore ├── LICENSE ├── README.md ├── bench.js ├── example.js ├── index.js ├── package.json └── worker.js /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | sandbox.js 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2020 Hyperdivision ApS 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # fast-async-zlib 2 | 3 | Speed up zlib operations by running them using the sync APIs but in a [Worker](https://nodejs.org/api/worker_threads.html). 4 | 5 | ``` 6 | npm install fast-async-zlib 7 | ``` 8 | 9 | ## Usage 10 | 11 | Works similar to the core zlib module, except it uses a Worker to batch pending zips 12 | which can be quite faster than using the normal `zlib.gzip(data, cb)` API. 13 | 14 | ``` js 15 | const ZLibWorker = require('fast-async-zlib') 16 | 17 | const z = new ZLibWorker({ 18 | maxBatchBytes: 1024 * 1024 // how large a batch buffer should be used? (1 MB default) 19 | }) 20 | 21 | const buf = await z.gzip('some data') 22 | console.log('gzipped:', buf) 23 | ``` 24 | 25 | There is a small bench included that benches three approaches to zipping 100k ~1kb strings. 26 | On my laptop it produces the following result: 27 | 28 | ``` 29 | running bench 30 | using core sync: 3.383s 31 | using core async: 4.640s 32 | using worker: 2.870s 33 | re-running bench 34 | using core sync: 3.873s 35 | using core async: 4.843s 36 | using worker: 2.929s 37 | ``` 38 | 39 | Ie. `worker.gzip` is ~10% faster than `zlib.gzipSync` and ~40% faster than `zlib.gzip(data, cb)`. 40 | 41 | ## API 42 | 43 | #### `const z = new ZLibWorker([options])` 44 | 45 | Create a new worker instance. Will use a Worker thread in the background to run the actual gzip, using a SharedArrayBuffer to pass data back and fourth. 46 | Options include: 47 | 48 | ``` 49 | { 50 | maxBatch: 512, // how many entries to max batch to the worker 51 | maxBatchBytes: 1MB // how much memory to use for the shared array buffer 52 | } 53 | ``` 54 | 55 | Note that `maxBatchBytes` must be larger than largest payload you pass to `z.gzip(payload)`, 56 | otherwise that method will throw an exception. 57 | 58 | If this is a big problem to you, open an issue and we'll see if can make the buffer autogrow easily. 59 | 60 | #### `const buf = await z.gzip(inp)` 61 | 62 | Gzip a string or buffer using the worker. 63 | 64 | #### `z.destroy()` 65 | 66 | Fully destroy the worker. Only needed if you for some reason want to get rid of it while the program is running. 67 | 68 | #### `const pool = ZLibWorker.pool(size, [options])` 69 | 70 | Make a simple worker pool of the given size. 71 | Has the same API as the `ZLibWorker` but will use `size` workers behind the scenes to spread out the load. 72 | 73 | ## Future 74 | 75 | If you have a need for gunzip, inflate, deflate etc open an issue and we'll see about adding it. 76 | 77 | ## License 78 | 79 | MIT 80 | -------------------------------------------------------------------------------- /bench.js: -------------------------------------------------------------------------------- 1 | const zlib = require('zlib') 2 | const ZW = require('./') 3 | 4 | const b = require('crypto').randomBytes(800).toString('base64') 5 | 6 | const strings = new Array(100000) 7 | for (let i = 0; i < strings.length; i++) strings[i] = b 8 | 9 | start() 10 | 11 | async function start () { 12 | console.log('running bench') 13 | await sync() 14 | await async() 15 | await worker() 16 | console.log('re-running bench') 17 | await sync() 18 | await async() 19 | await worker() 20 | } 21 | 22 | function worker () { 23 | return new Promise(resolve => { 24 | const pending = strings.slice(0) 25 | let missing = pending.length 26 | 27 | console.time('using worker') 28 | const pool = ZW.pool(3) 29 | 30 | let max = pending.length 31 | 32 | while (pending.length && max-- > 0) { 33 | pool.gzip(pending.pop()).then(ondone) 34 | } 35 | 36 | function ondone (out) { 37 | missing-- 38 | if (pending.length) return pool.gzip(pending.pop()).then(ondone) 39 | if (!missing) { 40 | console.timeEnd('using worker') 41 | pool.destroy() 42 | resolve() 43 | } 44 | } 45 | }) 46 | } 47 | 48 | // sync 49 | function sync () { 50 | const pending = strings.slice(0) 51 | console.time('using core sync') 52 | while (pending.length) { 53 | zlib.gzipSync(pending.pop()) 54 | } 55 | console.timeEnd('using core sync') 56 | } 57 | 58 | // async 59 | function async () { 60 | return new Promise(resolve => { 61 | const pending = strings.slice(0) 62 | let missing = pending.length 63 | console.time('using core async') 64 | for (let i = 0; i < 20; i++) { 65 | work() 66 | } 67 | 68 | function work () { 69 | const next = pending.pop() 70 | zlib.gzip(next, ondone) 71 | } 72 | 73 | function ondone (_, data) { 74 | missing-- 75 | if (pending.length) return work() 76 | if (!missing) { 77 | console.timeEnd('using core async') 78 | resolve() 79 | } 80 | } 81 | }) 82 | } 83 | -------------------------------------------------------------------------------- /example.js: -------------------------------------------------------------------------------- 1 | const ZLib = require('./') 2 | const z = new ZLib() 3 | 4 | main() 5 | 6 | async function main () { 7 | const a = z.gzip('a') 8 | const b = z.gzip('b') 9 | const c = z.gzip('c') 10 | 11 | console.log(await a, await b, await c) 12 | } 13 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | const { Worker } = require('worker_threads') 2 | const path = require('path') 3 | 4 | const GZIP_OVERHEAD = 30 5 | 6 | class WorkerPool { 7 | constructor (size, opts) { 8 | this.workers = new Array(size) 9 | this.tick = 0 10 | 11 | for (let i = 0; i < this.workers.length; i++) { 12 | this.workers[i] = new ZlibWorker(opts) 13 | } 14 | } 15 | 16 | gzip (inp) { 17 | return this.next().gzip(inp) 18 | } 19 | 20 | next () { 21 | if (this.tick === this.workers.length - 1) this.tick = 0 22 | else this.tick++ 23 | return this.workers[this.tick] 24 | } 25 | 26 | destroy () { 27 | for (const w of this.workers) w.destroy() 28 | } 29 | } 30 | 31 | class ZlibWorker { 32 | constructor (opts = {}) { 33 | const maxBatch = opts.maxBatch || 512 34 | const maxBatchBytes = opts.maxBatchBytes || 1024 * 1024 35 | const ints = maxBatch + 2 36 | 37 | this.destroyed = false 38 | 39 | this._maxBytes = maxBatchBytes 40 | this._maxBatch = maxBatch 41 | this._input = new SharedArrayBuffer(ints * 4 + maxBatchBytes) 42 | this._output = new SharedArrayBuffer(ints * 4 + (GZIP_OVERHEAD * maxBatch) + maxBatchBytes) 43 | this._batch = new Int32Array(this._input, 0, ints) 44 | this._inputData = Buffer.from(this._input, ints * 4) 45 | this._outputData = Buffer.from(this._output) 46 | 47 | this._worker = new Worker(path.join(__dirname, './worker.js'), { 48 | workerData: { 49 | input: this._input, 50 | output: this._output, 51 | maxBatch 52 | } 53 | }) 54 | 55 | this._pending = null 56 | this._size = 0 57 | this._batches = [] 58 | this._freeSpace = 0 59 | this._freeEntries = 0 60 | this._runQueued = false 61 | 62 | this._worker.on('message', () => { // batch done, signal message 63 | this._runQueued = false 64 | 65 | let offset = 0 66 | 67 | for (let i = 0; i < this._pending.length; i++) { 68 | const b = this._outputData.slice(offset, offset += this._batch[i + 2]) 69 | this._pending[i][1](Buffer.from(b)) 70 | } 71 | 72 | this._pending = null 73 | 74 | if (this._batches.length && !this._runQueued) { 75 | this._runQueued = true 76 | this._run() 77 | } else { 78 | this._maybeUnref() 79 | } 80 | }) 81 | 82 | this._worker.on('online', () => this._maybeUnref()) 83 | } 84 | 85 | _maybeUnref () { 86 | if (!this._pending && !this._batches.length) this._worker.unref() 87 | } 88 | 89 | get queued () { 90 | let queued = 0 91 | for (const b of this._batches) queued += b.length 92 | return queued 93 | } 94 | 95 | gzip (inp) { 96 | let resolve 97 | let reject 98 | 99 | return new Promise((res, rej) => { 100 | resolve = res 101 | reject = rej 102 | 103 | const len = inp.length 104 | 105 | if (this.destroyed) { 106 | return reject(new Error('Worker is destroyed')) 107 | } 108 | if (len >= this._maxBytes) { 109 | return reject(new Error('Input does not fit in buffer. Increase maxBatchBytes')) 110 | } 111 | 112 | if (this._freeSpace - len < 0 || !this._freeEntries) { 113 | this._freeEntries = this._maxBatch 114 | this._freeSpace = this._maxBytes 115 | this._batches.push([]) 116 | this._worker.ref() 117 | } 118 | 119 | this._batches[this._batches.length - 1].push([inp, resolve, reject]) 120 | this._freeSpace -= len 121 | this._freeEntries-- 122 | 123 | if (!this._runQueued) { 124 | this._runQueued = true 125 | process.nextTick(run, this) 126 | } 127 | }) 128 | } 129 | 130 | _run () { 131 | if (this.destroyed) return 132 | 133 | const inputs = this._pending = this._batches.shift() 134 | 135 | this._freeSpace = this._freeEntries = 0 136 | 137 | let offset = 0 138 | let i = 2 139 | 140 | for (const [b] of inputs) { 141 | if (typeof b === 'string') { 142 | const len = this._inputData.write(b, offset) 143 | offset += len 144 | this._batch[i++] = len 145 | } else { 146 | b.copy(this._inputData, offset) 147 | offset += b.length 148 | this._batch[i++] = b.length 149 | } 150 | } 151 | this._batch[0] = 1 // gzip 152 | this._batch[1] = inputs.length 153 | Atomics.notify(this._batch, 0, 1) 154 | } 155 | 156 | destroy () { 157 | if (this.destroyed) return 158 | this.destroyed = true 159 | 160 | this._worker.terminate() 161 | if (this._pending) rejectAll(this._pending, new Error('Worker is destroyed')) 162 | for (const b of this._batches) rejectAll(b, new Error('Worker is destroyed')) 163 | } 164 | 165 | static pool (size, opts) { 166 | return new WorkerPool(size, opts) 167 | } 168 | } 169 | 170 | module.exports = ZlibWorker 171 | 172 | function rejectAll (batch, err) { 173 | for (const [inp, resolve, reject] of batch) { 174 | reject(err) 175 | } 176 | } 177 | 178 | function run (self) { 179 | self._run() 180 | } 181 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "fast-async-zlib", 3 | "version": "1.0.0", 4 | "description": "Speed up zlib operations by running them using the sync APIs but in a Worker", 5 | "main": "index.js", 6 | "dependencies": {}, 7 | "devDependencies": {}, 8 | "repository": { 9 | "type": "git", 10 | "url": "https://github.com/hyperdivision/fast-async-zlib.git" 11 | }, 12 | "author": "Mathias Buus (@mafintosh)", 13 | "license": "MIT", 14 | "bugs": { 15 | "url": "https://github.com/hyperdivision/fast-async-zlib/issues" 16 | }, 17 | "homepage": "https://github.com/hyperdivision/fast-async-zlib" 18 | } 19 | -------------------------------------------------------------------------------- /worker.js: -------------------------------------------------------------------------------- 1 | const { workerData, parentPort } = require('worker_threads') 2 | const zlib = require('zlib') 3 | 4 | const { input, output, maxBatch } = workerData 5 | const ints = maxBatch + 2 6 | 7 | const batch = new Int32Array(input, 0, ints) 8 | const inp = Buffer.from(input, ints * 4, input.byteLength - 4 * ints) 9 | const out = Buffer.from(output) 10 | 11 | while (true) { 12 | Atomics.wait(batch, 0, 0) 13 | 14 | // const method = batch[0] // only gzip support atm 15 | const batchEnd = batch[1] + 2 16 | 17 | let inOffset = 0 18 | let outOffset = 0 19 | 20 | for (let i = 2; i < batchEnd; i++) { 21 | const g = new zlib.Gzip() 22 | const chunk = inp.slice(inOffset, inOffset += batch[i]) 23 | g._outBuffer = out.slice(outOffset) 24 | const res = g._processChunk(chunk, g._finishFlushFlag) 25 | batch[i] = res.length 26 | outOffset += res.length 27 | } 28 | 29 | batch[0] = 0 30 | 31 | parentPort.postMessage(null) 32 | } 33 | --------------------------------------------------------------------------------