├── sample ├── data │ └── sample.wasm └── index.html ├── .gitignore ├── .gitmodules ├── src ├── native │ ├── xzwasm.h │ ├── xzwasm.c │ └── memcmp.c └── xzwasm.js ├── package.json ├── webpack.config.js ├── Makefile └── README.md /sample/data/sample.wasm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SteveSanderson/xzwasm/HEAD/sample/data/sample.wasm -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode/ 2 | dist/ 3 | node_modules/ 4 | sample/lib/ 5 | sample/data/random* 6 | sample/data/sample.wasm.xz 7 | sample/data/sample.wasm-brotli.br 8 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "module/xz-embedded"] 2 | path = module/xz-embedded 3 | url = https://git.tukaani.org/xz-embedded.git 4 | [submodule "module/walloc"] 5 | path = module/walloc 6 | url = https://github.com/wingo/walloc.git 7 | -------------------------------------------------------------------------------- /src/native/xzwasm.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "../../module/xz-embedded/linux/include/linux/xz.h" 4 | 5 | #define BUFSIZE 65536 6 | 7 | typedef struct DecompressionContextStruct { 8 | size_t bufsize; 9 | struct xz_buf b; 10 | struct xz_dec* s; 11 | uint8_t in[BUFSIZE]; 12 | uint8_t out[BUFSIZE]; 13 | } DecompressionContext; 14 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "xzwasm", 3 | "version": "0.1.2", 4 | "description": "XZ decompression for the browser", 5 | "main": "dist/package/xzwasm.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1", 8 | "webpack": "webpack" 9 | }, 10 | "repository": { 11 | "type": "git", 12 | "url": "git+https://github.com/SteveSanderson/xzwasm.git" 13 | }, 14 | "keywords": [ 15 | "xz", 16 | "wasm" 17 | ], 18 | "author": "Steven Sanderson", 19 | "license": "MIT", 20 | "bugs": { 21 | "url": "https://github.com/SteveSanderson/xzwasm/issues" 22 | }, 23 | "homepage": "https://github.com/SteveSanderson/xzwasm#readme", 24 | "devDependencies": { 25 | "webpack": "^5.41.0", 26 | "webpack-cli": "^4.7.2" 27 | }, 28 | "files": [ 29 | "dist/package/**" 30 | ] 31 | } 32 | -------------------------------------------------------------------------------- /src/native/xzwasm.c: -------------------------------------------------------------------------------- 1 | #include "xzwasm.h" 2 | 3 | static int has_initialized = 0; 4 | 5 | DecompressionContext* create_context() { 6 | if (!has_initialized) { 7 | has_initialized = 1; 8 | xz_crc32_init(); 9 | xz_crc64_init(); 10 | } 11 | 12 | DecompressionContext* context = malloc(sizeof(DecompressionContext)); 13 | context->bufsize = BUFSIZE; 14 | context->s = xz_dec_init(XZ_DYNALLOC, 1 << 26); 15 | 16 | struct xz_buf *b = &context->b; 17 | b->in = context->in; 18 | b->in_pos = 0; 19 | b->in_size = 0; 20 | b->out = context->out; 21 | b->out_pos = 0; 22 | b->out_size = BUFSIZE; 23 | 24 | return context; 25 | } 26 | 27 | void destroy_context(DecompressionContext* context) { 28 | xz_dec_end(context->s); 29 | free(context); 30 | } 31 | 32 | void supply_input(DecompressionContext* context, size_t in_size) { 33 | struct xz_buf *b = &context->b; 34 | b->in_pos = 0; 35 | b->in_size = in_size; 36 | } 37 | 38 | enum xz_ret get_next_output(DecompressionContext* context) { 39 | struct xz_buf *b = &context->b; 40 | enum xz_ret ret = xz_dec_catrun(context->s, b, b->in_size == 0); 41 | return ret; 42 | } 43 | -------------------------------------------------------------------------------- /webpack.config.js: -------------------------------------------------------------------------------- 1 | const path = require('path'); 2 | const webpack = require('webpack'); 3 | const TerserPlugin = require('terser-webpack-plugin'); 4 | 5 | module.exports = { 6 | mode: 'none', 7 | entry: { 8 | 'xzwasm': './src/xzwasm.js', 9 | 'xzwasm.min': './src/xzwasm.js', 10 | }, 11 | devtool: false, 12 | output: { 13 | filename: '[name].js', 14 | path: path.resolve(__dirname, 'dist/package'), 15 | library: 'xzwasm', 16 | libraryTarget: 'umd', 17 | }, 18 | module: { 19 | rules: [{ 20 | test: /\.wasm/, 21 | type: 'asset/inline' 22 | }] 23 | }, 24 | optimization: { 25 | minimize: true, 26 | minimizer: [new TerserPlugin({ include: /\.min\.js$/, extractComments: false })] 27 | }, 28 | plugins: [ 29 | new webpack.BannerPlugin({ 30 | banner: '' 31 | + 'xzwasm (c) Steve Sanderson. License: MIT - https://github.com/SteveSanderson/xzwasm\n' 32 | + 'Contains xz-embedded by Lasse Collin and Igor Pavlov. License: Public domain - https://tukaani.org/xz/embedded.html\n' 33 | + 'and walloc (c) 2020 Igalia, S.L. License: MIT - https://github.com/wingo/walloc' 34 | }) 35 | ] 36 | } 37 | -------------------------------------------------------------------------------- /src/native/memcmp.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | // We have to polyfill this because, even with WebAssembly bulk memory extensions, 5 | // there's no built-in memcmp. 6 | int memcmp(const void *s1, const void *s2, unsigned long len) 7 | { 8 | if (s1 == s2) { 9 | return 0; 10 | } 11 | 12 | const unsigned char *p = s1; 13 | const unsigned char *q = s2; 14 | 15 | while (len > 0) { 16 | if (*p != *q) { 17 | return (*p > *q) ? 1 : -1; 18 | } 19 | 20 | len--; 21 | p++; 22 | q++; 23 | } 24 | 25 | return 0; 26 | } 27 | 28 | // ------------------------------------------------------------------------------------------------ 29 | // 30 | // The remainder would be provided by WebAssembly bulk memory extensions (if compiling with -mbulk-memory), 31 | // but that's not yet supported by Safari, so provide our own. Note that Safari Technology Preview 32 | // does support bulk memory, so these will be redundant soon. 33 | 34 | void* memmove(void *dest, const void *src, size_t size) 35 | { 36 | uint8_t *d = dest; 37 | const uint8_t *s = src; 38 | size_t i; 39 | 40 | if (d < s) { 41 | for (i = 0; i < size; ++i) 42 | d[i] = s[i]; 43 | } else if (d > s) { 44 | i = size; 45 | while (i-- > 0) 46 | d[i] = s[i]; 47 | } 48 | 49 | return dest; 50 | } 51 | 52 | void* memset(void *s, int c, size_t len) { 53 | unsigned char *dst = s; 54 | 55 | while (len > 0) { 56 | *dst = (unsigned char) c; 57 | dst++; 58 | len--; 59 | } 60 | 61 | return s; 62 | } 63 | 64 | // From https://stackoverflow.com/questions/17591624 65 | void* memcpy(void *dst, const void *src, size_t len) 66 | { 67 | size_t i; 68 | 69 | // If everything is aligned on long boundaries, we can copy 70 | // in units of long instead of char. 71 | if ((uintptr_t)dst % sizeof(long) == 0 72 | && (uintptr_t)src % sizeof(long) == 0 73 | && len % sizeof(long) == 0) { 74 | 75 | long *d = dst; 76 | const long *s = src; 77 | 78 | for (i = 0; i < len/sizeof(long); i++) 79 | d[i] = s[i]; 80 | } 81 | else { 82 | char *d = dst; 83 | const char *s = src; 84 | 85 | for (i = 0; i < len; i++) 86 | d[i] = s[i]; 87 | } 88 | 89 | return dst; 90 | } 91 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | xzdir := module/xz-embedded 2 | xzlibdir := $(xzdir)/linux/lib/xz 3 | 4 | ifeq ($(wasisdkroot),) 5 | $(error wasisdkroot is not set) 6 | endif 7 | 8 | ifeq ($(shell grep -o WSL2 /proc/version ),WSL2) 9 | # Runs a lot faster for me 10 | webpackcommand := cmd.exe /c npm run webpack 11 | else 12 | webpackcommand := npm run webpack 13 | endif 14 | 15 | .PHONY: all clean sample run-sample package 16 | 17 | all: dist/native/xzwasm.wasm sample/lib/*.* sample/data/random* 18 | 19 | package: dist/package/xzwasm.js 20 | 21 | dist/native/xzwasm.wasm: src/native/* $(xzdir)/**/* Makefile 22 | mkdir -p dist/native 23 | $(wasisdkroot)/bin/clang --sysroot=$(wasisdkroot)/share/wasi-sysroot \ 24 | --target=wasm32 -DNDEBUG -Os -s -nostdlib -Wl,--no-entry \ 25 | -DXZ_DEC_CONCATENATED -DXZ_USE_CRC64 \ 26 | -Wl,--export=create_context \ 27 | -Wl,--export=destroy_context \ 28 | -Wl,--export=supply_input \ 29 | -Wl,--export=get_next_output \ 30 | -o dist/native/xzwasm.wasm \ 31 | -I$(xzdir)/userspace/ \ 32 | -I$(xzdir)/linux/include/linux/ \ 33 | module/walloc/walloc.c \ 34 | src/native/*.c \ 35 | $(xzlibdir)/xz_crc32.c \ 36 | $(xzlibdir)/xz_crc64.c \ 37 | $(xzlibdir)/xz_dec_stream.c \ 38 | $(xzlibdir)/xz_dec_lzma2.c 39 | 40 | dist/package/xzwasm.js: webpack.config.js src/*.* dist/native/xzwasm.wasm package.json 41 | @$(webpackcommand) 42 | 43 | sample/lib/*.*: dist/package/xzwasm.js 44 | mkdir -p sample/lib 45 | cp dist/package/xzwasm.js sample/lib 46 | 47 | sample/data/random*: 48 | # Random data isn't very realistic. The sample wasm file compresses in a more relevant way. 49 | dd if=/dev/urandom of=sample/data/random-10K.bin bs=1K count=10 iflag=fullblock 50 | dd if=/dev/urandom of=sample/data/random-1M.bin bs=1M count=1 iflag=fullblock 51 | dd if=/dev/urandom of=sample/data/random-10M.bin bs=1M count=10 iflag=fullblock 52 | xz --check=crc64 -9 -k sample/data/random-10K.bin 53 | xz --check=crc32 -9 -k sample/data/random-1M.bin 54 | xz --check=crc32 -9 -k sample/data/random-10M.bin 55 | xz --check=crc32 -9 -k sample/data/sample.wasm 56 | brotli sample/data/random-10K.bin -o sample/data/random-10K.bin-brotli.br 57 | brotli sample/data/random-1M.bin -o sample/data/random-1M.bin-brotli.br 58 | brotli sample/data/random-10M.bin -o sample/data/random-10M.bin-brotli.br 59 | brotli sample/data/sample.wasm -o sample/data/sample.wasm-brotli.br 60 | 61 | run-sample: 62 | http-server sample/ -b 63 | 64 | clean: 65 | rm -rf dist 66 | rm -rf sample/lib 67 | rm sample/data/random* 68 | rm sample/data/sample.wasm.xz 69 | rm sample/data/sample.wasm-brotli.br 70 | -------------------------------------------------------------------------------- /sample/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 | Perf test 5 |

6 | Scenario: 7 | 13 |

14 |

15 | 16 | Uncompressed file 17 | 18 |

19 |

20 | 21 | Brotli compressed 22 | 23 |

24 |

25 | 26 | XZ compressed 27 | 28 |

29 |
30 | 31 | 32 | 62 | 63 | 64 | -------------------------------------------------------------------------------- /src/xzwasm.js: -------------------------------------------------------------------------------- 1 | import xzwasmBytes from '../dist/native/xzwasm.wasm'; 2 | 3 | const XZ_OK = 0; 4 | const XZ_STREAM_END = 1; 5 | 6 | class XzContext { 7 | constructor(moduleInstance) { 8 | this.exports = moduleInstance.exports; 9 | this.memory = this.exports.memory; 10 | this.ptr = this.exports.create_context(); 11 | this._refresh(); 12 | this.bufSize = this.mem32[0]; 13 | this.inStart = this.mem32[1] - this.ptr; 14 | this.inEnd = this.inStart + this.bufSize; 15 | this.outStart = this.mem32[4] - this.ptr; 16 | } 17 | 18 | supplyInput(sourceDataUint8Array) { 19 | const inBuffer = this.mem8.subarray(this.inStart, this.inEnd); 20 | inBuffer.set(sourceDataUint8Array, 0); 21 | this.exports.supply_input(this.ptr, sourceDataUint8Array.byteLength); 22 | this._refresh(); 23 | } 24 | 25 | getNextOutput() { 26 | const result = this.exports.get_next_output(this.ptr); 27 | this._refresh(); 28 | if (result !== XZ_OK && result !== XZ_STREAM_END) { 29 | throw new Error(`get_next_output failed with error code ${result}`); 30 | } 31 | const outChunk = this.mem8.subarray(this.outStart, this.outStart + /* outPos */ this.mem32[5]); 32 | return { outChunk, finished: result === XZ_STREAM_END }; 33 | } 34 | 35 | needsMoreInput() { 36 | return /* inPos */ this.mem32[2] === /* inSize */ this.mem32[3]; 37 | } 38 | 39 | outputBufferIsFull() { 40 | return /* outPos */ this.mem32[5] === this.bufSize; 41 | } 42 | 43 | resetOutputBuffer() { 44 | this.outPos = this.mem32[5] = 0; 45 | } 46 | 47 | dispose() { 48 | this.exports.destroy_context(this.ptr); 49 | this.exports = null; 50 | } 51 | 52 | _refresh() { 53 | if (this.memory.buffer !== this.mem8?.buffer) { 54 | this.mem8 = new Uint8Array(this.memory.buffer, this.ptr); 55 | this.mem32 = new Uint32Array(this.memory.buffer, this.ptr); 56 | } 57 | } 58 | } 59 | 60 | export class XzReadableStream extends ReadableStream { 61 | static _moduleInstancePromise; 62 | static _moduleInstance; 63 | static async _getModuleInstance() { 64 | const wasmBytes = await (await fetch(xzwasmBytes)).arrayBuffer(); 65 | const wasmResponse = new Response(wasmBytes, { headers: { 'Content-Type': 'application/wasm' } }); 66 | const wasmOptions = {}; 67 | const module = typeof WebAssembly.instantiateStreaming === 'function' 68 | ? await WebAssembly.instantiateStreaming(wasmResponse, wasmOptions) 69 | : await WebAssembly.instantiate(await wasmResponse.arrayBuffer(), wasmOptions); 70 | XzReadableStream._moduleInstance = module.instance; 71 | } 72 | 73 | constructor(compressedStream) { 74 | let xzContext; 75 | let unconsumedInput = null; 76 | const compressedReader = compressedStream.getReader(); 77 | 78 | super({ 79 | async start(controller) { 80 | if (!XzReadableStream._moduleInstance) { 81 | await (XzReadableStream._moduleInstancePromise || (XzReadableStream._moduleInstancePromise = XzReadableStream._getModuleInstance())); 82 | } 83 | xzContext = new XzContext(XzReadableStream._moduleInstance); 84 | }, 85 | 86 | async pull(controller) { 87 | if (xzContext.needsMoreInput()) { 88 | if (unconsumedInput === null || unconsumedInput.byteLength === 0) { 89 | const { done, value } = await compressedReader.read(); 90 | if (!done) { 91 | unconsumedInput = value; 92 | } 93 | } 94 | const nextInputLength = Math.min(xzContext.bufSize, unconsumedInput.byteLength); 95 | xzContext.supplyInput(unconsumedInput.subarray(0, nextInputLength)); 96 | unconsumedInput = unconsumedInput.subarray(nextInputLength); 97 | } 98 | 99 | const nextOutputResult = xzContext.getNextOutput(); 100 | controller.enqueue(nextOutputResult.outChunk); 101 | xzContext.resetOutputBuffer(); 102 | 103 | if (nextOutputResult.finished) { 104 | xzContext.dispose(); // Not sure if this always happens 105 | controller.close(); 106 | } 107 | }, 108 | cancel() { 109 | xzContext.dispose(); // Not sure if this always happens 110 | return compressedReader.cancel(); 111 | } 112 | }); 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # xzwasm - XZ decompression for the browser 2 | 3 | This is a browser-compatible NPM package that can decompress XZ streams. You can use this if you want your web server to return XZ-encoded content and have your JavaScript code see the uncompressed data. **It's an alternative to Gzip or Brotli compression for HTTP responses.** 4 | 5 | Skip to: [Installation](#installation) or [How to use](#how-to-use) 6 | 7 | ## Why would anyone do this? 8 | 9 | Although Brotli is an excellent general-purpose compression algorithm, there are some kinds of data for which XZ gives better compression ratios. 10 | 11 | * Brotli is based around a large dictionary of web-related text snippets, and usually outperforms XZ for text-based content (e.g., HTML, JS, CSS, WOFF, etc.). 12 | * XZ (or rather, the underlying LZMA2 algorithm) is not so oriented around text content and - in my experiments - usually compresses bitcode (e.g., WebAssembly `.wasm` files, .NET `.dll` files) better than Brotli 13 | 14 | Example, in each case using the highest available compression level: 15 | 16 | | Scenario | Uncompressed | Gzip | Brotli | XZ | 17 | | -------- | ------------ | ---- | ------ | -- | 18 | | `.wasm` file | 2220KB | 894KB | 763KB | 744KB | 19 | | `.dll` file bundle | 3058KB | 1159KB | 988KB | 963KB | 20 | 21 | The main drawbacks to using XZ in the browser are: 22 | 23 | * You have to bundle your own decompressor. Using this `xzwasm` library adds slightly under 8KB to your site (assuming it's served minified and Brotli-compressed). 24 | * Decompressing XZ content takes more CPU time than decompressing Brotli. In part that's because of the overhead of WebAssembly vs pure native code, but in part is inherent to the algorithms. As an estimate, the `.wasm` file from above takes my browser 21ms to decompress if served as Brotli, or 69ms if served as XZ and decompressed with `xzwasm`. 25 | 26 | So, you would only benefit from using XZ: 27 | 28 | * If the best available alternative is Gzip 29 | * Or, if you're serving very large bundles of bytecode. 30 | * Or, if you care a lot about *compression* (not decompression) speed. XZ can be 5-10x faster to compress than Brotli, especially at the highest compression level. 31 | 32 | **In most applications the added complexity of XZ via a custom decompressor library won't be worth the small bandwidth saving.** But it would be nice if browsers supported XZ natively. It's also a good demonstration of how a technology like WebAssembly can effectively extend the capabilities of a browser. 33 | 34 | ## Installation 35 | 36 | ### Option 1: As an NPM package 37 | 38 | ``` 39 | npm install --save xzwasm 40 | ``` 41 | 42 | You can then import things from `xzwasm` in your existing JavaScript/TypeScript files. Example: 43 | 44 | ```js 45 | import { XzReadableStream } from 'xzwasm'; 46 | ``` 47 | 48 | ### Option 2: As a plain ` 54 | ``` 55 | 56 | Your page will now have `xzwasm` in global scope. For example, you can call `new xzwasm.XzReadableStream(...)` - see below. 57 | 58 | ## How to use 59 | 60 | First, [install xzwasm into your project](#installation). 61 | 62 | Now, if you have an XZ-compressed stream, such as a `fetch` response body, you can get a decompressed response by wrapping it with `XzReadableStream`. Example: 63 | 64 | ```js 65 | const compressedResponse = await fetch('somefile.xz'); 66 | 67 | const decompressedResponse = new Response( 68 | new XzReadableStream(compressedResponse.body) 69 | ); 70 | 71 | // We now have a regular Response object, so can use standard APIs to parse its body data, 72 | // such as .text(), .json(), or .arrayBuffer(): 73 | const text = await decompressedResponse.text(); 74 | ``` 75 | 76 | The API is designed to be as JavaScript-standard as possible, so `XzReadableStream` is a [`ReadableStream`](https://developer.mozilla.org/en-US/docs/Web/API/ReadableStream) instance, which in turn means you can feed it into a [`Response`](https://developer.mozilla.org/en-US/docs/Web/API/Response), and in turn get a blob, an ArrayBuffer, JSON data, or anything else that you browser can do with a `Response`. 77 | 78 | **Note:** If you're using a `