├── .editorconfig ├── .gitattributes ├── .github └── workflows │ └── publish.yml ├── .gitignore ├── .travis.yml ├── dictionary.d.ts ├── dictionary.js ├── index.d.ts ├── index.js ├── license ├── package-lock.json ├── package.json ├── readme.md └── test ├── compatto.js ├── dictionary.js └── snapshots ├── dictionary.js.md └── dictionary.js.snap /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | indent_style = tab 5 | end_of_line = lf 6 | charset = utf-8 7 | trim_trailing_whitespace = true 8 | insert_final_newline = true 9 | 10 | [*.yml] 11 | indent_style = space 12 | indent_size = 2 13 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto eol=lf 2 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: publish 2 | 3 | on: 4 | release: 5 | types: [created] 6 | 7 | jobs: 8 | publish-npm: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v2 12 | - uses: actions/setup-node@v1 13 | with: 14 | node-version: 14 15 | registry-url: https://registry.npmjs.org/ 16 | - run: npm ci 17 | - run: npm run transpile 18 | - run: npm publish --access public 19 | env: 20 | NODE_AUTH_TOKEN: ${{secrets.npm_token}} 21 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | yarn.lock 3 | yarn-error.log 4 | .nyc_output 5 | coverage 6 | cjs 7 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | 3 | node_js: 4 | - "14" 5 | 6 | env: 7 | global: 8 | secure: ZUnK3J9J4Y2avSEiKfUWBSYBJN7Z9Fta/Ipf5LEc6yamPbwl3uYZgHf1kGHjzyWoPwUnFwKXmaO+L4MB/K+Q3pZi72jyBxS6WcPj7O2S8BWE2nLRN429YbP/Gegtx4VWS1ecw1Swczx4couvQo8RJakqBoMrWsTiGmB0Rj8JKIUWQopfqd+eMlnSha8Ce7GI16LLbmKt9jNj2LtZTaoib0Xz8bUprV7Sa6GNRxAHIc29YR9r9N3ORA0BgvrDv5DdRdCJUx73aRBSelqko5yJ3YPgPHfssmGbiZ53UVEgKsS5ECjqaTNCvp+no6jLjuYhPSuTjeeDIz3a1P2JL2Cz3mWhQW4yOuXaAOzLxAmaW1pVlVTNxQWA+7KGIsiiTSqea1H6JAYrBjXUmivQ3QoPm5utWQBuT/9v73Wmiex4BQa16ZXAbw01YP+MjyoL+JMWrA6d0ac2+0Tn4Dm3ac2tuqTEf0Y4HFMHUhyH9/XLOSbRr5oOrWfUQ8Wbyme/ZKn5Sx006yYkSpsCPgPvSWDnpgyYztOy1GEB+x0jke/pdNPN2lv6HV6QNzSHPe9vMBvqgI40DHskFJ7tGRW4G7cKuo4G5x/aPZV691nOdMQNOT2N+ZKNHJ90UzuSxabOz3bcpVA1vYToXdVPlGMQkPOazqFNe6Bfy2GlRvhvFJG8fcg= 9 | 10 | after_success: npm run coverage 11 | -------------------------------------------------------------------------------- /dictionary.d.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * `compatto`'s standard dictionary. 3 | * Even if it is the standard one it must be explicitly set by the user. 4 | * 5 | * @example 6 | * import { dictionary } from 'compatto/dictionary' 7 | * import { compatto } from 'compatto' 8 | * 9 | * compatto({ dictionary }) 10 | * => { compress, decompress } 11 | */ 12 | export const dictionary: string[] 13 | -------------------------------------------------------------------------------- /dictionary.js: -------------------------------------------------------------------------------- 1 | export const dictionary = [ 2 | ' ', 3 | 'the', 4 | 'e', 5 | 't', 6 | 'a', 7 | 'of', 8 | 'o', 9 | 'and', 10 | 'i', 11 | 'n', 12 | 's', 13 | 'e ', 14 | 'r', 15 | ' th', 16 | ' t', 17 | 'in', 18 | 'he', 19 | 'th', 20 | 'h', 21 | 'he ', 22 | 'to', 23 | '\r\n', 24 | 'l', 25 | 's ', 26 | 'd', 27 | ' a', 28 | 'an', 29 | 'er', 30 | 'c', 31 | ' o', 32 | 'd ', 33 | 'on', 34 | ' of', 35 | 're', 36 | 'of ', 37 | 't ', 38 | ', ', 39 | 'is', 40 | 'u', 41 | 'at', 42 | ' ', 43 | 'n ', 44 | 'or', 45 | 'which', 46 | 'f', 47 | 'm', 48 | 'as', 49 | 'it', 50 | 'that', 51 | '\n', 52 | 'was', 53 | 'en', 54 | ' ', 55 | ' w', 56 | 'es', 57 | ' an', 58 | ' i', 59 | '\r', 60 | 'f ', 61 | 'g', 62 | 'p', 63 | 'nd', 64 | ' s', 65 | 'nd ', 66 | 'ed ', 67 | 'w', 68 | 'ed', 69 | 'http://', 70 | 'for', 71 | 'te', 72 | 'ing', 73 | 'y ', 74 | 'The', 75 | ' c', 76 | 'ti', 77 | 'r ', 78 | 'his', 79 | 'st', 80 | ' in', 81 | 'ar', 82 | 'nt', 83 | ',', 84 | ' to', 85 | 'y', 86 | 'ng', 87 | ' h', 88 | 'with', 89 | 'le', 90 | 'al', 91 | 'to ', 92 | 'b', 93 | 'ou', 94 | 'be', 95 | 'were', 96 | ' b', 97 | 'se', 98 | 'o ', 99 | 'ent', 100 | 'ha', 101 | 'ng ', 102 | 'their', 103 | '"', 104 | 'hi', 105 | 'from', 106 | ' f', 107 | 'in ', 108 | 'de', 109 | 'ion', 110 | 'me', 111 | 'v', 112 | '.', 113 | 've', 114 | 'all', 115 | 're ', 116 | 'ri', 117 | 'ro', 118 | 'is ', 119 | 'co', 120 | 'f t', 121 | 'are', 122 | 'ea', 123 | '. ', 124 | 'her', 125 | ' m', 126 | 'er ', 127 | ' p', 128 | 'es ', 129 | 'by', 130 | 'they', 131 | 'di', 132 | 'ra', 133 | 'ic', 134 | 'not', 135 | 's, ', 136 | 'd t', 137 | 'at ', 138 | 'ce', 139 | 'la', 140 | 'h ', 141 | 'ne', 142 | 'as ', 143 | 'tio', 144 | 'on ', 145 | 'n t', 146 | 'io', 147 | 'we', 148 | ' a ', 149 | 'om', 150 | ', a', 151 | 's o', 152 | 'ur', 153 | 'li', 154 | 'll', 155 | 'ch', 156 | 'had', 157 | 'this', 158 | 'e t', 159 | 'g ', 160 | 'e\r\n', 161 | ' wh', 162 | 'ere', 163 | ' co', 164 | 'e o', 165 | 'a ', 166 | 'us', 167 | ' d', 168 | 'ss', 169 | '\n\r\n', 170 | '\r\n\r', 171 | '="', 172 | ' be', 173 | ' e', 174 | 's a', 175 | 'ma', 176 | 'one', 177 | 't t', 178 | 'or ', 179 | 'but', 180 | 'el', 181 | 'so', 182 | 'l ', 183 | 'e s', 184 | 's,', 185 | 'no', 186 | 'ter', 187 | ' wa', 188 | 'iv', 189 | 'ho', 190 | 'e a', 191 | ' r', 192 | 'hat', 193 | 's t', 194 | 'ns', 195 | 'ch ', 196 | 'wh', 197 | 'tr', 198 | 'ut', 199 | '/', 200 | 'have', 201 | 'ly ', 202 | 'ta', 203 | ' ha', 204 | ' on', 205 | 'tha', 206 | '-', 207 | ' l', 208 | 'ati', 209 | 'en ', 210 | 'pe', 211 | ' re', 212 | 'there', 213 | 'ass', 214 | 'si', 215 | ' fo', 216 | 'wa', 217 | 'ec', 218 | 'our', 219 | 'who', 220 | 'its', 221 | 'z', 222 | 'fo', 223 | 'rs', 224 | '>', 225 | 'ot', 226 | 'un', 227 | '<', 228 | 'im', 229 | 'th ', 230 | 'nc', 231 | 'ate', 232 | '><', 233 | 'ver', 234 | 'ad', 235 | ' we', 236 | 'ly', 237 | 'ee', 238 | ' n', 239 | 'id', 240 | ' cl', 241 | 'ac', 242 | 'il', 243 | ' Uint8Array [ 155, 56, 172, 62, 195, 70 ] 37 | */ 38 | compress(string: string): Uint8Array 39 | 40 | /** 41 | * Decompress the return value `compress()` back to a human-readable string. 42 | * 43 | * @param {Uint8Array} bytes A correctly-compressed array of bytes. 44 | * @returns {string} The decompressed string. 45 | * @throws `DecompressError` if the `bytes` parameter is not correct. 46 | * 47 | * @example 48 | * import { dictionary } from 'compatto/dictionary' 49 | * import { compatto } from 'compatto' 50 | * 51 | * const { decompress } = compatto({ dictionary }) 52 | * 53 | * decompress(Uint8Array.of(...[ 155, 56, 172, 62, 195, 70 ])) 54 | * => 'this is a string' 55 | */ 56 | decompress(bytes: Uint8Array): string 57 | } 58 | 59 | /** 60 | * Create a new object, that implements the `Compatto` interface, with some defaults overridden with your own. 61 | * 62 | * @param {Options} options An object containing the new options. 63 | * @returns The newly created object that uses the provided options. 64 | * @throws `TypeError` if the dictionary is not an array of strings with a maximum length of 254. 65 | * 66 | * @example 67 | * import { dictionary } from 'compatto/dictionary' 68 | * import { compatto } from 'compatto' 69 | * 70 | * const { compress, decompress } = compatto({ dictionary }) 71 | */ 72 | export const compatto: (options: Options) => Compatto 73 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | const { push } = Array.prototype 2 | const { min } = Math 3 | 4 | const textEncoder = new TextEncoder() 5 | const textDecoder = new TextDecoder() 6 | 7 | export class DecompressError extends Error { 8 | constructor(message) { 9 | super(message) 10 | 11 | this.name = 'DecompressError' 12 | } 13 | } 14 | 15 | const createMessage = (...messages) => { 16 | return messages.join(' ') 17 | } 18 | 19 | const createTrie = (strings) => { 20 | const trieRoot = new Map() 21 | 22 | for (const [index, string] of strings.entries()) { 23 | let trie = trieRoot 24 | 25 | const characters = string[Symbol.iterator]() 26 | 27 | for (const character of characters) { 28 | let branch = trie.get(character) 29 | 30 | if (!branch) { 31 | const newBranch = new Map() 32 | 33 | trie.set(character, newBranch) 34 | 35 | branch = newBranch 36 | } 37 | 38 | trie = branch 39 | } 40 | 41 | trie.value = index 42 | } 43 | 44 | return trieRoot 45 | } 46 | 47 | const retrieveWord = (trie, characters, characterAt) => { 48 | let trieNode = trie 49 | let index = characterAt 50 | 51 | const retrievedCharacters = [] 52 | 53 | do { 54 | trieNode = trieNode.get(characters[index]) 55 | 56 | if (trieNode) { 57 | retrievedCharacters.push({ 58 | character: characters[index], 59 | value: trieNode.value, 60 | index 61 | }) 62 | 63 | index += 1 64 | } 65 | } while (trieNode && characters[index]) 66 | 67 | let retrievedIndex = retrievedCharacters.length - 1 68 | 69 | while ( 70 | retrievedIndex > 0 && 71 | retrievedCharacters[retrievedIndex].value === undefined 72 | ) { 73 | retrievedIndex -= 1 74 | } 75 | 76 | return [ 77 | retrievedCharacters[retrievedIndex] && 78 | retrievedCharacters[retrievedIndex].value, 79 | ((retrievedCharacters[retrievedIndex] && 80 | retrievedCharacters[retrievedIndex].index) || 81 | characterAt) + 1 82 | ] 83 | } 84 | 85 | const flushVerbatim = (verbatim, bytesToRemove) => { 86 | const chunk = [] 87 | 88 | if (bytesToRemove > 1) { 89 | chunk.push(255, bytesToRemove - 1) 90 | } else { 91 | chunk.push(254) 92 | } 93 | 94 | push.apply(chunk, verbatim.splice(0, bytesToRemove)) 95 | 96 | return chunk 97 | } 98 | 99 | const compress = (string, trie) => { 100 | const characters = [...string] 101 | const charactersLength = characters.length 102 | 103 | const verbatim = [] 104 | let characterIndex = 0 105 | 106 | const bytes = [] 107 | 108 | while (characterIndex < charactersLength) { 109 | const [byte, nextIndex] = retrieveWord(trie, characters, characterIndex) 110 | 111 | // eslint-disable-next-line no-negated-condition 112 | if (byte !== undefined) { 113 | while (verbatim.length > 0) { 114 | push.apply(bytes, flushVerbatim(verbatim, min(256, verbatim.length))) 115 | } 116 | 117 | bytes.push(byte) 118 | } else { 119 | push.apply(verbatim, textEncoder.encode(characters[characterIndex])) 120 | 121 | while (verbatim.length >= 256) { 122 | push.apply(bytes, flushVerbatim(verbatim, 256)) 123 | } 124 | } 125 | 126 | characterIndex = nextIndex 127 | } 128 | 129 | if (verbatim.length > 0) { 130 | push.apply(bytes, flushVerbatim(verbatim, verbatim.length)) 131 | } 132 | 133 | try { 134 | return Uint8Array.of(...bytes) 135 | } catch { 136 | return new Uint8Array(bytes) 137 | } 138 | } 139 | 140 | const handleChunk254 = (bytes, index, bytesLength) => { 141 | const byteIndex = index + 1 142 | 143 | if (byteIndex >= bytesLength) { 144 | throw new DecompressError( 145 | createMessage( 146 | `The \`bytes\` argument is malformed because it has ${bytesLength} elements.`, 147 | `It wants to read at index ${byteIndex}.` 148 | ) 149 | ) 150 | } 151 | 152 | return [bytes.slice(byteIndex, byteIndex + 1), byteIndex + 1] 153 | } 154 | 155 | const handleChunk255 = (bytes, index, bytesLength) => { 156 | const bytesCountIndex = index + 1 157 | 158 | if (bytesCountIndex >= bytesLength) { 159 | throw new DecompressError( 160 | createMessage( 161 | `The \`bytes\` argument is malformed because it has ${bytesLength} elements.`, 162 | `It wants to read at index ${bytesCountIndex}.` 163 | ) 164 | ) 165 | } 166 | 167 | const verbatimStart = bytesCountIndex + 1 168 | const verbatimEnd = bytesCountIndex + bytes[bytesCountIndex] + 2 169 | 170 | if (verbatimEnd > bytesLength) { 171 | throw new DecompressError( 172 | createMessage( 173 | `The \`bytes\` argument is malformed because it has ${bytesLength} elements.`, 174 | `It wants to read from index ${verbatimStart} to ${verbatimEnd}.` 175 | ) 176 | ) 177 | } 178 | 179 | return [bytes.slice(verbatimStart, verbatimEnd), verbatimEnd] 180 | } 181 | 182 | const decompress = (bytes, dictionary) => { 183 | const bytesLength = bytes.length 184 | let index = 0 185 | 186 | let string = '' 187 | 188 | while (index < bytesLength) { 189 | const chunk = bytes[index] 190 | 191 | if (chunk === 254) { 192 | const [byteToDecode, nextIndex] = handleChunk254( 193 | bytes, 194 | index, 195 | bytesLength 196 | ) 197 | 198 | string += textDecoder.decode(byteToDecode) 199 | 200 | index = nextIndex 201 | 202 | continue 203 | } 204 | 205 | if (chunk === 255) { 206 | const chunkToDecode = [] 207 | 208 | do { 209 | const [bytesToDecode, nextIndex] = handleChunk255( 210 | bytes, 211 | index, 212 | bytesLength 213 | ) 214 | 215 | push.apply(chunkToDecode, bytesToDecode) 216 | 217 | index = nextIndex 218 | } while (bytes[index] === 255) 219 | 220 | if (bytes[index] === 254) { 221 | const [byteToDecode, nextIndex] = handleChunk254( 222 | bytes, 223 | index, 224 | bytesLength 225 | ) 226 | 227 | chunkToDecode.push(byteToDecode) 228 | 229 | index = nextIndex 230 | } 231 | 232 | string += textDecoder.decode(Uint8Array.of(...chunkToDecode)) 233 | 234 | continue 235 | } 236 | 237 | string += dictionary[chunk] 238 | 239 | index += 1 240 | } 241 | 242 | return string 243 | } 244 | 245 | export const compatto = ({ dictionary } = {}) => { 246 | if (!Array.isArray(dictionary) || dictionary.length > 254) { 247 | throw new TypeError( 248 | createMessage( 249 | 'The `dictionary` option must be an array with at most 254 elements.', 250 | Array.isArray(dictionary) 251 | ? `It has ${dictionary.length} elements.` 252 | : `It is \`${ 253 | dictionary === undefined || dictionary === null 254 | ? dictionary 255 | : dictionary.constructor.name 256 | }\`.` 257 | ) 258 | ) 259 | } 260 | 261 | const trie = createTrie(dictionary) 262 | 263 | return { 264 | compress(string) { 265 | if (typeof string !== 'string') { 266 | throw new TypeError( 267 | createMessage( 268 | 'The `string` argument must be of type ’string’.', 269 | `Its type is \`${typeof string}\`.` 270 | ) 271 | ) 272 | } 273 | 274 | return compress(string, trie) 275 | }, 276 | decompress(bytes) { 277 | if ( 278 | bytes === undefined || 279 | bytes === null || 280 | bytes.constructor.name !== 'Uint8Array' 281 | ) { 282 | throw new TypeError( 283 | createMessage( 284 | 'The `buffer` argument must be an instance of ’Uint8Array’.', 285 | `It is ${ 286 | bytes === undefined || bytes === null 287 | ? `\`${bytes}\`` 288 | : `an instance of \`${bytes.constructor.name}\`` 289 | }.` 290 | ) 291 | ) 292 | } 293 | 294 | return decompress(bytes, dictionary) 295 | } 296 | } 297 | } 298 | -------------------------------------------------------------------------------- /license: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Raul Macarie (macarie.me) 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 10 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "compatto", 3 | "version": "2.1.5", 4 | "description": "Tiny and fast string compression library with Unicode support", 5 | "license": "MIT", 6 | "type": "module", 7 | "main": "./cjs/index.cjs", 8 | "exports": { 9 | ".": { 10 | "import": "./index.js", 11 | "require": "./cjs/index.cjs" 12 | }, 13 | "./dictionary": { 14 | "import": "./dictionary.js", 15 | "require": "./cjs/dictionary.cjs" 16 | } 17 | }, 18 | "types": "./index.d.ts", 19 | "repository": { 20 | "type": "git", 21 | "url": "git+https://github.com/macarie/compatto.git" 22 | }, 23 | "bugs": { 24 | "url": "https://github.com/macarie/compatto/issues" 25 | }, 26 | "homepage": "https://github.com/macarie/compatto#readme", 27 | "author": { 28 | "name": "Raul Macarie", 29 | "email": "raul@macarie.me", 30 | "url": "https://macarie.me" 31 | }, 32 | "engines": { 33 | "node": ">=11" 34 | }, 35 | "scripts": { 36 | "test:xo": "xo", 37 | "test:ava": "c8 ava --verbose", 38 | "test": "run-p -s test:*", 39 | "coverage": "c8 report --reporter=text-lcov | codecov --pipe --disable=gcov", 40 | "transpile": "esbuild --format=cjs --platform=node --outdir=cjs --out-extension:.js=.cjs ./index.js ./dictionary.js" 41 | }, 42 | "files": [ 43 | "index.js", 44 | "index.d.ts", 45 | "dictionary.js", 46 | "dictionary.d.ts", 47 | "cjs/index.cjs", 48 | "cjs/dictionary.cjs" 49 | ], 50 | "keywords": [ 51 | "smaz", 52 | "data", 53 | "string", 54 | "strings", 55 | "archive", 56 | "gzip", 57 | "zip", 58 | "tar", 59 | "compress", 60 | "compression", 61 | "decompress", 62 | "decompression", 63 | "dictionary", 64 | "minify", 65 | "minifier", 66 | "browser" 67 | ], 68 | "devDependencies": { 69 | "ava": "^3.15.0", 70 | "c8": "^7.7.1", 71 | "codecov": "^3.8.1", 72 | "esbuild": "^0.11.16", 73 | "npm-run-all": "^4.1.5", 74 | "prettier": "^2.2.1", 75 | "xo": "^0.39.1" 76 | }, 77 | "xo": { 78 | "prettier": true, 79 | "rules": { 80 | "unicorn/import-index": [ 81 | "off" 82 | ], 83 | "import/extensions": [ 84 | "off" 85 | ], 86 | "import/no-useless-path-segments": [ 87 | "off" 88 | ], 89 | "unicorn/no-fn-reference-in-iterator": [ 90 | "off" 91 | ] 92 | }, 93 | "ignores": [ 94 | "*.ts" 95 | ] 96 | }, 97 | "prettier": { 98 | "semi": false, 99 | "bracketSpacing": true, 100 | "useTabs": true, 101 | "endOfLine": "lf" 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # 📮 [compatto](https://github.com/macarie/compatto) [![Release Version](https://img.shields.io/npm/v/compatto.svg?label=&color=0080FF)](https://www.npmjs.com/package/compatto) 2 | 3 | > Compatto is a tiny and fast compression library with Unicode support, that works well with small strings too 4 | 5 | [![Build Status](https://img.shields.io/travis/com/macarie/compatto)](https://travis-ci.com/macarie/compatto) [![Coverage Status](https://img.shields.io/codecov/c/github/macarie/compatto)](https://codecov.io/gh/macarie/compatto/) 6 | [![License](https://img.shields.io/npm/l/compatto?color=42cdad)](https://github.com/macarie/compatto/blob/master/license) 7 | 8 | Compatto is based on [antirez](https://github.com/antirez/)'s [smaz](https://github.com/antirez/smaz) concept. It targets modern browsers and Node.js. For older browsers and Node.js versions, you will need to transpile and use a [`TextEncoder`](https://developer.mozilla.org/en-US/docs/Web/API/TextEncoder) and [`TextDecoder`](https://developer.mozilla.org/en-US/docs/Web/API/TextDecoder) polyfill. 9 | 10 | ## Features 11 | 12 | - ⏱ [Very fast](#performance) to compress, even faster to decompress 13 | - 🍯 Support for Unicode characters, like emojis 14 | - 🗄 User-definable dictionary 15 | 16 | ## Compression ratio 17 | 18 | Being a dictionary-based compression algorithm, the compression ratio is heavily influenced by the dictionary one uses. 19 | 20 | With the default dictionary the compression ratio is around `1.67` for [The Great Gatsby](http://gutenberg.net.au/ebooks02/0200041.txt): it is compressed from `269,716` bytes in just `161,583`, in `70ms`. A simple string like `this is a string` tho, is compressed from `16` bytes in `6`, so the compression ratio would be `2.66`... Results may vary, I guess 😅 21 | 22 | ## Install 23 | 24 | ```console 25 | $ npm install compatto 26 | ``` 27 | 28 | Or if you prefer using Yarn: 29 | 30 | ```console 31 | $ yarn add compatto 32 | ``` 33 | 34 | ## Usage 35 | 36 | ```javascript 37 | import { dictionary } from "compatto/dictionary" 38 | import { compatto, DecompressError } from "compatto" 39 | 40 | const { compress, decompress } = compatto({ dictionary }) 41 | 42 | const compressedString = compress("this is a string") 43 | // => Uint8Array [ 155, 56, 172, 62, 195, 70 ] 44 | 45 | const decompressedString = decompress(compressedString) 46 | // => 'this is a string' 47 | ``` 48 | 49 | ## API 50 | 51 | ### compatto(options) 52 | 53 | Create a new object that implements the [`Compatto`](#compatto) interface, using the options you provide. 54 | 55 | #### options 56 | 57 | Type: `object` 58 | 59 | ##### dictionary 60 | 61 | Type: `string[]` 62 | 63 | A dictionary used to compress and decompress strings. If its length is greater than `254` a `TypeError` will be thrown. 64 | 65 | _Please note that, as of `v2.0`, this option has no default value, the user has to explicitly pass it._ 66 | 67 | ### Compatto 68 | 69 | Compatto is an interface that has two methods: [`compress()`](#compressstring) and [`decompress()`](#decompressbytes). 70 | 71 | The returned value of [`compatto()`](#compattooptions) implements this interface. 72 | 73 | #### compress(string) 74 | 75 | Compress a string into an array of bytes, returned as an instance of `Uint8Array`. 76 | 77 | Throws a `TypeError` if the argument is not the correct type. 78 | 79 | ##### string 80 | 81 | Type: `string` 82 | 83 | A string to compress. 84 | 85 | #### decompress(bytes) 86 | 87 | Decompress an instance of `Uint8Array` to the original, uncompressed, string. 88 | 89 | Throws a `TypeError` if the argument is not the correct type. 90 | 91 | Throws a `DecompressError` if the buffer is not correctly encoded. It can be imported along with `compatto()` if you want to check if the error thrown is an instance of this class. 92 | 93 | ##### bytes 94 | 95 | Type: `Uint8Array` 96 | 97 | An array of bytes representing a compressed string. 98 | 99 | _Please note that if the dictionary used to compress a string is not the same used to decompress the generated buffer, the result of the decompression will most likely **not** be correct._ 100 | 101 | ### dictionary 102 | 103 | Type: `string[]` 104 | 105 | This is compatto's standard dictionary. Remember that even if it is the _standard_ one, it must be explicitly set by the user! 106 | 107 | ## Performance 108 | 109 | Since `v2.0`, compatto generates a trie from the dictionary that is used to compress every string. Before `v2.0`, compatto tried to get a substring as long as the longest word in the dictionary and see if that substring was in it. If it wasn't, it tried again with a substring that was one character shorter, and so on until the substring was one character. 110 | 111 | For compressible strings it was not _that_ slow, but if a word had characters that were not inside the dictionary that approach was _really_ slow! 112 | 113 | This implementation change gave compatto a big performance boost 🚌💨 114 | 115 | In `v2.1` the `compress()` algorithm was simplified, thus leading to a performance improvement of about 20% compared to `v2.0` 🐌 116 | 117 | Below is a little table that indicates `compress()`'s performance improvements over the various versions. The file used to test the library is `/usr/share/dict/words`: in the first row, the file was split over `\n`, while in the second row the whole file was used as a long piece of text. 118 | 119 | | Data | v1.0 | v2.0 | v2.1 | 120 | | :------------- | :------: | :------: | :------: | 121 | | 235,887 words | `~500ms` | `~370ms` | `~295ms` | 122 | | 2.5MB raw text | `~700ms` | `~465ms` | `~365ms` | 123 | 124 | As you can see the performance improved a lot: now compressing a lot of small words takes about 40% less time, and almost 50% less to compress a long piece of text if we keep `v1.0` as reference! 125 | 126 | Is there space for improvements? **Absolutely**! I guess that the compression algorithm can be further improved, and keep in mind that I didn't have time to do code profiling. 127 | 128 | ## Browser support 129 | 130 | The latest version of Chrome, Firefox, Safari, and Edge. 131 | 132 | ## Node.js support 133 | 134 | Compatto requires Node.js 11 or later. 135 | 136 | ## Related 137 | 138 | - [hex-my-bytes](https://github.com/macarie/hex-my-bytes) - Display bytes sequences as strings of hexadecimal digits. 139 | -------------------------------------------------------------------------------- /test/compatto.js: -------------------------------------------------------------------------------- 1 | import { readFileSync } from 'fs' 2 | 3 | import test from 'ava' 4 | 5 | import { dictionary } from '../dictionary.js' 6 | import { compatto, DecompressError } from '../index.js' 7 | 8 | const { compress, decompress } = compatto({ dictionary }) 9 | 10 | const words = readFileSync('/usr/share/dict/words', 'utf-8') 11 | 12 | test('`compress()` basic functionality', (t) => { 13 | const buffer = compress('this is a string') 14 | 15 | t.deepEqual(buffer, Uint8Array.of(...[155, 56, 172, 62, 195, 70])) 16 | }) 17 | 18 | test('`compress()` should work with unicode characters', (t) => { 19 | const buffer = compress('this is a string 👍🏼') 20 | 21 | t.deepEqual( 22 | buffer, 23 | Uint8Array.of( 24 | ...[ 25 | 155, 26 | 56, 27 | 172, 28 | 62, 29 | 195, 30 | 70, 31 | 0, 32 | 255, 33 | 7, 34 | 240, 35 | 159, 36 | 145, 37 | 141, 38 | 240, 39 | 159, 40 | 143, 41 | 188 42 | ] 43 | ) 44 | ) 45 | }) 46 | 47 | test('`compress()` should work with large inputs', (t) => { 48 | t.notThrows(() => { 49 | compress(words) 50 | }) 51 | }) 52 | 53 | test('`compress()` should flush the verbatim buffer when it gets to 256 elements', (t) => { 54 | const compressed = compress('='.repeat(260)) 55 | 56 | t.is(compressed[0], 255) 57 | t.is(compressed[1], 255) 58 | t.is(compressed[258], 255) 59 | }) 60 | 61 | test('`compress()` cannot use an argument that is not a string', (t) => { 62 | t.throws(() => { 63 | compress(['hello']) 64 | }) 65 | }) 66 | 67 | test('`decompress()` basic functionality', (t) => { 68 | const string = decompress(Uint8Array.of(...[155, 56, 172, 62, 195, 70])) 69 | 70 | t.is(string, 'this is a string') 71 | }) 72 | 73 | test('`decompress()` should work with unicode characters', (t) => { 74 | const string = 'this is a string 🙌🏼, but does it work? 🕵🏼‍♂️' 75 | const compressed = compress(string) 76 | const decompressed = decompress(compressed) 77 | 78 | t.is(decompressed, string) 79 | }) 80 | 81 | test('`decompress()` should work with extra-long incompressible strings', (t) => { 82 | let string = `${'='.repeat(254)}📮` 83 | let compressed = compress(string) 84 | let decompressed = decompress(compressed) 85 | 86 | t.is(compressed.length, 262) 87 | t.is(decompressed, string) 88 | 89 | string = `${'='.repeat(253)}📮` 90 | compressed = compress(string) 91 | decompressed = decompress(compressed) 92 | 93 | t.is(compressed.length, 260) 94 | t.is(decompressed, string) 95 | }) 96 | 97 | test('`decompress()` should work with large buffers', (t) => { 98 | t.notThrows(() => { 99 | const compressed = compress(words) 100 | 101 | decompress(compressed) 102 | }) 103 | }) 104 | 105 | test('`decompress()` cannot use buffer that is not instance of `Uint8Array`', (t) => { 106 | t.throws( 107 | () => { 108 | decompress([155, 56, 172, 62, 195, 70]) 109 | }, 110 | { 111 | instanceOf: TypeError, 112 | message: 113 | 'The `buffer` argument must be an instance of ’Uint8Array’. It is an instance of `Array`.' 114 | } 115 | ) 116 | 117 | t.throws( 118 | () => { 119 | decompress(null) 120 | }, 121 | { 122 | instanceOf: TypeError, 123 | message: 124 | 'The `buffer` argument must be an instance of ’Uint8Array’. It is `null`.' 125 | } 126 | ) 127 | 128 | t.throws( 129 | () => { 130 | decompress() 131 | }, 132 | { 133 | instanceOf: TypeError, 134 | message: 135 | 'The `buffer` argument must be an instance of ’Uint8Array’. It is `undefined`.' 136 | } 137 | ) 138 | }) 139 | 140 | test('`decompress()` cannot use malformed buffer', (t) => { 141 | t.throws( 142 | () => { 143 | decompress(Uint8Array.of(...[254])) 144 | }, 145 | { 146 | instanceOf: DecompressError, 147 | message: 148 | 'The `bytes` argument is malformed because it has 1 elements. It wants to read at index 1.' 149 | } 150 | ) 151 | 152 | t.throws( 153 | () => { 154 | decompress(Uint8Array.of(...[255])) 155 | }, 156 | { 157 | instanceOf: DecompressError, 158 | message: 159 | 'The `bytes` argument is malformed because it has 1 elements. It wants to read at index 1.' 160 | } 161 | ) 162 | 163 | t.throws( 164 | () => { 165 | decompress(Uint8Array.of(...[255, 50, 160])) 166 | }, 167 | { 168 | instanceOf: DecompressError, 169 | message: 170 | 'The `bytes` argument is malformed because it has 3 elements. It wants to read from index 2 to 53.' 171 | } 172 | ) 173 | 174 | t.throws( 175 | () => { 176 | decompress( 177 | Uint8Array.of( 178 | ...[ 179 | 255, 180 | 255, 181 | '61 '.repeat(256).split(' ').filter(Boolean), 182 | 254 183 | ].flat() 184 | ) 185 | ) 186 | }, 187 | { 188 | instanceOf: DecompressError, 189 | message: 190 | 'The `bytes` argument is malformed because it has 259 elements. It wants to read at index 259.' 191 | } 192 | ) 193 | }) 194 | 195 | test('Verify `compress()`’s output with `decompress()`', (t) => { 196 | const decompressedWords = decompress(compress(words)) 197 | 198 | t.deepEqual(decompressedWords, words) 199 | 200 | const compressedWords = words.split('\n').map((word) => compress(word)) 201 | 202 | t.deepEqual(compressedWords.map((word) => decompress(word)).join('\n'), words) 203 | }) 204 | 205 | test('`compatto()` should create a new `compatto` object', (t) => { 206 | const compattoCopy = compatto({ dictionary }) 207 | 208 | const compattoKeys = ['compress', 'decompress'] 209 | const compattoCopyKeys = Object.keys(compattoCopy) 210 | 211 | t.deepEqual(compattoCopyKeys, compattoKeys) 212 | }) 213 | 214 | test('`compatto()` should create a working new `compatto` object', (t) => { 215 | const compattoCopy = compatto({ dictionary }) 216 | 217 | const string = 'this is a basic string 📮' 218 | const compressed = compress(string) 219 | const decompressed = decompress(compressed) 220 | 221 | t.deepEqual(compattoCopy.compress(string), compressed) 222 | t.is(compattoCopy.decompress(compressed), decompressed) 223 | }) 224 | 225 | test('`compatto()` should use a new dictionary', (t) => { 226 | const compattoCopy = compatto({ dictionary: ['aa '] }) 227 | 228 | const compressed = compattoCopy.compress('aa b') 229 | 230 | t.deepEqual(compressed, Uint8Array.of(...[0, 254, 98])) 231 | }) 232 | 233 | test('`compatto()` cannot use malformed dictionary', (t) => { 234 | t.throws( 235 | () => { 236 | compatto({ dictionary: Array.from({ length: 300 }) }) 237 | }, 238 | { 239 | instanceOf: TypeError, 240 | message: 241 | 'The `dictionary` option must be an array with at most 254 elements. It has 300 elements.' 242 | } 243 | ) 244 | t.throws( 245 | () => { 246 | compatto({ dictionary: null }) 247 | }, 248 | { 249 | instanceOf: TypeError, 250 | message: 251 | 'The `dictionary` option must be an array with at most 254 elements. It is `null`.' 252 | } 253 | ) 254 | t.throws( 255 | () => { 256 | compatto({ 257 | dictionary: 'hello' 258 | }) 259 | }, 260 | { 261 | instanceOf: TypeError, 262 | message: 263 | 'The `dictionary` option must be an array with at most 254 elements. It is `String`.' 264 | } 265 | ) 266 | }) 267 | -------------------------------------------------------------------------------- /test/dictionary.js: -------------------------------------------------------------------------------- 1 | import test from 'ava' 2 | 3 | import { dictionary } from '../dictionary.js' 4 | 5 | test('`dictionary` should never change', (t) => { 6 | t.snapshot(dictionary) 7 | }) 8 | -------------------------------------------------------------------------------- /test/snapshots/dictionary.js.md: -------------------------------------------------------------------------------- 1 | # Snapshot report for `test/dictionary.js` 2 | 3 | The actual snapshot is saved in `dictionary.js.snap`. 4 | 5 | Generated by [AVA](https://avajs.dev). 6 | 7 | ## `dictionary` should never change 8 | 9 | > Snapshot 1 10 | 11 | [ 12 | ' ', 13 | 'the', 14 | 'e', 15 | 't', 16 | 'a', 17 | 'of', 18 | 'o', 19 | 'and', 20 | 'i', 21 | 'n', 22 | 's', 23 | 'e ', 24 | 'r', 25 | ' th', 26 | ' t', 27 | 'in', 28 | 'he', 29 | 'th', 30 | 'h', 31 | 'he ', 32 | 'to', 33 | `␍␊ 34 | `, 35 | 'l', 36 | 's ', 37 | 'd', 38 | ' a', 39 | 'an', 40 | 'er', 41 | 'c', 42 | ' o', 43 | 'd ', 44 | 'on', 45 | ' of', 46 | 're', 47 | 'of ', 48 | 't ', 49 | ', ', 50 | 'is', 51 | 'u', 52 | 'at', 53 | ' ', 54 | 'n ', 55 | 'or', 56 | 'which', 57 | 'f', 58 | 'm', 59 | 'as', 60 | 'it', 61 | 'that', 62 | `␊ 63 | `, 64 | 'was', 65 | 'en', 66 | ' ', 67 | ' w', 68 | 'es', 69 | ' an', 70 | ' i', 71 | `␍ 72 | `, 73 | 'f ', 74 | 'g', 75 | 'p', 76 | 'nd', 77 | ' s', 78 | 'nd ', 79 | 'ed ', 80 | 'w', 81 | 'ed', 82 | 'http://', 83 | 'for', 84 | 'te', 85 | 'ing', 86 | 'y ', 87 | 'The', 88 | ' c', 89 | 'ti', 90 | 'r ', 91 | 'his', 92 | 'st', 93 | ' in', 94 | 'ar', 95 | 'nt', 96 | ',', 97 | ' to', 98 | 'y', 99 | 'ng', 100 | ' h', 101 | 'with', 102 | 'le', 103 | 'al', 104 | 'to ', 105 | 'b', 106 | 'ou', 107 | 'be', 108 | 'were', 109 | ' b', 110 | 'se', 111 | 'o ', 112 | 'ent', 113 | 'ha', 114 | 'ng ', 115 | 'their', 116 | '"', 117 | 'hi', 118 | 'from', 119 | ' f', 120 | 'in ', 121 | 'de', 122 | 'ion', 123 | 'me', 124 | 'v', 125 | '.', 126 | 've', 127 | 'all', 128 | 're ', 129 | 'ri', 130 | 'ro', 131 | 'is ', 132 | 'co', 133 | 'f t', 134 | 'are', 135 | 'ea', 136 | '. ', 137 | 'her', 138 | ' m', 139 | 'er ', 140 | ' p', 141 | 'es ', 142 | 'by', 143 | 'they', 144 | 'di', 145 | 'ra', 146 | 'ic', 147 | 'not', 148 | 's, ', 149 | 'd t', 150 | 'at ', 151 | 'ce', 152 | 'la', 153 | 'h ', 154 | 'ne', 155 | 'as ', 156 | 'tio', 157 | 'on ', 158 | 'n t', 159 | 'io', 160 | 'we', 161 | ' a ', 162 | 'om', 163 | ', a', 164 | 's o', 165 | 'ur', 166 | 'li', 167 | 'll', 168 | 'ch', 169 | 'had', 170 | 'this', 171 | 'e t', 172 | 'g ', 173 | `e␍␊ 174 | `, 175 | ' wh', 176 | 'ere', 177 | ' co', 178 | 'e o', 179 | 'a ', 180 | 'us', 181 | ' d', 182 | 'ss', 183 | `␊ 184 | ␍␊ 185 | `, 186 | `␍␊ 187 | ␍ 188 | `, 189 | '="', 190 | ' be', 191 | ' e', 192 | 's a', 193 | 'ma', 194 | 'one', 195 | 't t', 196 | 'or ', 197 | 'but', 198 | 'el', 199 | 'so', 200 | 'l ', 201 | 'e s', 202 | 's,', 203 | 'no', 204 | 'ter', 205 | ' wa', 206 | 'iv', 207 | 'ho', 208 | 'e a', 209 | ' r', 210 | 'hat', 211 | 's t', 212 | 'ns', 213 | 'ch ', 214 | 'wh', 215 | 'tr', 216 | 'ut', 217 | '/', 218 | 'have', 219 | 'ly ', 220 | 'ta', 221 | ' ha', 222 | ' on', 223 | 'tha', 224 | '-', 225 | ' l', 226 | 'ati', 227 | 'en ', 228 | 'pe', 229 | ' re', 230 | 'there', 231 | 'ass', 232 | 'si', 233 | ' fo', 234 | 'wa', 235 | 'ec', 236 | 'our', 237 | 'who', 238 | 'its', 239 | 'z', 240 | 'fo', 241 | 'rs', 242 | '>', 243 | 'ot', 244 | 'un', 245 | '<', 246 | 'im', 247 | 'th ', 248 | 'nc', 249 | 'ate', 250 | '><', 251 | 'ver', 252 | 'ad', 253 | ' we', 254 | 'ly', 255 | 'ee', 256 | ' n', 257 | 'id', 258 | ' cl', 259 | 'ac', 260 | 'il', 261 | '