├── devbuild.sh ├── .github └── CODEOWNERS ├── .gitignore ├── .travis.yml ├── test ├── buster.js └── tests │ └── datasquasher.js ├── package.json ├── GruntFile.js ├── README.md └── lib └── ftdatasquasher.js /devbuild.sh: -------------------------------------------------------------------------------- 1 | npm install 2 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @Financial-Times/apps 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | docs/ 3 | node_modules/ 4 | coverage 5 | build/ 6 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | script: 2 | - "npm test" 3 | - "npm run coveralls" 4 | 5 | language: node_js 6 | 7 | node_js: 8 | - "4" 9 | -------------------------------------------------------------------------------- /test/buster.js: -------------------------------------------------------------------------------- 1 | var config = module.exports; 2 | 3 | config.datasquasher = { 4 | rootPath: '../', 5 | sources: [ 6 | "lib/*.js" 7 | ], 8 | environment: 'node', 9 | "buster-istanbul": { 10 | outputDirectory: "coverage", 11 | format: ["lcov", "html"] 12 | }, 13 | tests: [ 14 | 'test/tests/*.js' 15 | ], 16 | extensions: [ 17 | require('buster-istanbul') 18 | ] 19 | }; 20 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "ftdatasquasher", 3 | "version": "0.0.4", 4 | "description": "Data compression and decompression support, packing base64 into UTF8 high and low bytes", 5 | "main": "lib/ftdatasquasher.js", 6 | "scripts": { 7 | "test": "./node_modules/.bin/grunt test", 8 | "coveralls": "cat coverage/lcov.info | coveralls" 9 | }, 10 | "repository": { 11 | "type": "git", 12 | "url": "git@github.com:ftlabs/ftdatasquasher.git" 13 | }, 14 | "devDependencies": { 15 | "grunt": "~0.4.1", 16 | "grunt-buster": "~0.3.1", 17 | "grunt-browserify": "~1.2.11", 18 | "buster-istanbul": "~0.1.10", 19 | "buster": "~0.7.8", 20 | "grunt-jsdoc": "~0.4.1", 21 | "grunt-cli": "~0.1.10", 22 | "coveralls": "~2.3.0" 23 | }, 24 | "author": "Rowan Beentje", 25 | "license": "MIT", 26 | "readmeFilename": "README.md" 27 | } 28 | -------------------------------------------------------------------------------- /GruntFile.js: -------------------------------------------------------------------------------- 1 | module.exports = function(grunt) { 2 | 3 | // Project configuration. 4 | grunt.initConfig({ 5 | pkg: grunt.file.readJSON('package.json'), 6 | 7 | buster: { 8 | test: {} 9 | }, 10 | 11 | browserify: { 12 | src: 'lib/ftdatasquasher.js', 13 | dest: 'build/ftdatasquasher.js', 14 | options: { 15 | standalone: 'DataSquasher' 16 | }, 17 | }, 18 | 19 | jsdoc: { 20 | dist: { 21 | src: ['lib/*.js', 'README.md'], 22 | options: { 23 | destination: 'doc' 24 | } 25 | } 26 | } 27 | }); 28 | 29 | grunt.loadNpmTasks('grunt-buster'); 30 | grunt.loadNpmTasks('grunt-browserify'); 31 | grunt.loadNpmTasks('grunt-jsdoc'); 32 | 33 | // Default task. 34 | grunt.registerTask('default', ['browserify:build']); 35 | grunt.registerTask('test', ['buster:test']); 36 | }; 37 | -------------------------------------------------------------------------------- /test/tests/datasquasher.js: -------------------------------------------------------------------------------- 1 | var buster = require('buster'); 2 | var squasher = require('../../lib/ftdatasquasher'); 3 | var assert = buster.assertions.assert; 4 | 5 | buster.testCase('datasquasher', { 6 | 7 | "A known string should compress and decompress to expected values": function () { 8 | 9 | // A base 64 encoded version of 'The quick brown fox jumps over the lazy dog' 10 | var sample = "VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wcyBvdmVyIHRoZSBsYXp5IGRvZw=="; 11 | var expected = "噇桬䥈䘱慗乲䥇䩹戳摵䥇婶敃䉱摗ㅷ捹䉶摭噹䥈副婓䉳奘瀵䥇剶婷㴽"; 12 | var actual = squasher.compress(sample); 13 | 14 | assert.equals(expected, actual); 15 | 16 | // And decompress again... 17 | var actual = squasher.decompress(actual); 18 | assert.equals(sample, actual); 19 | }, 20 | 21 | "Edge case - check when string not even number of characters": function () { 22 | var sample = "abc"; 23 | var expected = "慢挠"; 24 | var actual = squasher.compress(sample); 25 | 26 | assert.equals(expected, actual); 27 | 28 | // And decompress again... 29 | var actual = squasher.decompress(actual); 30 | 31 | // Not quite the same - has a space at the end. 32 | assert.equals(sample + ' ', actual); 33 | } 34 | 35 | }); 36 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ftdatasquasher [![Build Status](https://api.travis-ci.com/ftlabs/ftdatasquasher.png)](https://travis-ci.com/github/ftlabs/ftdatasquasher) [![Coverage Status](https://coveralls.io/repos/ftlabs/ftdatasquasher/badge.png)](https://coveralls.io/r/ftlabs/ftdatasquasher) 2 | ============== 3 | 4 | Data compression and decompression support, packing base64 into UTF8 high and 5 | low bytes that we use to 'compress' base 64 encoded to maximise the amount of 6 | binary data we can store offline on [described](http://bit.ly/unireencode) by 7 | [@triblondon](http://www.twitter.com/triblondon). 8 | 9 | ## The problem 10 | 11 | If you want to store binary data for offline use browsers today don't officially 12 | support it. (localStorage only stores strings, WebSQL stores a few different 13 | types - but not binary, and IndexedDB doesn't support it on all browsers) 14 | 15 | Also IndexedDB is unlikely to ever support it properly across all browsers as 16 | [File API should one day fulfill the storing binary files for offline use case](http://www.w3.org/TR/FileAPI/). 17 | 18 | But you can work around it. If you base 64 the binary data it becomes a string 19 | and that string can be stored with the storage technology of your choosing. 20 | 21 | Though, in reality it is only really practical to use IndexedDB (falling back 22 | to WebSQL when IndexedDB is not available as localStorage has limited capacity 23 | and base 64 encoded binary files tend to be quite large. 24 | 25 | When data in a web browser's offline storage is stored (for example 26 | IndexedDB, WebSQL or localStorage) it stores that data as UTF16. 27 | 28 | Because UTF-16 is capable of encoding over a million different characters and 29 | Base 64 only uses 64 of them it turns out that UTF-16 is not a very efficient 30 | format for storing base 64 encoded data. 31 | 32 | For a more detailed explanation see: 33 | http://labs.ft.com/2012/06/text-re-encoding-for-optimising-storage-capacity-in-the-browser/ 34 | 35 | ## What do we do 36 | 37 | We squash the characters together (if you look at them in dev tools the string 38 | often comes out as a mixture of characters from East Asian languages. This 39 | algorithm allows us to store more than twice as much base 64 encoded data than 40 | without it. 41 | 42 | ## Installation 43 | 44 | ``` 45 | npm install ftdatasquasher 46 | ``` 47 | 48 | ## Compatability 49 | 50 | It's just plain javascript. It should work everywhere. Across all browsers 51 | (via a compiler like Browserify) and in NodeJS. 52 | 53 | ## Usage 54 | 55 | [See the docs](http://ftlabs.github.io/ftdatasquasher/doc/module-ftdatasquasher.html) 56 | 57 | ## Credits and collaboration ## 58 | 59 | The lead developer of FastClick is 60 | [Rowan Beentje](http://twitter.com/rowanbeentje) at FT Labs. This fork is 61 | currently maintained by [Matt Andrews](http://twitter.com/andrewsmatt). All open 62 | source code released by FT Labs is licenced under the MIT licence. We welcome 63 | comments, feedback and suggestions. Please feel free to raise an issue or pull 64 | request. Enjoy. 65 | -------------------------------------------------------------------------------- /lib/ftdatasquasher.js: -------------------------------------------------------------------------------- 1 | /** 2 | * FT Data Squasher 3 | * 4 | * Data compression and decompression support, packing base64 into UTF8 high and 5 | * low bytes. 6 | * 7 | * A requireable module of the compression and decompression algorithm 8 | * {@link http://bit.ly/unireencode|described by @triblondon}. 9 | * 10 | * @module ftdatasquasher 11 | * @copyright The Financial Times Limited [All rights reserved] 12 | */ 13 | 14 | /*jshint laxbreak:true*/ 15 | 16 | /** 17 | * Squash the (assumed base 64 encoded) string using the algorithm 18 | * 19 | * @access public 20 | * @param {string} data The un-squashed string 21 | * @returns string 22 | */ 23 | function compress(data) { 24 | var i, l, out = ''; 25 | 26 | // If string is not an even number of characters, pad it with a space, so that 27 | // when these bytes are read as UTF-16 data, the final character is complete 28 | if (data.length % 2 !== 0) { 29 | data += ' '; 30 | } 31 | 32 | for (i = 0, l = data.length; i < l; i += 2) { 33 | out += String.fromCharCode((data.charCodeAt(i) * 256) 34 | + data.charCodeAt(i + 1)); 35 | } 36 | 37 | return out; 38 | 39 | } 40 | 41 | /** 42 | * Unsquaash the data using the algorithm 43 | * 44 | * @access public 45 | * @param {string} data The compressed data to uncompress 46 | * @returns string 47 | */ 48 | function decompress(data) { 49 | 50 | // Findings when optimising this function for homescreen iOS 6: 51 | // 1) Bitwise maths is significantly faster - ~1.25x faster 52 | // 2) Caching fromCharCode method slightly faster - ~1.03x faster 53 | // 3) Eliminating temporary storage variables - ~1.1x faster 54 | // 4) Passing multiple arguments to fromCharCode is complex; with just two, 55 | // slower (!) - ~1.10x slower - but combined with unrolling, faster, 56 | // 5) Unrolling the loop is faster, although with diminishing returns - never 57 | // near linear. 58 | // 6) Combining unrolling with multiple arguments to fromCharCode leads to a 59 | // bigger speed increase due to batched string creation. 60 | var i, n1, n2, n3, n4, n5, n6, n7, n8, n9, n10, n11, n12, n13, n14, n15, n16; 61 | var getCharacterCode = String.fromCharCode; 62 | var decompressedData = ''; 63 | 64 | // While l is ultimately the length to process, unrolling the loop needs to 65 | // process the data in batches, in this case of 16; so start with the length 66 | // rounded to a multiple of 16. 67 | var l = (data.length >> 4 << 4); 68 | 69 | // In a loop, process the data in batches of 16 characters. 70 | for (i = 0; i < l; i++) { 71 | 72 | // Copy to local variables representing the character code at the positions 73 | n1 = data.charCodeAt(i); 74 | n2 = data.charCodeAt(++i); 75 | n3 = data.charCodeAt(++i); 76 | n4 = data.charCodeAt(++i); 77 | n5 = data.charCodeAt(++i); 78 | n6 = data.charCodeAt(++i); 79 | n7 = data.charCodeAt(++i); 80 | n8 = data.charCodeAt(++i); 81 | n9 = data.charCodeAt(++i); 82 | n10 = data.charCodeAt(++i); 83 | n11 = data.charCodeAt(++i); 84 | n12 = data.charCodeAt(++i); 85 | n13 = data.charCodeAt(++i); 86 | n14 = data.charCodeAt(++i); 87 | n15 = data.charCodeAt(++i); 88 | n16 = data.charCodeAt(++i); 89 | 90 | // Use String.fromCharCode (or a cached version of same) to get the ascii 91 | // characters from the high and low parts of each of the characters. In 92 | // other words, each character from the passed-in data is converted via: 93 | // decompressedData += String.fromCharCode(n >> 8) 94 | // + String.fromCharCode(n & 255) 95 | decompressedData += getCharacterCode( 96 | n1 >> 8, n1 & 255, n2 >> 8, n2 & 255, n3 >> 8, n3 & 255, n4 >> 8, n4 & 255, 97 | n5 >> 8, n5 & 255, n6 >> 8, n6 & 255, n7 >> 8, n7 & 255, n8 >> 8, n8 & 255, 98 | n9 >> 8, n9 & 255, n10 >> 8, n10 & 255, n11 >> 8, n11 & 255, n12 >> 8, n12 & 255, 99 | n13 >> 8, n13 & 255, n14 >> 8, n14 & 255, n15 >> 8, n15 & 255, n16 >> 8, n16 & 255 100 | ); 101 | } 102 | 103 | // Finally, output the end of the string, by processing any characters left 104 | // over after the groups of 16 have been handled. 105 | for (l = data.length; i < l; i++) { 106 | n1 = data.charCodeAt(i); 107 | decompressedData += getCharacterCode(n1 >> 8) + getCharacterCode(n1 & 255); 108 | } 109 | 110 | return decompressedData; 111 | } 112 | 113 | 114 | module.exports = { 115 | compress: compress, 116 | decompress: decompress 117 | }; 118 | --------------------------------------------------------------------------------