├── .gitignore ├── .travis.yml ├── History.md ├── Makefile ├── README.md ├── bench.js ├── index.js ├── package.json └── test.js /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | *.sw* 3 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | language: node_js 3 | node_js: 4 | - 4 5 | - 5 6 | - 6 7 | -------------------------------------------------------------------------------- /History.md: -------------------------------------------------------------------------------- 1 | 2 | 0.1.1 / 2014-05-28 3 | ================== 4 | 5 | * always return arrays when fed arrays 6 | * refactor 7 | 8 | 0.1.0 / 2014-05-19 9 | ================== 10 | 11 | * add passing test for false positive sub key matches 12 | * docs 13 | * add multi match support 14 | * add near/far benchmarks 15 | * use matcha for benchmarks 16 | 17 | 0.0.1 / 2014-03-18 18 | ================== 19 | 20 | * fix escaping 21 | 22 | 0.0.0 / 2014-03-17 23 | ================== 24 | 25 | * initial release 26 | 27 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | test: 3 | @node_modules/.bin/mocha --reporter spec 4 | 5 | bench: 6 | @./node_modules/.bin/matcha bench 7 | 8 | .PHONY: test bench 9 | 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # binary-extract 3 | 4 | Extract one or more values from a buffer of json without parsing the whole thing. 5 | 6 | [![build status](https://secure.travis-ci.org/juliangruber/binary-extract.png)](http://travis-ci.org/juliangruber/binary-extract) 7 | 8 | ## Example 9 | 10 | ```js 11 | var extract = require('binary-extract'); 12 | 13 | var buf = new Buffer(JSON.stringify({ 14 | foo: 'bar', 15 | bar: 'baz', 16 | nested: { 17 | bar: 'nope' 18 | } 19 | })); 20 | 21 | var value = extract(buf, 'bar'); 22 | // => 'baz' 23 | 24 | var values = extract(buf, ['foo', 'nested']) 25 | // => ["bar", {"bar":"nope"}] 26 | ``` 27 | 28 | ## Perf 29 | 30 | With the object from `bench.js`, `extract()` is ~2-4x faster than 31 | `JSON.parse(buf.toString())`. It is also way more memory efficient as the 32 | blob stays out of the V8 heap. 33 | 34 | The big perf gain comes mainly from not parsing everything and not 35 | converting the buffer to a string. 36 | 37 | ## Installation 38 | 39 | ```bash 40 | $ npm install binary-extract 41 | ``` 42 | 43 | ## API 44 | 45 | ### extract(buf, keys) 46 | 47 | Extract the value of `keys` in the json `buf`. 48 | 49 | The value can be any valid JSON structure. 50 | 51 | If `keys` is a __String__, returns a value. If `keys` is an __Array__ of 52 | keys, returns an array of values. 53 | 54 | ## Sponsors 55 | 56 | This module is proudly supported by my [Sponsors](https://github.com/juliangruber/sponsors)! 57 | 58 | Do you want to support modules like this to improve their quality, stability and weigh in on new features? Then please consider donating to my [Patreon](https://www.patreon.com/juliangruber). Not sure how much of my modules you're using? Try [feross/thanks](https://github.com/feross/thanks)! 59 | 60 | ## License 61 | 62 | MIT 63 | 64 | -------------------------------------------------------------------------------- /bench.js: -------------------------------------------------------------------------------- 1 | 2 | var extract = require('./'); 3 | var equal = require('assert').equal; 4 | 5 | var near = Buffer(JSON.stringify( 6 | { properties: 7 | { selected: '2', 8 | lastName: '', 9 | username: 'someone', 10 | category: 'Wedding Venues', 11 | firstName: '', 12 | product: 'planner', 13 | location: '', 14 | platform: 'ios', 15 | email: 'someone@yahoo.com', 16 | member_id: '12312313123123', 17 | filtered: 'false', 18 | viewed: 3 }, 19 | projectId: 'foobarbaz', 20 | userId: '123123123123123', 21 | sessionId: 'FF8D19D8-123123-449E-A0B9-2181C4886020', 22 | requestId: 'F3C49DEB-123123-4A54-BB72-D4BE591E4B29', 23 | action: 'Track', 24 | event: 'Vendor Category Viewed', 25 | timestamp: '2014-04-23T20:55:19.000Z', 26 | context: 27 | { providers: 28 | { Crittercism: false, 29 | Amplitude: false, 30 | Mixpanel: false, 31 | Countly: false, 32 | Localytics: false, 33 | 'Google Analytics': false, 34 | Flurry: false, 35 | Tapstream: false, 36 | Bugsnag: false }, 37 | appReleaseVersion: '2.3.1', 38 | osVersion: '7.1', 39 | os: 'iPhone OS', 40 | appVersion: '690', 41 | screenHeight: 480, 42 | 'library-version': '0.10.3', 43 | traits: 44 | { lastName: '', 45 | product: 'planner', 46 | member_id: '123123123123123', 47 | firstName: '', 48 | email: 'someone@yahoo.com', 49 | platform: 'ios', 50 | username: 'someone' }, 51 | screenWidth: 320, 52 | deviceManufacturer: 'Apple', 53 | library: 'analytics-ios', 54 | idForAdvertiser: '1323232-A0ED-47AB-BE4F-274F2252E4B4', 55 | deviceModel: 'iPad3,4' }, 56 | requestTime: '2014-04-23T20:55:44.211Z', 57 | version: 1, 58 | channel: 'server' } 59 | )); 60 | 61 | var far = Buffer(JSON.stringify( 62 | { properties: 63 | { selected: '2', 64 | lastName: '', 65 | username: 'someone', 66 | category: 'Wedding Venues', 67 | firstName: '', 68 | product: 'planner', 69 | location: '', 70 | platform: 'ios', 71 | email: 'someone@yahoo.com', 72 | member_id: '12312313123123', 73 | filtered: 'false', 74 | viewed: 3 }, 75 | userId: '123123123123123', 76 | sessionId: 'FF8D19D8-123123-449E-A0B9-2181C4886020', 77 | requestId: 'F3C49DEB-123123-4A54-BB72-D4BE591E4B29', 78 | action: 'Track', 79 | event: 'Vendor Category Viewed', 80 | timestamp: '2014-04-23T20:55:19.000Z', 81 | context: 82 | { providers: 83 | { Crittercism: false, 84 | Amplitude: false, 85 | Mixpanel: false, 86 | Countly: false, 87 | Localytics: false, 88 | 'Google Analytics': false, 89 | Flurry: false, 90 | Tapstream: false, 91 | Bugsnag: false }, 92 | appReleaseVersion: '2.3.1', 93 | osVersion: '7.1', 94 | os: 'iPhone OS', 95 | appVersion: '690', 96 | screenHeight: 480, 97 | 'library-version': '0.10.3', 98 | traits: 99 | { lastName: '', 100 | product: 'planner', 101 | member_id: '123123123123123', 102 | firstName: '', 103 | email: 'someone@yahoo.com', 104 | platform: 'ios', 105 | username: 'someone' }, 106 | screenWidth: 320, 107 | deviceManufacturer: 'Apple', 108 | library: 'analytics-ios', 109 | idForAdvertiser: '1323232-A0ED-47AB-BE4F-274F2252E4B4', 110 | deviceModel: 'iPad3,4' }, 111 | projectId: 'foobarbaz', 112 | requestTime: '2014-04-23T20:55:44.211Z', 113 | version: 1, 114 | channel: 'server' } 115 | )); 116 | 117 | suite('near', function(){ 118 | bench('native', function(){ 119 | var obj = JSON.parse(near.toString()); 120 | var val = obj.projectId; 121 | }) 122 | 123 | bench('extract', function(){ 124 | var val = extract(near, 'projectId'); 125 | }) 126 | }) 127 | 128 | suite('far', function(){ 129 | bench('native', function(){ 130 | var obj = JSON.parse(far.toString()); 131 | var val = obj.projectId; 132 | }) 133 | 134 | bench('extract', function(){ 135 | var val = extract(far, 'projectId'); 136 | }) 137 | }) 138 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | 2 | /** 3 | * Expose `extract`. 4 | */ 5 | 6 | module.exports = extract; 7 | 8 | /** 9 | * Char codes. 10 | */ 11 | 12 | var comma = code(','); 13 | var obrace = code('{'); 14 | var cbrace = code('}'); 15 | var obracket = code('['); 16 | var cbracket = code(']'); 17 | var colon = code(':'); 18 | var mark = code('"'); 19 | var backslash = code('\\'); 20 | 21 | /** 22 | * Extract the value of `keys` in the json `buf`. 23 | * 24 | * If `keys` is a single key, returns the value. 25 | * If `keys` is an array of keys, returns an array of values. 26 | * 27 | * @param {Buffer} buf 28 | * @param {Array|String} keys 29 | * @return {Mixed} 30 | * @api public 31 | */ 32 | 33 | function extract(buf, keys){ 34 | var multi = Array.isArray(keys); 35 | if (!multi) keys = [keys]; 36 | 37 | var values = []; 38 | var matched = {}; 39 | var isKey = true; 40 | var inString = false; 41 | var level = 0; 42 | var chars = keys.map(strToCharCodes); 43 | var c; 44 | var match; 45 | var start; 46 | var end; 47 | 48 | for (var i = 0; i < buf.length; i++) { 49 | c = buf[i]; 50 | 51 | if (c == backslash) { 52 | i++; 53 | continue; 54 | } 55 | 56 | if (c == mark) { 57 | inString = !inString; 58 | continue; 59 | } 60 | 61 | if (!inString) { 62 | if (c == colon) isKey = false; 63 | else if (c == comma) isKey = true; 64 | else if (c == obrace) level++; 65 | else if (c == cbrace) level--; 66 | } 67 | if (!isKey || level > 1) continue; 68 | 69 | for (var j = 0; j < keys.length; j++) { 70 | if (!matched[keys[j]] && isMatch(buf, i, chars[j])) { 71 | match = { 72 | key: keys[j], 73 | chars: chars[j], 74 | idx: j 75 | }; 76 | matched[keys[i]] = true; 77 | break; 78 | }; 79 | } 80 | if (!match) continue; 81 | 82 | start = i + match.key.length + 2; 83 | end = findEnd(buf, start); 84 | 85 | values[match.idx] = parse(buf, start, end); 86 | match = null; 87 | if (values.length == keys.length) break; 88 | } 89 | 90 | return multi 91 | ? values 92 | : values[0]; 93 | } 94 | 95 | /** 96 | * Get the char code of `str`. 97 | * 98 | * @param {String} str 99 | * @return {Number} 100 | * @api private 101 | */ 102 | 103 | function code(str) { 104 | return str.charCodeAt(0); 105 | } 106 | 107 | /** 108 | * Convert `str` to an array of char codes. 109 | * 110 | * @param {String} str 111 | * @return {Array[Number]} 112 | * @api private 113 | */ 114 | 115 | function strToCharCodes(str) { 116 | var chars = []; 117 | for (var i = 0; i < str.length; i++) { 118 | chars[i] = str.charCodeAt(i); 119 | } 120 | return chars; 121 | } 122 | 123 | /** 124 | * Check if `buf[i-1] - buf[i+n]` equals `"chars"`. 125 | * 126 | * @param {Array[Number]} chars 127 | * @param {Buffer} buf 128 | * @param {Number} i 129 | * @return {Boolean} 130 | * @api private 131 | */ 132 | 133 | function isMatch(buf, i, chars){ 134 | if (buf[i - 1] != mark) return false; 135 | for (var j = 0; j < chars.length; j++) { 136 | if (buf[i + j] != chars[j]) return false; 137 | } 138 | if (buf[i + chars.length] != mark) return false; 139 | return true; 140 | } 141 | 142 | /** 143 | * Find the end index of the object 144 | * that starts at `start` in `buf`. 145 | * 146 | * @param {Buffer} buf 147 | * @param {Number} start 148 | * @return {Number} 149 | * @api private 150 | */ 151 | 152 | function findEnd(buf, start) { 153 | var level = 0; 154 | var s = buf[start]; 155 | var c; 156 | 157 | for (var i = start; i < buf.length; i++) { 158 | c = buf[i]; 159 | if (c == obrace || c == obracket) { 160 | level++; 161 | continue; 162 | } else if (c == cbrace || c == cbracket) { 163 | if (--level > 0) continue; 164 | } 165 | if ( 166 | level < 0 167 | || level == 0 && (c == comma || c == cbrace || c == cbracket) 168 | ) { 169 | return s == obrace || s == obracket 170 | ? i + 1 171 | : i; 172 | } 173 | } 174 | } 175 | 176 | /** 177 | * Parse the json in `buf` from `start` to `end`. 178 | * 179 | * @param {Buffer} buf 180 | * @param {Number} start 181 | * @param {Number} end 182 | * @return {Mixed} 183 | * @api private 184 | */ 185 | 186 | function parse(buf, start, end) { 187 | var json = buf.toString('utf8', start, end); 188 | return JSON.parse(json); 189 | } 190 | 191 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "binary-extract", 3 | "description": "Extract values from a binary json blob", 4 | "version": "0.1.2", 5 | "repository": "segmentio/binary-extract", 6 | "license": "MIT", 7 | "devDependencies": { 8 | "matcha": "~0.7.0", 9 | "mocha": "^3.0.0" 10 | }, 11 | "scripts": { 12 | "test": "make test" 13 | }, 14 | "publishConfig": { 15 | "registry": "https://registry.npmjs.org" 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /test.js: -------------------------------------------------------------------------------- 1 | 2 | var assert = require('assert'); 3 | var equal = assert.deepEqual; 4 | var extract = require('./'); 5 | 6 | describe('extract(buf, key)', function(){ 7 | it('should extract a value', function(){ 8 | var buf = toBuf({ foo: 'bar' }); 9 | equal(extract(buf, 'foo'), 'bar'); 10 | }) 11 | it('should extract multiple values', function(){ 12 | var buf = toBuf({ a: '0', b: '1', c: '2' }); 13 | equal(extract(buf, ['a', 'c']), ['0', '2']); 14 | }) 15 | it('should always return arrays when fed arrays', function(){ 16 | var buf = toBuf({ foo: 'bar' }); 17 | equal(extract(buf, ['foo']), ['bar']); 18 | }) 19 | it('should end on ,', function(){ 20 | var buf = toBuf({ foo: 'bar', bar: 'baz' }); 21 | equal(extract(buf, 'foo'), 'bar'); 22 | }) 23 | it('should ignore values when looking for keys', function(){ 24 | var buf = toBuf({ foo: 'bar', bar: 'baz' }); 25 | equal(extract(buf, 'bar'), 'baz'); 26 | }) 27 | it('should ignore too deeply nested values', function(){ 28 | var buf = toBuf({ foo: { beep: 'boop', bar: 'oops' }, bar: 'baz' }); 29 | equal(extract(buf, 'bar'), 'baz'); 30 | buf = toBuf({ foo: [{ bar: 'oops' }], bar: 'baz' }); 31 | equal(extract(buf, 'bar'), 'baz'); 32 | }) 33 | it('should ignore strings with special chars', function(){ 34 | var buf = toBuf({ foo: ',bar', bar: 'baz' }); 35 | equal(extract(buf, 'bar'), 'baz'); 36 | }) 37 | it('should extract objects', function(){ 38 | var buf = toBuf({ foo: { bar: 'baz' }}); 39 | equal(extract(buf, 'foo'), { bar: 'baz' }); 40 | }) 41 | it('should extract arrays', function(){ 42 | var buf = toBuf({ foo: ['bar', 'baz']}); 43 | equal(extract(buf, 'foo'), ['bar', 'baz']); 44 | }) 45 | it('should escape with backslash', function(){ 46 | var buf = toBuf({ beep: '\"', foo: 'bar' }); 47 | equal(extract(buf, 'foo'), 'bar'); 48 | var buf = toBuf({ foo: 'bar\"baz' }); 49 | equal(extract(buf, 'foo'), 'bar\"baz'); 50 | }); 51 | it('should ignore sub key matches', function(){ 52 | var buf = toBuf({ _a: '0', a_: '1', _a_: '2', a: '3' }); 53 | equal(extract(buf, 'a'), '3'); 54 | }); 55 | }) 56 | 57 | function toBuf(obj){ 58 | return new Buffer(JSON.stringify(obj)); 59 | } 60 | --------------------------------------------------------------------------------