├── package.json ├── license ├── index.js ├── readme.md └── test.js /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "jsonl", 3 | "version": "1.1.2", 4 | "description": "Transform a stream of JSON into a stream of Line Delimited JSON", 5 | "author": "Stephen Sawchuk (http://stephenplusplus.com)", 6 | "main": "index.js", 7 | "repository": "stephenplusplus/jsonl", 8 | "files": [ 9 | "index.js", 10 | "LICENSE" 11 | ], 12 | "keywords": [ 13 | "json", 14 | "stream", 15 | "newline" 16 | ], 17 | "dependencies": { 18 | "jsonparse": "0.0.6", 19 | "through2": "^0.6.3" 20 | }, 21 | "devDependencies": { 22 | "mocha": "^2.0.1" 23 | }, 24 | "scripts": { 25 | "test": "mocha" 26 | }, 27 | "license": "MIT" 28 | } 29 | -------------------------------------------------------------------------------- /license: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) Stephen Sawchuk (stephenplusplus.com) 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | "use strict" 2 | 3 | var jsonparse = require("jsonparse") 4 | var through = require("through2") 5 | 6 | module.exports = function (opts) { 7 | opts = opts || {} 8 | 9 | var parser = new jsonparse() 10 | var json = {} 11 | 12 | if (opts.toBufferStream) opts.objectMode = true 13 | if (opts.pluck) opts.pluck = Array.isArray(opts.pluck) ? opts.pluck : [opts.pluck] 14 | 15 | opts.depth = typeof opts.depth === "number"? opts.depth : opts.toBufferStream ? 0 : 1 16 | 17 | var jsonl = through(opts, function (chunk, enc, next) { 18 | parser.write(Buffer.isBuffer(chunk) ? chunk : JSON.stringify(chunk)) 19 | next() 20 | }) 21 | 22 | parser.onValue = function (value) { 23 | var skip = true 24 | 25 | if (this.stack.length === opts.depth) { 26 | if (!opts.pluck) 27 | skip = false 28 | else if (this.key && opts.pluck.indexOf(this.key.toString().toLowerCase()) > -1) 29 | json[this.key] = value 30 | } 31 | 32 | if (this.stack.length <= opts.depth && Object.keys(json).length > 0) { 33 | skip = false 34 | value = json 35 | json = {} 36 | } 37 | 38 | if (skip) return 39 | 40 | if (opts.objectMode && !opts.toBufferStream) 41 | jsonl.push(value) 42 | else if (opts.toBufferStream) 43 | jsonl.push(new Buffer(JSON.stringify(value) + (opts.separator || "\n"))) 44 | else 45 | jsonl.push(JSON.stringify(value) + (opts.separator || "\n")) 46 | } 47 | 48 | return jsonl 49 | } 50 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # jso\nl 2 | 3 | > Transform a stream of JSON into a stream of [Line Delimited JSON](http://en.wikipedia.org/wiki/Line_Delimited_JSON) 4 | 5 | ## Install 6 | ```sh 7 | $ npm install --save jsonl 8 | ``` 9 | 10 | ## Use 11 | 12 | ### From Buffers 13 | ```js 14 | var fs = require("fs") 15 | var jsonl = require("jsonl") 16 | 17 | fs.createReadStream("./in.json") 18 | .pipe(jsonl()) 19 | .pipe(fs.createWriteStream("./out.json")) 20 | ``` 21 | #### `in.json` 22 | ```json 23 | [{"test":"value"},{"test":"value"},{"test":"value"},{"test":"value"}] 24 | ``` 25 | #### `out.json` 26 | ```json 27 | {"test":"value"} 28 | {"test":"value"} 29 | {"test":"value"} 30 | {"test":"value"} 31 | 32 | ``` 33 | 34 | ### From Objects 35 | ```js 36 | var fs = require("fs") 37 | var jsonl = require("jsonl") 38 | var through = require("through2") 39 | var stream = through.obj() 40 | 41 | stream.pipe(jsonl({toBufferStream:true})) 42 | .pipe(fs.createWriteStream("./out.json")) 43 | 44 | stream.push({test:"value"}) 45 | stream.push({test:"value"}) 46 | stream.push({test:"value"}) 47 | stream.push({test:"value"}) 48 | stream.end() 49 | ``` 50 | #### `out.json` 51 | ```json 52 | {"test":"value"} 53 | {"test":"value"} 54 | {"test":"value"} 55 | {"test":"value"} 56 | 57 | ``` 58 | 59 | ### Depth 60 | To get the results you expect, you will likely need to know the structure of your incoming data. You may have to pass a `depth` property, which corresponds to the layer of the property in a serialized, nested JSON object. 61 | 62 | By default, jsonl will use a depth of 1 when reading data from a Buffer stream (expecting objects to be nested in an array), and a depth of 0 from a stream in object mode. 63 | 64 | ```js 65 | /*0*/[ 66 | /*1*/ { 67 | /*2*/ test: "value" 68 | /*1*/ }, 69 | /*1*/ { 70 | /*2*/ test: "value" 71 | /*1*/ } 72 | /*0*/] 73 | ``` 74 | 75 | ### Plucking 76 | To filter the incoming data based on properties, you can select specific fields to be plucked out of the incoming object. 77 | 78 | \* You will need to specify a `depth` property for the nested level of the property. 79 | 80 | 81 | ```js 82 | var fs = require("fs") 83 | var jsonl = require("jsonl") 84 | 85 | fs.createReadStream("./in.json") 86 | .pipe(jsonl({pluck:["category"], depth:2})) 87 | .pipe(fs.createWriteStream("./out.json")) 88 | ``` 89 | #### `in.json` 90 | ```json 91 | [{"category": "cactus heights", "question":"?", "answer": "!"},{"category": "giraffe shoe sizes", "question":"?", "answer": "!"}] 92 | ``` 93 | #### `out.json` 94 | ```json 95 | {"category":"cactus heights"} 96 | {"category":"giraffe shoe sizes"} 97 | 98 | ``` 99 | 100 | ## API 101 | 102 | ### var jsonl = require("jsonl")([opts]) 103 | 104 | #### opts.depth 105 | - Type: `Number` (default: `1`) 106 | 107 | The depth of the objects in the incoming data to pluck out. This is what you want for an array of objects, such as: 108 | 109 | ```json 110 | [{"this":"that"},{"this":"that"}] 111 | ``` 112 | 113 | #### opts.objectMode 114 | - Type: `Boolean` (default: `false`) 115 | 116 | Convert data into an object stream. 117 | 118 | #### opts.pluck 119 | - Type: `Array|String` (default: []) 120 | 121 | Only return select properties from JSON objects. 122 | 123 | #### opts.separator 124 | - Type: `String` (default: `\n`) 125 | 126 | String to separate object data with. 127 | 128 | #### opts.toBufferStream 129 | - Type: `Boolean` (default: `false`) 130 | 131 | Set this to true when you have an object stream that you would like converted to a stream of line delimited JSON buffers. 132 | 133 | If set, this defaults `opts.depth` to `0`, but can still be overridden. 134 | 135 | ## License 136 | 137 | MIT © [Stephen Sawchuk](http://stephenplusplus.com) 138 | -------------------------------------------------------------------------------- /test.js: -------------------------------------------------------------------------------- 1 | "use strict" 2 | 3 | var assert = require("assert") 4 | var jsonl = require("./") 5 | var through = require("through2") 6 | 7 | describe("jsonl", function () { 8 | it("chunks out new lines", function (done) { 9 | var data = [] 10 | var stream = through() 11 | 12 | stream 13 | .pipe(jsonl()) 14 | .on("data", function (c) { 15 | data.push(c) 16 | }) 17 | .on("end", function () { 18 | assert.deepEqual(data, [ 19 | new Buffer('{"hi":0}\n'), 20 | new Buffer('{"hi":1}\n'), 21 | new Buffer('{"hi":2}\n'), 22 | new Buffer('{"hi":3}\n'), 23 | new Buffer('{"hi":4}\n') 24 | ]) 25 | done() 26 | }) 27 | 28 | stream.write('[{"hi":0},{"hi":1},{"hi":2},{"hi":3},{"hi":4}]') 29 | stream.end() 30 | }) 31 | 32 | it("allows overriding depth", function (done) { 33 | var data = [] 34 | var stream = through() 35 | 36 | stream 37 | .pipe(jsonl({depth: 0})) 38 | .on("data", function (c) { 39 | data.push(c) 40 | }) 41 | .on("end", function () { 42 | assert.deepEqual(data, [ 43 | new Buffer('{"hi":0}\n'), 44 | new Buffer('{"hi":1}\n'), 45 | new Buffer('{"hi":2}\n'), 46 | new Buffer('{"hi":3}\n'), 47 | new Buffer('{"hi":4}\n') 48 | ]) 49 | done() 50 | }) 51 | 52 | stream.write('{"hi":0}') 53 | stream.write('{"hi":1}') 54 | stream.write('{"hi":2}') 55 | stream.write('{"hi":3}') 56 | stream.write('{"hi":4}') 57 | stream.end() 58 | }) 59 | 60 | it("allows overriding separator", function (done) { 61 | var data = [] 62 | var stream = through() 63 | 64 | stream 65 | .pipe(jsonl({separator: "&&"})) 66 | .on("data", function (c) { 67 | data.push(c) 68 | }) 69 | .on("end", function () { 70 | assert.deepEqual(data, [ 71 | new Buffer('{"hi":0}&&'), 72 | new Buffer('{"hi":1}&&'), 73 | new Buffer('{"hi":2}&&'), 74 | new Buffer('{"hi":3}&&'), 75 | new Buffer('{"hi":4}&&') 76 | ]) 77 | done() 78 | }) 79 | 80 | stream.write('[{"hi":0},{"hi":1},{"hi":2},{"hi":3},{"hi":4}') 81 | stream.end() 82 | }) 83 | 84 | it("returns a stream in object mode", function (done) { 85 | var data = [] 86 | var stream = through() 87 | 88 | stream 89 | .pipe(jsonl({objectMode: true})) 90 | .on("data", function (obj) { 91 | data.push(obj) 92 | }) 93 | .on("end", function () { 94 | assert.deepEqual(data, [ 95 | { hi: 0 }, 96 | { hi: 1 }, 97 | { hi: 2 }, 98 | { hi: 3 }, 99 | { hi: 4 } 100 | ]) 101 | done() 102 | }) 103 | 104 | stream.write('[{"hi":0},{"hi":1},{"hi":2},{"hi":3},{"hi":4}') 105 | stream.end() 106 | }) 107 | 108 | it("converts object mode to buffers", function (done) { 109 | var data = [] 110 | var stream = through.obj() 111 | 112 | stream 113 | .pipe(jsonl({toBufferStream: true})) 114 | .on("data", function (chunk) { 115 | data.push(chunk) 116 | }) 117 | .on("end", function () { 118 | assert.deepEqual(data, [ 119 | new Buffer('{"hi":0}\n'), 120 | new Buffer('{"hi":1}\n'), 121 | new Buffer('{"hi":2}\n'), 122 | new Buffer('{"hi":3}\n'), 123 | new Buffer('{"hi":4}\n') 124 | ]) 125 | done() 126 | }) 127 | 128 | stream.push({hi: 0}) 129 | stream.push({hi: 1}) 130 | stream.push({hi: 2}) 131 | stream.push({hi: 3}) 132 | stream.push({hi: 4}) 133 | stream.end() 134 | }) 135 | 136 | it("plucks [obj] properties from same depth", function (done) { 137 | var data = [] 138 | var stream = through.obj() 139 | 140 | stream 141 | .pipe(jsonl({ 142 | toBufferStream: true, depth: 3, pluck: ["one","two"] 143 | })) 144 | .on("data", function (chunk) { 145 | data.push(chunk) 146 | }) 147 | .on("end", function () { 148 | assert.deepEqual(data, [ 149 | new Buffer('{"one":{"yo":"hey"}}\n'), 150 | new Buffer('{"two":["h","e","y"]}\n'), 151 | new Buffer('{"one":{"yo":"hey"}}\n'), 152 | new Buffer('{"two":["h","e","y"]}\n'), 153 | new Buffer('{"one":{"yo":"hey"}}\n') 154 | ]) 155 | done() 156 | }) 157 | 158 | var obj = 159 | /*0*/{ 160 | /*111*/hi: [ 161 | /*22222*/{ 162 | /*3333333*/one: { 163 | /*444444444*/yo: "hey" 164 | /*3333333*/} 165 | /*22222*/} 166 | /*111*/] 167 | /*0*/} 168 | var arr = 169 | /*0*/{ 170 | /*111*/hi: [ 171 | /*22222*/{ 172 | /*3333333*/two: ["h","e","y"] 173 | /*22222*/} 174 | /*111*/] 175 | /*0*/} 176 | 177 | stream.push(obj) 178 | stream.push(arr) 179 | stream.push(obj) 180 | stream.push(arr) 181 | stream.push(obj) 182 | stream.end() 183 | }) 184 | }) 185 | --------------------------------------------------------------------------------