├── .gitignore ├── index.js ├── package.json ├── readme.md └── test ├── data ├── dummy.csv └── dummy.json └── parse.js /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | var pumpify = require('pumpify') 2 | var peek = require('peek-stream') 3 | var detectJSON = require('detect-json-style') 4 | var detectCSV = require('detect-csv') 5 | var JSONStream = require('JSONStream') 6 | var csv = require('csv-parser') 7 | var through = require('through2') 8 | var debug = require('debug')('parse-input-stream') 9 | 10 | module.exports = parseStream 11 | 12 | function parseStream (opts) { 13 | if (!opts) opts = {} 14 | 15 | switch (opts.format || opts.f) { 16 | case 'csv': return parseCSV(opts.separator) 17 | case 'tsv': return parseCSV('\t') 18 | case 'json': return parseJSON(opts.jsonpath) 19 | case 'objects': return parseObjects() 20 | } 21 | 22 | var detectMax = opts.detectMax || 8000 23 | 24 | return peek({newline: false, maxBuffer: detectMax}, function (data, swap) { 25 | if (!Buffer.isBuffer(data)) return swap(null, parseObjects()) 26 | var jsonStyle = detectJSON(data) 27 | if (jsonStyle) { 28 | jsonStyle.format = 'json' 29 | return swap(null, parseJSON(jsonStyle.selector)) 30 | } 31 | var isCSV = detectCSV(data) 32 | if (isCSV) { 33 | return swap(null, parseCSV(isCSV.delimiter)) 34 | } 35 | swap(new Error('Could not auto detect input type. Please specify the format.')) 36 | }) 37 | 38 | function parseCSV (separator) { 39 | debug('parsing csv') 40 | return combine([ 41 | csv({ 42 | headers: opts.headerRow === false && opts.columns, 43 | separator: separator 44 | }) 45 | ]) 46 | } 47 | 48 | function parseJSON (selector) { 49 | debug('parsing json') 50 | return combine([ 51 | JSONStream.parse(selector), 52 | parseObjects() 53 | ]) 54 | } 55 | 56 | function parseObjects () { 57 | debug('parsing objects') 58 | return through.obj() // empty through obj stream 59 | } 60 | } 61 | 62 | function combine (streams) { 63 | if (streams.length === 1) return streams[0] 64 | return pumpify.obj(streams) 65 | } 66 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "parse-input-stream", 3 | "version": "1.0.1", 4 | "description": "[![NPM](https://nodei.co/npm/parse-input-stream.png)](https://nodei.co/npm/parse-input-stream/)", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "standard && tape test/*.js" 8 | }, 9 | "repository": { 10 | "type": "git", 11 | "url": "https://github.com/karissa/parse-input-stream.git" 12 | }, 13 | "author": "Karissa McKelvey (http://karissamck.com/)", 14 | "license": "ISC", 15 | "bugs": { 16 | "url": "https://github.com/karissa/parse-input-stream/issues" 17 | }, 18 | "homepage": "https://github.com/karissa/parse-input-stream", 19 | "dependencies": { 20 | "JSONStream": "^1.0.4", 21 | "csv-parser": "^1.7.0", 22 | "debug": "^2.2.0", 23 | "detect-csv": "^1.1.0", 24 | "detect-json-style": "^1.0.2", 25 | "peek-stream": "^1.1.1", 26 | "through2": "^2.0.0" 27 | }, 28 | "devDependencies": { 29 | "pumpify": "^1.3.4", 30 | "standard": "^6.0.8", 31 | "tape": "^4.5.1" 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # parse-input-stream 2 | 3 | ![dat](http://img.shields.io/badge/Development%20sponsored%20by-dat-green.svg?style=flat) 4 | 5 | Parse a tabular input stream. Can be used to pipe a buffered stream and attempts to parse it as a table -- csv, json, objects, or ndjson are supported. 6 | 7 | ``` 8 | npm install -g parse-input-stream 9 | ``` 10 | 11 | For CSV files, it will attempt to guess the delimiter. Quoted csvs work! 12 | 13 | For JSON files, the selector will be auto-detected. Experimental. 14 | 15 | ## Example 16 | 17 | ```js 18 | var parseInputStream = require('parse-input-stream') 19 | 20 | var inputStream = fs.createReadStream('/path/to/file.csv') 21 | 22 | var args = { 23 | "format": "csv" 24 | } 25 | 26 | inputStream.pipe(parseInputStream(args)).pipe(process.stdout) 27 | ``` 28 | 29 | ## Options 30 | 31 | `format`: string. attempt to parse the stream into the given format. support 'json', 'objects', 'csv', or 'tsv' 32 | 33 | `detectMax`: number. the maximum buffer amount to pre-read in order to detect the file type, delimiter, etc. Default 8000 -------------------------------------------------------------------------------- /test/data/dummy.csv: -------------------------------------------------------------------------------- 1 | a,b,c 2 | 1,2,3 3 | -------------------------------------------------------------------------------- /test/data/dummy.json: -------------------------------------------------------------------------------- 1 | {"foo":"bar","name":"josie","age":"35"} 2 | {"foo":"baz","name":"eloise"} 3 | {"foo":"baz","name":"francoise"} 4 | -------------------------------------------------------------------------------- /test/parse.js: -------------------------------------------------------------------------------- 1 | var test = require('tape') 2 | var fs = require('fs') 3 | var path = require('path') 4 | var parseInputStream = require('../') 5 | 6 | test('parse json', function (t) { 7 | var opts = {'format': 'json'} 8 | collectResults('dummy.json', opts, verify) 9 | function verify (err, lines) { 10 | t.false(err, 'no err') 11 | t.same(lines[0], { foo: 'bar', name: 'josie', age: '35' }, 'first row is an object') 12 | t.equal(lines.length, 3, '3 rows') 13 | t.end() 14 | } 15 | }) 16 | 17 | test('parse csv', function (t) { 18 | var opts = {'format': 'csv'} 19 | collectResults('dummy.csv', opts, verify) 20 | function verify (err, lines) { 21 | t.false(err, 'no err') 22 | t.same(lines[0], {a: '1', b: '2', c: '3'}, 'first row is an object with csv headers as object keys') 23 | t.equal(lines.length, 1, '1 row') 24 | t.end() 25 | } 26 | }) 27 | 28 | // helpers 29 | 30 | function fixture (name) { 31 | return path.join(__dirname, 'data', name) 32 | } 33 | 34 | function collectResults (file, opts, cb) { 35 | if (typeof opts === 'function') return collectResults(file, null, opts) 36 | var data = fs.createReadStream(fixture(file)) 37 | var lines = [] 38 | var parser = parseInputStream(opts) 39 | data.pipe(parser) 40 | .on('data', function (line) { 41 | lines.push(line) 42 | }) 43 | .on('error', function (err) { cb(err, lines) }) 44 | .on('end', function () { cb(false, lines) }) 45 | return parser 46 | } 47 | --------------------------------------------------------------------------------