├── .gitignore ├── Gruntfile.js ├── README.md ├── data └── demo.csv ├── examples ├── cmd.js ├── server.js └── transform.js ├── index.js ├── lib ├── demo.js ├── generatedemo.js ├── parser.js └── streamcombiner.js └── package.json /.gitignore: -------------------------------------------------------------------------------- 1 | # Folder view configuration files 2 | .DS_Store 3 | Desktop.ini 4 | 5 | # Thumbnail cache files 6 | ._* 7 | Thumbs.db 8 | 9 | # Files that might appear on external disks 10 | .Spotlight-V100 11 | .Trashes 12 | 13 | # Compiled Python files 14 | *.pyc 15 | 16 | # Application specific files 17 | venv 18 | node_modules 19 | components 20 | .sass-cache 21 | tmp -------------------------------------------------------------------------------- /Gruntfile.js: -------------------------------------------------------------------------------- 1 | module.exports = function(grunt) { 2 | 3 | var globalConfig = { 4 | scripts: ['*.js', 'lib/**/*.js', 'bin/**/*.js'] 5 | }; 6 | 7 | // Project configuration 8 | grunt.initConfig({ 9 | 10 | globalConfig: globalConfig, 11 | 12 | // Lint js files 13 | jshint: { 14 | all: { 15 | options: { 16 | 'node': true, 17 | 'laxcomma': true, 18 | 'sub': true, 19 | 'debug': true 20 | }, 21 | src: ['<%= globalConfig.scripts %>'] 22 | } 23 | }, 24 | 25 | // Watch files: lint js and compile templates 26 | watch: { 27 | scripts: { 28 | files: ['<%= globalConfig.scripts %>'], 29 | tasks: ['jshint'] 30 | } 31 | } 32 | }); 33 | 34 | // Load tasks from plugins 35 | grunt.loadNpmTasks('grunt-contrib-jshint'); 36 | grunt.loadNpmTasks('grunt-contrib-watch'); 37 | 38 | // Default task 39 | grunt.registerTask('default', 'jshint'); 40 | 41 | }; -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Example Stream Parser 2 | 3 | An example Node.js streaming data file parser. 4 | 5 | See: [http://nicolashery.com/parse-data-files-using-nodejs-streams/](http://nicolashery.com/parse-data-files-using-nodejs-streams/). 6 | 7 | Turns this: 8 | 9 | ``` 10 | Game Export (v1.2) 11 | GameId,1234567 12 | Player,1,Homer Simpson 13 | Player,2,Bart Simpson 14 | Player,3,Marge Simpson 15 | Map,101,Crossroads 16 | Time Range,2013-01-11 02:50:40,2013-01-12 05:34:56 17 | Number of Records,100 18 | Index,Timestamp,Event Type,Player Id,Event Data 19 | 1,2013-01-11 02:54:42,ResourcesGathered,3,"resource_type=Wood, quantity=11" 20 | 2,2013-01-11 03:00:26,ResourcesGathered,2,"resource_type=Gold, quantity=7" 21 | 3,2013-01-11 03:05:42,ResourcesGathered,1,"resource_type=Gold, quantity=2" 22 | 4,2013-01-11 03:08:05,UnitTrained,3,"unit_type=Knight, health=270, damage=12-15" 23 | 5,2013-01-11 03:24:05,DestroyedEnemy,1,"unit_type=Pig Farm" 24 | ``` 25 | 26 | Into this: 27 | 28 | ``` 29 | {"header":{"Title":"Game Export (v1.2)","GameId":"1234567","Players":[{"id":"1","name":"Homer Simpson"},{"id":"2","name":"Bart Simpson"},{"id":"3","name":"Marge Simpson"}],"Map":{"id":"101","name":"Crossroads"},"Time Range":{"start":"2013-01-11 02:50:40","end":"2013-01-12 05:34:56"},"Number of Records":"100","Columns":["Index","Timestamp","Event Type","Player Id","Event Data"]}} 30 | {"Index":"1","Timestamp":"2013-01-11 02:54:42","Event Type":"ResourcesGathered","Player Id":"3","Event Data":{"resource_type":"Wood","quantity":"11"}} 31 | {"Index":"2","Timestamp":"2013-01-11 03:00:26","Event Type":"ResourcesGathered","Player Id":"2","Event Data":{"resource_type":"Gold","quantity":"7"}} 32 | {"Index":"3","Timestamp":"2013-01-11 03:05:42","Event Type":"ResourcesGathered","Player Id":"1","Event Data":{"resource_type":"Gold","quantity":"2"}} 33 | {"Index":"4","Timestamp":"2013-01-11 03:08:05","Event Type":"UnitTrained","Player Id":"3","Event Data":{"unit_type":"Knight","health":"270","damage":"12-15"}} 34 | {"Index":"5","Timestamp":"2013-01-11 03:24:05","Event Type":"DestroyedEnemy","Player Id":"1","Event Data":{"unit_type":"Pig Farm"}} 35 | ``` 36 | 37 | ## Usage: command line 38 | 39 | See [examples/cmd.js](examples/cmd.js). 40 | 41 | Example: 42 | 43 | ```bash 44 | cat data/demo.csv | node examples/cmd 45 | ``` 46 | 47 | Will output result to stdout. 48 | 49 | ## Usage: JavaScript API 50 | 51 | See [examples/transform.js](examples/transform.js). 52 | 53 | It's just a Node.js [Transform Stream](http://nodejs.org/api/stream.html#stream_class_stream_transform) that takes a stream of the data file coming in, and emits parsed JavaScript objects. 54 | 55 | Example: 56 | 57 | ```javascript 58 | var parser = require('./index.js'); 59 | 60 | process.stdin 61 | .pipe(parser()) 62 | .pipe(JSONStream.stringify(false)) 63 | .pipe(process.stdout); 64 | ``` 65 | 66 | ## Usage: server 67 | 68 | See [examples/server.js](examples/server.js). 69 | 70 | (Inspired by [Max Ogden](https://github.com/maxogden)'s ["Gut: Hosted Open Data Filet Knives"](http://maxogden.com/gut-hosted-open-data-filets.html).) 71 | 72 | Run the server in a separate terminal: 73 | 74 | ```bash 75 | $ node examples/server 76 | ``` 77 | 78 | Send some data to the server: 79 | 80 | ```bash 81 | $ curl -i -X POST http://localhost:8000/ -H "Content-Type: text/plain" --data-binary "@data/demo.csv" 82 | ``` 83 | 84 | You will get valid JSON back: 85 | 86 | ``` 87 | HTTP/1.1 200 OK 88 | Content-Type: application/json 89 | 90 | [ 91 | { 92 | "header": { 93 | "Title": "Game Export (v1.2)", 94 | "GameId": "1234567", 95 | "Players": [ 96 | { 97 | "id": "1", 98 | "name": "Homer Simpson" 99 | }, 100 | { 101 | "id": "2", 102 | "name": "Bart Simpson" 103 | }, 104 | { 105 | "id": "3", 106 | "name": "Marge Simpson" 107 | } 108 | ], 109 | "Map": { 110 | "id": "101", 111 | "name": "Crossroads" 112 | }, 113 | "Time Range": { 114 | "start": "2013-01-11 02:50:40", 115 | "end": "2013-01-12 05:34:56" 116 | }, 117 | "Number of Records": "100", 118 | "Columns": [ 119 | "Index", 120 | "Timestamp", 121 | "Event Type", 122 | "Player Id", 123 | "Event Data" 124 | ] 125 | } 126 | }, 127 | { 128 | "Index": "1", 129 | "Timestamp": "2013-01-11 02:54:42", 130 | "Event Type": "ResourcesGathered", 131 | "Player Id": "3", 132 | "Event Data": { 133 | "resource_type": "Wood", 134 | "quantity": "11" 135 | } 136 | }, 137 | { 138 | "Index": "2", 139 | "Timestamp": "2013-01-11 03:00:26", 140 | "Event Type": "ResourcesGathered", 141 | "Player Id": "2", 142 | "Event Data": { 143 | "resource_type": "Gold", 144 | "quantity": "7" 145 | } 146 | }, 147 | ... 148 | ] 149 | ``` 150 | 151 | ## Generate demo data 152 | 153 | Use helper script: 154 | 155 | ```bash 156 | $ node lib/generatedemo > data/demo.csv 157 | ``` -------------------------------------------------------------------------------- /data/demo.csv: -------------------------------------------------------------------------------- 1 | Game Export (v1.2) 2 | GameId,1234567 3 | Player,1,Homer Simpson 4 | Player,2,Bart Simpson 5 | Player,3,Marge Simpson 6 | Map,101,Crossroads 7 | Time Range,2013-01-11 02:50:40,2013-01-12 05:34:56 8 | Number of Records,100 9 | Index,Timestamp,Event Type,Player Id,Event Data 10 | 1,2013-01-11 02:54:42,ResourcesGathered,3,"resource_type=Wood, quantity=11" 11 | 2,2013-01-11 03:00:26,ResourcesGathered,2,"resource_type=Gold, quantity=7" 12 | 3,2013-01-11 03:05:42,ResourcesGathered,1,"resource_type=Gold, quantity=2" 13 | 4,2013-01-11 03:08:05,UnitTrained,3,"unit_type=Knight, health=270, damage=12-15" 14 | 5,2013-01-11 03:24:05,DestroyedEnemy,1,"unit_type=Pig Farm" 15 | 6,2013-01-11 03:30:12,ResourcesGathered,2,"resource_type=Wood, quantity=7" 16 | 7,2013-01-11 04:53:22,ResourcesGathered,1,"resource_type=Pigs, quantity=10" 17 | 8,2013-01-11 05:17:35,ResourcesGathered,2,"resource_type=Wood, quantity=10" 18 | 9,2013-01-11 05:18:07,DestroyedEnemy,1,"unit_type=Castle" 19 | 10,2013-01-11 05:19:26,ResourcesGathered,1,"resource_type=Wood, quantity=19" 20 | 11,2013-01-11 05:35:37,ResourcesGathered,3,"resource_type=Gold, quantity=10" 21 | 12,2013-01-11 06:07:19,UnitTrained,2,"unit_type=Footman, health=60, damage=4-8" 22 | 13,2013-01-11 06:34:19,ResourcesGathered,2,"resource_type=Pigs, quantity=13" 23 | 14,2013-01-11 06:49:38,ResourcesGathered,3,"resource_type=Gold, quantity=6" 24 | 15,2013-01-11 07:09:09,ResourcesGathered,1,"resource_type=Gold, quantity=9" 25 | 16,2013-01-11 07:38:02,StructureBuilt,1,"structure_type=Archery, health=350" 26 | 17,2013-01-11 08:11:42,ResourcesGathered,2,"resource_type=Wood, quantity=18" 27 | 18,2013-01-11 08:28:42,ResourcesGathered,1,"resource_type=Gold, quantity=6" 28 | 19,2013-01-11 08:28:59,StructureBuilt,1,"structure_type=Pig Farm, health=180" 29 | 20,2013-01-11 09:11:59,StructureBuilt,2,"structure_type=Barracks, health=250" 30 | 21,2013-01-11 09:12:06,StructureBuilt,3,"structure_type=Castle, health=480" 31 | 22,2013-01-11 09:26:20,UnitTrained,3,"unit_type=Archer, health=80, damage=3-9" 32 | 23,2013-01-11 09:29:59,StructureBuilt,1,"structure_type=Barracks, health=250" 33 | 24,2013-01-11 09:37:54,ResourcesGathered,1,"resource_type=Wood, quantity=9" 34 | 25,2013-01-11 10:14:21,ResourcesGathered,2,"resource_type=Wood, quantity=6" 35 | 26,2013-01-11 10:26:50,UnitTrained,2,"unit_type=Knight, health=270, damage=12-15" 36 | 27,2013-01-11 10:48:12,StructureBuilt,2,"structure_type=Castle, health=480" 37 | 28,2013-01-11 10:48:54,StructureBuilt,3,"structure_type=Pig Farm, health=180" 38 | 29,2013-01-11 10:50:22,ResourcesGathered,2,"resource_type=Gold, quantity=6" 39 | 30,2013-01-11 10:51:37,ResourcesGathered,3,"resource_type=Wood, quantity=14" 40 | 31,2013-01-11 10:57:22,StructureBuilt,2,"structure_type=Pig Farm, health=180" 41 | 32,2013-01-11 11:16:00,ResourcesGathered,3,"resource_type=Wood, quantity=6" 42 | 33,2013-01-11 11:19:04,ResourcesGathered,1,"resource_type=Pigs, quantity=15" 43 | 34,2013-01-11 11:22:51,ResourcesGathered,1,"resource_type=Wood, quantity=11" 44 | 35,2013-01-11 11:27:09,StructureBuilt,3,"structure_type=Castle, health=480" 45 | 36,2013-01-11 12:11:53,ResourcesGathered,2,"resource_type=Pigs, quantity=9" 46 | 37,2013-01-11 12:32:28,ResourcesGathered,3,"resource_type=Pigs, quantity=8" 47 | 38,2013-01-11 12:56:00,UnitTrained,2,"unit_type=Archer, health=80, damage=3-9" 48 | 39,2013-01-11 12:56:46,ResourcesGathered,2,"resource_type=Gold, quantity=6" 49 | 40,2013-01-11 13:23:10,UnitTrained,1,"unit_type=Peasant, health=40, damage=1-4" 50 | 41,2013-01-11 13:29:40,ResourcesGathered,3,"resource_type=Gold, quantity=4" 51 | 42,2013-01-11 13:34:32,ResourcesGathered,1,"resource_type=Pigs, quantity=10" 52 | 43,2013-01-11 14:18:31,ResourcesGathered,1,"resource_type=Pigs, quantity=4" 53 | 44,2013-01-11 14:50:01,DestroyedEnemy,2,"unit_type=Pig Farm" 54 | 45,2013-01-11 15:34:32,ResourcesGathered,2,"resource_type=Pigs, quantity=15" 55 | 46,2013-01-11 15:46:24,ResourcesGathered,2,"resource_type=Wood, quantity=15" 56 | 47,2013-01-11 15:50:18,ResourcesGathered,2,"resource_type=Pigs, quantity=14" 57 | 48,2013-01-11 16:23:50,StructureBuilt,1,"structure_type=Castle, health=480" 58 | 49,2013-01-11 17:00:30,ResourcesGathered,2,"resource_type=Pigs, quantity=4" 59 | 50,2013-01-11 17:14:00,ResourcesGathered,2,"resource_type=Gold, quantity=7" 60 | 51,2013-01-11 17:38:58,UnitTrained,1,"unit_type=Footman, health=60, damage=4-8" 61 | 52,2013-01-11 17:46:32,DestroyedEnemy,3,"unit_type=Footman" 62 | 53,2013-01-11 17:49:02,UnitTrained,1,"unit_type=Footman, health=60, damage=4-8" 63 | 54,2013-01-11 17:53:00,ResourcesGathered,2,"resource_type=Wood, quantity=7" 64 | 55,2013-01-11 17:55:00,ResourcesGathered,3,"resource_type=Wood, quantity=19" 65 | 56,2013-01-11 18:09:48,ResourcesGathered,3,"resource_type=Gold, quantity=2" 66 | 57,2013-01-11 18:58:18,ResourcesGathered,1,"resource_type=Gold, quantity=6" 67 | 58,2013-01-11 19:39:03,StructureBuilt,1,"structure_type=Archery, health=350" 68 | 59,2013-01-11 19:55:31,ResourcesGathered,1,"resource_type=Pigs, quantity=4" 69 | 60,2013-01-11 20:02:15,DestroyedEnemy,3,"unit_type=Pig Farm" 70 | 61,2013-01-11 20:57:16,DestroyedEnemy,2,"unit_type=Knight" 71 | 62,2013-01-11 21:20:55,ResourcesGathered,2,"resource_type=Wood, quantity=12" 72 | 63,2013-01-11 21:28:04,ResourcesGathered,3,"resource_type=Pigs, quantity=13" 73 | 64,2013-01-11 21:44:37,ResourcesGathered,1,"resource_type=Gold, quantity=9" 74 | 65,2013-01-11 22:09:45,ResourcesGathered,2,"resource_type=Wood, quantity=15" 75 | 66,2013-01-11 22:16:50,ResourcesGathered,3,"resource_type=Gold, quantity=6" 76 | 67,2013-01-11 22:54:49,ResourcesGathered,3,"resource_type=Gold, quantity=6" 77 | 68,2013-01-11 23:25:15,UnitTrained,2,"unit_type=Knight, health=270, damage=12-15" 78 | 69,2013-01-11 23:51:01,ResourcesGathered,2,"resource_type=Pigs, quantity=8" 79 | 70,2013-01-12 00:18:03,ResourcesGathered,2,"resource_type=Pigs, quantity=6" 80 | 71,2013-01-12 00:37:11,UnitTrained,2,"unit_type=Peasant, health=40, damage=1-4" 81 | 72,2013-01-12 00:39:57,UnitTrained,2,"unit_type=Footman, health=60, damage=4-8" 82 | 73,2013-01-12 01:14:41,UnitTrained,2,"unit_type=Footman, health=60, damage=4-8" 83 | 74,2013-01-12 01:33:42,ResourcesGathered,3,"resource_type=Pigs, quantity=14" 84 | 75,2013-01-12 02:02:47,StructureBuilt,3,"structure_type=Barracks, health=250" 85 | 76,2013-01-12 02:33:53,StructureBuilt,2,"structure_type=Castle, health=480" 86 | 77,2013-01-12 02:42:42,ResourcesGathered,1,"resource_type=Gold, quantity=1" 87 | 78,2013-01-12 02:50:35,ResourcesGathered,2,"resource_type=Gold, quantity=5" 88 | 79,2013-01-12 02:51:10,ResourcesGathered,1,"resource_type=Wood, quantity=14" 89 | 80,2013-01-12 03:44:56,ResourcesGathered,1,"resource_type=Wood, quantity=7" 90 | 81,2013-01-12 03:51:22,ResourcesGathered,3,"resource_type=Pigs, quantity=8" 91 | 82,2013-01-12 03:57:36,ResourcesGathered,3,"resource_type=Gold, quantity=8" 92 | 83,2013-01-12 04:01:37,ResourcesGathered,1,"resource_type=Pigs, quantity=14" 93 | 84,2013-01-12 04:14:38,ResourcesGathered,3,"resource_type=Gold, quantity=6" 94 | 85,2013-01-12 04:35:58,DestroyedEnemy,3,"unit_type=Footman" 95 | 86,2013-01-12 04:37:19,DestroyedEnemy,3,"unit_type=Archery" 96 | 87,2013-01-12 04:43:17,ResourcesGathered,3,"resource_type=Gold, quantity=8" 97 | 88,2013-01-12 04:53:46,UnitTrained,2,"unit_type=Archer, health=80, damage=3-9" 98 | 89,2013-01-12 04:57:35,UnitTrained,3,"unit_type=Footman, health=60, damage=4-8" 99 | 90,2013-01-12 05:08:09,DestroyedEnemy,2,"unit_type=Pig Farm" 100 | 91,2013-01-12 05:15:40,StructureBuilt,2,"structure_type=Archery, health=350" 101 | 92,2013-01-12 05:16:07,ResourcesGathered,1,"resource_type=Wood, quantity=16" 102 | 93,2013-01-12 05:34:56,DestroyedEnemy,3,"unit_type=Archery" 103 | 94,2013-01-12 05:34:56,ResourcesGathered,1,"resource_type=Gold, quantity=4" 104 | 95,2013-01-12 05:34:56,StructureBuilt,2,"structure_type=Castle, health=480" 105 | 96,2013-01-12 05:34:56,DestroyedEnemy,1,"unit_type=Footman" 106 | 97,2013-01-12 05:34:56,ResourcesGathered,1,"resource_type=Wood, quantity=6" 107 | 98,2013-01-12 05:34:56,StructureBuilt,3,"structure_type=Archery, health=350" 108 | 99,2013-01-12 05:34:56,ResourcesGathered,3,"resource_type=Pigs, quantity=14" 109 | 100,2013-01-12 05:34:56,ResourcesGathered,3,"resource_type=Wood, quantity=9" 110 | -------------------------------------------------------------------------------- /examples/cmd.js: -------------------------------------------------------------------------------- 1 | // Example: command line 2 | // 3 | // Stringify parsed JavaScript objects and output to stdout 4 | 5 | var JSONStream = require('JSONStream') 6 | , parser = require('../index'); 7 | 8 | process.stdin 9 | .pipe(parser()) 10 | .pipe(JSONStream.stringify(false)) 11 | .pipe(process.stdout); 12 | 13 | process.stdout.on('error', process.exit); 14 | -------------------------------------------------------------------------------- /examples/server.js: -------------------------------------------------------------------------------- 1 | // Example: server 2 | // 3 | // Send data file to a server, get parsed data back 4 | 5 | var http = require('http') 6 | , JSONStream = require('JSONStream') 7 | , parser = require('../index'); 8 | 9 | var server = http.createServer(function(req, res){ 10 | res.setHeader('Content-Type', 'application/json'); 11 | req 12 | .pipe(parser()) 13 | .pipe(JSONStream.stringify()) 14 | .pipe(res); 15 | }); 16 | 17 | server.listen(process.argv[2] || 8000); 18 | -------------------------------------------------------------------------------- /examples/transform.js: -------------------------------------------------------------------------------- 1 | // Example: transform 2 | // 3 | // Add some more transform logic after parsing the data file 4 | 5 | var fs = require('fs') 6 | , Transform = require('stream').Transform 7 | , JSONStream = require('JSONStream') 8 | , parser = require('../index'); 9 | 10 | var dataFile = fs.createReadStream('data/demo.csv'); 11 | 12 | var transform = new Transform({objectMode: true}); 13 | 14 | transform.header = null; 15 | 16 | transform._transform = function(data, encoding, done) { 17 | // First data object should be header 18 | if (data.header) { 19 | this.header = data.header; 20 | return done(); 21 | } 22 | // Add some game info to all records using the header 23 | data['Game'] = { 24 | 'id': this.header['GameId'], 25 | 'players': this.header['Players'], 26 | 'map': this.header['Map'] 27 | }; 28 | this.push(data); 29 | done(); 30 | }; 31 | 32 | // We could do something with our transformed data records, 33 | // like save them to a database by piping them to a `Writable` stream 34 | // that will handle that. 35 | // Here we'll just stringify them to stdout 36 | dataFile 37 | .pipe(parser()) 38 | .pipe(transform) 39 | .pipe(JSONStream.stringify(false)) 40 | .pipe(process.stdout); 41 | 42 | process.stdout.on('error', process.exit); 43 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | var csv = require('csv-streamify') 2 | , StreamCombiner = require('./lib/streamcombiner') 3 | , parser = require('./lib/parser'); 4 | 5 | module.exports = function(options) { 6 | // Buffer of CSV data file going in 7 | // JavaScript objects going out 8 | return new StreamCombiner(csv({objectMode: true}), 9 | parser(options)); 10 | }; 11 | -------------------------------------------------------------------------------- /lib/demo.js: -------------------------------------------------------------------------------- 1 | // Generate demo data 2 | 3 | var util = require('util') 4 | , Readable = require('stream').Readable 5 | , Chance = require('chance/chance') // Should be fixed in next version 6 | , moment = require('moment') 7 | , _ = require('lodash'); 8 | 9 | var dateFormat = 'YYYY-MM-DD HH:mm:ss'; 10 | 11 | // Exponential random number generator 12 | // Time until next arrival 13 | function randomExponential(rate, randomUniform) { 14 | // http://en.wikipedia.org/wiki/Exponential_distribution#Generating_exponential_variates 15 | rate = rate || 1; 16 | 17 | // Allow to pass a random uniform value or function 18 | // Default to Math.random() 19 | var U = randomUniform; 20 | if (typeof randomUniform === 'function') U = randomUniform(); 21 | if (!U) U = Math.random(); 22 | 23 | return -Math.log(U)/rate; 24 | } 25 | 26 | // Geometric random number generator 27 | // Number of failures before the first success, 28 | // supported on the set {0, 1, 2, 3, ...} 29 | function randomGeometric(successProbability, randomUniform) { 30 | // http://en.wikipedia.org/wiki/Geometric_distribution#Related_distributions 31 | successProbability = successProbability || 1 - Math.exp(-1); // Equivalent to rate = 1 32 | 33 | var rate = -Math.log(1 - successProbability); 34 | 35 | return Math.floor(randomExponential(rate, randomUniform)); 36 | } 37 | 38 | // Generate demo data (header, records) as plain old objects 39 | function DemoDataGenerator(options) { 40 | options = options || {}; 41 | 42 | var seed = this.seed = options.seed || undefined, 43 | minTotalRecords = options.min || 1, 44 | maxTotalRecords = options.max || 100; 45 | 46 | var chance = this.chance = new Chance(seed); 47 | 48 | this.timeRange = { 49 | start: moment('2013-01-11').add('seconds', chance.natural({min: 1, max: 60*60*12})), 50 | end: moment('2013-01-12').add('seconds', chance.natural({min: 1, max: 60*60*12})) 51 | }; 52 | 53 | this.totalRecords = chance.natural({min: minTotalRecords, max: maxTotalRecords}); 54 | this.recordsCount = 0; 55 | 56 | // Used in generating next record timestamp 57 | this.lastEventTimestamp = this.timeRange.start; 58 | this.remainingSeconds = this.timeRange.end.diff(this.timeRange.start, 'seconds'); 59 | this.averageEventRate = this.totalRecords/this.remainingSeconds; 60 | 61 | this.eventGenerators = this.createEventGenerators(); 62 | this.eventTypes = _.keys(this.eventGenerators); 63 | // Make ResourcesGathered appear more often 64 | for (var i = 1; i <= 3; i = ++i) { 65 | this.eventTypes.push('ResourcesGathered'); 66 | } 67 | } 68 | 69 | DemoDataGenerator.prototype.getHeader = function() { 70 | // If header was already generated, return it 71 | if (this.header) return this.header; 72 | 73 | var chance = this.chance; 74 | 75 | var header = { 76 | title: 'Game Export (v1.2)', 77 | gameId: this.seed || 'random', 78 | players: [ 79 | {id: 1, name: 'Homer Simpson'}, 80 | {id: 2, name: 'Bart Simpson'} 81 | ], 82 | map: { 83 | id: chance.natural({min: 100, max: 200}), 84 | name: 'Crossroads' 85 | }, 86 | timeRange: { 87 | start: this.timeRange.start.format(dateFormat), 88 | end: this.timeRange.end.format(dateFormat) 89 | }, 90 | totalRecords: this.totalRecords 91 | }; 92 | 93 | // Add a third player, or not 94 | if (chance.bool()) header.players.push({id: 3, name: 'Marge Simpson'}); 95 | 96 | // Save header so we always return the same one if method is called more 97 | // than once 98 | this.header = header; 99 | 100 | return header; 101 | }; 102 | 103 | // Return next record timestamp 104 | DemoDataGenerator.prototype._nextRecordTimestamp = function() { 105 | if (this.recordsCount >= this.totalRecords) { 106 | // Make sure last record has ending timestamp 107 | return this.timeRange.end; 108 | } 109 | else { 110 | // Not great for uniform random number, but need to seed 111 | var randomUniform = this.chance.natural({min: 0, max: 9999})/10000; 112 | var timeToNextEvent = Math.floor(randomExponential( 113 | this.averageEventRate, randomUniform)); 114 | // If we go over the end time, just return the end time 115 | // Shouldn't happen that much, especially with many events 116 | if (timeToNextEvent >= this.remainingSeconds) { 117 | // Make sure all further events will be at end time as well 118 | this.remainingSeconds = 0; 119 | this.lastEventTimestamp = this.timeRange.end; 120 | return this.lastEventTimestamp; 121 | } 122 | else { 123 | this.remainingSeconds = this.remainingSeconds - timeToNextEvent; 124 | this.lastEventTimestamp.add('seconds', timeToNextEvent); 125 | return this.lastEventTimestamp; 126 | } 127 | } 128 | }; 129 | 130 | // Return next record, null if reached limit 131 | DemoDataGenerator.prototype.nextRecord = function() { 132 | if (this.recordsCount >= this.totalRecords) return null; 133 | var record = {}; 134 | this.recordsCount = this.recordsCount + 1; 135 | 136 | record.id = this.recordsCount; 137 | record.timestamp = this._nextRecordTimestamp().format(dateFormat); 138 | record.playerId = this._recordPlayerId(); 139 | record.event = this._recordEvent(); 140 | 141 | return record; 142 | }; 143 | 144 | DemoDataGenerator.prototype._recordPlayerId = function() { 145 | var header = this.header || this.getHeader(); 146 | 147 | var playerIndex = this.chance.natural( 148 | {min: 0, max: header.players.length - 1}); 149 | 150 | return header.players[playerIndex].id; 151 | }; 152 | 153 | DemoDataGenerator.prototype._recordEvent = function() { 154 | var eventTypeIndex = this.chance.natural( 155 | {min: 0, max: this.eventTypes.length - 1}); 156 | var eventType = this.eventTypes[eventTypeIndex]; 157 | return this.eventGenerators[eventType](); 158 | }; 159 | 160 | DemoDataGenerator.prototype.createEventGenerators = function() { 161 | // If already created, return them 162 | // (in case this function is called more than once) 163 | if (this.eventGenerators) return this.eventGenerators; 164 | 165 | var self = this, 166 | eventGenerators = {}; 167 | 168 | // ---------- ResourcesGathered 169 | var resources = { 170 | 'Gold': {min: 1, max: 10}, 171 | 'Wood': {min: 5, max: 20}, 172 | 'Pigs': {min: 3, max: 15} 173 | }; 174 | var resourceTypes = _.keys(resources); 175 | eventGenerators['ResourcesGathered'] = function() { 176 | var resourceType = resourceTypes[self.randomNatural( 177 | 0, resourceTypes.length - 1)]; 178 | var resource = resources[resourceType]; 179 | return { 180 | event_type: 'ResourcesGathered', 181 | resource_type: resourceType, 182 | quantity: self.randomNatural(resource.min, resource.max) 183 | }; 184 | }; 185 | 186 | // ---------- StructureBuilt 187 | var structures = { 188 | 'Pig Farm': {health: this.randomNatural(15, 25)*10}, 189 | 'Barracks': {health: this.randomNatural(25, 35)*10}, 190 | 'Archery': {health: this.randomNatural(25, 35)*10}, 191 | 'Castle': {health: this.randomNatural(45, 55)*10} 192 | }; 193 | var structureTypes = _.keys(structures); 194 | eventGenerators['StructureBuilt'] = function() { 195 | var structureType = structureTypes[self.randomNatural( 196 | 0, structureTypes.length - 1)]; 197 | var structure = structures[structureType]; 198 | return { 199 | event_type: 'StructureBuilt', 200 | structure_type: structureType, 201 | health: structure.health 202 | }; 203 | }; 204 | 205 | // ---------- UnitTrained 206 | var units = { 207 | 'Peasant': { 208 | health: this.randomNatural(2, 4)*10, 209 | damage: {min: this.randomNatural(1, 3), max: this.randomNatural(4, 7)} 210 | }, 211 | 'Footman': { 212 | health: this.randomNatural(5, 10)*10, 213 | damage: {min: this.randomNatural(4, 7), max: this.randomNatural(8, 10)} 214 | }, 215 | 'Archer': { 216 | health: this.randomNatural(5, 10)*10, 217 | damage: {min: this.randomNatural(3, 5), max: this.randomNatural(6, 9)} 218 | }, 219 | 'Knight': { 220 | health: this.randomNatural(20, 30)*10, 221 | damage: {min: this.randomNatural(11, 14), max: this.randomNatural(15, 20)} 222 | } 223 | }; 224 | var unitTypes = _.keys(units); 225 | eventGenerators['UnitTrained'] = function() { 226 | var unitType = unitTypes[self.randomNatural( 227 | 0, unitTypes.length - 1)]; 228 | var unit = units[unitType]; 229 | return { 230 | event_type: 'UnitTrained', 231 | unit_type: unitType, 232 | health: unit.health, 233 | damage: unit.damage.min + '-' + unit.damage.max 234 | }; 235 | }; 236 | 237 | // ---------- DestroyedEnemy 238 | var enemyTypes = structureTypes.concat(unitTypes); 239 | eventGenerators['DestroyedEnemy'] = function() { 240 | var enemyType = enemyTypes[self.randomNatural( 241 | 0, enemyTypes.length - 1)]; 242 | return { 243 | event_type: 'DestroyedEnemy', 244 | unit_type: enemyType 245 | }; 246 | }; 247 | 248 | return eventGenerators; 249 | }; 250 | 251 | // Helper to generate random natural numbers 252 | DemoDataGenerator.prototype.randomNatural = function(min, max) { 253 | return this.chance.natural({min: min, max: max}); 254 | }; 255 | 256 | 257 | // Stream demo data as CSV 258 | function DemoDataStream(options) { 259 | Readable.call(this, options); 260 | 261 | this.generator = new DemoDataGenerator(options); 262 | } 263 | 264 | 265 | util.inherits(DemoDataStream, Readable); 266 | 267 | DemoDataStream.prototype._read = function() { 268 | if (!this.headerSent) { 269 | this.push(this._serializeHeader(this.generator.getHeader())); 270 | this.headerSent = true; 271 | } 272 | else { 273 | var record = this.generator.nextRecord(); 274 | // Stop streaming when we reach limit 275 | if (!record) return this.push(null); 276 | 277 | this.push(this._serializeRecord(record) + '\n'); 278 | } 279 | }; 280 | 281 | DemoDataStream.prototype._serializeHeader = function(header) { 282 | return [ 283 | header.title, 284 | 'GameId,' + header.gameId, 285 | _.map(header.players, function(player) { 286 | return ['Player', player.id, player.name].join(','); 287 | }).join('\n'), 288 | ['Map', header.map.id, header.map.name].join(','), 289 | ['Time Range', header.timeRange.start, header.timeRange.end].join(','), 290 | 'Number of Records,' + header.totalRecords, 291 | 'Index,Timestamp,Event Type,Player Id,Event Data\n' 292 | ].join('\n'); 293 | }; 294 | 295 | DemoDataStream.prototype._serializeRecord = function(record) { 296 | return [ 297 | record.id, 298 | record.timestamp, 299 | record.event.event_type, 300 | record.playerId, 301 | this._serializeEventData(record.event) 302 | ].join(','); 303 | }; 304 | 305 | DemoDataStream.prototype._serializeEventData = function(event) { 306 | var values = _.clone(event); 307 | delete values.event_type; 308 | values = _.pairs(values); 309 | values = _.map(values, function(pair) { 310 | return pair.join('='); 311 | }); 312 | values = values.join(', '); 313 | return '"' + values + '"'; 314 | }; 315 | 316 | module.exports = { 317 | DemoDataStream: DemoDataStream 318 | }; 319 | -------------------------------------------------------------------------------- /lib/generatedemo.js: -------------------------------------------------------------------------------- 1 | // Run to generate demo data to std out 2 | // Ex: 3 | // $ node lib/generatedemo > data/demo.csv 4 | // to see a sample: 5 | // $ node lib/generatedemo | head -n 20 6 | 7 | var DemoDataStream = require('./demo').DemoDataStream; 8 | 9 | var stream = new DemoDataStream({seed: 1234567, min: 100, max: 100}); 10 | // var stream = new DemoDataStream({min: 10, max: 100}); 11 | 12 | stream.pipe(process.stdout); 13 | 14 | // The `head` program will emit EPIPE error on stdout when it doesn't want 15 | // data anymore 16 | process.stdout.on('error', process.exit); 17 | -------------------------------------------------------------------------------- /lib/parser.js: -------------------------------------------------------------------------------- 1 | var util = require('util') 2 | , Transform = require('stream').Transform 3 | , _ = require('lodash'); 4 | 5 | var COLUMNS_LINE = 'Index,Timestamp,Event Type,Player Id,Event Data'; 6 | var COLUMNS = COLUMNS_LINE.split(','); 7 | 8 | function Parser(options) { 9 | options = options || {}; 10 | // Parser expects objects coming in, and will emit objects going out 11 | options.objectMode = true; 12 | 13 | Transform.call(this, options); 14 | 15 | this._rawHeader = []; 16 | this.header = null; 17 | } 18 | 19 | // Parser is a `Transform` stream (readable and writable) 20 | // Pipe data through it and get parsed data out of it 21 | util.inherits(Parser, Transform); 22 | 23 | Parser.prototype._transform = function(data, encoding, done) { 24 | if (!this.header) { 25 | this._rawHeader.push(data); 26 | if (this._isHeaderEnd(data)) { 27 | this.header = this._parseRawHeader(this._rawHeader); 28 | // Let the world know we are done parsing the header 29 | this.emit('header', this.header); 30 | this.push({header: this.header}); 31 | } 32 | } 33 | // After parsing the header, push data rows 34 | else { 35 | this.push(this._parseRow(data)); 36 | } 37 | done(); 38 | }; 39 | 40 | // Test if a line is the last header item 41 | Parser.prototype._isHeaderEnd = function(data) { 42 | return data.join(',') === COLUMNS_LINE; 43 | }; 44 | 45 | // Make header lines one pretty object 46 | Parser.prototype._parseRawHeader = function(rawHeader) { 47 | var header = {} 48 | , self = this; 49 | _.forEach(rawHeader, function(row) { 50 | var parsedHeaderRow = self._parseHeaderRow(row); 51 | // Players are added to an array 52 | if (parsedHeaderRow['Player']) { 53 | if (!header['Players']) header['Players'] = []; 54 | header['Players'].push(parsedHeaderRow['Player']); 55 | } 56 | // The rest is just added to the header object 57 | else { 58 | _.extend(header, parsedHeaderRow); 59 | } 60 | }); 61 | return header; 62 | }; 63 | 64 | Parser.prototype._parseHeaderRow = function(row) { 65 | var result = {} 66 | , key = row[0]; 67 | if (key.match(/^Game Export/)) { 68 | result['Title'] = key; 69 | } 70 | else if (key === 'Player' || key === 'Map') { 71 | result[key] = { 72 | 'id': row[1], 73 | 'name': row[2] 74 | }; 75 | } 76 | else if (key === 'Time Range') { 77 | result[key] = { 78 | 'start': row[1], 79 | 'end': row[2] 80 | }; 81 | } 82 | // Indicates column names line 83 | else if (key === 'Index') { 84 | result['Columns'] = row; 85 | } 86 | // Default behavior 87 | else { 88 | result[key] = row[1]; 89 | } 90 | return result; 91 | }; 92 | 93 | // Parse a data row into an object 94 | Parser.prototype._parseRow = function(row) { 95 | var result = _.zipObject(COLUMNS, row); 96 | // "Expand" event data column 97 | var eventData = result['Event Data']; 98 | result['Event Data'] = {}; 99 | eventData = eventData.split(', '); 100 | _.forEach(eventData, function(item) { 101 | item = item.split('='); 102 | result['Event Data'][item[0]] = item[1]; 103 | }); 104 | return result; 105 | }; 106 | 107 | module.exports = function(options) { 108 | return new Parser(options); 109 | }; 110 | -------------------------------------------------------------------------------- /lib/streamcombiner.js: -------------------------------------------------------------------------------- 1 | /* StreamCombiner 2 | Combine a pipe of multiple streams into one stream. 3 | 4 | Example: 5 | 6 | var stream3 = new StreamCombiner(stream1, stream2); 7 | process.stdin.pipe(stream3).pipe(process.stdout); 8 | // The line above will do this: 9 | // process.stdin.pipe(stream1).pipe(stream2).pipe(process.stdout); 10 | 11 | Thanks to Brandon Tilley (https://github.com/BinaryMuse) 12 | for this code snippet. 13 | */ 14 | 15 | var util = require('util') 16 | , PassThrough = require('stream').PassThrough; 17 | 18 | var StreamCombiner = function() { 19 | this.streams = Array.prototype.slice.apply(arguments); 20 | 21 | // When a source stream is piped to us, undo that pipe, and save 22 | // off the source stream piped into our internally managed streams. 23 | this.on('pipe', function(source) { 24 | source.unpipe(this); 25 | for(var i in this.streams) { 26 | source = source.pipe(this.streams[i]); 27 | } 28 | this.transformStream = source; 29 | }); 30 | }; 31 | 32 | util.inherits(StreamCombiner, PassThrough); 33 | 34 | // When we're piped to another stream, instead pipe our internal 35 | // transform stream to that destination. 36 | StreamCombiner.prototype.pipe = function(dest, options) { 37 | return this.transformStream.pipe(dest, options); 38 | }; 39 | 40 | module.exports = StreamCombiner; -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "example-stream-parser", 3 | "version": "0.0.0", 4 | "description": "An example Node.js streaming data file parser", 5 | "private": true, 6 | "repository": { 7 | "type": "git", 8 | "url": "https://github.com/nicolashery/example-stream-parser.git" 9 | }, 10 | "dependencies": { 11 | "chance": "~0.3.1", 12 | "moment": "~2.0.0", 13 | "lodash": "~1.3.1", 14 | "JSONStream": "~0.6.4", 15 | "csv-streamify": "~0.4.0" 16 | }, 17 | "devDependencies": { 18 | "grunt-contrib-watch": "~0.4.4", 19 | "grunt": "~0.4.1", 20 | "grunt-contrib-jshint": "~0.6.0" 21 | } 22 | } 23 | --------------------------------------------------------------------------------