├── .travis.yml ├── .npmignore ├── .gitignore ├── test ├── data │ ├── arraywelldata-no-headers.js │ ├── arraywelldata.js │ ├── welldata2.js │ ├── classdata.js │ └── welldata1.js └── test.js ├── .github └── FUNDING.yml ├── LICENSE ├── benchmark ├── index.js └── treeize-previous.js ├── package.json ├── lib └── treeize.js └── README.md /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | node_js: stable 3 | -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | Gruntfile.* 2 | /node_modules 3 | /test 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /bin/node_modules 2 | /node_modules 3 | /npm-debug.log 4 | .DS_STORE 5 | -------------------------------------------------------------------------------- /test/data/arraywelldata-no-headers.js: -------------------------------------------------------------------------------- 1 | module.exports = [ 2 | ["RA", "LB", "RA-001", "LB", 5000, "12/12/2014"], 3 | ["RA", "LB", "RA-001", "LB", 5050, "12/13/2014"], 4 | ["RA", "LB", "RA-001", "LB", 6076, "12/14/2014"], 5 | ["RA", "UB", "RA-002", "UB", 4500, "12/12/2014"], 6 | ["SA", "MA", "SA-032", "MA", 2050, "12/12/2014"], 7 | ["SA", "MA", "SA-031", "MA", 850, "12/11/2014"], 8 | ]; 9 | -------------------------------------------------------------------------------- /test/data/arraywelldata.js: -------------------------------------------------------------------------------- 1 | module.exports = [ 2 | ["code", "reservoirs:code", "wells:uwi", "wells:reservoirs:code", "wells:log+:effluent", "wells:log+:date*"], 3 | ["RA", "LB", "RA-001", "LB", 5000, "12/12/2014"], 4 | ["RA", "LB", "RA-001", "LB", 5050, "12/13/2014"], 5 | ["RA", "LB", "RA-001", "LB", 6076, "12/14/2014"], 6 | ["RA", "UB", "RA-002", "UB", 4500, "12/12/2014"], 7 | ["SA", "MA", "SA-032", "MA", 2050, "12/12/2014"], 8 | ["SA", "MA", "SA-031", "MA", 850, "12/11/2014"], 9 | ]; 10 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: kwhitley 4 | open_collective: kevinrwhitley 5 | # patreon: # Replace with a single Patreon username 6 | # ko_fi: # Replace with a single Ko-fi username 7 | # tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | # community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | # iberapay: # Replace with a single Liberapay username 10 | # issuehunt: # Replace with a single IssueHunt username 11 | # otechie: # Replace with a single Otechie username 12 | # custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] 13 | -------------------------------------------------------------------------------- /test/data/welldata2.js: -------------------------------------------------------------------------------- 1 | module.exports = [ 2 | { 3 | "code*": "RA", 4 | "reservoirs:code": "UB", 5 | "wells:uwi": "RA-002", 6 | "wells:reservoirs:code": "UB", 7 | }, 8 | { 9 | "code*": "SA", 10 | "reservoirs:code": "MA", 11 | "wells:uwi": "SA-032", 12 | "wells:reservoirs:code": "MA", 13 | "wells:log+:wc": 0.1, 14 | "wells:log+:date*": "12/12/2014", 15 | }, 16 | { 17 | "code*": "SA", 18 | "reservoirs:code": "MA", 19 | "wells:uwi": "SA-032", 20 | "wells:reservoirs:code": "MA", 21 | "wells:log+:wc": 0.2, 22 | "wells:log+:date*": "12/13/2014", 23 | } 24 | ]; 25 | -------------------------------------------------------------------------------- /test/data/classdata.js: -------------------------------------------------------------------------------- 1 | module.exports = [ 2 | ["Subject+:Courses:Date Added", "Subject+:Courses:Name", "Subject+:Courses:Description", "Subject+:Courses:Link", "Subject+:Courses:Media", "Providers:Name", "Name", "Subject+:Courses:Instructor", "Subject+:Courses:Start Date", "Subject+:Courses:Duration", "Subject+:Name"], 3 | ["07/16/13", "9/11 and Its Aftermath -- Part I", "Provided by Duke University, this course investigates the forces leading up to the 9/11 attacks and the policies adopted by the US afterwards.", "https://www.coursera.org/course/911aftermath", "full course", "Coursera", "Duke University", "David Schanzer", "09/09/13", "7 weeks", "liberal-arts"], 4 | ["07/16/13", "Analysis of a Complex Kind", "This course educates students on the subject of complex analysis, which is the study of functions that live in the complex plane.", "https://www.coursera.org/course/complexanalysis", "full course", "Coursera", "Wesleyan University", "Dr. Petra Bonfert-Taylor", "10/21/13", "6 weeks", "math"], 5 | ]; 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2013 Kevin Whitley 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so, 10 | subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /benchmark/index.js: -------------------------------------------------------------------------------- 1 | var util = require('util'); 2 | var TreeizeStable = require('./treeize-previous'); 3 | var TreeizeUnstable = require('../lib/treeize'); 4 | 5 | var welldata1 = require('../test/data/welldata1'); 6 | var welldata2 = require('../test/data/welldata2'); 7 | var arraywelldata = require('../test/data/arraywelldata'); 8 | var classdata = require('../test/data/classdata'); 9 | var Benchmark = require('benchmark'); 10 | var _ = require('lodash'); 11 | 12 | var suite = new Benchmark.Suite; 13 | 14 | console.log('Benchmarking Stable vs Unstable...') 15 | 16 | // add tests 17 | suite 18 | .add('Treeize[Unstable]', function() { 19 | var fields = new TreeizeUnstable(); 20 | fields 21 | .grow(welldata1) 22 | .grow(welldata2) 23 | .grow(arraywelldata) 24 | ; 25 | }) 26 | .add('Treeize[Stable]', function() { 27 | var fields = new TreeizeStable(); 28 | fields 29 | .grow(welldata1) 30 | .grow(welldata2) 31 | .grow(arraywelldata) 32 | ; 33 | }) 34 | // add listeners 35 | .on('cycle', function(event) { 36 | console.log(String(event.target)); 37 | }) 38 | .on('complete', function() { 39 | console.log('Fastest is ' + this.filter('fastest').map('name')); 40 | }) 41 | // run async 42 | .run({ 'async': true }) 43 | ; 44 | 45 | // console.log('KEYWORDS>', keywords + ''); 46 | // console.log('STATS>', util.inspect(keywords.stats, false, null)); 47 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "treeize", 3 | "version": "2.1.2", 4 | "description": "Converts tabular row data (as from SQL joins, flat JSON, etc) to deep object graphs based on simple column naming conventions - without the use of an ORM or models.", 5 | "main": "./lib/treeize.js", 6 | "repository": { 7 | "type": "git", 8 | "url": "https://github.com/kwhitley/treeize.git" 9 | }, 10 | "keywords": [ 11 | "JSON", 12 | "SQL", 13 | "CSV", 14 | "excel", 15 | "tree", 16 | "object", 17 | "graph", 18 | "hydration", 19 | "incongrous", 20 | "multi-source", 21 | "model", 22 | "deep", 23 | "convert", 24 | "expand", 25 | "flat", 26 | "array", 27 | "ORM" 28 | ], 29 | "author": "Kevin R. Whitley (http://krwhitley.com/)", 30 | "licenses": [ 31 | { 32 | "type": "MIT", 33 | "url": "http://en.wikipedia.org/wiki/MIT_License" 34 | } 35 | ], 36 | "bugs": { 37 | "url": "https://github.com/kwhitley/treeize/issues" 38 | }, 39 | "dependencies": { 40 | "inflection": "^1.12.0", 41 | "lodash": "^4.17.4", 42 | "object-merge": "^2.5.1" 43 | }, 44 | "devDependencies": { 45 | "benchmark": "^2.1.4", 46 | "mocha": "latest", 47 | "nodemon": "^1.11.0", 48 | "should": "latest" 49 | }, 50 | "scripts": { 51 | "test": "mocha", 52 | "test:watch": "mocha --watch test/test.js .", 53 | "benchmark": "npm run test:watch & nodemon --watch . benchmark/index.js" 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /test/data/welldata1.js: -------------------------------------------------------------------------------- 1 | module.exports = [ 2 | { 3 | "code": "RA", 4 | "reservoirs:code": "LB", 5 | "wells:uwi": "RA-001", 6 | "wells:reservoirs:code": "LB", 7 | "wells:log+:oilrate": 5000, 8 | "wells:log+:date*": "12/12/2014", 9 | }, 10 | { 11 | "code": "RA", 12 | "reservoirs:code": "LB", 13 | "wells:uwi": "RA-001", 14 | "wells:reservoirs:code": "LB", 15 | "wells:log+:oilrate": 5000, 16 | "wells:log+:date*": "12/12/2014", 17 | }, 18 | { 19 | "code": "RA", 20 | "reservoirs:code": "LB", 21 | "wells:uwi": "RA-001", 22 | "wells:reservoirs:code": "LB", 23 | "wells:log+:oilrate": 5050, 24 | "wells:log+:date*": "12/13/2014", 25 | }, 26 | { 27 | "code": "RA", 28 | "reservoirs:code": "LB", 29 | "wells:uwi": "RA-001", 30 | "wells:reservoirs:code": "LB", 31 | "wells:log+:wc": 0.5, 32 | "wells:log+:date*": "12/13/2014", 33 | }, 34 | { 35 | "code": "RA", 36 | "reservoirs:code": null, 37 | "wells:uwi": "RA-002", 38 | "wells:reservoirs:code": null, 39 | "wells:reservoir": "UB", 40 | "wells:log+:oilrate": 4500, 41 | "wells:log+:date*": "12/12/2014", 42 | }, 43 | { 44 | "code": "SA", 45 | "reservoirs:code": "MA", 46 | "wells:uwi": "SA-032", 47 | "wells:reservoirs:code": "MA", 48 | "wells:log+:oilrate": 2050, 49 | "wells:log+:date*": "12/12/2014", 50 | }, 51 | ]; 52 | -------------------------------------------------------------------------------- /lib/treeize.js: -------------------------------------------------------------------------------- 1 | var inflection = require('inflection') 2 | var merge = require('object-merge') 3 | var _ = require('lodash') 4 | 5 | var isArray = function(item) { 6 | return _.isArray(item) 7 | } 8 | 9 | var isEmpty = function(item) { 10 | return !item || (typeof item === 'object' && !Object.keys(item).length) 11 | } 12 | 13 | var where = function(collection, props) { 14 | return collection.filter(item => { 15 | for (var attribute in props) { 16 | let value = props[attribute] 17 | 18 | if (item[attribute] !== value) { 19 | return false 20 | } 21 | } 22 | 23 | return true 24 | }) 25 | } 26 | 27 | var findWhere = function(collection, props) { 28 | return _.find(collection, props) 29 | } 30 | 31 | function Treeize(options) { 32 | this.baseOptions = { 33 | input: { 34 | delimiter: ':', 35 | detectCollections: true, 36 | uniformRows: false, 37 | }, 38 | output: { 39 | prune: true, 40 | objectOverwrite: true, 41 | resultsAsObject: false, 42 | }, 43 | log: false, 44 | } 45 | 46 | this.data = { 47 | signature: { 48 | nodes: [], 49 | type: null, 50 | }, 51 | seed: [], 52 | tree: [], 53 | } 54 | 55 | this.stats = { 56 | time: { 57 | total: 0, 58 | signatures: 0, 59 | }, 60 | rows: 0, 61 | sources: 0, 62 | } 63 | 64 | // set default options (below) 65 | this.resetOptions() 66 | 67 | if (options) { 68 | this.options(options) 69 | } 70 | 71 | return this 72 | } 73 | 74 | Treeize.prototype.log = function() { 75 | if (this._options.log) { 76 | console.log.apply(this, arguments) 77 | } 78 | 79 | return this 80 | } 81 | 82 | Treeize.prototype.getData = function() { 83 | return this.data.tree 84 | } 85 | 86 | Treeize.prototype.getSeedData = function() { 87 | return this.data.seed 88 | } 89 | 90 | Treeize.prototype.getStats = function() { 91 | return this.stats 92 | } 93 | 94 | /* 95 | Reads the signature from a given row to determine path mapping. If passed without params, assumes 96 | a forced reading which will last 97 | */ 98 | Treeize.prototype.signature = function(row, options, auto) { 99 | if (!row) { 100 | return this.data.signature 101 | } 102 | 103 | // start timer 104 | var t1 = (new Date()).getTime() 105 | 106 | // sets the signature as fixed (or not) when manually set 107 | this.data.signature.isFixed = auto !== true 108 | 109 | var nodes = this.data.signature.nodes = [] 110 | var isRowAnArray = isArray(row) 111 | var opt = merge(this._options, options || {}) 112 | 113 | this.data.signature.type = isArray ? 'array' : 'object' 114 | 115 | for (var key in row) { 116 | let value = row[key] 117 | var attr = {} 118 | 119 | attr.key = typeof key === 'number' ? key : key//.replace(/^[\*\-\+]|[\*\-\+]$/g,'') 120 | attr.fullPath = isRowAnArray ? value : key 121 | attr.split = attr.fullPath.split(opt.input.delimiter) 122 | attr.path = attr.split.slice(0,attr.split.length-1).join(opt.input.delimiter) 123 | attr.parent = attr.split.slice(0,attr.split.length-2).join(opt.input.delimiter)//.replace(/^[\*\-\+]|[\*\-\+]$/g,'') 124 | attr.node = attr.split[attr.split.length - 2] 125 | attr.attr = attr.split[attr.split.length - 1] 126 | 127 | if (attr.attr.match(/\*/gi)) { 128 | attr.attr = attr.attr.replace(/[\*]/gi,'') 129 | attr.pk = true 130 | } 131 | 132 | if (attr.pk) { 133 | this.log('primary key detected in node "' + attr.attr + '"') 134 | } 135 | 136 | // set up node reference 137 | var node = findWhere(nodes, { path: attr.path }) 138 | if (!node) { 139 | node = { path: attr.path, attributes: [], blueprint: [] } 140 | nodes.push(node) 141 | } 142 | 143 | node.isCollection = !attr.node || (opt.input.detectCollections && inflection.pluralize(attr.node) === attr.node) 144 | 145 | var collectionFlag = attr.node && attr.node.match(/^[\-\+]|[\-\+]$/g) 146 | if (collectionFlag) { 147 | //this.log('collection flag "' + collectionFlag + '" detected in node "' + attr.node + '"') 148 | node.flags = true 149 | node.isCollection = attr.node.match(/^\+|\+$/g) 150 | attr.node = attr.node.replace(/^[\*\-\+]|[\*\-\+]$/g,'') // clean node 151 | } 152 | 153 | node.name = attr.node 154 | node.depth = attr.split.length - 1 155 | node.parent = attr.split.slice(0, attr.split.length - 2).join(opt.input.delimiter) 156 | node.attributes.push({ name: attr.attr, key: attr.key }) 157 | if (attr.pk) { 158 | //this.log('adding node to blueprint') 159 | node.flags = true 160 | node.blueprint.push({ name: attr.attr, key: attr.key }) 161 | } 162 | } 163 | 164 | // backfill blueprint when not specifically defined 165 | nodes.forEach(function(node) { 166 | if (!node.blueprint.length) { 167 | node.blueprint = node.attributes 168 | } 169 | }) 170 | 171 | nodes.sort(function(a, b) { return a.depth < b.depth ? -1 : 1 }) 172 | 173 | // end timer and add time 174 | var t2 = ((new Date()).getTime() - t1) 175 | this.stats.time.signatures += t2 176 | this.stats.time.total += t2 177 | 178 | return this 179 | } 180 | 181 | Treeize.prototype.getSignature = function() { 182 | return this.signature() 183 | } 184 | 185 | Treeize.prototype.setSignature = function(row, options) { 186 | return this.signature(row, options) 187 | } 188 | 189 | Treeize.prototype.setSignatureAuto = function(row, options) { 190 | return this.signature(row, options, true) 191 | } 192 | 193 | Treeize.prototype.clearSignature = function() { 194 | this.data.signature = { nodes: [], type: null } 195 | this.data.signature.isFixed = false 196 | 197 | return this 198 | } 199 | 200 | 201 | Treeize.prototype.grow = function(data, options) { 202 | var opt = merge(this._options, options || {}) 203 | // chain past if no data to grow 204 | if (typeof data !== 'object' || !data.length) { 205 | return this 206 | } 207 | 208 | //this.log('OPTIONS>', opt) 209 | 210 | // locate existing signature (when sharing signatures between data sources) 211 | var signature = this.getSignature() 212 | 213 | // set data uniformity (locally) to true to avoid signature fetching on data rows 214 | if (isArray(data[0])) { 215 | opt.input.uniformRows = true 216 | } 217 | 218 | if (!signature.nodes.length) { 219 | //this.log('setting signature from first row of data (auto)') 220 | // set signature from first row 221 | signature = this.setSignatureAuto(data[0], options).getSignature() 222 | 223 | // remove header row in flat array data (avoids processing headers as actual values) 224 | if (isArray(data[0])) { 225 | var originalData = data 226 | data = [] 227 | 228 | // copy data without original signature row before processing 229 | originalData.forEach(function(row, index) { 230 | if (index > 0) { 231 | data.push(row) 232 | } 233 | }) 234 | } 235 | } 236 | 237 | if (opt.output.resultsAsObject && isArray(this.data.tree)) { 238 | this.data.tree = {} 239 | } 240 | 241 | //this.log('SIGNATURE>', util.inspect(this.getSignature(), false, null)) 242 | 243 | this.stats.sources++ 244 | var t1 = (new Date()).getTime() 245 | 246 | data.forEach(function(row) { 247 | this.data.seed.push(row) 248 | var trails = {} // LUT for trails (find parent of new node in trails path) 249 | var trail = base = this.data.tree // OPTIMIZATION: do we need to reset this trail for each row? 250 | //this.log('CURRENT TRAIL STATUS>', trail) 251 | var t = null 252 | 253 | // set initial base object path for non-array datasets 254 | if (opt.output.resultsAsObject) { 255 | trails[''] = trail 256 | } 257 | 258 | if (!this.data.signature.isFixed && !opt.input.uniformRows) { 259 | //this.log('setting signature from new row of data (auto)') 260 | // get signature from each row 261 | this.setSignatureAuto(row, opt) 262 | //this.log('SIGNATURE>', util.inspect(this.getSignature(), false, null)) 263 | } 264 | 265 | this.stats.rows++ 266 | 267 | if (where(this.signature().nodes, { flags: true }).length) { 268 | // flags detected within signature, clean attributes of row 269 | for (var key in row) { 270 | let value = row[key] 271 | 272 | if (typeof key === 'string') { 273 | var clean = key.replace(/^[\*\-\+]|[\*\-\+]$/g,'') 274 | if (clean !== key) { 275 | //this.log('cleaning key "' + key + '" and embedding as "' + clean + '"') 276 | row[key.replace(/^[\*\-\+]|[\*\-\+]$/g,'')] = value // simply embed value at clean path (if not already) 277 | } 278 | } 279 | } 280 | } 281 | 282 | this.signature().nodes.forEach(function(node) { 283 | //this.log('PROCESSING NODE>', node) 284 | var blueprint = {} 285 | var blueprintExtended = {} 286 | 287 | // create blueprint for locating existing nodes 288 | node.blueprint.forEach(function(attribute) { 289 | var key = (node.path ? (node.path + ':') : '') + attribute.name 290 | blueprint[attribute.name] = row[attribute.key] 291 | //this.log('creating attribute "' + attribute.name + '" within blueprint', row[attribute.key]) 292 | }, this) 293 | 294 | // create full node signature for insertion/updating 295 | node.attributes.forEach(function(attribute) { 296 | var key = (node.path ? (node.path + ':') : '') + attribute.name 297 | var value = row[attribute.key] 298 | 299 | // insert extended blueprint attributes when not empty (or not pruning) 300 | if (!opt.output.prune || (value !== null && value !== undefined)) { 301 | //this.log('creating attribute "' + attribute.name + '" within extended blueprint', row[attribute.key]) 302 | blueprintExtended[attribute.name] = row[attribute.key] 303 | } 304 | }, this) 305 | 306 | //this.log('EXTENDED BLUEPRINT>', blueprintExtended) 307 | //this.log('BLUEPRINT>', blueprint) 308 | 309 | // ONLY INSERT IF NOT PRUNED 310 | if (!opt.output.prune || !isEmpty(blueprintExtended)) { 311 | // IF 0 DEPTH AND RESULTSASOBJECT, EXTEND base 312 | if (opt.output.resultsAsObject && node.depth === 0) { 313 | Object.assign(trails[node.path] = trail = base, blueprintExtended) 314 | //this.log('extending blueprint onto base>', trail) 315 | 316 | // IF base TRAIL IS NOT YET MAPPED 317 | } else if (node.isCollection && !(trail = trails[node.parent])) { 318 | //this.log('PARENT TRAIL NOT FOUND (base?)') 319 | // set up target node if doesn't exist 320 | if (!(trail = findWhere(base, blueprint))) { 321 | base.push(trail = blueprintExtended) 322 | } else { 323 | Object.assign(trail, blueprintExtended) 324 | } 325 | trails[node.path] = trail 326 | 327 | // NORMAL NODE TRAVERSAL 328 | } else { 329 | // NOT base CASE 330 | if (node.isCollection) { 331 | // handle collection nodes 332 | //this.log('inserting into collection node', trail) 333 | if (!trail[node.name]) { 334 | // node attribute doesnt exist, create array with fresh blueprint 335 | trail[node.name] = [blueprintExtended] 336 | trails[node.path] = blueprintExtended 337 | } else { 338 | // node attribute exists, find or inject blueprint 339 | var t 340 | if (!(t = findWhere(trail[node.name], blueprint))) { 341 | trail[node.name].push(trail = blueprintExtended) 342 | } else { 343 | Object.assign(t, blueprintExtended) 344 | } 345 | trails[node.path] = t || trail 346 | } 347 | } else { 348 | // handle non-collection nodes 349 | if (trail == base && node.parent === '') { 350 | base.push(trails[node.parent] = trail = {}) 351 | //this.log('base insertion') 352 | } 353 | trail = trails[node.parent] 354 | 355 | // ON DEEP NODES, THE PARENT WILL BE TOO LONG AND FAIL ON THE NEXT IF STATEMENT BELOW 356 | // ASSUMPTION: in deep nodes, no signatures will be present, so entries will simply be pushed onto collections defined within 357 | 358 | if (!trail) { // do something to fix a broken trail (usually from too deep?) 359 | // backtrack from parent trail segments until trail found, then create creadcrumbs 360 | var breadcrumbs = [] 361 | var segments = node.parent.split(':') 362 | var numSegments = segments.length 363 | var pathAttempt = node.parent 364 | var segmentsStripped = 0 365 | 366 | //this.log('path MISSING for location "' + pathAttempt + '"') 367 | while (!(trail = trails[pathAttempt])) { 368 | segmentsStripped++ 369 | pathAttempt = segments.slice(0,numSegments-segmentsStripped).join(':') 370 | //this.log('..attempting path location for "' + pathAttempt + '"') 371 | 372 | //infinite loop kickout 373 | if (segmentsStripped > 15) break 374 | } 375 | //this.log('path FOUND for location for "' + pathAttempt + '" after removing ' + segmentsStripped + ' segments') 376 | 377 | // create stored nodes if they don't exist. 378 | segments.slice(numSegments - segmentsStripped).forEach(function(segment) { 379 | var isCollection = ((inflection.pluralize(segment) === segment) || segment.match(/^\+|\+$/)) && (!segment.match(/^\-|\-$/)) 380 | // TODO: add modifier detection 381 | //this.log('creating or trailing path segment ' + (isCollection ? '[collection]' : '{object}') + ' "' + segment + '"') 382 | 383 | segment = segment.replace(/^[\*\-\+]|[\*\-\+]$/g,'') 384 | if (isCollection) { 385 | // retrieve or set collection segment and push new trail onto it 386 | (trail[segment] = trail[segment] || []).push(trail = {}) 387 | } else { 388 | trail = trail[segment] = trail[segment] || {} 389 | } 390 | }) 391 | } 392 | 393 | //this.log('inserting into non-collection node') 394 | //if (!trail[node.name]) { // TODO: CONSIDER: add typeof check to this for possible overwriting 395 | if (!trail[node.name] || (opt.output.objectOverwrite && (typeof trail[node.name] !== typeof blueprintExtended))) { 396 | // node attribute doesnt exist, create object 397 | //this.log('create object') 398 | trail[node.name] = blueprintExtended 399 | trails[node.path] = blueprintExtended 400 | } else { 401 | // node attribute exists, set path for next pass 402 | // TODO: extend trail?? 403 | //this.log('object at node "' + node.name + '" exists as "' + trail[node.name] + '", skipping insertion and adding trail') 404 | if (typeof trail[node.name] === 'object') { 405 | trail[node.name] = merge(trail[node.name], blueprintExtended) 406 | } 407 | //this.log('trail[node.name] updated to "' + trail[node.name]) 408 | trails[node.path] = trail[node.path] 409 | } 410 | } 411 | } 412 | // END PRUNE PASS 413 | } 414 | }, this) 415 | }, this) 416 | 417 | var t2 = ((new Date()).getTime() - t1) 418 | this.stats.time.total += t2 419 | 420 | // clear signature between growth sets - TODO: consider leaving this wipe pass off if processing multiple identical sources (add) 421 | if (!signature.isFixed) { 422 | this.signature([]) 423 | } 424 | 425 | return this 426 | } 427 | 428 | /* 429 | .[get|set]options (options) 430 | 431 | Get and sets global options. 432 | */ 433 | 434 | Treeize.prototype.options = function(options) { 435 | if (!options) { 436 | return merge({}, this._options) 437 | } 438 | 439 | this._options = merge(this._options, options) 440 | 441 | return this 442 | } 443 | 444 | Treeize.prototype.getOptions = function() { 445 | return this._options 446 | } 447 | 448 | Treeize.prototype.setOptions = function(options) { 449 | return this.options(options) 450 | } 451 | 452 | Treeize.prototype.resetOptions = function() { 453 | this._options = merge({}, this.baseOptions) 454 | 455 | return this 456 | } 457 | 458 | Treeize.prototype.toString = function treeToString() { 459 | return 'WARNING: .toString() method of Treeize is deprecated' 460 | } 461 | 462 | module.exports = Treeize 463 | -------------------------------------------------------------------------------- /benchmark/treeize-previous.js: -------------------------------------------------------------------------------- 1 | var inflection = require('inflection') 2 | var merge = require('object-merge') 3 | 4 | var isArray = function(item) { 5 | return typeof item === 'object' && item.length !== undefined 6 | } 7 | 8 | var isEmpty = function(item) { 9 | return !item || (typeof item === 'object' && !Object.keys(item).length) 10 | } 11 | 12 | var where = function(collection, props) { 13 | return collection.map(item => { 14 | for (var attribute in props) { 15 | let value = props[attribute] 16 | 17 | if (item[attribute] !== value) { 18 | return false 19 | } 20 | } 21 | 22 | return true 23 | }) 24 | } 25 | 26 | var findWhere = function(collection, props) { 27 | let matches = collection.filter(item => { 28 | for (var attribute in props) { 29 | let value = props[attribute] 30 | 31 | if (item[attribute] !== value) { 32 | return false 33 | } 34 | } 35 | 36 | return true 37 | }) 38 | 39 | if (!matches.length) { 40 | return false 41 | } 42 | 43 | return matches[0] 44 | } 45 | 46 | function Treeize(options) { 47 | this.baseOptions = { 48 | input: { 49 | delimiter: ':', 50 | detectCollections: true, 51 | uniformRows: false, 52 | }, 53 | output: { 54 | prune: true, 55 | objectOverwrite: true, 56 | resultsAsObject: false, 57 | }, 58 | log: false, 59 | } 60 | 61 | this.data = { 62 | signature: { 63 | nodes: [], 64 | type: null, 65 | }, 66 | seed: [], 67 | tree: [], 68 | } 69 | 70 | this.stats = { 71 | time: { 72 | total: 0, 73 | signatures: 0, 74 | }, 75 | rows: 0, 76 | sources: 0, 77 | } 78 | 79 | // set default options (below) 80 | this.resetOptions() 81 | 82 | if (options) { 83 | this.options(options) 84 | } 85 | 86 | return this 87 | } 88 | 89 | Treeize.prototype.log = function() { 90 | if (this._options.log) { 91 | console.log.apply(this, arguments) 92 | } 93 | 94 | return this 95 | } 96 | 97 | Treeize.prototype.getData = function() { 98 | return this.data.tree 99 | } 100 | 101 | Treeize.prototype.getSeedData = function() { 102 | return this.data.seed 103 | } 104 | 105 | Treeize.prototype.getStats = function() { 106 | return this.stats 107 | } 108 | 109 | /* 110 | Reads the signature from a given row to determine path mapping. If passed without params, assumes 111 | a forced reading which will last 112 | */ 113 | Treeize.prototype.signature = function(row, options, auto) { 114 | if (!row) { 115 | return this.data.signature 116 | } 117 | 118 | // start timer 119 | var t1 = (new Date()).getTime() 120 | 121 | // sets the signature as fixed (or not) when manually set 122 | this.data.signature.isFixed = auto !== true 123 | 124 | var nodes = this.data.signature.nodes = [] 125 | var isRowAnArray = isArray(row) 126 | var opt = merge(this._options, options || {}) 127 | 128 | this.data.signature.type = isArray ? 'array' : 'object' 129 | 130 | for (var key in row) { 131 | let value = row[key] 132 | var attr = {} 133 | 134 | attr.key = typeof key === 'number' ? key : key//.replace(/^[\*\-\+]|[\*\-\+]$/g,'') 135 | attr.fullPath = isRowAnArray ? value : key 136 | attr.split = attr.fullPath.split(opt.input.delimiter) 137 | attr.path = attr.split.slice(0,attr.split.length-1).join(opt.input.delimiter) 138 | attr.parent = attr.split.slice(0,attr.split.length-2).join(opt.input.delimiter)//.replace(/^[\*\-\+]|[\*\-\+]$/g,'') 139 | attr.node = attr.split[attr.split.length - 2] 140 | attr.attr = attr.split[attr.split.length - 1] 141 | 142 | if (attr.attr.match(/\*/gi)) { 143 | attr.attr = attr.attr.replace(/[\*]/gi,'') 144 | attr.pk = true 145 | } 146 | 147 | // if (attr.pk) { 148 | // this.log('primary key detected in node "' + attr.attr + '"') 149 | // } 150 | 151 | // set up node reference 152 | var node = findWhere(nodes, { path: attr.path }) 153 | if (!node) { 154 | node = { path: attr.path, attributes: [], blueprint: [] } 155 | nodes.push(node) 156 | } 157 | 158 | node.isCollection = !attr.node || (opt.input.detectCollections && inflection.pluralize(attr.node) === attr.node) 159 | 160 | var collectionFlag = attr.node && attr.node.match(/^[\-\+]|[\-\+]$/g) 161 | if (collectionFlag) { 162 | //this.log('collection flag "' + collectionFlag + '" detected in node "' + attr.node + '"') 163 | node.flags = true 164 | node.isCollection = attr.node.match(/^\+|\+$/g) 165 | attr.node = attr.node.replace(/^[\*\-\+]|[\*\-\+]$/g,'') // clean node 166 | } 167 | 168 | node.name = attr.node 169 | node.depth = attr.split.length - 1 170 | node.parent = attr.split.slice(0, attr.split.length - 2).join(opt.input.delimiter) 171 | node.attributes.push({ name: attr.attr, key: attr.key }) 172 | if (attr.pk) { 173 | //this.log('adding node to blueprint') 174 | node.flags = true 175 | node.blueprint.push({ name: attr.attr, key: attr.key }) 176 | } 177 | } 178 | 179 | // backfill blueprint when not specifically defined 180 | nodes.forEach(function(node) { 181 | if (!node.blueprint.length) { 182 | node.blueprint = node.attributes 183 | } 184 | }) 185 | 186 | nodes.sort(function(a, b) { return a.depth < b.depth ? -1 : 1 }) 187 | 188 | // end timer and add time 189 | var t2 = ((new Date()).getTime() - t1) 190 | this.stats.time.signatures += t2 191 | this.stats.time.total += t2 192 | 193 | return this 194 | } 195 | 196 | Treeize.prototype.getSignature = function() { 197 | return this.signature() 198 | } 199 | 200 | Treeize.prototype.setSignature = function(row, options) { 201 | return this.signature(row, options) 202 | } 203 | 204 | Treeize.prototype.setSignatureAuto = function(row, options) { 205 | return this.signature(row, options, true) 206 | } 207 | 208 | Treeize.prototype.clearSignature = function() { 209 | this.data.signature = { nodes: [], type: null } 210 | this.data.signature.isFixed = false 211 | 212 | return this 213 | } 214 | 215 | 216 | Treeize.prototype.grow = function(data, options) { 217 | var opt = merge(this._options, options || {}) 218 | // chain past if no data to grow 219 | if (typeof data !== 'object' || !data.length) { 220 | return this 221 | } 222 | 223 | //this.log('OPTIONS>', opt) 224 | 225 | // locate existing signature (when sharing signatures between data sources) 226 | var signature = this.getSignature() 227 | 228 | // set data uniformity (locally) to true to avoid signature fetching on data rows 229 | if (isArray(data[0])) { 230 | opt.input.uniformRows = true 231 | } 232 | 233 | if (!signature.nodes.length) { 234 | //this.log('setting signature from first row of data (auto)') 235 | // set signature from first row 236 | signature = this.setSignatureAuto(data[0], options).getSignature() 237 | 238 | // remove header row in flat array data (avoids processing headers as actual values) 239 | if (isArray(data[0])) { 240 | var originalData = data 241 | data = [] 242 | 243 | // copy data without original signature row before processing 244 | originalData.forEach(function(row, index) { 245 | if (index > 0) { 246 | data.push(row) 247 | } 248 | }) 249 | } 250 | } 251 | 252 | if (opt.output.resultsAsObject && isArray(this.data.tree)) { 253 | this.data.tree = {} 254 | } 255 | 256 | //this.log('SIGNATURE>', util.inspect(this.getSignature(), false, null)) 257 | 258 | this.stats.sources++ 259 | var t1 = (new Date()).getTime() 260 | 261 | data.forEach(function(row) { 262 | this.data.seed.push(row) 263 | var trails = {} // LUT for trails (find parent of new node in trails path) 264 | var trail = base = this.data.tree // OPTIMIZATION: do we need to reset this trail for each row? 265 | //this.log('CURRENT TRAIL STATUS>', trail) 266 | var t = null 267 | 268 | // set initial base object path for non-array datasets 269 | if (opt.output.resultsAsObject) { 270 | trails[''] = trail 271 | } 272 | 273 | if (!this.data.signature.isFixed && !opt.input.uniformRows) { 274 | //this.log('setting signature from new row of data (auto)') 275 | // get signature from each row 276 | this.setSignatureAuto(row, opt) 277 | //this.log('SIGNATURE>', util.inspect(this.getSignature(), false, null)) 278 | } 279 | 280 | this.stats.rows++ 281 | 282 | if (where(this.signature().nodes, { flags: true }).length) { 283 | // flags detected within signature, clean attributes of row 284 | for (var key in row) { 285 | let value = row[key] 286 | 287 | if (typeof key === 'string') { 288 | var clean = key.replace(/^[\*\-\+]|[\*\-\+]$/g,'') 289 | if (clean !== key) { 290 | //this.log('cleaning key "' + key + '" and embedding as "' + clean + '"') 291 | row[key.replace(/^[\*\-\+]|[\*\-\+]$/g,'')] = value // simply embed value at clean path (if not already) 292 | } 293 | } 294 | } 295 | } 296 | 297 | this.signature().nodes.forEach(function(node) { 298 | //this.log('PROCESSING NODE>', node) 299 | var blueprint = {} 300 | var blueprintExtended = {} 301 | 302 | // create blueprint for locating existing nodes 303 | node.blueprint.forEach(function(attribute) { 304 | var key = (node.path ? (node.path + ':') : '') + attribute.name 305 | blueprint[attribute.name] = row[attribute.key] 306 | //this.log('creating attribute "' + attribute.name + '" within blueprint', row[attribute.key]) 307 | }, this) 308 | 309 | // create full node signature for insertion/updating 310 | node.attributes.forEach(function(attribute) { 311 | var key = (node.path ? (node.path + ':') : '') + attribute.name 312 | var value = row[attribute.key] 313 | 314 | // insert extended blueprint attributes when not empty (or not pruning) 315 | if (!opt.output.prune || (value !== null && value !== undefined)) { 316 | //this.log('creating attribute "' + attribute.name + '" within extended blueprint', row[attribute.key]) 317 | blueprintExtended[attribute.name] = row[attribute.key] 318 | } 319 | }, this) 320 | 321 | //this.log('EXTENDED BLUEPRINT>', blueprintExtended) 322 | //this.log('BLUEPRINT>', blueprint) 323 | 324 | // ONLY INSERT IF NOT PRUNED 325 | if (!opt.output.prune || !isEmpty(blueprintExtended)) { 326 | // IF 0 DEPTH AND RESULTSASOBJECT, EXTEND base 327 | if (opt.output.resultsAsObject && node.depth === 0) { 328 | Object.assign(trails[node.path] = trail = base, blueprintExtended) 329 | //this.log('extending blueprint onto base>', trail) 330 | 331 | // IF base TRAIL IS NOT YET MAPPED 332 | } else if (node.isCollection && !(trail = trails[node.parent])) { 333 | //this.log('PARENT TRAIL NOT FOUND (base?)') 334 | // set up target node if doesn't exist 335 | if (!(trail = findWhere(base, blueprint))) { 336 | base.push(trail = blueprintExtended) 337 | } else { 338 | Object.assign(trail, blueprintExtended) 339 | } 340 | trails[node.path] = trail 341 | 342 | // NORMAL NODE TRAVERSAL 343 | } else { 344 | // NOT base CASE 345 | if (node.isCollection) { 346 | // handle collection nodes 347 | //this.log('inserting into collection node', trail) 348 | if (!trail[node.name]) { 349 | // node attribute doesnt exist, create array with fresh blueprint 350 | trail[node.name] = [blueprintExtended] 351 | trails[node.path] = blueprintExtended 352 | } else { 353 | // node attribute exists, find or inject blueprint 354 | var t 355 | if (!(t = findWhere(trail[node.name], blueprint))) { 356 | trail[node.name].push(trail = blueprintExtended) 357 | } else { 358 | Object.assign(t, blueprintExtended) 359 | } 360 | trails[node.path] = t || trail 361 | } 362 | } else { 363 | // handle non-collection nodes 364 | if (trail == base && node.parent === '') { 365 | base.push(trails[node.parent] = trail = {}) 366 | //this.log('base insertion') 367 | } 368 | trail = trails[node.parent] 369 | 370 | // ON DEEP NODES, THE PARENT WILL BE TOO LONG AND FAIL ON THE NEXT IF STATEMENT BELOW 371 | // ASSUMPTION: in deep nodes, no signatures will be present, so entries will simply be pushed onto collections defined within 372 | 373 | if (!trail) { // do something to fix a broken trail (usually from too deep?) 374 | // backtrack from parent trail segments until trail found, then create creadcrumbs 375 | var breadcrumbs = [] 376 | var segments = node.parent.split(':') 377 | var numSegments = segments.length 378 | var pathAttempt = node.parent 379 | var segmentsStripped = 0 380 | 381 | //this.log('path MISSING for location "' + pathAttempt + '"') 382 | while (!(trail = trails[pathAttempt])) { 383 | segmentsStripped++ 384 | pathAttempt = segments.slice(0,numSegments-segmentsStripped).join(':') 385 | //this.log('..attempting path location for "' + pathAttempt + '"') 386 | 387 | //infinite loop kickout 388 | if (segmentsStripped > 15) break 389 | } 390 | //this.log('path FOUND for location for "' + pathAttempt + '" after removing ' + segmentsStripped + ' segments') 391 | 392 | // create stored nodes if they don't exist. 393 | segments.slice(numSegments - segmentsStripped).forEach(function(segment) { 394 | var isCollection = ((inflection.pluralize(segment) === segment) || segment.match(/^\+|\+$/)) && (!segment.match(/^\-|\-$/)) 395 | // TODO: add modifier detection 396 | //this.log('creating or trailing path segment ' + (isCollection ? '[collection]' : '{object}') + ' "' + segment + '"') 397 | 398 | segment = segment.replace(/^[\*\-\+]|[\*\-\+]$/g,'') 399 | if (isCollection) { 400 | // retrieve or set collection segment and push new trail onto it 401 | (trail[segment] = trail[segment] || []).push(trail = {}) 402 | } else { 403 | trail = trail[segment] = trail[segment] || {} 404 | } 405 | }) 406 | } 407 | 408 | //this.log('inserting into non-collection node') 409 | //if (!trail[node.name]) { // TODO: CONSIDER: add typeof check to this for possible overwriting 410 | if (!trail[node.name] || (opt.output.objectOverwrite && (typeof trail[node.name] !== typeof blueprintExtended))) { 411 | // node attribute doesnt exist, create object 412 | //this.log('create object') 413 | trail[node.name] = blueprintExtended 414 | trails[node.path] = blueprintExtended 415 | } else { 416 | // node attribute exists, set path for next pass 417 | // TODO: extend trail?? 418 | //this.log('object at node "' + node.name + '" exists as "' + trail[node.name] + '", skipping insertion and adding trail') 419 | if (typeof trail[node.name] === 'object') { 420 | trail[node.name] = merge(trail[node.name], blueprintExtended) 421 | } 422 | //this.log('trail[node.name] updated to "' + trail[node.name]) 423 | trails[node.path] = trail[node.path] 424 | } 425 | } 426 | } 427 | // END PRUNE PASS 428 | } 429 | }, this) 430 | }, this) 431 | 432 | var t2 = ((new Date()).getTime() - t1) 433 | this.stats.time.total += t2 434 | 435 | // clear signature between growth sets - TODO: consider leaving this wipe pass off if processing multiple identical sources (add) 436 | if (!signature.isFixed) { 437 | this.signature([]) 438 | } 439 | 440 | return this 441 | } 442 | 443 | /* 444 | .[get|set]options (options) 445 | 446 | Get and sets global options. 447 | */ 448 | 449 | Treeize.prototype.options = function(options) { 450 | if (!options) { 451 | return merge({}, this._options) 452 | } 453 | 454 | this._options = merge(this._options, options) 455 | 456 | return this 457 | } 458 | 459 | Treeize.prototype.getOptions = function() { 460 | return this._options 461 | } 462 | 463 | Treeize.prototype.setOptions = function(options) { 464 | return this.options(options) 465 | } 466 | 467 | Treeize.prototype.resetOptions = function() { 468 | this._options = merge({}, this.baseOptions) 469 | 470 | return this 471 | } 472 | 473 | Treeize.prototype.toString = function treeToString() { 474 | return 'WARNING: .toString() method of Treeize is deprecated' 475 | } 476 | 477 | module.exports = Treeize 478 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Treeize.js 2 | 3 | [![Build Status via Travis CI](https://travis-ci.org/kwhitley/treeize.svg)](https://travis-ci.org/kwhitley/treeize) 4 | 5 | Converts row data (in JSON/associative array format or flat array format) to object/tree structure based on simple column naming conventions. 6 | 7 | ## Installation 8 | 9 | ``` 10 | npm install treeize 11 | ``` 12 | 13 | ## Why? 14 | 15 | Because APIs usually require data in a deep object graph/collection form, but SQL results (especially heavily joined data), excel, csv, and other flat data sources that we're often forced to drive our applications from represent data in a very "flat" way. Treeize takes this flattened data and based on simple column/attribute naming conventions, remaps it into a deep object graph - all without the overhead/hassle of hydrating a traditional ORM. 16 | 17 | #### What it does... 18 | 19 | ```js 20 | // Treeize turns flat associative data (as from SQL queries) like this: 21 | var peopleData = [ 22 | { 23 | 'name': 'John Doe', 24 | 'age': 34, 25 | 'pets:name': 'Rex', 26 | 'pets:type': 'dog', 27 | 'pets:toys:type': 'bone' 28 | }, 29 | { 30 | 'name': 'John Doe', 31 | 'age': 34, 32 | 'pets:name': 'Rex', 33 | 'pets:type': 'dog', 34 | 'pets:toys:type': 'ball' 35 | }, 36 | { 37 | 'name': 'Mary Jane', 38 | 'age': 19, 39 | 'pets:name': 'Mittens', 40 | 'pets:type': 'kitten', 41 | 'pets:toys:type': 'yarn' 42 | }, 43 | { 44 | 'name': 'Mary Jane', 45 | 'age': 19, 46 | 'pets:name': 'Fluffy', 47 | 'pets:type': 'cat' 48 | } 49 | ]; 50 | 51 | 52 | // ...or flat array-of-values data (as from CSV/excel) like this: 53 | var peopleData = [ 54 | ['name', 'age', 'pets:name', 'pets:type', 'pets:toys:type'], // header row 55 | ['John Doe', 34, 'Rex', 'dog', 'bone'], 56 | ['John Doe', 34, 'Rex', 'dog', 'ball'], 57 | ['Mary Jane', 19, 'Mittens', 'kitten', 'yarn'], 58 | ['Mary Jane', 19, 'Fluffy', 'cat', null] 59 | ]; 60 | 61 | 62 | // ...via a dead-simple implementation: 63 | var Treeize = require('treeize'); 64 | var people = new Treeize(); 65 | 66 | people.grow(peopleData); 67 | 68 | 69 | // ...into deep API-ready object graphs like this: 70 | people.getData() == [ 71 | { 72 | name: 'John Doe', 73 | age: 34, 74 | pets: [ 75 | { 76 | name: 'Rex', 77 | type: 'dog', 78 | toys: [ 79 | { type: 'bone' }, 80 | { type: 'ball' } 81 | ] 82 | } 83 | ] 84 | }, 85 | { 86 | name: 'Mary Jane', 87 | age: 19, 88 | pets: [ 89 | { 90 | name: 'Mittens', 91 | type: 'kitten', 92 | toys: [ 93 | { type: 'yarn' } 94 | ] 95 | }, 96 | { 97 | name: 'Fluffy', 98 | type: 'cat' 99 | } 100 | ] 101 | } 102 | ]; 103 | ``` 104 | 105 | 106 | # API Index 107 | 108 | ##### 1. get/set options (optional) 109 | 110 | - [`options([options])`](#options) - getter/setter for options 111 | - [`getOptions()`](#getOptions) - returns options 112 | - [`setOptions(options)`](#setOptions) - merges new `[options]` with existing 113 | - [`resetOptions()`](#resetOptions) - resets options to defaults 114 | 115 | ##### 2a. set data signature manually if needed (optional) 116 | 117 | - [`signature([row], [options])`](#signature) - getter/setter for signature definitions 118 | - [`getSignature()`](#getSignature) - returns currently defined signature 119 | - [`setSignature(row, [options])`](#setSignature) - sets signature using a specific row of data/headers (preserves signature between data sets if uniformity option is enabled) 120 | - [`clearSignature()`](#clearSignature) - clear signature between data sets (only needed when previously defined a uniform signature via `setSignature`) 121 | 122 | ##### 2b. grow tree from data set(s) 123 | 124 | - [`grow(data, [options])`](#grow) - grow flat `data`, with optional local `[options]` 125 | 126 | ##### 3. retrieve transformed data 127 | 128 | - [`getData()`](#getData) - gets current tree data 129 | - [`getSeedData()`](#getSeedData) - gets original, flat data 130 | 131 | ##### * misc/internal methods 132 | 133 | - [`getStats()`](#getStats) - returns object with growth statistics 134 | 135 | # API 136 | 137 | ### .options([options]) 138 | 139 | [Getter](#getOptions)/[Setter](#setOptions) for options. If options object is passed, this is identical to [.setOptions(options)](#setOptions) and returns self (chainable). If no options are passed, this is identical to [.getOptions()](#getOptions) and returns current options as object. 140 | 141 | 142 | ### .setOptions(options) 143 | 144 | Sets options globally for the Treeize instance. This is an alias for `.options(options)`. Default options are as follows: 145 | 146 | ```js 147 | { 148 | input: { 149 | delimiter: ':', // delimiter between path segments, defaults to ':' 150 | detectCollections: true, // when true, plural path segments become collections 151 | uniformRows: false, // set to true if each row has identical signatures 152 | }, 153 | output: { 154 | prune: true, // remove blank/null values and empty nodes 155 | objectOverwrite: true, // incoming objects will overwrite placeholder ids 156 | resultsAsObject: false, // root structure defaults to array (instead of object) 157 | } 158 | } 159 | ``` 160 | 161 | For example, to change the delimiter and enable output logging, you would use the following: 162 | 163 | ```js 164 | .setOptions({ input: { delimiter: '|' }}); 165 | ``` 166 | 167 | #### Available Options 168 | 169 | `input.delimiter` 170 | This sets the delimiter to be used between path segments (e.g. the ":" in "children:mother:name"). 171 | [View test example](https://github.com/kwhitley/treeize/blob/feature/multi-format/test/test.js#L51-58) 172 | 173 | `input.detectCollections` 174 | Enables/disables the default behavior of turning plural path segments (e.g. "subjects" vs. "subject") into collections instead of object paths. **Note:** In order to properly merge multiple rows into the same collection item, the collection must have a base-level attribute(s) acting as a signature. 175 | [View test example (enabled)](https://github.com/kwhitley/treeize/blob/feature/multi-format/test/test.js#L79-86) | [or (disabled)](https://github.com/kwhitley/treeize/blob/feature/multi-format/test/test.js#L92-99) 176 | 177 | `input.uniformRows` 178 | By default row uniformity is disabled to allow the most flexible data merging. This means each and every row of data that is processed (unless flat array-of-array data) will be analyzed and mapped individually into the final structure. If your data rows have uniform attributes/columns, disable this for a performance increase. 179 | 180 | `output.prune` 181 | Removes blank/empty nodes in the structure. This is enabled by default to prevent sparse data sets from injecting blanks and nulls everywhere in your final output. If nulls are important to preserve, disable this. 182 | [View test example](https://github.com/kwhitley/treeize/blob/feature/multi-format/test/test.js#L207-240) 183 | 184 | `output.objectOverwrite` 185 | To allow for merging objects directly onto existing placeholder values (e.g. foreign key ids), this is enabled by default. 186 | [View test example](https://github.com/kwhitley/treeize/blob/feature/multi-format/test/test.js#L159-203) 187 | 188 | `output.resultsAsObject` 189 | This creates a single root object (instead of the default array of objects). 190 | [View test example](https://github.com/kwhitley/treeize/blob/feature/multi-format/test/test.js#L245-278) 191 | 192 | ### .getOptions() 193 | 194 | Returns the current global options (as object). 195 | [View example format](#setOptions) 196 | 197 | 198 | ### .resetOptions(options) 199 | 200 | Resets all global options to [original defaults](#setOptions) and returns self (chainable). 201 | 202 | ### .signature([row], [options]) 203 | 204 | [Getter](#getSignature)/[Setter](#setSignature) for row signatures. If options object is passed, this is identical to [.setSignature(options)](#setSignature) and returns self (chainable). If no options are passed, this is identical to [.getSignature()](#getSignature) and returns currently defined signature as object. 205 | 206 | 207 | ### .setSignature(row, [options]) 208 | 209 | Manually defines the signature for upcoming data sets from argument `row`, with optional `options`. The row may be either in object (key/value) form or flat array form (array of paths). This method is only required if sharing a single signature across multiple data sources (when merging homogeneous data sets), or when the data itself has no header information (for instance, with bulk flat array-of-values data). Returns self (chainable). 210 | 211 | ```js 212 | // May be set from a single row of associative data 213 | .setSignature({ 214 | 'id': 1, 215 | 'name:first': 'Kevin', 216 | 'name:last': 'Whitley', 217 | 'hobbies:name': 'photography' 218 | 'hobbies:years': 12 219 | }) 220 | 221 | // Or from header row of flat array data 222 | .setSignature(['id', 'name:first', 'name:last', 'hobbies:name', 'hobbies:years']) 223 | ``` 224 | 225 | ### .getSignature() 226 | 227 | Returns currently defined signature. _For internal use only._ 228 | 229 | ### .clearSignature() 230 | 231 | Clears currently-defined signature if previously set via [`setSignature(row)`](#setSignature), and returns self (chainable). This is only required between data sets if signature auto-detection should be re-enabled. It is unlikely that you will need to use this. 232 | 233 | ### .getData() 234 | 235 | Returns current data tree. 236 | 237 | ```js 238 | var tree = new Treeize(); 239 | 240 | tree.grow([ 241 | { 'foo': 'bar', 'logs:a': 1 }, 242 | { 'foo': 'bar', 'logs:a': 2 }, 243 | { 'foo': 'baz', 'logs:a': 3 }, 244 | ]); 245 | 246 | console.log(tree.getData()); 247 | 248 | /* 249 | [ 250 | { foo: 'bar', logs: [{ a: 1 }, { a: 2 }] }, 251 | { foo: 'baz', logs: [{ a: 3 }]} 252 | ] 253 | */ 254 | ``` 255 | 256 | ### .getSeedData() 257 | 258 | Returns original, flat data. 259 | 260 | ```js 261 | var tree = new Treeize(); 262 | 263 | tree.grow([ 264 | { 'foo': 'bar', 'logs:a': 1 }, 265 | { 'foo': 'bar', 'logs:a': 2 }, 266 | { 'foo': 'baz', 'logs:a': 3 } 267 | ]); 268 | 269 | console.log(tree.getSeedData()); 270 | 271 | /* 272 | [ 273 | { 'foo': 'bar', 'logs:a': 1 }, 274 | { 'foo': 'bar', 'logs:a': 2 }, 275 | { 'foo': 'baz', 'logs:a': 3 } 276 | ] 277 | */ 278 | ``` 279 | 280 | ### .getStats() 281 | 282 | Returns current growth statistics (e.g. number of sources process, number of rows, etc). _Output and format subject to change - use at your own risk._ 283 | 284 | --- 285 | 286 | ### .grow(data, [options]) 287 | 288 | The `grow(data, [options])` method provides the core functionality of Treeize. This method expands flat data (of one or more sources) into the final deep tree output. Each attribute path is analyzed for injection into the final object graph. 289 | 290 | #### Path Naming 291 | 292 | Each column/attribute of each row will dictate its own destination path 293 | using the following format: 294 | 295 | ```js 296 | { 297 | 'path1:path2:pathX:attributeName': [value] 298 | } 299 | ``` 300 | 301 | Each "path" (up to n-levels deep) is optional and represents a single object node if the word is singular, or a collection if the word is plural (with optional +/- override modifiers). For example, a "favoriteMovie:name" path will add a "favoriteMovie" object to its path - where "favoriteMovies:name" would add a collection of movies (complete with a first entry) instead. For root nodes, include only the attribute name without any preceding paths. If you were creating a final output of a book collection for instance, the title of the book would likely be pathless as you would want the value on the high-level `books` collection. 302 | 303 | It's important to note that each row will create or find its path within the newly transformed output being created. Your flat feed may have mass-duplication, but the results will not. 304 | 305 | ##### Merging Multiple Data Sources 306 | 307 | Treeize was designed to merge from multiple data sources of both attribute-value and array-of-value format (as long as signatures are provided in some manner), including ones with varying signatures. 308 | 309 | ```js 310 | var Treeize = require('treeize'); 311 | var arrayOfObjects = require('somesource1.js'); 312 | var arrayOfValues = require('somesource2.js'); 313 | 314 | var tree = new Treeize(); 315 | 316 | tree 317 | .grow(arrayOfObjects) 318 | .grow(arrayOfValues) // assumes header row as first row 319 | ; 320 | 321 | // tree.getData() == final merged results 322 | ``` 323 | 324 | ##### How to manually override the default pluralization scheme for collection-detection 325 | 326 | In the rare (but possible) case that plural/singular node names are not enough to properly detect collections, you may add specific overrides to the node name, using the `+` (for collections) and `-` (for singular objects) indicators. 327 | 328 | ```js 329 | { 330 | 'name': 'Bird', 331 | 'attributes:legs': 2, 332 | 'attributes:hasWings': true 333 | } 334 | 335 | // would naturally return 336 | 337 | [ 338 | { 339 | name: 'Bird', 340 | attributes: [ 341 | { 342 | legs: 2, 343 | hasWings: true 344 | } 345 | ] 346 | } 347 | ] 348 | 349 | // to tell treeize that the node (detected as a plural collection) 350 | // is NOT a collection, add a - to the path 351 | 352 | { 353 | 'name': 'Bird', 354 | 'attributes-:legs': 2, 355 | 'attributes-:hasWings': true 356 | } 357 | 358 | // results in 359 | 360 | [ 361 | { 362 | name: 'Bird', 363 | attributes: { 364 | legs: 2, 365 | hasWings: true 366 | } 367 | } 368 | ] 369 | 370 | // conversely, add a + to a path to force it into a collection 371 | 372 | ``` 373 | 374 | ##### Specifying Your Own Key/Blueprint For Collections 375 | 376 | By default, all known attributes of a collection node level define a "blueprint" by which to match future rows. For example, in a collection of people, if both `name` and `age` attributes are defined within each row, future rows will require both the `name` and `age` values to match for the additional information to be merged into that record. To override this default behavior and specify your own criteria, simply _mark each required attribute with a leading or tailing `*` modifier._ 377 | 378 | ```js 379 | [ 380 | { 381 | 'date': '1/1/2014', 382 | 'temperatureF': 90, 383 | 'temperatureC': 32 384 | }, 385 | { 386 | 'date': '1/1/2014', 387 | 'humidity': .1 388 | } 389 | ] 390 | 391 | // ...would normally grow into: 392 | [ 393 | { 394 | date: '1/1/2014', 395 | temperatureF: 90, 396 | temperatureC: 32 397 | }, 398 | { 399 | date: '1/1/2014', 400 | humidity: 0.1 401 | } 402 | ] 403 | 404 | // ...but by specifying only the "date" attribute as the blueprint/key 405 | [ 406 | { 407 | 'date*': '1/1/2014', 408 | 'temperatureF': 90, 409 | 'temperatureC': 32 410 | }, 411 | { 412 | 'date*': '1/1/2014', 413 | 'humidity': .1 414 | } 415 | ] 416 | 417 | // ...the data merges appropriately 418 | [ 419 | { 420 | date: '1/1/2014', 421 | temperatureF: 90, 422 | temperatureC: 32, 423 | humidity: 0.1 424 | } 425 | ] 426 | ``` 427 | 428 | ### Notes 429 | 430 | - Each attribute name of the flat data must consist of the full path to its node & attribute, seperated by the delimiter. `id` suggests an `id` attribute on a root element, whereas `name:first` implies a `first` attribute on a `name` object within a root element. 431 | - To imply a collection in the path/attribute-name, use a plural name (e.g. "subjects" instead of "subject"). Otherwise, use a singular name for a singular object. 432 | - Use a `:` delimiter (default) to seperate path segments. To change this, modify the [`input.delimiter`](#optionsInputDelimiter) option. 433 | 434 | --- 435 | 436 | # Examples 437 | 438 | In this short series of examples, we'll take a standard "join dump", originally keyed 439 | (via attribute names) to organize by movie - and demonstrate how other organizations can 440 | be easily derived from the same original feed... by simply modifying the column/attribute 441 | names in the output. 442 | 443 | #### Example 1 444 | 445 | In this example, we'll take our dump (as if from a CSV or SQL result) - and name the keys to 446 | group by movies (as if for an `/api/movies`). 447 | 448 | ```js 449 | var movieData = [ 450 | { 451 | 'title': 'The Prestige', 452 | 'director': 'Christopher Nolan', 453 | 'actors:name': 'Christian Bale', 454 | 'actors:as': 'Alfred Borden' 455 | }, 456 | { 457 | 'title': 'The Prestige', 458 | 'director': 'Christopher Nolan', 459 | 'actors:name': 'Hugh Jackman', 460 | 'actors:as': 'Robert Angier' 461 | }, 462 | { 463 | 'title': 'The Dark Knight Rises', 464 | 'director': 'Christopher Nolan', 465 | 'actors:name': 'Christian Bale', 466 | 'actors:as': 'Bruce Wayne' 467 | }, 468 | { 469 | 'title': 'The Departed', 470 | 'director': 'Martin Scorsese', 471 | 'actors:name': 'Leonardo DiCaprio', 472 | 'actors:as': 'Billy' 473 | }, 474 | { 475 | 'title': 'The Departed', 476 | 'director': 'Martin Scorsese', 477 | 'actors:name': 'Matt Damon', 478 | 'actors:as': 'Colin Sullivan' 479 | } 480 | ]; 481 | 482 | var Treeize = require('treeize'); 483 | var movies = new Treeize(); 484 | 485 | movies.grow(movieData); 486 | 487 | /* 488 | 489 | 'movies.getData()' now results in the following: 490 | 491 | [ 492 | { 493 | 'director': 'Christopher Nolan', 494 | 'title': 'The Prestige', 495 | 'actors': [ 496 | { 497 | 'as': 'Alfred Borden', 498 | 'name': 'Christian Bale' 499 | }, 500 | { 501 | 'as': 'Robert Angier', 502 | 'name': 'Hugh Jackman' 503 | } 504 | ] 505 | }, 506 | { 507 | 'director': 'Christopher Nolan', 508 | 'title': 'The Dark Knight Rises', 509 | 'actors': [ 510 | { 511 | 'as': 'Bruce Wayne', 512 | 'name': 'Christian Bale' 513 | } 514 | ] 515 | }, 516 | { 517 | 'director': 'Martin Scorsese', 518 | 'title': 'The Departed', 519 | 'actors': [ 520 | { 521 | 'as': 'Billy', 522 | 'name': 'Leonardo DiCaprio' 523 | }, 524 | { 525 | 'as': 'Colin Sullivan', 526 | 'name': 'Matt Damon' 527 | } 528 | ] 529 | } 530 | ] 531 | 532 | */ 533 | ``` 534 | 535 | #### Example 2 536 | 537 | Taking the same feed, but modifying the target paths through the attribute/column 538 | names we can completely transform the data (as you would for another API endpoint, 539 | for example). This time we'll organize the data by actors, as you would for 540 | and endpoint like `/api/actors`. 541 | 542 | Notice the feed is left unchanged - only the attribute names have been modified to 543 | define their new target path. In this case, by changing the base node to the actor 544 | name (instead of the movie name), we group everything by actor at a high level. 545 | 546 | ```js 547 | var moviesDump = [ 548 | { 549 | 'movies:title': 'The Prestige', 550 | 'movies:director': 'Christopher Nolan', 551 | 'name': 'Christian Bale', 552 | 'movies:as': 'Alfred Borden' 553 | }, 554 | { 555 | 'movies:title': 'The Prestige', 556 | 'movies:director': 'Christopher Nolan', 557 | 'name': 'Hugh Jackman', 558 | 'movies:as': 'Robert Angier' 559 | }, 560 | { 561 | 'movies:title': 'The Dark Knight Rises', 562 | 'movies:director': 'Christopher Nolan', 563 | 'name': 'Christian Bale', 564 | 'movies:as': 'Bruce Wayne' 565 | }, 566 | { 567 | 'movies:title': 'The Departed', 568 | 'movies:director': 'Martin Scorsese', 569 | 'name': 'Leonardo DiCaprio', 570 | 'movies:as': 'Billy' 571 | }, 572 | { 573 | 'movies:title': 'The Departed', 574 | 'movies:director': 'Martin Scorsese', 575 | 'name': 'Matt Damon', 576 | 'movies:as': 'Colin Sullivan' 577 | } 578 | ]; 579 | 580 | var Treeize = require('treeize'); 581 | var actors = new Treeize(); 582 | 583 | actors.grow(moviesData); 584 | 585 | /* 586 | 587 | 'actors.getData()' now results in the following: 588 | 589 | [ 590 | { 591 | 'name': 'Christian Bale', 592 | 'movies': [ 593 | { 594 | 'as': 'Alfred Borden', 595 | 'director': 'Christopher Nolan', 596 | 'title': 'The Prestige' 597 | }, 598 | { 599 | 'as': 'Bruce Wayne', 600 | 'director': 'Christopher Nolan', 601 | 'title': 'The Dark Knight Rises' 602 | } 603 | ] 604 | }, 605 | { 606 | 'name': 'Hugh Jackman', 607 | 'movies': [ 608 | { 609 | 'as': 'Robert Angier', 610 | 'director': 'Christopher Nolan', 611 | 'title': 'The Prestige' 612 | } 613 | ] 614 | }, 615 | { 616 | 'name': 'Leonardo DiCaprio', 617 | 'movies': [ 618 | { 619 | 'as': 'Billy', 620 | 'director': 'Martin Scorsese', 621 | 'title': 'The Departed' 622 | } 623 | ] 624 | }, 625 | { 626 | 'name': 'Matt Damon', 627 | 'movies': [ 628 | { 629 | 'as': 'Colin Sullivan', 630 | 'director': 'Martin Scorsese', 631 | 'title': 'The Departed' 632 | } 633 | ] 634 | } 635 | ] 636 | 637 | */ 638 | ``` 639 | 640 | # Changelog 641 | 642 | - **2.0.1** - performance tuning... ~400% performance boost over 2.0.0 643 | - **2.0.2** - added `.getSeedData()` to retrieve original, flat data 644 | - **2.0.3** - internal variable renaming to avoid deprecation error 645 | - **2.1.0** - major (> 3x) performance improvement - required dropping support for .toString() and internal logging, removed lodash as a dependency 646 | - **2.1.1** - rollback to lodash dependency to solve edge case in mapping 647 | - **2.1.2** - solves issue in edge case with attributes named "length" 648 | -------------------------------------------------------------------------------- /test/test.js: -------------------------------------------------------------------------------- 1 | var Treeize = require('../lib/treeize') 2 | var treeize = new Treeize() 3 | var should = require('should') 4 | 5 | var welldata1 = require('./data/welldata1') 6 | var welldata2 = require('./data/welldata2') 7 | var arraywelldata = require('./data/arraywelldata') 8 | var arraywelldataNoHeaders = require('./data/arraywelldata-no-headers') 9 | var classdata = require('./data/classdata') 10 | 11 | describe('#getSeedData()', function() { 12 | it('should return original flat data', function() { 13 | var tree = new Treeize() 14 | tree 15 | .grow(welldata1) 16 | .getSeedData() 17 | .should.eql(welldata1) 18 | 19 | }) 20 | 21 | it('should return original flat data from multiple data sources', function() { 22 | var tree = new Treeize() 23 | 24 | tree 25 | .grow([{ 'foo': 'bar', 'logs:a': 1 }]) 26 | .grow([{ 'foo': 'bar', 'logs:b': 2 }]) 27 | .getSeedData() 28 | .should.eql([ 29 | { 'foo': 'bar', 'logs:a': 1 }, 30 | { 'foo': 'bar', 'logs:b': 2 } 31 | ]) 32 | 33 | }) 34 | }) 35 | 36 | describe('#getStats()', function() { 37 | var tree = new Treeize() 38 | var stats = tree.grow([ 39 | { 'foo': 'bar', 'logs:a': 1 }, 40 | { 'foo': 'bar', 'logs:a': 2 }, 41 | { 'foo': 'baz', 'logs:a': 3 }, 42 | ]).getStats() 43 | 44 | describe('.rows', function() { 45 | it('should return number of rows processed', function() { 46 | stats.rows.should.equal(3) 47 | }) 48 | }) 49 | 50 | describe('.sources', function() { 51 | it('should return number of sources/growth passes', function() { 52 | stats.sources.should.equal(1) 53 | }) 54 | }) 55 | }) 56 | 57 | 58 | describe('#getOptions()', function() { 59 | it('should return options', function() { 60 | treeize.getOptions().log.should.be.false 61 | treeize.getOptions().input.delimiter.should.equal(':') 62 | }) 63 | }) 64 | 65 | 66 | describe('#setOptions()', function() { 67 | it('should be chainable', function() { 68 | treeize.setOptions({ input: { uniformRows: false }}).should.be.type('object') 69 | treeize.setOptions({ input: { uniformRows: true }}).should.have.property('grow') 70 | }) 71 | 72 | describe('input.delimiter', function() { 73 | it('should allow custom delimiters', function() { 74 | var tree = new Treeize() 75 | 76 | tree.setOptions({ input: { delimiter: '|' }}).grow([ 77 | { 'foo': 'bar', 'logs|a': 1 }, 78 | { 'foo': 'bar', 'logs|a': 2 }, 79 | { 'foo': 'baz', 'logs|a': 3 }, 80 | ]).getData().should.eql([ 81 | { foo: 'bar', logs: [{ a: 1 }, { a: 2 }] }, 82 | { foo: 'baz', logs: [{ a: 3 }]} 83 | ]) 84 | }) 85 | 86 | it('should be able to be set from grow() options', function() { 87 | var tree = new Treeize() 88 | 89 | tree.grow([ 90 | { 'foo': 'bar', 'logs|a': 1 }, 91 | { 'foo': 'bar', 'logs|a': 2 }, 92 | { 'foo': 'baz', 'logs|a': 3 }, 93 | ], { input: { delimiter: '|' }}).getData().should.eql([ 94 | { foo: 'bar', logs: [{ a: 1 }, { a: 2 }] }, 95 | { foo: 'baz', logs: [{ a: 3 }]} 96 | ]) 97 | }) 98 | }) 99 | 100 | describe('input.detectCollections', function() { 101 | it('should force plural nodes into collections when enabled', function() { 102 | var tree = new Treeize() 103 | 104 | tree.grow([ 105 | { 'foo': 'bar', 'logs:a': 1 }, 106 | { 'foo': 'bar', 'logs:a': 2 }, 107 | { 'foo': 'baz', 'logs:a': 3 }, 108 | ]).getData().should.eql([ 109 | { foo: 'bar', logs: [{ a: 1 }, { a: 2 }] }, 110 | { foo: 'baz', logs: [{ a: 3 }]} 111 | ]) 112 | }) 113 | 114 | it('should ignore plural nodes when disabled', function() { 115 | var tree = new Treeize() 116 | 117 | tree.setOptions({ input: { detectCollections: false } }).grow([ 118 | { 'foo': 'bar', 'logs:a': 1 }, 119 | { 'foo': 'bar', 'logs:a': 2 }, 120 | { 'foo': 'baz', 'logs:a': 3 }, 121 | ]).getData().should.eql([ 122 | { foo: 'bar', logs: { a: 2 } }, 123 | { foo: 'baz', logs: { a: 3 } }, 124 | ]) 125 | }) 126 | }) 127 | 128 | describe('input.uniformRows', function() { 129 | it('should create unique row signature for each row when disabled (default)', function() { 130 | var fields = new Treeize() 131 | fields 132 | .grow(welldata1) 133 | 134 | 135 | fields.getData().should.eql([ 136 | { code: 'RA', 137 | wells: 138 | [ { uwi: 'RA-001', 139 | log: 140 | [ { oilrate: 5000, date: '12/12/2014' }, 141 | { oilrate: 5050, date: '12/13/2014', wc: 0.5 } ], 142 | reservoirs: [ { code: 'LB' } ] }, 143 | { uwi: 'RA-002', 144 | reservoir: 'UB', 145 | log: [ { oilrate: 4500, date: '12/12/2014' } ] } ], 146 | reservoirs: [ { code: 'LB' } ] }, 147 | { code: 'SA', 148 | wells: 149 | [ { uwi: 'SA-032', 150 | log: [ { oilrate: 2050, date: '12/12/2014' } ], 151 | reservoirs: [ { code: 'MA' } ] } ], 152 | reservoirs: [ { code: 'MA' } ] } 153 | ]) 154 | }) 155 | 156 | it('should use signature from first row when enabled', function() { 157 | var fields = new Treeize() 158 | fields 159 | .grow(welldata1, { input: { uniformRows: true }}) 160 | 161 | 162 | fields.getData().should.eql([ 163 | { code: 'RA', 164 | wells: 165 | [ { uwi: 'RA-001', 166 | log: 167 | [ { oilrate: 5000, date: '12/12/2014' }, 168 | { oilrate: 5050, date: '12/13/2014' } ], 169 | reservoirs: [ { code: 'LB' } ] }, 170 | { uwi: 'RA-002', 171 | log: [ { oilrate: 4500, date: '12/12/2014' } ] } ], 172 | reservoirs: [ { code: 'LB' } ] }, 173 | { code: 'SA', 174 | wells: 175 | [ { uwi: 'SA-032', 176 | log: [ { oilrate: 2050, date: '12/12/2014' } ], 177 | reservoirs: [ { code: 'MA' } ] } ], 178 | reservoirs: [ { code: 'MA' } ] } 179 | ]) 180 | }) 181 | }) 182 | 183 | describe('output.objectOverwrite', function() { 184 | var testDataOverwrite = [ 185 | { 186 | 'name*': 'dog', 187 | 'fk': 1, 188 | 'pet': 'Fido' 189 | }, 190 | { 191 | 'name': 'cat', 192 | 'fk:a': 'A', 193 | 'fk:b': 'B' 194 | }, 195 | { 196 | 'name*': 'dog', 197 | 'fk:a': 'X', 198 | 'fk:b': 'Y', 199 | 'pet': 'Mittens' 200 | }, 201 | ] 202 | 203 | it('should overwrite attribute/placeholder objects with real objects when enabled', function() { 204 | var tree = new Treeize() 205 | tree = tree 206 | .grow(testDataOverwrite) 207 | .getData() 208 | 209 | 210 | tree.should.eql([ 211 | { name: 'dog', fk: { a: 'X', b: 'Y' }, pet: 'Mittens' }, 212 | { name: 'cat', fk: { a: 'A', b: 'B' } } 213 | ]) 214 | }) 215 | 216 | it('should not overwrite attribute/placeholder objects with real objects when disabled', function() { 217 | var tree = new Treeize() 218 | tree = tree 219 | .setOptions({ output: { objectOverwrite: false }}) 220 | .grow(testDataOverwrite) 221 | .getData() 222 | 223 | 224 | tree.should.eql([ 225 | { name: 'dog', fk: 1, pet: 'Mittens' }, 226 | { name: 'cat', fk: { a: 'A', b: 'B' } } 227 | ]) 228 | }) 229 | }) 230 | 231 | describe('output.prune', function() { 232 | var pruneData = [ 233 | { 'name': null, 'age': 1 }, 234 | { 'name': 'Kevin', 'age': 12 }, 235 | { foo: null, bar: null } 236 | ] 237 | 238 | it('should prune empty nodes when enabled', function() { 239 | var tree = new Treeize() 240 | tree 241 | .setOptions({ input: { uniformRows: false } }) 242 | .grow(pruneData) 243 | 244 | 245 | tree.getData().should.have.a.length(2) 246 | tree.getData().should.eql([ 247 | { age: 1 }, 248 | { name: 'Kevin', age: 12 } 249 | ]) 250 | }) 251 | 252 | it('should leave empty nodes when disabled', function() { 253 | var tree = new Treeize() 254 | tree 255 | .setOptions({ input: { uniformRows: false }, output: { prune: false } }) 256 | .grow(pruneData) 257 | 258 | 259 | tree.getData().should.have.a.length(3) 260 | tree.getData().should.eql([ 261 | { name: null, age: 1 }, 262 | { name: 'Kevin', age: 12 }, 263 | { foo: null, bar: null } 264 | ]) 265 | }) 266 | }) 267 | 268 | describe('output.resultsAsObject', function() { 269 | it('should create single root object instead of array results', function() { 270 | var testDataRootObject = [ 271 | { 272 | 'name': 'kevin', 273 | 'pet': 'Fido' 274 | }, 275 | { 276 | 'age': 34, 277 | }, 278 | { 279 | 'comments:comment': 'I miss you', 280 | 'comments:date': '2014/09/10' 281 | }, 282 | { 283 | 'comments:comment': 'I really miss you', 284 | 'comments:date': '2014/09/11' 285 | } 286 | ] 287 | 288 | var tree = new Treeize() 289 | tree = tree 290 | .setOptions({ input: { uniformRows: false }, output: { resultsAsObject: true }}) 291 | .grow(testDataRootObject) 292 | .getData() 293 | 294 | 295 | tree.should.eql({ 296 | name: 'kevin', 297 | pet: 'Fido', 298 | age: 34, 299 | comments: [ 300 | { comment: 'I miss you', date: '2014/09/10' }, 301 | { comment: 'I really miss you', date: '2014/09/11' } 302 | ] 303 | }) 304 | }) 305 | }) 306 | }) 307 | 308 | 309 | describe('#resetOptions()', function() { 310 | it('should be chainable', function() { 311 | treeize.resetOptions().should.be.type('object') 312 | treeize.resetOptions().should.have.property('grow') 313 | }) 314 | 315 | it('should reset base options', function() { 316 | var baseOptions = treeize.getOptions() 317 | treeize.setOptions({ log: true }) 318 | treeize.getOptions().should.not.eql(baseOptions) 319 | treeize.getOptions().log.should.be.true 320 | treeize.resetOptions() 321 | treeize.getOptions().should.eql(baseOptions) 322 | treeize.getOptions().log.should.be.false 323 | }) 324 | }) 325 | 326 | 327 | describe('#setSignature()', function() { 328 | it('should be chainable', function() { 329 | treeize.setSignature([]).should.be.type('object') 330 | treeize.setSignature([]).should.have.property('grow') 331 | }) 332 | 333 | it('should force signature from a defined row', function() { 334 | var fields = new Treeize() 335 | fields 336 | .setSignature(welldata1[3]) 337 | .grow(welldata1) 338 | 339 | 340 | fields.getData().should.eql([ 341 | { code: 'RA', 342 | wells: 343 | [ { uwi: 'RA-001', 344 | log: [ { date: '12/12/2014' }, { date: '12/13/2014', wc: 0.5 } ], 345 | reservoirs: [ { code: 'LB' } ] }, 346 | { uwi: 'RA-002', log: [ { date: '12/12/2014' } ] } ], 347 | reservoirs: [ { code: 'LB' } ] }, 348 | { code: 'SA', 349 | wells: 350 | [ { uwi: 'SA-032', 351 | log: [ { date: '12/12/2014' } ], 352 | reservoirs: [ { code: 'MA' } ] } ], 353 | reservoirs: [ { code: 'MA' } ] } 354 | ]) 355 | }) 356 | 357 | it('should work with array data', function() { 358 | var fields = new Treeize() 359 | fields 360 | .setSignature(['id','name:first','age']) 361 | .grow([ 362 | [1, 'kevin', 34], 363 | [2, 'jimbo', 33], 364 | ]) 365 | 366 | 367 | fields.getData().should.eql([ 368 | { id: 1, name: { first: 'kevin' }, age: 34 }, 369 | { id: 2, name: { first: 'jimbo' }, age: 33 } 370 | ]) 371 | }) 372 | 373 | it('should persist between data sets when called manually', function() { 374 | var fields = new Treeize() 375 | fields 376 | .setSignature(welldata1[3]) 377 | .grow(welldata1) 378 | .grow(welldata2) 379 | 380 | 381 | fields.getData().should.eql([ 382 | { code: 'RA', 383 | wells: 384 | [ { uwi: 'RA-001', 385 | log: [ { date: '12/12/2014' }, { date: '12/13/2014', wc: 0.5 } ], 386 | reservoirs: [ { code: 'LB' } ] }, 387 | { uwi: 'RA-002', 388 | log: [ { date: '12/12/2014' } ], 389 | reservoirs: [ { code: 'UB' } ] } ], 390 | reservoirs: [ { code: 'LB' }, { code: 'UB' } ] }, 391 | { code: 'SA', 392 | wells: 393 | [ { uwi: 'SA-032', 394 | log: 395 | [ { date: '12/12/2014', wc: 0.1 }, 396 | { wc: 0.2, date: '12/13/2014' } ], 397 | reservoirs: [ { code: 'MA' } ] } ], 398 | reservoirs: [ { code: 'MA' } ] } 399 | ]) 400 | }) 401 | 402 | describe('modifiers', function() { 403 | it('-/+/* modifiers should only be stripped from head/tail of paths', function() { 404 | var testPlusMinus = [ 405 | { 406 | 'name': 'kevin', 407 | 'owned-pets:name': 'Fido', 408 | 'owned-pets:age': 12, 409 | 'a+b': 'why not?', 410 | 'log-ref+:date': '2014/1/1' 411 | }, 412 | { 413 | 'name': 'kevin', 414 | 'owned-pets:name': 'Fido', 415 | 'owned-pets:age': 12, 416 | 'a+b': 'why not?', 417 | 'log-ref+:date': '2014/1/2' 418 | }, 419 | ] 420 | 421 | var tree = new Treeize() 422 | tree = tree 423 | .grow(testPlusMinus) 424 | .getData() 425 | 426 | 427 | tree.should.eql([ { name: 'kevin', 428 | 'a+b': 'why not?', 429 | 'log-ref': [ { date: '2014/1/1' }, { date: '2014/1/2' } ], 430 | 'owned-pets': [ { name: 'Fido', age: 12 } ] } ] 431 | ) 432 | }) 433 | 434 | it('* modifier should define specific signature attributes', function() { 435 | var tree = new Treeize() 436 | 437 | tree.grow([ 438 | { 'foo*': 'bar', 'age': 1 }, 439 | { 'foo*': 'bar', 'age': 2 }, 440 | { 'foo*': 'baz', 'age': 3 }, 441 | ]).getData().should.have.length(2) 442 | }) 443 | 444 | it('+ modifier should force collection', function() { 445 | var tree = new Treeize() 446 | 447 | tree.grow([ 448 | { 'foo': 'bar', 'log+:a': 1 }, 449 | { 'foo': 'bar', 'log+:a': 2 }, 450 | { 'foo': 'baz', 'log+:a': 3 }, 451 | ]).getData().should.eql([ 452 | { foo: 'bar', log: [{ a: 1 }, { a: 2 }] }, 453 | { foo: 'baz', log: [{ a: 3 }]} 454 | ]) 455 | }) 456 | 457 | it('+ modifier should work on deep nodes', function() { 458 | var tree = new Treeize() 459 | 460 | tree.grow([ 461 | { 'foo': 'bar', 'log+:a:b': 1 }, 462 | { 'foo': 'bar', 'log+:a:b': 2 }, 463 | { 'foo': 'baz', 'log+:a:b': 3 }, 464 | ]).getData().should.eql([ 465 | { foo: 'bar', log: [{ a: { b: 1 } }, { a: { b: 2 } }] }, 466 | { foo: 'baz', log: [{ a: { b: 3 } }]} 467 | ]) 468 | }) 469 | 470 | it('+ modifier should work on deep edge (classdata) case', function() { 471 | var tree = new Treeize() 472 | 473 | tree.grow(classdata).getData().should.eql([ 474 | { Name: 'Duke University', 475 | Subject: 476 | [ { Name: 'liberal-arts', 477 | Courses: 478 | [ { 'Date Added': '07/16/13', 479 | Name: '9/11 and Its Aftermath -- Part I', 480 | Description: 'Provided by Duke University, this course investigates the forces leading up to the 9/11 attacks and the policies adopted by the US afterwards.', 481 | Link: 'https://www.coursera.org/course/911aftermath', 482 | Media: 'full course', 483 | Instructor: 'David Schanzer', 484 | 'Start Date': '09/09/13', 485 | Duration: '7 weeks' } ] } ], 486 | Providers: [ { Name: 'Coursera' } ] }, 487 | { Name: 'Wesleyan University', 488 | Subject: 489 | [ { Name: 'math', 490 | Courses: 491 | [ { 'Date Added': '07/16/13', 492 | Name: 'Analysis of a Complex Kind', 493 | Description: 'This course educates students on the subject of complex analysis, which is the study of functions that live in the complex plane.', 494 | Link: 'https://www.coursera.org/course/complexanalysis', 495 | Media: 'full course', 496 | Instructor: 'Dr. Petra Bonfert-Taylor', 497 | 'Start Date': '10/21/13', 498 | Duration: '6 weeks' } ] } ], 499 | Providers: [ { Name: 'Coursera' } ] } 500 | ]) 501 | }) 502 | 503 | it('- modifier should force object (instead of collection) when plural name', function() { 504 | var tree = new Treeize() 505 | 506 | tree.grow([ 507 | { 'foo': 'bar', 'logs-:a': 1 }, 508 | { 'foo': 'bar', 'logs-:a': 2 }, 509 | { 'foo': 'baz', 'logs-:a': 3 }, 510 | ]).getData().should.eql([ 511 | { foo: 'bar', logs: { a: 2 } }, 512 | { foo: 'baz', logs: { a: 3 } } 513 | ]) 514 | }) 515 | 516 | it('- modifier should work on deep nodes', function() { 517 | var tree = new Treeize() 518 | 519 | tree.grow([ 520 | { 'foo': 'bar', 'logs-:a:b': 1 }, 521 | { 'foo': 'bar', 'logs-:a:b': 2 }, 522 | { 'foo': 'baz', 'logs-:a:b': 3 }, 523 | ]).getData().should.eql([ 524 | { foo: 'bar', logs: { a: { b: 2 } } }, 525 | { foo: 'baz', logs: { a: { b: 3 } } } 526 | ]) 527 | }) 528 | }) 529 | }) 530 | 531 | 532 | describe('#grow()', function() { 533 | it('should be chainable', function() { 534 | treeize.grow().should.be.type('object') 535 | treeize.grow().should.have.property('grow') 536 | }) 537 | 538 | it('passing options should not change global options (including input options)', function() { 539 | var pruneData = [ 540 | { 'name': null, 'age': 1 }, 541 | { 'name': 'Kevin', 'age': 12 }, 542 | { foo: null, bar: null } 543 | ] 544 | 545 | var tree = new Treeize() 546 | tree 547 | .grow(pruneData, { input: { uniformRows: false }, output: { prune: false } }) 548 | .getData() 549 | .should.eql([ 550 | { name: null, age: 1 }, 551 | { name: 'Kevin', age: 12 }, 552 | { foo: null, bar: null } 553 | ]) 554 | 555 | }) 556 | 557 | it('passing options should not change global options', function() { 558 | var tree = new Treeize() 559 | tree.setOptions({ input: { delimiter: '&' } }) 560 | tree.getOptions().input.delimiter.should.equal('&') 561 | 562 | tree.grow([], { input: { delimiter: '>' } }) 563 | tree.getOptions().input.delimiter.should.equal('&') 564 | }) 565 | 566 | it('passing options for signature reading should work', function() { 567 | var tree = new Treeize() 568 | tree.setOptions({ input: { delimiter: '&' } }) 569 | tree.getOptions().input.delimiter.should.equal('&') 570 | 571 | tree.grow([], { input: { delimiter: '>' } }) 572 | tree.getOptions().input.delimiter.should.equal('&') 573 | }) 574 | 575 | it('should create new entry for each unique node signature', function() { 576 | var tree = new Treeize() 577 | 578 | tree.grow([ 579 | { 'foo': 'bar', 'age': 1 }, 580 | { 'foo': 'bar', 'age': 2 }, 581 | { 'foo': 'baz', 'age': 3 }, 582 | ]).getData().should.have.length(3) 583 | }) 584 | 585 | it('should handle flat array data', function() { 586 | var fields = new Treeize() 587 | fields 588 | .grow(arraywelldata) 589 | 590 | 591 | fields.getData().should.eql([ 592 | { code: 'RA', 593 | wells: 594 | [ { uwi: 'RA-001', 595 | log: 596 | [ { effluent: 5000, date: '12/12/2014' }, 597 | { effluent: 5050, date: '12/13/2014' }, 598 | { effluent: 6076, date: '12/14/2014' } ], 599 | reservoirs: [ { code: 'LB' } ] }, 600 | { uwi: 'RA-002', 601 | log: [ { effluent: 4500, date: '12/12/2014' } ], 602 | reservoirs: [ { code: 'UB' } ] } ], 603 | reservoirs: [ { code: 'LB' }, { code: 'UB' } ] }, 604 | { code: 'SA', 605 | wells: 606 | [ { uwi: 'SA-032', 607 | log: [ { effluent: 2050, date: '12/12/2014' } ], 608 | reservoirs: [ { code: 'MA' } ] }, 609 | { uwi: 'SA-031', 610 | log: [ { effluent: 850, date: '12/11/2014' } ], 611 | reservoirs: [ { code: 'MA' } ] } ], 612 | reservoirs: [ { code: 'MA' } ] } 613 | ]) 614 | }) 615 | 616 | it('should be able to merge multiple data sources/types together', function() { 617 | var fields = new Treeize() 618 | fields 619 | .setOptions({ input: { uniformRows: false } }) 620 | .grow(welldata1) 621 | .grow(welldata2) 622 | .grow(arraywelldata) 623 | 624 | 625 | fields.getData().should.eql([ 626 | { code: 'RA', 627 | wells: 628 | [ { uwi: 'RA-001', 629 | log: 630 | [ { oilrate: 5000, date: '12/12/2014', effluent: 5000 }, 631 | { oilrate: 5050, date: '12/13/2014', wc: 0.5, effluent: 5050 }, 632 | { effluent: 6076, date: '12/14/2014' } ], 633 | reservoirs: [ { code: 'LB' } ] }, 634 | { uwi: 'RA-002', 635 | reservoir: 'UB', 636 | log: [ { oilrate: 4500, date: '12/12/2014', effluent: 4500 } ], 637 | reservoirs: [ { code: 'UB' } ] } ], 638 | reservoirs: [ { code: 'LB' }, { code: 'UB' } ] }, 639 | { code: 'SA', 640 | wells: 641 | [ { uwi: 'SA-032', 642 | log: 643 | [ { oilrate: 2050, date: '12/12/2014', wc: 0.1, effluent: 2050 }, 644 | { wc: 0.2, date: '12/13/2014' } ], 645 | reservoirs: [ { code: 'MA' } ] }, 646 | { uwi: 'SA-031', 647 | log: [ { effluent: 850, date: '12/11/2014' } ], 648 | reservoirs: [ { code: 'MA' } ] } ], 649 | reservoirs: [ { code: 'MA' } ] } 650 | ]) 651 | }) 652 | 653 | it('should handle deep object paths without existing definition', function() { 654 | var tree = new Treeize() 655 | 656 | tree.grow([ 657 | { 658 | 'id':1, 659 | 'user:a:b:c:d:e': 'kevin', 660 | 'user:age': 34 661 | }, 662 | { 663 | 'id':1, 664 | 'user:a:b:c:def:e': 'jimbo', 665 | 'user:age': 34 666 | }, 667 | { 668 | 'id':1, 669 | 'user:a:b:c:d:efg': 'kelly', 670 | 'user:age': 34 671 | } 672 | ]).getData().should.eql([ 673 | { id: 1, 674 | user: 675 | { age: 34, 676 | a: { b: { c: { d: { e: 'kevin', efg: 'kelly' }, def: { e: 'jimbo' } } } } } } 677 | ]) 678 | }) 679 | 680 | it('should handle signature-less root node insertion', function() { 681 | var tree = new Treeize() 682 | 683 | tree.grow([ 684 | { 'foo:name': 'bar', 'foo:age': 1 }, 685 | { 'foo:name': 'baz', 'foo:age': 3 }, 686 | ]).getData().should.eql([ 687 | { foo: { name: 'bar', age: 1 } }, 688 | { foo: { name: 'baz', age: 3 } } 689 | ]) 690 | }) 691 | 692 | it('should handle attributes named "length"', function() { 693 | var tree = new Treeize() 694 | 695 | tree.grow([ 696 | { length: 86 }, 697 | ]).getData().should.eql([ 698 | { length: 86 } 699 | ]) 700 | }) 701 | 702 | it('should handle rows with nested objects', function() { 703 | var tree = new Treeize() 704 | 705 | tree.grow([{ 706 | primaryKey: 1, 707 | subObject: { 708 | key: 'field' 709 | }, 710 | 'subresources:id': 1, 711 | 'subresources:field': 'Subresource 1', 712 | }, { 713 | primaryKey: 1, 714 | subObject: { 715 | key: 'field' 716 | }, 717 | 'subresources:id': 2, 718 | 'subresources:field': 'Subresource 2', 719 | }]) 720 | .getData().should.eql([ 721 | { 722 | "primaryKey": 1, 723 | "subObject": { 724 | "key": "field" 725 | }, 726 | "subresources": [ 727 | { 728 | "id": 1, 729 | "field": "Subresource 1" 730 | }, 731 | { 732 | "id": 2, 733 | "field": "Subresource 2" 734 | } 735 | ] 736 | } 737 | ]) 738 | }) 739 | }) 740 | --------------------------------------------------------------------------------