├── .gitignore ├── examples ├── data │ ├── us-states │ │ ├── us-states.cpg │ │ ├── us-states.dbf │ │ ├── us-states.shp │ │ ├── us-states.shx │ │ └── us-states.prj │ ├── new-data.json │ ├── left-data.json │ ├── new-data-two.json │ ├── new-geo-data.json │ ├── left-data-nested-three.json │ ├── new-data-three.json │ ├── left-data-nested-four.json │ ├── left-data-nested-two.json │ ├── left-data-nested.json │ └── us-states.geojson ├── joinJson.js ├── joinJson-nest-under.js ├── joinJson-nested-join-keys.js ├── joinGeoJson-id.js └── joinGeoJson-prop.js ├── .npmignore ├── .travis.yml ├── bin ├── io │ ├── index.js │ ├── makeDirectoriesSync.js │ ├── makeDirectories.js │ ├── readFile.js │ ├── writeDataSync.js │ ├── parsers.js │ ├── readDbf.js │ ├── helpers.js │ ├── path.js │ ├── writeData.js │ ├── readData.js │ └── formatters.js └── index.js ├── src ├── .babelrc ├── utils │ └── report.js └── index.js ├── rollup.config.js ├── LICENSE ├── package.json ├── CHANGELOG.md ├── README.md ├── dist └── joiner.min.js └── test └── test.js /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | node_modules -------------------------------------------------------------------------------- /examples/data/us-states/us-states.cpg: -------------------------------------------------------------------------------- 1 | UTF-8 -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | src 2 | examples 3 | test 4 | CHANGELOG.md 5 | rollup.config.js 6 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | node_js: 3 | - "16" 4 | - "14" 5 | - "12" 6 | -------------------------------------------------------------------------------- /examples/data/us-states/us-states.dbf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mhkeller/joiner/HEAD/examples/data/us-states/us-states.dbf -------------------------------------------------------------------------------- /examples/data/us-states/us-states.shp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mhkeller/joiner/HEAD/examples/data/us-states/us-states.shp -------------------------------------------------------------------------------- /examples/data/us-states/us-states.shx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mhkeller/joiner/HEAD/examples/data/us-states/us-states.shx -------------------------------------------------------------------------------- /examples/data/us-states/us-states.prj: -------------------------------------------------------------------------------- 1 | GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]] -------------------------------------------------------------------------------- /bin/io/index.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | helpers: require('./helpers'), 3 | readData: require('./readData.js'), 4 | writeData: require('./writeData.js'), 5 | writeDataSync: require('./writeDataSync.js') 6 | } 7 | -------------------------------------------------------------------------------- /bin/io/makeDirectoriesSync.js: -------------------------------------------------------------------------------- 1 | var path = require('path') 2 | var mkdirp = require('mkdirp') 3 | 4 | function makeDirectoriesSync (outPath) { 5 | mkdirp.sync(path.dirname(outPath)) 6 | } 7 | 8 | module.exports = makeDirectoriesSync 9 | -------------------------------------------------------------------------------- /src/.babelrc: -------------------------------------------------------------------------------- 1 | { 2 | "presets": [ 3 | [ 4 | "es2015", 5 | { 6 | "modules": false 7 | } 8 | ] 9 | ], 10 | "plugins": [ 11 | "transform-object-assign", 12 | "external-helpers" 13 | ] 14 | } 15 | -------------------------------------------------------------------------------- /examples/data/new-data.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "state_name": "CO", 4 | "avg_temp": 34 5 | }, 6 | { 7 | "state_name": "UT", 8 | "avg_temp": 72 9 | }, 10 | { 11 | "state_name": "NM", 12 | "avg_temp": 45 13 | } 14 | ] 15 | -------------------------------------------------------------------------------- /examples/data/left-data.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": "1", 4 | "name": "UT" 5 | }, { 6 | "id": "2", 7 | "name": "WY" 8 | }, { 9 | "id": "3", 10 | "name": "CO" 11 | }, { 12 | "id": "4", 13 | "name": "NM" 14 | } 15 | ] 16 | -------------------------------------------------------------------------------- /bin/io/makeDirectories.js: -------------------------------------------------------------------------------- 1 | var path = require('path') 2 | var mkdirp = require('mkdirp') 3 | 4 | function makeDirectories (outPath, cb) { 5 | mkdirp(path.dirname(outPath), function (err) { 6 | cb(err) 7 | }) 8 | } 9 | 10 | module.exports = makeDirectories 11 | -------------------------------------------------------------------------------- /examples/data/new-data-two.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "state_name": "Colorado", 4 | "avg_temp": 34 5 | }, 6 | { 7 | "state_name": "Utah", 8 | "avg_temp": 72 9 | }, 10 | { 11 | "state_name": "New Mexico", 12 | "avg_temp": 45 13 | } 14 | ] 15 | -------------------------------------------------------------------------------- /bin/io/readFile.js: -------------------------------------------------------------------------------- 1 | var fs = require('fs') 2 | 3 | module.exports = readFile 4 | 5 | function readFile (path, cb) { 6 | fs.readFile(path, 'utf8', function (err, data) { 7 | if (err) { 8 | cb(err) 9 | return false 10 | } 11 | cb(null, data) 12 | }) 13 | } 14 | -------------------------------------------------------------------------------- /rollup.config.js: -------------------------------------------------------------------------------- 1 | import babel from 'rollup-plugin-babel' 2 | import nodeResolve from 'rollup-plugin-node-resolve' 3 | import commonjs from 'rollup-plugin-commonjs' 4 | 5 | export default { 6 | entry: 'src/index.js', 7 | format: 'cjs', 8 | plugins: [ babel(), nodeResolve(), commonjs() ], 9 | dest: 'dist/joiner.node.js' 10 | } 11 | -------------------------------------------------------------------------------- /examples/data/new-geo-data.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "state_abbr": "CO", 4 | "state_name": "Colorado", 5 | "avg_temp": 34 6 | }, 7 | { 8 | "state_abbr": "UT", 9 | "state_name": "Utah", 10 | "avg_temp": 72 11 | }, 12 | { 13 | "state_abbr": "NM", 14 | "state_name": "New Mexico", 15 | "avg_temp": 45 16 | } 17 | ] 18 | -------------------------------------------------------------------------------- /examples/data/left-data-nested-three.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": "1", 4 | "name": "Utah", 5 | "values": {} 6 | }, { 7 | "id": "2", 8 | "name": "Wyoming", 9 | "values": {} 10 | }, { 11 | "id": "3", 12 | "name": "Colorado", 13 | "values": {} 14 | }, { 15 | "id": "4", 16 | "name": "New Mexico", 17 | "values": {} 18 | } 19 | ] 20 | -------------------------------------------------------------------------------- /bin/io/writeDataSync.js: -------------------------------------------------------------------------------- 1 | var fs = require('fs') 2 | var helpers = require('./helpers.js') 3 | var makeDirectoriesSync = require('./makeDirectoriesSync.js') 4 | 5 | function writeDataSync (outPath, data, opts) { 6 | var formattedData = helpers.formatData(outPath, data) 7 | if (opts.makeDirectories) { 8 | makeDirectoriesSync(outPath) 9 | } 10 | fs.writeFileSync(outPath, formattedData) 11 | } 12 | 13 | module.exports = writeDataSync 14 | -------------------------------------------------------------------------------- /examples/data/new-data-three.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "type": "state", 4 | "data": { 5 | "state_name": "Colorado", 6 | "avg_temp": 34 7 | } 8 | }, 9 | { 10 | "type": "state", 11 | "data": { 12 | "state_name": "Utah", 13 | "avg_temp": 72 14 | } 15 | }, 16 | { 17 | "type": "state", 18 | "data": { 19 | "state_name": "New Mexico", 20 | "avg_temp": 45 21 | } 22 | } 23 | ] 24 | -------------------------------------------------------------------------------- /bin/io/parsers.js: -------------------------------------------------------------------------------- 1 | var dsv = require('d3-dsv') 2 | 3 | var parsers = { 4 | json: function (str) { 5 | return JSON.parse(str) 6 | }, 7 | csv: function (str) { 8 | return dsv.csvParse(str) 9 | }, 10 | tsv: function (str) { 11 | return dsv.tsvParse(str) 12 | }, 13 | psv: function (str) { 14 | return dsv.dsvFormat('|').parse(str) 15 | } 16 | } 17 | 18 | // Aliases 19 | parsers.geojson = parsers.json 20 | parsers.topojson = parsers.json 21 | 22 | module.exports = parsers 23 | -------------------------------------------------------------------------------- /examples/data/left-data-nested-four.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": "1", 4 | "name": "UT", 5 | "values": { 6 | "name": "Utah" 7 | } 8 | }, { 9 | "id": "2", 10 | "name": "WY", 11 | "values": { 12 | "name": "Wyoming" 13 | } 14 | }, { 15 | "id": "3", 16 | "name": "CO", 17 | "values": { 18 | "name": "Colorado" 19 | } 20 | }, { 21 | "id": "4", 22 | "name": "NM", 23 | "values": { 24 | "name": "New Mexico" 25 | } 26 | } 27 | ] 28 | -------------------------------------------------------------------------------- /examples/data/left-data-nested-two.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": "1", 4 | "name": "Utah", 5 | "values": { 6 | "weather": {} 7 | } 8 | }, { 9 | "id": "2", 10 | "name": "Wyoming", 11 | "values": { 12 | "weather": {} 13 | } 14 | }, { 15 | "id": "3", 16 | "name": "Colorado", 17 | "values": { 18 | "weather": {} 19 | } 20 | }, { 21 | "id": "4", 22 | "name": "New Mexico", 23 | "values": { 24 | "weather": {} 25 | } 26 | } 27 | ] 28 | -------------------------------------------------------------------------------- /bin/io/readDbf.js: -------------------------------------------------------------------------------- 1 | var shapefile = require('shapefile') 2 | 3 | module.exports = readDbf 4 | 5 | function readDbf (path, cb) { 6 | var values = [] 7 | shapefile.openDbf(path) 8 | .then(function (source) { 9 | return source.read() 10 | .then(function log (result) { 11 | if (result.done) return cb(null, values) 12 | values.push(result.value) 13 | return source.read().then(log) 14 | }) 15 | }) 16 | .catch(function (error) { 17 | return cb(error.stack) 18 | }) 19 | } 20 | -------------------------------------------------------------------------------- /bin/io/helpers.js: -------------------------------------------------------------------------------- 1 | var path = require('./path.js') 2 | var formatters = require('./formatters.js') 3 | 4 | function formatData (outPath, data) { 5 | var ext = discernFormat(outPath) 6 | return formatters[ext](data) 7 | } 8 | 9 | function discernFormat (fileName) { 10 | var extension = path.extname(fileName) 11 | if (extension === '') return false 12 | 13 | var formatName = extension.slice(1) 14 | return formatName 15 | } 16 | 17 | module.exports = { 18 | discernFormat: discernFormat, 19 | formatData: formatData 20 | } 21 | -------------------------------------------------------------------------------- /bin/io/path.js: -------------------------------------------------------------------------------- 1 | // -------------------------------------------- 2 | // 3 | // Browser-implementations of NodeJS path module, courtesy Rich Harris 4 | // https://github.com/rollup/rollup/blob/master/browser/path.js 5 | // 6 | // -------------------------------------------- 7 | 8 | function basename (path) { 9 | return path.split(/(\/|\\)/).pop() 10 | } 11 | 12 | function extname (path) { 13 | const match = /\.[^.]+$/.exec(basename(path)) 14 | if (!match) return '' 15 | return match[0] 16 | } 17 | 18 | module.exports = { 19 | basename: basename, 20 | extname: extname 21 | } 22 | -------------------------------------------------------------------------------- /examples/data/left-data-nested.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": "1", 4 | "name": "UT", 5 | "values": { 6 | "name": "Utah", 7 | "weather": {} 8 | } 9 | }, { 10 | "id": "2", 11 | "name": "WY", 12 | "values": { 13 | "name": "Wyoming", 14 | "weather": {} 15 | } 16 | }, { 17 | "id": "3", 18 | "name": "CO", 19 | "values": { 20 | "name": "Colorado", 21 | "weather": {} 22 | } 23 | }, { 24 | "id": "4", 25 | "name": "NM", 26 | "values": { 27 | "name": "New Mexico", 28 | "weather": {} 29 | } 30 | } 31 | ] 32 | -------------------------------------------------------------------------------- /bin/io/writeData.js: -------------------------------------------------------------------------------- 1 | var fs = require('fs') 2 | var helpers = require('./helpers.js') 3 | var makeDirectories = require('./makeDirectories.js') 4 | 5 | function writeData (outPath, data, opts, cb) { 6 | var formattedData = helpers.formatData(outPath, data) 7 | if (opts.makeDirectories) { 8 | makeDirectories(outPath, proceed) 9 | } else { 10 | proceed() 11 | } 12 | function proceed (err) { 13 | if (err) { 14 | cb(err) 15 | } 16 | fs.writeFile(outPath, formattedData, function (err) { 17 | cb(err, formattedData) 18 | }) 19 | } 20 | } 21 | 22 | module.exports = writeData 23 | -------------------------------------------------------------------------------- /bin/io/readData.js: -------------------------------------------------------------------------------- 1 | var helpers = require('./helpers.js') 2 | var readDbf = require('./readDbf.js') 3 | var readFile = require('./readFile.js') 4 | var parsers = require('./parsers.js') 5 | 6 | function readData (path, cb) { 7 | var readers = { 8 | csv: readFile, 9 | tsv: readFile, 10 | psv: readFile, 11 | json: readFile, 12 | geojson: readFile, 13 | topojson: readFile, 14 | shp: readDbf, 15 | dbf: readDbf 16 | } 17 | 18 | var ext = helpers.discernFormat(path) 19 | var reader = readers[ext] 20 | 21 | reader(path, function (err, result) { 22 | if (err) { 23 | cb(err) 24 | } 25 | var parser = parsers[ext] || function (d) { return d } 26 | cb(null, parser(result)) 27 | }) 28 | } 29 | 30 | module.exports = readData 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2017 Michael Keller 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /bin/io/formatters.js: -------------------------------------------------------------------------------- 1 | var dsv = require('d3-dsv') 2 | var dbf = require('dbf') 3 | 4 | function reportParseError (format) { 5 | console.error('[joiner] Error converting your data to ' + format + '. Your data most likely contains objects or lists. Object values can only be strings for this format. Please convert before writing to file.') 6 | } 7 | 8 | var formatters = { 9 | json: function (file, writeOptions) { 10 | writeOptions = writeOptions || {} 11 | return JSON.stringify(file, writeOptions.replacer, writeOptions.indent) 12 | }, 13 | csv: function (file, writeOptions) { 14 | writeOptions = writeOptions || {} 15 | try { 16 | return dsv.csvFormat(file, writeOptions.columns) 17 | } catch (err) { 18 | reportParseError('csv') 19 | } 20 | }, 21 | tsv: function (file, writeOptions) { 22 | writeOptions = writeOptions || {} 23 | try { 24 | return dsv.tsvFormat(file, writeOptions.columns) 25 | } catch (err) { 26 | reportParseError('tsv') 27 | } 28 | }, 29 | psv: function (file, writeOptions) { 30 | writeOptions = writeOptions || {} 31 | try { 32 | return dsv.dsvFormat('|').format(file, writeOptions.columns) 33 | } catch (err) { 34 | reportParseError('psv') 35 | } 36 | }, 37 | txt: function (d) { return d }, 38 | dbf: function (file, writeOptions) { 39 | writeOptions = writeOptions || {} 40 | function toBuffer (ab) { 41 | var buffer = new Buffer(ab.byteLength) 42 | var view = new Uint8Array(ab) 43 | for (var i = 0; i < buffer.length; ++i) { 44 | buffer[i] = view[i] 45 | } 46 | return buffer 47 | } 48 | var buf = dbf.structure(file) 49 | return toBuffer(buf.buffer) 50 | } 51 | } 52 | 53 | module.exports = formatters 54 | -------------------------------------------------------------------------------- /examples/data/us-states.geojson: -------------------------------------------------------------------------------- 1 | {"type":"FeatureCollection","features":[ 2 | {"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[-103.00051157559423,36.99999842346288],[-106.40314927871762,36.99999842346288],[-109.04485956299908,36.99999842346288],[-109.04485956299908,40.99944977889005],[-104.05217069691822,40.99944977889005],[-102.05294158231935,40.99892470978733],[-102.03858446120913,36.99999842346288],[-103.00051157559423,36.99999842346288]]]},"properties":{"name":"Colorado"},"id":"CO"}, 3 | {"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[-109.04485956299908,36.99999842346288],[-106.40314927871762,36.99999842346288],[-103.00051157559423,36.99999842346288],[-103.0435829389249,35.89420289313209],[-103.06511862059024,32.00186563466003],[-106.66157745870169,32.000290427351864],[-108.21573581888357,31.777661127798083],[-108.21573581888357,31.327151837663315],[-109.04844884327663,31.326626768560594],[-109.04485956299908,36.99999842346288]]]},"properties":{"name":"New Mexico"},"id":"NM"}, 4 | {"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[-111.05126723815307,41.9997064195739],[-111.05126723815307,40.99944977889005],[-109.04485956299908,40.99944977889005],[-109.04485956299908,36.99999842346288],[-114.04113770935749,37.003148838079206],[-114.04113770935749,42.00023148867662],[-111.05126723815307,41.9997064195739]]]},"properties":{"name":"Utah"},"id":"UT"}, 5 | {"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[-104.05217069691822,40.99944977889005],[-109.04485956299908,40.99944977889005],[-111.05126723815307,40.99944977889005],[-111.05126723815307,41.9997064195739],[-111.05126723815307,44.99995127252268],[-108.82591346606814,44.99995127252268],[-104.05575997719579,44.99995127252268],[-104.05217069691822,40.99944977889005]]]},"properties":{"name":"Wyoming"},"id":"WY"} 6 | ]} 7 | -------------------------------------------------------------------------------- /examples/joinJson.js: -------------------------------------------------------------------------------- 1 | var fs = require('fs') 2 | var joiner = require('../src/index.js') 3 | 4 | var data = JSON.parse(fs.readFileSync('examples/data/left-data.json')) 5 | /* 6 | [ 7 | { 8 | "id": "1", 9 | "name": "UT" 10 | }, { 11 | "id": "2", 12 | "name": "WY" 13 | }, { 14 | "id": "3", 15 | "name": "CO" 16 | }, { 17 | "id": "4", 18 | "name": "NM" 19 | } 20 | ] 21 | */ 22 | var newData = JSON.parse(fs.readFileSync('examples/data/new-data.json')) 23 | /* 24 | [ 25 | { 26 | "state_name": "CO", 27 | "avg_temp": 34 28 | }, 29 | { 30 | "state_name": "UT", 31 | "avg_temp": 72 32 | }, 33 | { 34 | "state_name": "NM", 35 | "avg_temp": 45 36 | } 37 | ] 38 | */ 39 | 40 | var joinedData = joiner({ 41 | leftData: data, 42 | leftDataKey: 'name', 43 | rightData: newData, 44 | rightDataKey: 'state_name' 45 | }) 46 | 47 | console.log(joinedData) 48 | 49 | /* 50 | { 51 | "data": [ 52 | { 53 | "id": "1", 54 | "name": "UT", 55 | "avg_temp": 72 56 | }, 57 | { 58 | "id": "2", 59 | "name": "WY", 60 | "avg_temp": null 61 | }, 62 | { 63 | "id": "3", 64 | "name": "CO", 65 | "avg_temp": 34 66 | }, 67 | { 68 | "id": "4", 69 | "name": "NM", 70 | "avg_temp": 45 71 | } 72 | ], 73 | "report": { 74 | "diff": { 75 | "a": [ 76 | "CO", 77 | "NM", 78 | "UT", 79 | "WY" 80 | ], 81 | "b": [ 82 | "CO", 83 | "NM", 84 | "UT" 85 | ], 86 | "a_and_b": [ 87 | "CO", 88 | "NM", 89 | "UT" 90 | ], 91 | "a_not_in_b": [ 92 | "WY" 93 | ], 94 | "b_not_in_a": [] 95 | }, 96 | "prose": { 97 | "summary": "3 rows matched in A and B. 1 row in A not in B. All 3 rows in B in A.", 98 | "full": "Matches in A and B: CO, NM, UT. A not in B: WY." 99 | } 100 | } 101 | } 102 | */ 103 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "joiner", 3 | "version": "2.1.3", 4 | "description": "A simple utility for SQL-like joins with Json or GeoJson data in Node, the browser and on the command line. Also creates join reports so you can know how successful a given join was.", 5 | "main": "dist/joiner.node.js", 6 | "module": "src/index.js", 7 | "directories": { 8 | "example": "examples" 9 | }, 10 | "scripts": { 11 | "test": "standard src/**/* bin/**/* && mocha", 12 | "build:node": "rollup --config", 13 | "build:umd": "rollup -c --banner \"$(preamble)\" -f umd -n joiner -o dist/joiner.js -- src/index.js && uglifyjs -O preamble=\"\\\"$(preamble)\\\"\" -o dist/joiner.min.js -cm -- dist/joiner.js", 14 | "build": "npm run build:node && npm run build:umd", 15 | "start": "npm run build && npm test" 16 | }, 17 | "bin": { 18 | "joiner": "./bin/index.js" 19 | }, 20 | "repository": { 21 | "type": "git", 22 | "url": "https://github.com/mhkeller/joiner" 23 | }, 24 | "keywords": [ 25 | "join", 26 | "sql", 27 | "data join", 28 | "json", 29 | "geojson" 30 | ], 31 | "author": { 32 | "name": "Michael Keller", 33 | "url": "code@mhkeller.com" 34 | }, 35 | "license": "MIT", 36 | "bugs": { 37 | "url": "https://github.com/mhkeller/joiner/issues" 38 | }, 39 | "homepage": "https://github.com/mhkeller/joiner", 40 | "dependencies": { 41 | "d3-dsv": "^1.0.3", 42 | "d3-queue": "^3.0.3", 43 | "dbf": "^0.1.4", 44 | "mkdirp": "^0.5.1", 45 | "optimist": "^0.6.1", 46 | "shapefile": "^0.6.2", 47 | "underscore": "~1.6.0" 48 | }, 49 | "devDependencies": { 50 | "babel-plugin-external-helpers": "^6.22.0", 51 | "babel-plugin-transform-object-assign": "^6.22.0", 52 | "babel-preset-es2015": "^6.22.0", 53 | "chai": "^3.5.0", 54 | "indian-ocean": "^2.0.1", 55 | "lodash": "^4.17.4", 56 | "mocha": "^3.2.0", 57 | "package-preamble": "0.0.2", 58 | "rimraf": "^2.5.4", 59 | "rollup": "^0.41.4", 60 | "rollup-plugin-babel": "^2.7.1", 61 | "rollup-plugin-commonjs": "^7.0.0", 62 | "rollup-plugin-node-resolve": "^2.0.0", 63 | "standard": "^8.6.0", 64 | "uglify-js": "^3.13.5" 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /src/utils/report.js: -------------------------------------------------------------------------------- 1 | import intersection from 'lodash/intersection' 2 | import difference from 'lodash/difference' 3 | 4 | function init () { 5 | return { 6 | aKeys: [], 7 | bKeys: [] 8 | } 9 | } 10 | 11 | function create (reportData) { 12 | var a = reportData.aKeys.sort() 13 | var b = reportData.bKeys.sort() 14 | 15 | var report = { 16 | diff: {}, 17 | prose: { 18 | summary: '', 19 | full: '' 20 | } 21 | } 22 | report.diff.a = a 23 | report.diff.b = b 24 | report.diff.a_and_b = intersection(a, b) 25 | report.diff.a_not_in_b = difference(a, b) 26 | report.diff.b_not_in_a = difference(b, a) 27 | 28 | report.prose.summary = 'No matches. Try choosing different columns to match on.' 29 | 30 | // If it matched some things... 31 | if (report.diff.a_and_b.length !== 0) { 32 | // But it wasn't a perfect match... 33 | if (report.diff.a_not_in_b.length !== 0 || report.diff.b_not_in_a.length !== 0) { 34 | report.prose.summary = printRows(report.diff.a_and_b.length) + ' matched in A and B. ' 35 | report.prose.full = 'Matches in A and B: ' + report.diff.a_and_b.join(', ') + '. ' 36 | 37 | if (report.diff.a_not_in_b.length === 0) { 38 | report.prose.summary += 'All ' + printRows(report.diff.a.length) + ' in A find a match. ' 39 | } else { 40 | report.prose.summary += printRows(report.diff.a_not_in_b.length) + ' in A not in B. ' 41 | report.prose.full += 'A not in B: ' + report.diff.a_not_in_b.join(', ') + '. ' 42 | } 43 | 44 | if (report.diff.b_not_in_a.length === 0) { 45 | report.prose.summary += 'All ' + printRows(report.diff.b.length) + ' in B in A. ' 46 | } else { 47 | report.prose.summary += printRows(report.diff.b_not_in_a.length) + ' in B not in A. ' 48 | report.prose.full += 'B not in A: ' + report.diff.b_not_in_a.join(', ') + '. ' 49 | } 50 | } else { 51 | report.prose.summary = '100%, one-to-one match of ' + report.diff.a.length + ' rows!' 52 | } 53 | report.prose.summary = report.prose.summary.trim() 54 | report.prose.full = report.prose.full.trim() 55 | } 56 | return report 57 | } 58 | 59 | function printRows (length) { 60 | return length + ' row' + (length > 1 ? 's' : '') 61 | } 62 | 63 | export default { 64 | init, create 65 | } 66 | -------------------------------------------------------------------------------- /examples/joinJson-nest-under.js: -------------------------------------------------------------------------------- 1 | // -------------------------------------------- 2 | // 3 | // Attach joined data onto a nested key 4 | // 5 | // -------------------------------------------- 6 | var fs = require('fs') 7 | var joiner = require('../src/index.js') 8 | 9 | var data = JSON.parse(fs.readFileSync('examples/data/left-data-nested-three.json')) 10 | /* 11 | [ 12 | { 13 | "id": "1", 14 | "name": "Utah", 15 | "values": { 16 | "weather": {} 17 | } 18 | }, { 19 | "id": "2", 20 | "name": "Wyoming", 21 | "values": { 22 | "weather": {} 23 | } 24 | }, { 25 | "id": "3", 26 | "name": "Colorado", 27 | "values": { 28 | "weather": {} 29 | } 30 | }, { 31 | "id": "4", 32 | "name": "New Mexico", 33 | "values": { 34 | "weather": {} 35 | } 36 | } 37 | ] 38 | */ 39 | 40 | var newData = JSON.parse(fs.readFileSync('examples/data/new-data-two.json')) 41 | /* 42 | [ 43 | { 44 | "state_name": "Colorado", 45 | "avg_temp": 34 46 | }, 47 | { 48 | "state_name": "Utah", 49 | "avg_temp": 72 50 | }, 51 | { 52 | "state_name": "New Mexico", 53 | "avg_temp": 45 54 | } 55 | ] 56 | */ 57 | 58 | var joinedData = joiner({ 59 | leftData: data, 60 | leftDataKey: 'name', 61 | rightData: newData, 62 | rightDataKey: 'state_name', 63 | nestKey: 'values.weather' // If this didn't already exist, it would be created 64 | }) 65 | 66 | console.log(JSON.stringify(joinedData)) 67 | 68 | /* 69 | { 70 | "data": [ 71 | { 72 | "id": "1", 73 | "name": "Utah", 74 | "values": { 75 | "weather": { 76 | "avg_temp": 72 77 | } 78 | } 79 | }, 80 | { 81 | "id": "2", 82 | "name": "Wyoming", 83 | "values": { 84 | "weather": { 85 | "avg_temp": null 86 | } 87 | } 88 | }, 89 | { 90 | "id": "3", 91 | "name": "Colorado", 92 | "values": { 93 | "weather": { 94 | "avg_temp": 34 95 | } 96 | } 97 | }, 98 | { 99 | "id": "4", 100 | "name": "New Mexico", 101 | "values": { 102 | "weather": { 103 | "avg_temp": 45 104 | } 105 | } 106 | } 107 | ], 108 | "report": { 109 | "diff": { 110 | "a": [ 111 | "Colorado", 112 | "New Mexico", 113 | "Utah", 114 | "Wyoming", 115 | ], 116 | "b": [ 117 | "Colorado", 118 | "New Mexico", 119 | "Utah" 120 | ], 121 | "a_and_b": [ 122 | "Colorado", 123 | "New Mexico", 124 | "Utah" 125 | ], 126 | "a_not_in_b": [ 127 | "Wyoming" 128 | ], 129 | "b_not_in_a": [] 130 | }, 131 | "prose": { 132 | "summary": "3 rows matched in A and B. 1 row in A not in B. All 3 rows in B in A.", 133 | "full": "Matches in A and B: Colorado, New Mexico, Utah. A not in B: Wyoming." 134 | } 135 | } 136 | } 137 | */ 138 | -------------------------------------------------------------------------------- /examples/joinJson-nested-join-keys.js: -------------------------------------------------------------------------------- 1 | // -------------------------------------------- 2 | // 3 | // Join data when the keys are nested 4 | // 5 | // -------------------------------------------- 6 | 7 | var fs = require('fs') 8 | var joiner = require('../src/index.js') 9 | 10 | var data = JSON.parse(fs.readFileSync('examples/data/left-data-nested-four.json')) 11 | /* 12 | [ 13 | { 14 | "id": "1", 15 | "name": "UT", 16 | "values": { 17 | "name": "Utah" 18 | } 19 | }, { 20 | "id": "2", 21 | "name": "WY", 22 | "values": { 23 | "name": "Wyoming" 24 | } 25 | }, { 26 | "id": "3", 27 | "name": "CO", 28 | "values": { 29 | "name": "Colorado" 30 | } 31 | }, { 32 | "id": "4", 33 | "name": "NM", 34 | "values": { 35 | "name": "New Mexico" 36 | } 37 | } 38 | ] 39 | */ 40 | var newData = JSON.parse(fs.readFileSync('examples/data/new-data-three.json')) 41 | /* 42 | [ 43 | { 44 | "type": "state", 45 | "data": { 46 | "state_name": "Colorado", 47 | "avg_temp": 34 48 | } 49 | }, 50 | { 51 | "type": "state", 52 | "data": { 53 | "state_name": "Utah", 54 | "avg_temp": 72 55 | } 56 | }, 57 | { 58 | "type": "state", 59 | "data": { 60 | "state_name": "New Mexico", 61 | "avg_temp": 45 62 | } 63 | } 64 | ] 65 | */ 66 | 67 | var joinedData = joiner({ 68 | leftData: data, 69 | leftDataKey: 'values.name', 70 | rightData: newData, 71 | rightDataKey: 'data.state_name' 72 | }) 73 | 74 | console.log(JSON.stringify(joinedData)) 75 | 76 | /* 77 | { 78 | "data": [ 79 | { 80 | "id": "1", 81 | "name": "UT", 82 | "values": { 83 | "name": "Utah" 84 | }, 85 | "type": "state", 86 | "data": { 87 | "avg_temp": 72 88 | } 89 | }, 90 | { 91 | "id": "2", 92 | "name": "WY", 93 | "values": { 94 | "name": "Wyoming" 95 | }, 96 | "type": null, 97 | "data": null 98 | }, 99 | { 100 | "id": "3", 101 | "name": "CO", 102 | "values": { 103 | "name": "Colorado" 104 | }, 105 | "type": "state", 106 | "data": { 107 | "avg_temp": 34 108 | } 109 | }, 110 | { 111 | "id": "4", 112 | "name": "NM", 113 | "values": { 114 | "name": "New Mexico" 115 | }, 116 | "type": "state", 117 | "data": { 118 | "avg_temp": 45 119 | } 120 | } 121 | ], 122 | "report": { 123 | "diff": { 124 | "a": [ 125 | "Colorado", 126 | "New Mexico", 127 | "Utah", 128 | "Wyoming" 129 | ], 130 | "b": [ 131 | "Colorado", 132 | "New Mexico", 133 | "Utah" 134 | ], 135 | "a_and_b": [ 136 | "Colorado", 137 | "New Mexico", 138 | "Utah" 139 | ], 140 | "a_not_in_b": [ 141 | "Wyoming" 142 | ], 143 | "b_not_in_a": [] 144 | }, 145 | "prose": { 146 | "summary": "3 rows matched in A and B. 1 row in A not in B. All 3 rows in B in A.", 147 | "full": "Matches in A and B: Colorado, New Mexico, Utah. A not in B: Wyoming." 148 | } 149 | } 150 | } 151 | */ 152 | -------------------------------------------------------------------------------- /bin/index.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | var optimist = require('optimist') 4 | var joiner = require('../dist/joiner.node.js') 5 | var queue = require('d3-queue').queue 6 | 7 | var io = require('./io/index.js') 8 | 9 | var argv = optimist 10 | .usage('Usage: joiner -a DATASET_A_PATH -k DATASET_A_KEY -b DATASET_B_PATH -j DATASET_B_KEY -o OUT_FILE_PATH [-r (summary|full) -n NEST_KEY --geojson]') 11 | .options('h', { 12 | alias: 'help', 13 | describe: 'Display help', 14 | default: false 15 | }) 16 | .options('a', { 17 | alias: 'apath', 18 | describe: 'Dataset A path' 19 | }) 20 | .options('k', { 21 | alias: 'akey', 22 | describe: 'Dataset A key' 23 | }) 24 | .options('b', { 25 | alias: 'bpath', 26 | describe: 'Dataset B path' 27 | }) 28 | .options('j', { 29 | alias: 'bkey', 30 | describe: 'Dataset B key' 31 | }) 32 | .options('g', { 33 | alias: 'geojson', 34 | describe: 'Is dataset A geojson?', 35 | default: false, 36 | boolean: true 37 | }) 38 | .options('n', { 39 | alias: 'nestkey', 40 | describe: 'Nested key name' 41 | }) 42 | .options('o', { 43 | alias: 'out', 44 | describe: 'Out path', 45 | default: null 46 | }) 47 | .options('r', { 48 | alias: 'report', 49 | describe: 'Report format', 50 | default: 'summary' 51 | }) 52 | .check(function (argv) { 53 | if ((!argv['a'] || !argv['adata']) && (!argv['a'] || !argv['adata']) && (!argv['b'] || !argv['bdata']) && (!argv['k'] || !argv['akey']) && (!argv['j'] || !argv['bkey'])) { 54 | throw 'What do you want to do?' // eslint-disable-line no-throw-literal 55 | } 56 | }) 57 | .argv 58 | 59 | if (argv.h || argv.help) { 60 | optimist.showHelp() 61 | } 62 | 63 | var aPath = argv.a || argv['apath'] 64 | var aKey = argv.k || argv['akey'] 65 | var bPath = argv.b || argv['bpath'] 66 | var bKey = argv.j || argv['bkey'] 67 | var geojson = argv.g || argv['geojson'] 68 | var nestKey = argv.n || argv['nestkey'] 69 | var outPath = argv.o || argv['out'] 70 | var reportDesc = argv.r || argv['report'] 71 | 72 | var q = queue() 73 | 74 | q.defer(io.readData, aPath) 75 | q.defer(io.readData, bPath) 76 | 77 | q.await(function (err, aData, bData) { 78 | console.log('adata', aData) 79 | console.log('bdata', bData) 80 | if (err) { 81 | throw new Error(err) 82 | } 83 | var config = { 84 | leftData: aData, 85 | leftDataKey: aKey, 86 | rightData: bData, 87 | rightDataKey: bKey, 88 | nestKey: nestKey, 89 | geoJson: geojson 90 | } 91 | 92 | // Join data 93 | var jd = joiner(config) 94 | if (outPath !== null) { 95 | io.writeData(outPath, jd.data, {makeDirectories: true}, function (err) { 96 | if (err) { 97 | console.error('Error writing data file', outPath) 98 | throw new Error(err) 99 | } 100 | }) 101 | io.writeDataSync(stripExtension(outPath) + 'report.json', jd.report, {makeDirectories: true}) 102 | } else { 103 | if (reportDesc === 'summary') { 104 | console.log(jd.report.prose.summary) 105 | } else { 106 | console.log(jd.report.prose.full) 107 | } 108 | } 109 | }) 110 | 111 | function stripExtension (fullPath) { 112 | var ext = io.helpers.discernFormat(fullPath) 113 | return fullPath.replace(new RegExp(ext + '$', 'g'), '') 114 | } 115 | -------------------------------------------------------------------------------- /src/index.js: -------------------------------------------------------------------------------- 1 | import cloneDeep from 'lodash/clonedeep' 2 | import get from 'lodash/get' 3 | import set from 'lodash/set' 4 | import unset from 'lodash/unset' 5 | 6 | import joinReport from './utils/report.js' 7 | 8 | function addNulls (data, nullKeyObj, nestKey) { 9 | data.forEach(function (datum) { 10 | var nestedDestination 11 | if (nestKey) { 12 | // Set the nested destination to an object if it isn't already or doesn't exist 13 | nestedDestination = get(datum, nestKey) 14 | if (typeof nestedDestination !== 'object' || Array.isArray(nestedDestination) || typeof nestedDestination === 'function') { 15 | set(datum, nestKey, {}) 16 | } 17 | datum = get(datum, nestKey) 18 | } 19 | // Create copies of this so they don't get overwritten or messed up by the extend 20 | var datumPersist = cloneDeep(datum) 21 | // You could extend `nullKeyObjPersist` with `datum` but that would reverse the order of your keys 22 | // And always put your keys that have nulls (which are probably the least important keys) first. 23 | // This way will overwrite everything with nulls, then rewrite keys that have values. 24 | Object.assign(datum, nullKeyObj, datumPersist) 25 | }) 26 | return data 27 | } 28 | 29 | function addToNullMatch (keyMap, keys) { 30 | keys.forEach(function (key) { 31 | if (!keyMap.null_match[key]) { 32 | keyMap.null_match[key] = null 33 | } 34 | }) 35 | } 36 | 37 | function indexRightDataOnKey (rightData, rightKeyColumn, reportData) { 38 | var keyMap = { 39 | null_match: {} 40 | } 41 | rightData.forEach(function (datum) { 42 | // Copy this value because we're going to be deleting the match column 43 | // And we don't want that column to be deleted the next time we join, if we want to join without reloading data 44 | // This will delete the copy, but keep the original data next time the function is run 45 | var datumPersist = cloneDeep(datum) 46 | var rightKeyValue = get(datumPersist, rightKeyColumn) 47 | reportData.bKeys.push(rightKeyValue) 48 | if (!keyMap[rightKeyValue]) { 49 | // Get rid of the original name key since that will just be a duplicate 50 | unset(datumPersist, rightKeyColumn) 51 | set(keyMap, rightKeyValue, datumPersist) 52 | // Log the new keys that we've encountered for a comprehensive list at the end 53 | addToNullMatch(keyMap, Object.keys(datumPersist)) 54 | } else { 55 | console.error('[Joiner] Duplicate entry for "' + rightKeyValue + '"') 56 | } 57 | }) 58 | return keyMap 59 | } 60 | 61 | function joinOnMatch (leftData, leftKeyColumn, keyMap, nestKey, geoJson, reportData) { 62 | if (geoJson) { 63 | leftData = leftData.features 64 | } 65 | 66 | leftData.forEach(function (datum) { 67 | var leftKeyValue = get(datum, leftKeyColumn) 68 | var match = keyMap[leftKeyValue] 69 | reportData.aKeys.push(leftKeyValue) 70 | if (match) { 71 | if (typeof nestKey === 'string' && nestKey !== '') { 72 | set(datum, nestKey, Object.assign(get(datum, nestKey) || {}, match)) 73 | } else { 74 | Object.assign(datum, match) 75 | } 76 | } 77 | }) 78 | return leftData 79 | } 80 | 81 | function joinDataLeft (config) { 82 | var requiredKeys = ['leftData', 'rightData', 'rightDataKey'] 83 | requiredKeys.forEach(function (k) { 84 | if (!config[k]) { 85 | throw new Error('[joiner] `' + k + '` is required') 86 | } 87 | }) 88 | 89 | var leftData = cloneDeep(config.leftData) 90 | var leftDataKey = config.leftDataKey 91 | var rightData = cloneDeep(config.rightData) 92 | var rightDataKey = config.rightDataKey 93 | var nestKey = config.nestKey 94 | var geoJson = config.geoJson 95 | 96 | if (geoJson === true) { 97 | leftDataKey = config.leftDataKey || 'id' 98 | nestKey = config.nestKey || 'properties' 99 | } 100 | 101 | var reportData = joinReport.init() 102 | 103 | var keyMap = indexRightDataOnKey(rightData, rightDataKey, reportData) 104 | var joinedData = joinOnMatch(leftData, leftDataKey, keyMap, nestKey, geoJson, reportData) 105 | var joinedDataWithNull = addNulls(joinedData, keyMap.null_match, nestKey) 106 | 107 | var report = joinReport.create(reportData) 108 | // If it's geoJson, nest the collection back under a `FeatureCollection` 109 | if (geoJson) { 110 | joinedDataWithNull = { 111 | type: 'FeatureCollection', 112 | features: joinedDataWithNull 113 | } 114 | } 115 | return {data: joinedDataWithNull, report: report} 116 | } 117 | 118 | export default joinDataLeft 119 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | Changelog 2 | === 3 | 4 | # 2.1.3 5 | 6 | > 2021-04-30 7 | 8 | Fix [an error](https://github.com/mhkeller/joiner/issues/11) that was caused where `report.prose.full` was `undefined` and couldn't be `.trim()`ed. Update the build process to use Uglify 3.x 9 | 10 | * [309ea49318d6548554a797de5d51cafe4b5c3bc8](https://github.com/mhkeller/joiner/commit/309ea49318d6548554a797de5d51cafe4b5c3bc8) 11 | * [c37152db5f34b1f537bf1fa8ff778ff8707ff533](https://github.com/mhkeller/joiner/commit/c37152db5f34b1f537bf1fa8ff778ff8707ff533) 12 | 13 | 14 | # 2.1.2 15 | 16 | > 2017-03-14 17 | 18 | Slight reorganization, add `module` field to package.json. Skipped 2.1.2 because of error in version number in dist file comments. Functionally identical. 19 | 20 | * See above 21 | * [eccfa17eba838b9358831a603f8cfb22055f6ac0](https://github.com/mhkeller/joiner/commit/eccfa17eba838b9358831a603f8cfb22055f6ac0) 22 | 23 | # 2.1.0 24 | 25 | > 2017-02-25 26 | 27 | Move to rollup build process, export browser version and some small cleanup 28 | 29 | * Rollup 30 | * [dddf7719845013930fab1c06b36ca4e216f44f38](https://github.com/mhkeller/joiner/commit/dddf7719845013930fab1c06b36ca4e216f44f38) 31 | * [51b590f0f42abfbbc10246fe89fae13339addd0e](https://github.com/mhkeller/joiner/commit/51b590f0f42abfbbc10246fe89fae13339addd0e) 32 | * [6d5b0b056a7d63281ccd9bdf667685b94d91585a](https://github.com/mhkeller/joiner/commit/6d5b0b056a7d63281ccd9bdf667685b94d91585a) 33 | * Browser version 34 | * [a61d3a19f688af9543dfc10752212275fd48e092](https://github.com/mhkeller/joiner/commit/a61d3a19f688af9543dfc10752212275fd48e092) 35 | * Some extra tests for sort order on object props now that we're using Object.assign shim and not underscore's extend 36 | * [f861d072c27691b6dc7b9ac9c7e37d77af490733](https://github.com/mhkeller/joiner/commit/f861d072c27691b6dc7b9ac9c7e37d77af490733) 37 | * [a9ac8b157bef43f94d321e4e20ba7896747d99db](https://github.com/mhkeller/joiner/commit/a9ac8b157bef43f94d321e4e20ba7896747d99db) 38 | * [a9ac8b157bef43f94d321e4e20ba7896747d99db](https://github.com/mhkeller/joiner/commit/a9ac8b157bef43f94d321e4e20ba7896747d99db) 39 | * Sort keys in example comments 40 | * [14b2b64060fd280e03da3b75f5d007eb675bf58b](https://github.com/mhkeller/joiner/commit/14b2b64060fd280e03da3b75f5d007eb675bf58b) 41 | 42 | # 2.0.0 43 | 44 | > 2017-02-14 45 | 46 | Another rework of the API focused on clarity between json, geojson and nested variables. Much better and clearer support for targeting nested keys and adding results to a nested key through lodash's `get` and `set`. Started maintaining changelog. Remove indian-ocean dependency and include browser-safe portions. 47 | 48 | * Add many more test cases and some bug fixes to dbf reading and writing 49 | * [0fac3167d9e226925ccadb86a7735b33bda9afb2](https://github.com/mhkeller/joiner/commit/0fac3167d9e226925ccadb86a7735b33bda9afb2) 50 | * [09a5b07964be04fcad5230cabb102f7d507cdb0a](https://github.com/mhkeller/joiner/commit/09a5b07964be04fcad5230cabb102f7d507cdb0a) 51 | * [0cce72ef36b142ab654ef469c69853e7c6de8080](https://github.com/mhkeller/joiner/commit/0cce72ef36b142ab654ef469c69853e7c6de8080) 52 | * [629f7b4e8ece8c05f2be967565e936626163607d](https://github.com/mhkeller/joiner/commit/629f7b4e8ece8c05f2be967565e936626163607d) 53 | * [e278ba88e033d9b9682f36a19d62082198a3b206](https://github.com/mhkeller/joiner/commit/e278ba88e033d9b9682f36a19d62082198a3b206) 54 | * Remove dependency on indian-ocean for easier browser-compatibility and lighter-weight. 55 | * [d246c7cacbf74caf58b8b28bcbbe66734d65df27](https://github.com/mhkeller/joiner/commit/d246c7cacbf74caf58b8b28bcbbe66734d65df27) 56 | * [d246c7cacbf74caf58b8b28bcbbe66734d65df27](https://github.com/mhkeller/joiner/commit/d246c7cacbf74caf58b8b28bcbbe66734d65df27) 57 | * [104a89ee72e3024743845594b36356c44f91cf7b](https://github.com/mhkeller/joiner/commit/104a89ee72e3024743845594b36356c44f91cf7b) 58 | * [632b99c77aeba47ccaf7cad593a8a7708ffa57d4](https://github.com/mhkeller/joiner/commit/632b99c77aeba47ccaf7cad593a8a7708ffa57d4) 59 | * [590f7b85c78ccaf89a30ddb2c234c51d45b6b692](https://github.com/mhkeller/joiner/commit/590f7b85c78ccaf89a30ddb2c234c51d45b6b692) 60 | * Sort keys in the report 61 | * [f80344a4eb8b3a6f29861969b6f712c8108ae16b](https://github.com/mhkeller/joiner/commit/f80344a4eb8b3a6f29861969b6f712c8108ae16b) 62 | * Drop support for node 2 and below 63 | * [765f094c691e53c32ca26db5c51e99e0c3c6eaa1](https://github.com/mhkeller/joiner/commit/765f094c691e53c32ca26db5c51e99e0c3c6eaa1) 64 | * [558d972824b08d9733a134b8d5fa1ba73c9af760](https://github.com/mhkeller/joiner/commit/558d972824b08d9733a134b8d5fa1ba73c9af760) 65 | * [0c21638ff5c61b4ed28858259818da797763e6e1](https://github.com/mhkeller/joiner/commit/0c21638ff5c61b4ed28858259818da797763e6e1) 66 | 67 | # 1.0.1 68 | 69 | > 2017-01-08 70 | 71 | Minor fixes 72 | 73 | * Fix dependencies and test command in package.json 74 | * [43671fe3489f9271fc976e6563ebe7ebd0973640](https://github.com/mhkeller/joiner/commit/43671fe3489f9271fc976e6563ebe7ebd0973640) 75 | * SEO 76 | * [482bb3fb572a3b79c20d362828d1ffc801cdb739](https://github.com/mhkeller/joiner/commit/482bb3fb572a3b79c20d362828d1ffc801cdb739) 77 | 78 | 79 | # 1.0.0 80 | 81 | > 2017-01-08 82 | 83 | Major overhaul of API including adding tests. 84 | 85 | * Switch to a config-based API 86 | * [ba7b7f413b70cd508bf539dc6d3fe6f9bdd483fc](https://github.com/mhkeller/joiner/commit/ba7b7f413b70cd508bf539dc6d3fe6f9bdd483fc) 87 | * [9255f1328f4999db4df671a584fe49cb76cf13ac](https://github.com/mhkeller/joiner/commit/9255f1328f4999db4df671a584fe49cb76cf13ac) 88 | * Deep clone passed in objects 89 | * [397d34ff22ee148d613fdc9b47bcc567a532e375](https://github.com/mhkeller/joiner/commit/397d34ff22ee148d613fdc9b47bcc567a532e375) 90 | * Dbf support 91 | * [421864889fde07a17693e1c27c86cd13478635f8](https://github.com/mhkeller/joiner/commit/421864889fde07a17693e1c27c86cd13478635f8) 92 | * [2e2b1572c12135d05097066bf0872ae1dd120973](https://github.com/mhkeller/joiner/commit/2e2b1572c12135d05097066bf0872ae1dd120973) 93 | * Tests and CI 94 | * [397d34ff22ee148d613fdc9b47bcc567a532e375](https://github.com/mhkeller/joiner/commit/397d34ff22ee148d613fdc9b47bcc567a532e375) 95 | * [2cc9230f60a3a21386aef2cb4f55aedd135f8d5d](https://github.com/mhkeller/joiner/commit/2cc9230f60a3a21386aef2cb4f55aedd135f8d5d) 96 | 97 | 98 | # 0.4.2 99 | 100 | > 2015-01-11 101 | 102 | Initial commit. We'll start from here 103 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Joiner 2 | ====== 3 | 4 | [![Build Status](https://secure.travis-ci.org/mhkeller/joiner.png?branch=master&style=flat-square)](http://travis-ci.org/mhkeller/joiner) [![NPM version](https://badge.fury.io/js/joiner.png?style=flat)](http://badge.fury.io/js/joiner) [![npm](https://img.shields.io/npm/dm/joiner.svg)](https://www.npmjs.com/package/joiner) 5 | [![js-standard-style](https://img.shields.io/badge/code%20style-standard-brightgreen.svg?style=flat)](https://github.com/feross/standard) 6 | 7 | A simple utility for SQL-like joins with Json or geoJson data in Node, the browser and on the command line. Also creates join reports so you can know how successful a given join is. 8 | 9 | Try it in the browser --> https://mhkeller.github.io/join.report/ 10 | 11 | ```js 12 | var data = [ 13 | { "id": "1", "name": "UT" }, 14 | { "id": "4", "name": "NM" } 15 | ] 16 | 17 | var newData = [ 18 | { "state_name": "NM", "avg_temp": 45 } 19 | ] 20 | 21 | var joinedData = joiner({ 22 | leftData: data, 23 | leftDataKey: 'name', 24 | rightData: newData, 25 | rightDataKey: 'state_name' 26 | }) 27 | 28 | console.log(joinedData) 29 | /* 30 | { data: 31 | [ { id: '1', name: 'UT', avg_temp: null }, 32 | { id: '4', name: 'NM', avg_temp: 45 } 33 | ], 34 | report: 35 | { diff: 36 | { a: [ 'NM', 'UT' ], 37 | b: [ 'NM' ], 38 | a_and_b: [ 'NM' ], 39 | a_not_in_b: [ 'UT' ], 40 | b_not_in_a: [] 41 | }, 42 | prose: 43 | { summary: '1 row matched in A and B. 1 row in A not in B. All 1 row in B in A.', 44 | full: 'Matches in A and B: NM. A not in B: UT.' } } } 45 | */ 46 | 47 | ``` 48 | ## Examples 49 | 50 | See the **[`examples`](https://github.com/mhkeller/joiner/tree/master/examples)** folder for different file formats and options. Joiner is useful to verify whether all of your joins were successful and to spot any patterns among fields that didn't join properly. For example, you can see that the `county_01` row in dataset A didn't match with the `county_1` in dataset B and that you have a zero-padding issue going on. 51 | 52 | ## Installation 53 | 54 | To install as a Node.js module: 55 | 56 | ```` 57 | npm install --save joiner 58 | ```` 59 | 60 | Or to install as a command-line utility: 61 | 62 | ```` 63 | npm install joiner -g 64 | ```` 65 | 66 | To use as both, run both commands. 67 | 68 | ## Methods 69 | 70 | All joins return an object with the following structure: 71 | 72 | ```` 73 | data: [data object], 74 | report: { 75 | diff: { 76 | a: [data in A], 77 | b: [data in B], 78 | a_and_b: [data in A and B], 79 | a_not_in_b: [data in A not in B], 80 | b_not_in_a: [data in B not in A] 81 | }: 82 | prose: { 83 | summary: [summary description of join result, number of matches in A and B, A not in B, B not in A.] 84 | full: [full list of which rows were joined in each of the above categories] 85 | } 86 | } 87 | ```` 88 | 89 | ### _joiner(config)_ 90 | 91 | Perform a left join on the two array of object json datasets. It performs a deep clone using [lodash.clonedeep](https://www.npmjs.com/package/lodash.clonedeep) of the objects you pass in and returns the new object. 92 | 93 | Optionally, you can pass in a key name under `nestKey` in case the left data's attribute dictionary is nested under another key, such as in geoJson when it's under the `properties` object. More on that below. 94 | 95 | | parameter | type | description | 96 | | :------------|:-------- |:---------------| 97 | | leftData | Array | existing data | 98 | | leftDataKey | [String] | key to join on, defaults to `"id"` if not set and `geoJson: true` | 99 | | rightData | Array | new data | 100 | | rightDataKey | String | key to join on | 101 | | geoJson | [Boolean] default=false | optional, key name holding attribute | 102 | | nestKey | [String] | optional, key name holding attribute, feaults to `"properties"` if not set and `geoJson: true` | 103 | 104 | #### Joining to geoJson 105 | 106 | If `geoJson` is true, performs a left join onto the `properties` object of each feature in a geoJson array. 107 | 108 | If you want to join on the `"id"` property, omit `leftDataKey`. If you want to join on a value in the `properties` object, set `leftDataKey` to `'properties.'` and set `nestKey` to `'properties'`. See examples for more. 109 | 110 | ## Command line interface 111 | 112 | ```` 113 | Usage: joiner -a DATASET_A_PATH -k DATASET_A_KEY -b DATASET_B_PATH -j DATASET_B_KEY -o OUT_FILE_PATH [-r (summary|full) -n NEST_KEY --geojson] 114 | 115 | Options: 116 | -h, --help Display help [default: false] 117 | -a, --apath Dataset A path 118 | -k, --akey Dataset A key 119 | -b, --bpath Dataset B path 120 | -j, --bkey Dataset B key 121 | -g, --geojson Is dataset A geojson? [default: false] 122 | -n, --nestkey Nested key name 123 | -o, --out Out path 124 | -r, --report Report format [default: "summary"] 125 | 126 | ```` 127 | 128 | In most cases, the first four parameters (`--apath`, `--akey`, `--bpath` and `--bkey`) are required. `--akey` is not required if you have set geojson to true by using `-g` or `--geojson` since it will join on the `"id"` field. If you want to join on a property field in geojson, then set that using `--akey`. 129 | 130 | If you specify an output file, it will write the join result to the specified file and the report to the same directory. Intermediate directories will be created if they don't already exist. For example, `-o path/to/output.csv` will also write `-o path/to/output-report.json` and create the `to/` folder if it isn't already there. If you don't specify an output file, it will print the results to the console. 131 | 132 | If you don't specify an output file with `-o`, Joiner will print the join report to the console. By default, it will just specify the summary report. To print the full report, specify `-d full`. 133 | 134 | Setting `-g` or `--geojson` is the equivalent of setting `geojson: true` above. 135 | 136 | It converts the specified input file into json and writes the joined dataset to file using [indian ocean](https://github.com/mhkeller/indian-ocean), which currently supports the following formats: `json`, `geojson`, `csv`, `psv`, `tsv` and `dbf`. The format is inferred from the file extension of the input and output file paths. For example, `-a path/to/input/file.csv` will read in a csv and `-o path/to/output/file.csv` will write a csv. 137 | -------------------------------------------------------------------------------- /examples/joinGeoJson-id.js: -------------------------------------------------------------------------------- 1 | var fs = require('fs') 2 | var joiner = require('../src/index.js') 3 | 4 | var geoData = JSON.parse(fs.readFileSync('examples/data/us-states.geojson', 'utf-8')) 5 | var newData = JSON.parse(fs.readFileSync('examples/data/new-geo-data.json')) 6 | /* 7 | [ 8 | { 9 | "state_abbr": "CO", 10 | "state_name": "Colorado", 11 | "avg_temp": 34 12 | }, 13 | { 14 | "state_abbr": "UT", 15 | "state_name": "Utah", 16 | "avg_temp": 72 17 | }, 18 | { 19 | "state_abbr": "NM", 20 | "state_name": "New Mexico", 21 | "avg_temp": 45 22 | } 23 | ] 24 | */ 25 | 26 | var joinedData = joiner({ 27 | leftData: geoData, 28 | rightData: newData, 29 | rightDataKey: 'state_abbr', 30 | geoJson: true 31 | }) 32 | 33 | console.log(JSON.stringify(joinedData)) 34 | 35 | /* 36 | { 37 | "data": { 38 | "type": "FeatureCollection", 39 | "features": [ 40 | { 41 | "type": "Feature", 42 | "geometry": { 43 | "type": "Polygon", 44 | "coordinates": [ 45 | [ 46 | [ 47 | -103.00051157559423, 48 | 36.99999842346288 49 | ], 50 | [ 51 | -106.40314927871762, 52 | 36.99999842346288 53 | ], 54 | [ 55 | -109.04485956299908, 56 | 36.99999842346288 57 | ], 58 | [ 59 | -109.04485956299908, 60 | 40.99944977889005 61 | ], 62 | [ 63 | -104.05217069691822, 64 | 40.99944977889005 65 | ], 66 | [ 67 | -102.05294158231935, 68 | 40.99892470978733 69 | ], 70 | [ 71 | -102.03858446120913, 72 | 36.99999842346288 73 | ], 74 | [ 75 | -103.00051157559423, 76 | 36.99999842346288 77 | ] 78 | ] 79 | ] 80 | }, 81 | "properties": { 82 | "state_name": "Colorado", 83 | "avg_temp": 34 84 | }, 85 | "id": "CO" 86 | }, 87 | { 88 | "type": "Feature", 89 | "geometry": { 90 | "type": "Polygon", 91 | "coordinates": [ 92 | [ 93 | [ 94 | -109.04485956299908, 95 | 36.99999842346288 96 | ], 97 | [ 98 | -106.40314927871762, 99 | 36.99999842346288 100 | ], 101 | [ 102 | -103.00051157559423, 103 | 36.99999842346288 104 | ], 105 | [ 106 | -103.0435829389249, 107 | 35.89420289313209 108 | ], 109 | [ 110 | -103.06511862059024, 111 | 32.00186563466003 112 | ], 113 | [ 114 | -106.66157745870169, 115 | 32.000290427351864 116 | ], 117 | [ 118 | -108.21573581888357, 119 | 31.777661127798083 120 | ], 121 | [ 122 | -108.21573581888357, 123 | 31.327151837663315 124 | ], 125 | [ 126 | -109.04844884327663, 127 | 31.326626768560594 128 | ], 129 | [ 130 | -109.04485956299908, 131 | 36.99999842346288 132 | ] 133 | ] 134 | ] 135 | }, 136 | "properties": { 137 | "state_name": "New Mexico", 138 | "avg_temp": 45 139 | }, 140 | "id": "NM" 141 | }, 142 | { 143 | "type": "Feature", 144 | "geometry": { 145 | "type": "Polygon", 146 | "coordinates": [ 147 | [ 148 | [ 149 | -111.05126723815307, 150 | 41.9997064195739 151 | ], 152 | [ 153 | -111.05126723815307, 154 | 40.99944977889005 155 | ], 156 | [ 157 | -109.04485956299908, 158 | 40.99944977889005 159 | ], 160 | [ 161 | -109.04485956299908, 162 | 36.99999842346288 163 | ], 164 | [ 165 | -114.04113770935749, 166 | 37.003148838079206 167 | ], 168 | [ 169 | -114.04113770935749, 170 | 42.00023148867662 171 | ], 172 | [ 173 | -111.05126723815307, 174 | 41.9997064195739 175 | ] 176 | ] 177 | ] 178 | }, 179 | "properties": { 180 | "state_name": "Utah", 181 | "avg_temp": 72 182 | }, 183 | "id": "UT" 184 | }, 185 | { 186 | "type": "Feature", 187 | "geometry": { 188 | "type": "Polygon", 189 | "coordinates": [ 190 | [ 191 | [ 192 | -104.05217069691822, 193 | 40.99944977889005 194 | ], 195 | [ 196 | -109.04485956299908, 197 | 40.99944977889005 198 | ], 199 | [ 200 | -111.05126723815307, 201 | 40.99944977889005 202 | ], 203 | [ 204 | -111.05126723815307, 205 | 41.9997064195739 206 | ], 207 | [ 208 | -111.05126723815307, 209 | 44.99995127252268 210 | ], 211 | [ 212 | -108.82591346606814, 213 | 44.99995127252268 214 | ], 215 | [ 216 | -104.05575997719579, 217 | 44.99995127252268 218 | ], 219 | [ 220 | -104.05217069691822, 221 | 40.99944977889005 222 | ] 223 | ] 224 | ] 225 | }, 226 | "properties": { 227 | "name": "Wyoming", 228 | "state_name": null, 229 | "avg_temp": null 230 | }, 231 | "id": "WY" 232 | } 233 | ] 234 | }, 235 | "report": { 236 | "diff": { 237 | "a": [ 238 | "CO", 239 | "NM", 240 | "UT", 241 | "WY" 242 | ], 243 | "b": [ 244 | "CO", 245 | "NM", 246 | "UT" 247 | ], 248 | "a_and_b": [ 249 | "CO", 250 | "NM", 251 | "UT" 252 | ], 253 | "a_not_in_b": [ 254 | "WY" 255 | ], 256 | "b_not_in_a": [] 257 | }, 258 | "prose": { 259 | "summary": "3 rows matched in A and B. 1 row in A not in B. All 3 rows in B in A.", 260 | "full": "Matches in A and B: CO, NM, UT. A not in B: WY." 261 | } 262 | } 263 | } 264 | */ 265 | -------------------------------------------------------------------------------- /examples/joinGeoJson-prop.js: -------------------------------------------------------------------------------- 1 | var fs = require('fs') 2 | var joiner = require('../src/index.js') 3 | 4 | var geoData = JSON.parse(fs.readFileSync('examples/data/us-states.geojson')) 5 | var newData = JSON.parse(fs.readFileSync('examples/data/new-geo-data.json')) 6 | /* 7 | [ 8 | { 9 | "state_abbr": "CO", 10 | "state_name": "Colorado", 11 | "avg_temp": 34 12 | }, 13 | { 14 | "state_abbr": "UT", 15 | "state_name": "Utah", 16 | "avg_temp": 72 17 | }, 18 | { 19 | "state_abbr": "NM", 20 | "state_name": "New Mexico", 21 | "avg_temp": 45 22 | } 23 | ] 24 | */ 25 | 26 | var joinedData = joiner({ 27 | leftData: geoData, 28 | leftDataKey: 'properties.name', 29 | rightData: newData, 30 | rightDataKey: 'state_name', 31 | nestKey: 'properties', 32 | geoJson: true 33 | }) 34 | 35 | console.log(JSON.stringify(joinedData)) 36 | 37 | /* 38 | { 39 | "data": { 40 | "type": "FeatureCollection", 41 | "features": [ 42 | { 43 | "type": "Feature", 44 | "geometry": { 45 | "type": "Polygon", 46 | "coordinates": [ 47 | [ 48 | [ 49 | -103.00051157559423, 50 | 36.99999842346288 51 | ], 52 | [ 53 | -106.40314927871762, 54 | 36.99999842346288 55 | ], 56 | [ 57 | -109.04485956299908, 58 | 36.99999842346288 59 | ], 60 | [ 61 | -109.04485956299908, 62 | 40.99944977889005 63 | ], 64 | [ 65 | -104.05217069691822, 66 | 40.99944977889005 67 | ], 68 | [ 69 | -102.05294158231935, 70 | 40.99892470978733 71 | ], 72 | [ 73 | -102.03858446120913, 74 | 36.99999842346288 75 | ], 76 | [ 77 | -103.00051157559423, 78 | 36.99999842346288 79 | ] 80 | ] 81 | ] 82 | }, 83 | "properties": { 84 | "name": "Colorado", 85 | "state_abbr": "CO", 86 | "avg_temp": 34 87 | }, 88 | "id": "CO" 89 | }, 90 | { 91 | "type": "Feature", 92 | "geometry": { 93 | "type": "Polygon", 94 | "coordinates": [ 95 | [ 96 | [ 97 | -109.04485956299908, 98 | 36.99999842346288 99 | ], 100 | [ 101 | -106.40314927871762, 102 | 36.99999842346288 103 | ], 104 | [ 105 | -103.00051157559423, 106 | 36.99999842346288 107 | ], 108 | [ 109 | -103.0435829389249, 110 | 35.89420289313209 111 | ], 112 | [ 113 | -103.06511862059024, 114 | 32.00186563466003 115 | ], 116 | [ 117 | -106.66157745870169, 118 | 32.000290427351864 119 | ], 120 | [ 121 | -108.21573581888357, 122 | 31.777661127798083 123 | ], 124 | [ 125 | -108.21573581888357, 126 | 31.327151837663315 127 | ], 128 | [ 129 | -109.04844884327663, 130 | 31.326626768560594 131 | ], 132 | [ 133 | -109.04485956299908, 134 | 36.99999842346288 135 | ] 136 | ] 137 | ] 138 | }, 139 | "properties": { 140 | "name": "New Mexico", 141 | "state_abbr": "NM", 142 | "avg_temp": 45 143 | }, 144 | "id": "NM" 145 | }, 146 | { 147 | "type": "Feature", 148 | "geometry": { 149 | "type": "Polygon", 150 | "coordinates": [ 151 | [ 152 | [ 153 | -111.05126723815307, 154 | 41.9997064195739 155 | ], 156 | [ 157 | -111.05126723815307, 158 | 40.99944977889005 159 | ], 160 | [ 161 | -109.04485956299908, 162 | 40.99944977889005 163 | ], 164 | [ 165 | -109.04485956299908, 166 | 36.99999842346288 167 | ], 168 | [ 169 | -114.04113770935749, 170 | 37.003148838079206 171 | ], 172 | [ 173 | -114.04113770935749, 174 | 42.00023148867662 175 | ], 176 | [ 177 | -111.05126723815307, 178 | 41.9997064195739 179 | ] 180 | ] 181 | ] 182 | }, 183 | "properties": { 184 | "name": "Utah", 185 | "state_abbr": "UT", 186 | "avg_temp": 72 187 | }, 188 | "id": "UT" 189 | }, 190 | { 191 | "type": "Feature", 192 | "geometry": { 193 | "type": "Polygon", 194 | "coordinates": [ 195 | [ 196 | [ 197 | -104.05217069691822, 198 | 40.99944977889005 199 | ], 200 | [ 201 | -109.04485956299908, 202 | 40.99944977889005 203 | ], 204 | [ 205 | -111.05126723815307, 206 | 40.99944977889005 207 | ], 208 | [ 209 | -111.05126723815307, 210 | 41.9997064195739 211 | ], 212 | [ 213 | -111.05126723815307, 214 | 44.99995127252268 215 | ], 216 | [ 217 | -108.82591346606814, 218 | 44.99995127252268 219 | ], 220 | [ 221 | -104.05575997719579, 222 | 44.99995127252268 223 | ], 224 | [ 225 | -104.05217069691822, 226 | 40.99944977889005 227 | ] 228 | ] 229 | ] 230 | }, 231 | "properties": { 232 | "name": "Wyoming", 233 | "state_abbr": null, 234 | "avg_temp": null 235 | }, 236 | "id": "WY" 237 | } 238 | ] 239 | }, 240 | "report": { 241 | "diff": { 242 | "a": [ 243 | "Colorado", 244 | "New Mexico", 245 | "Utah", 246 | "Wyoming" 247 | ], 248 | "b": [ 249 | "Colorado", 250 | "New Mexico", 251 | "Utah" 252 | ], 253 | "a_and_b": [ 254 | "Colorado", 255 | "New Mexico", 256 | "Utah" 257 | ], 258 | "a_not_in_b": [ 259 | "Wyoming" 260 | ], 261 | "b_not_in_a": [] 262 | }, 263 | "prose": { 264 | "summary": "3 rows matched in A and B. 1 row in A not in B. All 3 rows in B in A.", 265 | "full": "Matches in A and B: Colorado, New Mexico, Utah. A not in B: Wyoming." 266 | } 267 | } 268 | } 269 | */ 270 | -------------------------------------------------------------------------------- /dist/joiner.min.js: -------------------------------------------------------------------------------- 1 | // https://github.com/mhkeller/joiner Version 2.1.3. Copyright 2021 Michael Keller. 2 | !function(t,r){"object"==typeof exports&&"undefined"!=typeof module?module.exports=r():"function"==typeof define&&define.amd?define(r):t.joiner=r()}(this,function(){"use strict";var t=function(t,r){return t===r||t!=t&&r!=r},e=t;var r=function(t,r){for(var n=t.length;n--;)if(e(t[n][0],r))return n;return-1},n=r,o=Array.prototype.splice;var a=r;var u=r;var i=r;var c=function(t){var r=this.__data__;return!((t=n(r,t))<0)&&(t==r.length-1?r.pop():o.call(r,t,1),--this.size,!0)},f=function(t){var r=this.__data__;return(t=a(r,t))<0?void 0:r[t][1]},s=function(t){return-1>>0,r>>>=0;for(var a=Array(o);++e