├── Build.js ├── README.md ├── dangit.js ├── package.json └── types └── geojson.js /Build.js: -------------------------------------------------------------------------------- 1 | var Geojson = require('./types/geojson.js'); 2 | 3 | module.exports = function(config) { 4 | console.log('Building dataset ' + config.name + '...'); 5 | 6 | if(config.type == 'geojson') { 7 | Geojson.build(config) 8 | } else { 9 | console.log('This dataset\'s dangit.json has an unsupported type') 10 | } 11 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # dangit 2 | 3 | DAta Nudged into GIT - File-based datasets that use git for version control of individual records 4 | 5 | 6 | 7 | # Overview 8 | 9 | DANGIT is an experimental way to do version control on datasets. By storing each row/record of a dataset as a file in a github repository, it is possible to easily track changes and allow for anyone (yes, anyone... the feedback loop is open!) to submit changes using the same workflows as open source software development. The files for the individual rows/records are then built into a single dataset file. [Here's the gist](https://gist.github.com/chriswhong/8efd249a58abfa8b39b68bca198e1072) with a braindump for the idea. 10 | 11 | In the future, a simple UI with Github Single Sign-on would allow non-technical users to perform the entire fork/edit/build/pull request workflow without using the command line or editing text files. 12 | 13 | 14 | 15 | ## How to Use 16 | 17 | - Clone this repo 18 | - Install Dependencies 19 | `npm install` 20 | - Clone the sample dataset [nyc-pizzashops](https://github.com/chriswhong/nyc-pizzashops) 21 | - Edit or add data to the sample dataset by editing files in `/rows` 22 | - Use DANGIT to build the dataset with your new changes 23 | `node dangit build ../nyc-pizzashops` 24 | - Create a Pull Request to submit your changes to the source repo 25 | 26 | 27 | 28 | # How it works 29 | 30 | ## Dataset storage 31 | 32 | A dataset is maintained in its own github repository with file structure like this: 33 | ``` 34 | /build - the build directory, where dangit writes the built dataset file (a geojson FeatureCollection or a CSV or a JSON array of objects) the build filename should be the same as the dataset's name, with the appropriate file extension 35 | /rows - the rows directory, where individual rows are stored as geojson features or 2D json objects 36 | dangit.json - the dangit configuration file, which includes name, type, uid field, etc. 37 | ``` 38 | 39 | 40 | 41 | ## Editing data 42 | 43 | Edits are made on the files in `/rows`, new data are added by creating new files (for now, increment uid manually. Someday the build process should validate unique ids, data types, etc) 44 | 45 | 46 | 47 | ## Building data 48 | 49 | Run DANGIT build using node, passing in the path of the dataset you would like to build: 50 | `node dangit build ../nyc-pizzashops` 51 | DANGIT looks for a `dangit.json` file in the root of the directory you pass in, and starts the build based on `type`. For type `geojson`, it will expect each file in `/rows` to be a valid geojson feature, and will write a geojson FeatureCollection into `/build`. 52 | 53 | 54 | 55 | ## Sample Dataset 56 | 57 | You can participate in our early experimentation by adding or editing (or deleting) rows to the dataset [nyc-pizzashops](https://github.com/chriswhong/nyc-pizzashops). Fork the repo, make your changes to the rows, build the distribution file, and do a pull request back to the source repo. 58 | 59 | 60 | 61 | ## Commit Messages 62 | 63 | Commit messages should include as much info as possible about the rows that were edited/added/removed. 64 | 65 | 66 | 67 | ## Pull Requests 68 | 69 | Pull requests on dataset repos should include a successful build of the data. (how should we validate this) 70 | -------------------------------------------------------------------------------- /dangit.js: -------------------------------------------------------------------------------- 1 | var fs = require('fs'), 2 | Build = require('./build.js'); 3 | 4 | //get the command that the user passed in 5 | var command = process.argv[2]; 6 | 7 | //get the path of the dataset that the user passed in 8 | var path = process.argv[3]; 9 | 10 | //get the config file for this dataset 11 | var config = require(path + '/dangit.json'); 12 | 13 | //add the path to config so lower level modules can access it 14 | config.path = path; 15 | 16 | //build, but allow for other commands in the future 17 | if (command=='build') { 18 | Build(config) 19 | } else { 20 | console.log('I don\'t recognize that command...'); 21 | } -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "dangit", 3 | "version": "1.0.0", 4 | "description": "DAta Nudged into GIT - File-based datasets that use git for version control of individual records", 5 | "main": "dangit.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1" 8 | }, 9 | "repository": { 10 | "type": "git", 11 | "url": "git+https://github.com/chriswhong/dangit.git" 12 | }, 13 | "author": "", 14 | "license": "ISC", 15 | "bugs": { 16 | "url": "https://github.com/chriswhong/dangit/issues" 17 | }, 18 | "homepage": "https://github.com/chriswhong/dangit#readme" 19 | } 20 | -------------------------------------------------------------------------------- /types/geojson.js: -------------------------------------------------------------------------------- 1 | var fs = require('fs'); 2 | 3 | module.exports = { 4 | build: function(config) { 5 | console.log('Combining geojson Features into FeatureCollection...') 6 | var rowCount = 0; 7 | //create buildFile 8 | var buildFile = fs.createWriteStream(config.path + '/build/' + config.name + '.geojson'); 9 | 10 | //get array of filenames in path/rows 11 | var rows = fs.readdirSync(config.path + '/rows'); 12 | 13 | //write opening string for FeatureCollection 14 | buildFile.write('{"type": "FeatureCollection","features":[') 15 | 16 | rows.forEach(function(row,i) { 17 | console.log('Handling file ' + row + '...') 18 | var rowText = fs.readFileSync(config.path + '/rows/' + row, 'utf-8'); 19 | 20 | rowText = rowText.replace(/(\r\n|\n|\r)/gm," "); //get rid of line breaks in rowText 21 | //TODO get rid of spaces too, but not spaces in the strings. 22 | 23 | //TODO add some logic to make sure UIDs are unique 24 | 25 | if (i < rows.length - 1) { 26 | rowText += ',' 27 | } 28 | 29 | buildFile.write(rowText); 30 | rowCount++; 31 | 32 | }); 33 | 34 | //write closing string for FeatureCollection 35 | buildFile.write(']}'); 36 | 37 | console.log('Successfully built ' + config.path + '/build/' + config.name + '.geojson with ' + rowCount + ' rows of data.') 38 | }, 39 | 40 | validate: function(config) { 41 | //validate the repo here 42 | } 43 | } --------------------------------------------------------------------------------