├── .gitignore ├── index.js ├── lib ├── require-expression.js └── formats.js ├── package.json ├── cli.js ├── README.md └── LICENSE /.gitignore: -------------------------------------------------------------------------------- 1 | .*.sw? 2 | .DS_Store 3 | node_modules 4 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | var formats = require('./lib/formats'); 2 | module.exports = { 3 | version: require('./package.json').version, 4 | formats: formats, 5 | createReadStream: formats.createReadStream, 6 | createWriteStream: formats.createWriteStream 7 | }; 8 | -------------------------------------------------------------------------------- /lib/require-expression.js: -------------------------------------------------------------------------------- 1 | var relative = require('require-relative'); 2 | var fs = require('fs'); 3 | 4 | module.exports = function resolveExpression(expressionOrFilename) { 5 | var filename = expressionOrFilename; 6 | if (filename.match(/\.js(on)?$/)) { 7 | return relative(filename, process.cwd()); 8 | } 9 | return expressionOrFilename; 10 | }; 11 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "tito", 3 | "version": "0.6.1", 4 | "description": "", 5 | "main": "index.js", 6 | "bin": { 7 | "tito": "cli.js" 8 | }, 9 | "scripts": { 10 | "test": "cd test && make" 11 | }, 12 | "author": { 13 | "name": "Shawn Allen", 14 | "url": "https://github.com/shawnbot" 15 | }, 16 | "repository": { 17 | "type": "git", 18 | "url": "https://github.com/shawnbot/tito.git" 19 | }, 20 | "license": "CC0-1.0", 21 | "dependencies": { 22 | "JSONStream": "^0.10.0", 23 | "epipebomb": "^0.1.1", 24 | "extend": "^2.0.0", 25 | "fast-csv": "^0.6.0", 26 | "fof": "^1.2.0", 27 | "html-table-stream": "^0.4.0", 28 | "multiline": "^1.0.2", 29 | "ndjson": "^1.3.0", 30 | "require-relative": "^0.8.7", 31 | "yargs": "^3.5.4" 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /cli.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | var $0 = 'tito'; 3 | var tito = require('./'); 4 | var formats = tito.formats; 5 | 6 | var yargs = require('yargs') 7 | .usage($0 + ' [options] [input] [output]') 8 | .describe('read', 'the input format (see below)') 9 | .default('read', 'ndjson') 10 | .alias('r', 'read') 11 | .describe('write', 'the output format (see below)') 12 | .default('write', 'ndjson') 13 | .alias('w', 'write') 14 | .describe('in', 'the input filename') 15 | .alias('i', 'in') 16 | .describe('out', 'the output filename') 17 | .alias('o', 'out') 18 | .describe('filter', 'filter input by this data expression') 19 | .alias('f', 'filter') 20 | .describe('map', 'map input to this data expression') 21 | .alias('m', 'map') 22 | .describe('multiple', 'Allow one-to-many array transforms') 23 | .boolean('multiple') 24 | .describe('help', 'Show this help message.') 25 | .describe('version', 'Print the version and exit') 26 | .alias('v', 'version') 27 | .alias('h', 'help') 28 | .wrap(72); 29 | 30 | var options = yargs.argv; 31 | var args = options._; 32 | 33 | if (options.version) { 34 | console.log($0, 'v' + tito.version); 35 | return process.exit(0); 36 | } 37 | 38 | if (options.help) { 39 | yargs.showHelp(); 40 | formats.showHelp(options.help, null, $0); 41 | return process.exit(1); 42 | } 43 | 44 | var fs = require('fs'); 45 | 46 | // hush EPIPE errors 47 | require('epipebomb')(); 48 | 49 | delete options._; 50 | delete options.$0; 51 | 52 | var input = options.in || args[0] || '/dev/stdin'; 53 | var output = options.out || args[1] || '/dev/stdout'; 54 | var parse = formats.createReadStream(options.read); 55 | var format = formats.createWriteStream(options.write); 56 | 57 | var stream = fs.createReadStream(input) 58 | .pipe(parse); 59 | 60 | if (options.map || options.filter) { 61 | var fof = require('fof'); 62 | var expr = require('./lib/require-expression'); 63 | 64 | var map = options.map ? expr(options.map) : null; 65 | var filter = options.filter ? expr(options.filter) : null; 66 | var transform = fof.stream(map, { 67 | filter: filter, 68 | multiple: options.multiple 69 | }); 70 | 71 | stream = stream.pipe(transform); 72 | } 73 | 74 | stream = stream 75 | .pipe(format) 76 | .pipe(fs.createWriteStream(output)); 77 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # tito 2 | tito is a Node.js module and command-line utility for translating 3 | between tabular text streams in formats such as CSV, TSV, JSON 4 | and HTML tables. It stands for **T**ables **I**n, **T**ables **O**ut. 5 | 6 | ## Formats 7 | * JSON: structured with [JSONPath] queries or 8 | [newline-delimited](http://ndjson.org) (the default for input and output). 9 | * Comma-, tab-, and otherwise-delimited text, with support for custom 10 | column and row delimiters. 11 | * HTML tables, with support for targeted parsing with CSS selectors 12 | and formatted output. 13 | 14 | ## Installation 15 | Install it with [npm](https://www.npmjs.com/package/tito): 16 | 17 | ``` 18 | npm install -g tito 19 | ``` 20 | 21 | ## Examples 22 | Here are some examples of what tito can do: 23 | 24 | ##### Convert CSV to TSV 25 | Use the `--read` and `--write` options to set the read and write 26 | formats: 27 | 28 | ```sh 29 | tito --read csv data.csv --write tsv data.tsv 30 | ``` 31 | 32 | Or pipe data into and out of tito via stdio: 33 | 34 | ```sh 35 | cat data.csv | tito --read csv --write tsv > data.tsv 36 | ``` 37 | 38 | ##### Turn HTML tables into CSV 39 | tito's `html` reader uses a [streaming HTML parser] and can target 40 | tables with CSS selectors: 41 | 42 | ```sh 43 | curl -s "http://www.federalreserve.gov/releases/h15/current/" \ 44 | | tito --read.format html --read.selector 'table.statistics' --write csv \ 45 | > interest-rates.csv 46 | ``` 47 | 48 | ##### Import structured JSON data from a URL into dat 49 | tito can take structured JSON like this: 50 | 51 | ```js 52 | { 53 | "results": [ 54 | { /* ... */ }, 55 | // etc. 56 | ] 57 | } 58 | ``` 59 | 60 | and turn it into [newline-delimited JSON]. Just set `--read.format` 61 | to `json` and `--read.path` to the [JSONPath] expression of your data 62 | elements. For the structure above, which is common to many REST APIs, 63 | you would use `results.*`. You could then use the following to import 64 | data from one such API into [dat]: 65 | 66 | ```sh 67 | curl -s http://api.data.gov/some-data \ 68 | | tito --read.format json --read.path 'results.*' \ 69 | | dat import 70 | ``` 71 | 72 | ##### Map and filter your data 73 | The tito `--map` and `--filter` options allow you to perform streaming 74 | transformations on your data. Both options can either be specified as 75 | [fof-compatible expressions](https://github.com/shawnbot/fof#api) or filenames. 76 | 77 | ```sh 78 | tito --filter 'd => d.Year > 2000' \ 79 | --map 'd => {{year: d.Year, region: d.Region, revenue: +d.Revenue}}' \ 80 | --read csv data.csv 81 | ``` 82 | 83 | If you specify an existing filename for either `--map` or `--filter`, it will 84 | be `require()`d and its value passed to `fof()`. This means that you can 85 | specify map and filter transformations in JSON or JavaScript, e.g.: 86 | 87 | ```js 88 | { 89 | year: 'd => +d.Year', 90 | region: 'Region', 91 | revenue: 'd => +d.Revenue' 92 | } 93 | ``` 94 | 95 | then, you could use this transformation with: 96 | 97 | ```sh 98 | tito --map ./transform.json \ 99 | --read csv --write json input.csv > output.json 100 | ``` 101 | 102 | ## Usage 103 | This is the output of `tito --help formats`: 104 | ``` 105 | tito [options] [input] [output] 106 | 107 | Options: 108 | --read, -r the input format (see below) [default: "ndjson"] 109 | --write, -w the output format (see below) [default: "ndjson"] 110 | --in, -i the input filename 111 | --out, -o the output filename 112 | --filter, -f filter input by this data expression [string] 113 | --map, -m map input to this data expression [string] 114 | --help, -h Show this help message. 115 | --version, -v Print the version and exit 116 | 117 | Formats: 118 | 119 | The following values may be used for the input and output format 120 | options, --read/-r or --write/-w: 121 | 122 | tito --read csv --write tsv 123 | tito -r csv -w tsv 124 | 125 | If you wish to specify format options, you must use the dot notation: 126 | 127 | tito --read.format csv --read.delim=, data.csv 128 | tito -r.format json -r.path='results.*' data.json 129 | tito data.ndjson | tito -w.format html -w.indent=' ' 130 | 131 | "csv": Read and write comma-separated (or otherwise-delimted) text 132 | Options: 133 | - "delimiter", "delim", "d": The field delimiter 134 | - "newline", "line", "n": The row delimiter 135 | - "quote", "q": The quote character 136 | 137 | "tsv": Read and write tab-separated values 138 | Options: 139 | - "headers": 140 | - "newline", "line", "n": The line separator character sequence 141 | 142 | "ndjson": Read and write newline-delimted JSON 143 | Options: 144 | 145 | "json": Read and write arrays from streaming JSON 146 | Options: 147 | - "path", "p": The JSONPath selector containing the data (read-only) 148 | - "open", "o": Output this string before streaming items (write-only) 149 | - "separator", "sep", "s": Output this string between items (write-only) 150 | - "close", "c": Output this string after writing all items (write-only) 151 | 152 | "html": Read and write data from HTML tables 153 | Options: 154 | - "selector", "s": the CSS selector of the table to target (read-only) 155 | - "indent", "i": indent HTML with this string (write-only) 156 | ``` 157 | 158 | [dat]: http://dat-data.com/ 159 | [newline-delimited JSON]: http://ndjson.org/ 160 | [JSONPath]: http://jsonpath.curiousconcept.com/ 161 | [streaming HTML parser]: https://www.npmjs.com/package/htmlparser2 162 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | CC0 1.0 Universal 2 | 3 | Statement of Purpose 4 | 5 | The laws of most jurisdictions throughout the world automatically confer 6 | exclusive Copyright and Related Rights (defined below) upon the creator and 7 | subsequent owner(s) (each and all, an "owner") of an original work of 8 | authorship and/or a database (each, a "Work"). 9 | 10 | Certain owners wish to permanently relinquish those rights to a Work for the 11 | purpose of contributing to a commons of creative, cultural and scientific 12 | works ("Commons") that the public can reliably and without fear of later 13 | claims of infringement build upon, modify, incorporate in other works, reuse 14 | and redistribute as freely as possible in any form whatsoever and for any 15 | purposes, including without limitation commercial purposes. These owners may 16 | contribute to the Commons to promote the ideal of a free culture and the 17 | further production of creative, cultural and scientific works, or to gain 18 | reputation or greater distribution for their Work in part through the use and 19 | efforts of others. 20 | 21 | For these and/or other purposes and motivations, and without any expectation 22 | of additional consideration or compensation, the person associating CC0 with a 23 | Work (the "Affirmer"), to the extent that he or she is an owner of Copyright 24 | and Related Rights in the Work, voluntarily elects to apply CC0 to the Work 25 | and publicly distribute the Work under its terms, with knowledge of his or her 26 | Copyright and Related Rights in the Work and the meaning and intended legal 27 | effect of CC0 on those rights. 28 | 29 | 1. Copyright and Related Rights. A Work made available under CC0 may be 30 | protected by copyright and related or neighboring rights ("Copyright and 31 | Related Rights"). Copyright and Related Rights include, but are not limited 32 | to, the following: 33 | 34 | i. the right to reproduce, adapt, distribute, perform, display, communicate, 35 | and translate a Work; 36 | 37 | ii. moral rights retained by the original author(s) and/or performer(s); 38 | 39 | iii. publicity and privacy rights pertaining to a person's image or likeness 40 | depicted in a Work; 41 | 42 | iv. rights protecting against unfair competition in regards to a Work, 43 | subject to the limitations in paragraph 4(a), below; 44 | 45 | v. rights protecting the extraction, dissemination, use and reuse of data in 46 | a Work; 47 | 48 | vi. database rights (such as those arising under Directive 96/9/EC of the 49 | European Parliament and of the Council of 11 March 1996 on the legal 50 | protection of databases, and under any national implementation thereof, 51 | including any amended or successor version of such directive); and 52 | 53 | vii. other similar, equivalent or corresponding rights throughout the world 54 | based on applicable law or treaty, and any national implementations thereof. 55 | 56 | 2. Waiver. To the greatest extent permitted by, but not in contravention of, 57 | applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and 58 | unconditionally waives, abandons, and surrenders all of Affirmer's Copyright 59 | and Related Rights and associated claims and causes of action, whether now 60 | known or unknown (including existing as well as future claims and causes of 61 | action), in the Work (i) in all territories worldwide, (ii) for the maximum 62 | duration provided by applicable law or treaty (including future time 63 | extensions), (iii) in any current or future medium and for any number of 64 | copies, and (iv) for any purpose whatsoever, including without limitation 65 | commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes 66 | the Waiver for the benefit of each member of the public at large and to the 67 | detriment of Affirmer's heirs and successors, fully intending that such Waiver 68 | shall not be subject to revocation, rescission, cancellation, termination, or 69 | any other legal or equitable action to disrupt the quiet enjoyment of the Work 70 | by the public as contemplated by Affirmer's express Statement of Purpose. 71 | 72 | 3. Public License Fallback. Should any part of the Waiver for any reason be 73 | judged legally invalid or ineffective under applicable law, then the Waiver 74 | shall be preserved to the maximum extent permitted taking into account 75 | Affirmer's express Statement of Purpose. In addition, to the extent the Waiver 76 | is so judged Affirmer hereby grants to each affected person a royalty-free, 77 | non transferable, non sublicensable, non exclusive, irrevocable and 78 | unconditional license to exercise Affirmer's Copyright and Related Rights in 79 | the Work (i) in all territories worldwide, (ii) for the maximum duration 80 | provided by applicable law or treaty (including future time extensions), (iii) 81 | in any current or future medium and for any number of copies, and (iv) for any 82 | purpose whatsoever, including without limitation commercial, advertising or 83 | promotional purposes (the "License"). The License shall be deemed effective as 84 | of the date CC0 was applied by Affirmer to the Work. Should any part of the 85 | License for any reason be judged legally invalid or ineffective under 86 | applicable law, such partial invalidity or ineffectiveness shall not 87 | invalidate the remainder of the License, and in such case Affirmer hereby 88 | affirms that he or she will not (i) exercise any of his or her remaining 89 | Copyright and Related Rights in the Work or (ii) assert any associated claims 90 | and causes of action with respect to the Work, in either case contrary to 91 | Affirmer's express Statement of Purpose. 92 | 93 | 4. Limitations and Disclaimers. 94 | 95 | a. No trademark or patent rights held by Affirmer are waived, abandoned, 96 | surrendered, licensed or otherwise affected by this document. 97 | 98 | b. Affirmer offers the Work as-is and makes no representations or warranties 99 | of any kind concerning the Work, express, implied, statutory or otherwise, 100 | including without limitation warranties of title, merchantability, fitness 101 | for a particular purpose, non infringement, or the absence of latent or 102 | other defects, accuracy, or the present or absence of errors, whether or not 103 | discoverable, all to the greatest extent permissible under applicable law. 104 | 105 | c. Affirmer disclaims responsibility for clearing rights of other persons 106 | that may apply to the Work or any use thereof, including without limitation 107 | any person's Copyright and Related Rights in the Work. Further, Affirmer 108 | disclaims responsibility for obtaining any necessary consents, permissions 109 | or other rights required for any use of the Work. 110 | 111 | d. Affirmer understands and acknowledges that Creative Commons is not a 112 | party to this document and has no duty or obligation with respect to this 113 | CC0 or use of the Work. 114 | 115 | For more information, please see 116 | 117 | 118 | -------------------------------------------------------------------------------- /lib/formats.js: -------------------------------------------------------------------------------- 1 | var extend = require('extend'), 2 | multiline = require('multiline'), 3 | csv = require('fast-csv'), 4 | ndjson = require('ndjson'), 5 | jsonStream = require('JSONStream'), 6 | table = require('html-table-stream'); 7 | // datex = require('data-expression'); 8 | 9 | var formats = { 10 | names: [], 11 | showHelp: function(format, log, $0) { 12 | if (!log) log = console.log.bind(console); 13 | 14 | log('Formats:'); 15 | log(multiline(function(){/* 16 | 17 | The following values may be used for the input and output format 18 | options, --read/-r or --write/-w: 19 | 20 | $0 --read csv --write tsv 21 | $0 -r csv -w tsv 22 | 23 | If you wish to specify format options, you must use the dot notation: 24 | 25 | $0 --read.format csv --read.delim=, data.csv 26 | $0 -r.format json -r.path='results.*' data.json 27 | $0 data.ndjson | tito -w.format html -w.indent=' ' 28 | */}).replace(/\$0/g, $0)); 29 | 30 | if (format === true) { 31 | log(multiline(function(){/* 32 | 33 | Use "--help formats" to see more about formats. 34 | 35 | */})); 36 | } else { 37 | formats.names.forEach(function(name, i) { 38 | if (format !== 'formats' && format !== name) return; 39 | log(''); 40 | formats[name].showHelp(log); 41 | }); 42 | } 43 | } 44 | }; 45 | 46 | // CSV 47 | formats.csv = createFormat('csv', { 48 | description: 'Read and write comma-separated (or otherwise-delimted) text', 49 | headers: { 50 | description: 'Whether to parse headers in the first row of input', 51 | default: true, 52 | hide: true 53 | }, 54 | delimiter: { 55 | description: 'The field delimiter', 56 | alias: ['delim', 'd'], 57 | default: ',' 58 | }, 59 | newline: { 60 | description: 'The row delimiter', 61 | alias: ['line', 'n'], 62 | default: '\n' 63 | }, 64 | quote: { 65 | description: 'The quote character', 66 | alias: ['q'], 67 | default: '"' 68 | } 69 | }, csv.parse, csv.format); 70 | 71 | // TSV 72 | formats.tsv = createFormat('tsv', { 73 | description: 'Read and write tab-separated values', 74 | headers: { 75 | description: '', 76 | default: true 77 | }, 78 | delimiter: { 79 | description: '', 80 | alias: ['delim', 'd'], 81 | default: '\t', 82 | hide: true 83 | }, 84 | newline: { 85 | description: 'The line separator character sequence', 86 | alias: ['line', 'n'], 87 | default: '\n' 88 | } 89 | // TODO: escape character? 90 | }, csv.parse, csv.format); 91 | 92 | // Newline-Delimited JSON 93 | formats.ndjson = createFormat('ndjson', { 94 | description: 'Read and write newline-delimted JSON', 95 | // no options 96 | }, ndjson.parse, ndjson.stringify); 97 | 98 | // Newline-Delimited JSON 99 | formats.json = createFormat('json', { 100 | description: 'Read and write arrays from streaming JSON', 101 | path: { 102 | description: 'The JSONPath selector containing the data', 103 | alias: ['p'], 104 | readonly: true, 105 | default: '.*' 106 | }, 107 | /* 108 | map: { 109 | alias: ['m'], 110 | default: null 111 | }, 112 | */ 113 | open: { 114 | description: 'Output this string before streaming items', 115 | alias: ['o'], 116 | writeonly: true, 117 | default: '[\n ' 118 | }, 119 | separator: { 120 | description: 'Output this string between items', 121 | alias: ['sep', 's'], 122 | writeonly: true, 123 | default: ',\n ' 124 | }, 125 | close: { 126 | description: 'Output this string after writing all items', 127 | alias: ['c'], 128 | writeonly: true, 129 | default: '\n]\n' 130 | } 131 | }, function jsonReader(options) { 132 | return jsonStream.parse(options.path); 133 | }, function jsonWriter(options) { 134 | return jsonStream.stringify( 135 | options.open, 136 | options.separator, 137 | options.close 138 | ); 139 | }); 140 | 141 | formats.html = createFormat('html', { 142 | description: 'Read and write data from HTML tables', 143 | selector: { 144 | description: 'the CSS selector of the table to target', 145 | alias: ['s'], 146 | readonly: true, 147 | default: 'table' 148 | }, 149 | indent: { 150 | description: 'indent HTML with this string', 151 | alias: ['i'], 152 | writeonly: true, 153 | default: '', 154 | } 155 | }, function htmlReader(options) { 156 | return table.parse(options); 157 | }, function htmlWriter(options) { 158 | if (options.format === 'html') { 159 | delete options.format; 160 | } 161 | return table.format(options); 162 | }); 163 | 164 | module.exports = formats; 165 | module.exports.createReadStream = createReadStream; 166 | module.exports.createWriteStream = createWriteStream; 167 | module.exports.resolve = resolveFormat; 168 | 169 | function resolveFormat(name, options) { 170 | if (typeof name === 'object') { 171 | options = name; 172 | name = name.format || name.name; 173 | } 174 | if (!formats.hasOwnProperty(name)) { 175 | throw new Error('No such format: "' + name + '"'); 176 | } 177 | return { 178 | format: formats[name], 179 | options: options || {} 180 | }; 181 | } 182 | 183 | function createReadStream(name, options) { 184 | var resolved = resolveFormat(name, options); 185 | return resolved.format.createReadStream(resolved.options); 186 | } 187 | 188 | function createWriteStream(name, options) { 189 | var resolved = resolveFormat(name, options); 190 | return resolved.format.createWriteStream(resolved.options); 191 | } 192 | 193 | function createFormat(name, opts, reader, writer) { 194 | formats.names.push(name); 195 | 196 | var defaults = {}; 197 | for (var key in opts) { 198 | if (opts[key].default) { 199 | defaults[key] = opts[key].default; 200 | } 201 | } 202 | 203 | function parseOptions(options) { 204 | for (var dest in opts) { 205 | var opt = opts[dest]; 206 | if (!options[dest] && opt.alias) { 207 | opt.alias.forEach(function(key) { 208 | if (options[key]) { 209 | options[dest] = options[key]; 210 | delete options[key]; 211 | } 212 | }); 213 | } 214 | } 215 | return extend({}, defaults, options); 216 | } 217 | 218 | return { 219 | name: name, 220 | options: opts, 221 | defaults: defaults, 222 | parseOptions: parseOptions, 223 | showHelp: function(log) { 224 | if (!log) log = console.log.bind(console); 225 | log('"%s": %s', name, opts.description || ''); 226 | if (Object.keys(opts).length) { 227 | log(' Options:'); 228 | for (var key in opts) { 229 | var opt = opts[key]; 230 | if (key === 'description' || opt.hide) continue; 231 | var alias = (opt.alias || []).map(function(k) { 232 | return ', "' + k + '"'; 233 | }).join(''); 234 | var flag = opt.readonly 235 | ? ' (read-only)' 236 | : opt.writeonly 237 | ? ' (write-only)' 238 | : ''; 239 | log(' - "%s"%s: %s%s', key, alias, opts[key].description || '', flag); 240 | } 241 | } 242 | }, 243 | createReadStream: function(options) { 244 | options = parseOptions(options); 245 | // console.warn('creating %s reader with options:', name, options); 246 | return reader(options); 247 | }, 248 | createWriteStream: function(options) { 249 | options = parseOptions(options); 250 | // console.warn('creating %s writer with options:', name, options); 251 | return writer(options); 252 | } 253 | }; 254 | } 255 | --------------------------------------------------------------------------------