├── .dir-locals.el
├── .gitignore
├── .travis.yml
├── LICENSE.md
├── Procfile
├── README.md
├── app.js
├── bin
└── datapipes
├── doc
├── cli.md
├── dev.md
├── index.md
├── op-csv.md
├── op-cut.md
├── op-delete.md
├── op-grep.md
├── op-head.md
├── op-html.md
├── op-map.md
├── op-none.md
├── op-replace.md
├── op-strip.md
└── op-tail.md
├── lib
├── fixedqueue.js
├── index.js
├── operators.js
├── shorthandlist.js
├── stdout.js
├── tools.js
├── transform.js
└── util.js
├── package-lock.json
├── package.json
├── public
├── css
│ └── style.css
├── favicon.ico
├── img
│ ├── bg.jpg
│ └── example.png
└── js
│ ├── jquery.tablesorter.js
│ └── table.js
├── routes
└── index.js
├── test
├── data
│ └── gla.csv
├── system.js
├── transform.js
├── unit.js
├── unit
│ ├── cut.js
│ ├── none.js
│ └── strip.js
└── util.js
└── views
├── docs.html
├── interactive.html
└── layout.html
/.dir-locals.el:
--------------------------------------------------------------------------------
1 | ;;; Set project level defaults for coding standards - Emacs Edition.
2 | ((nil . ((tab-width . 2)
3 | (js-indent-level . 2))))
4 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .*.swp
2 | .DS_Store
3 |
4 | # Logs
5 | logs
6 | *.log
7 | npm-debug.log*
8 | yarn-debug.log*
9 | yarn-error.log*
10 |
11 | # Runtime data
12 | pids
13 | *.pid
14 | *.seed
15 | *.pid.lock
16 |
17 | # Directory for instrumented libs generated by jscoverage/JSCover
18 | lib-cov
19 |
20 | # Coverage directory used by tools like istanbul
21 | coverage
22 |
23 | # nyc test coverage
24 | .nyc_output
25 |
26 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
27 | .grunt
28 |
29 | # Bower dependency directory (https://bower.io/)
30 | bower_components
31 |
32 | # node-waf configuration
33 | .lock-wscript
34 |
35 | # Compiled binary addons (https://nodejs.org/api/addons.html)
36 | build/Release
37 |
38 | # Dependency directories
39 | node_modules/
40 | jspm_packages/
41 |
42 | # TypeScript v1 declaration files
43 | typings/
44 |
45 | # Optional npm cache directory
46 | .npm
47 |
48 | # Optional eslint cache
49 | .eslintcache
50 |
51 | # Optional REPL history
52 | .node_repl_history
53 |
54 | # Output of 'npm pack'
55 | *.tgz
56 |
57 | # Yarn Integrity file
58 | .yarn-integrity
59 |
60 | # dotenv environment variables file
61 | .env
62 |
63 | # next.js build output
64 | .next
65 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: node_js
2 | node_js:
3 | - "9"
4 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 | ---------------------
3 |
4 | Copyright (c) 2013 Open Knowledge Foundation
5 |
6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
7 |
8 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
9 |
10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--------------------------------------------------------------------------------
/Procfile:
--------------------------------------------------------------------------------
1 | web: node app.js
2 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## :warning: Deprecation notice
2 |
3 | **The datapipes website is now archived and read-only.** The `gh-pages` branch hosts the content of the static version of the website, which is now available at [datapipes.datopian.com](https://datapipes.datopian.com/).
4 |
5 | ## datapipes
6 |
7 | A node library, command line tool and webapp to provide "pipe-able" Unix-Style
8 | data transformations on row-based data like CSVs.
9 |
10 | DataPipes offers unix-style `cut`, `grep`, `sed` operations on row-based data
11 | like CSVs in a streaming, connectable "pipe-like" manner.
12 |
13 | DataPipes can be used:
14 |
15 | * Online at
16 | * Via a command line interface - see below
17 | * As a Node JS library - see below
18 |
19 | [](https://travis-ci.org/okfn/datapipes)
21 |
22 | ## Install
23 |
24 | ```
25 | npm install -g datapipes
26 | ```
27 |
28 | ## Usage - Command line
29 |
30 | Once installed, `datapipes` will be available on the command line:
31 |
32 | datapipes -h
33 |
34 | See the help for usage instructions, but to give a quick taster:
35 |
36 | # head (first 10 rows) of this file
37 | datapipes https://raw.githubusercontent.com/datasets/browser-stats/c2709fe7/data.csv head
38 |
39 | # search for occurrences of London (ignore case) and show first 10 results
40 | datapipes https://raw.githubusercontent.com/rgrp/dataset-gla/75b56891/data/all.csv "grep -i london" head
41 |
42 | ## Usage - Library
43 |
44 | See the [Developer
45 | Docs](https://github.com/okfn/datapipes/blob/master/doc/dev.md).
46 |
47 | ----
48 |
49 | ## Developers
50 |
51 | ### Installation
52 |
53 | This is a Node Express application. To install and run do the following.
54 |
55 | 1. Clone this repo
56 | 2. Change into the repository base directory
57 | 3. Run:
58 |
59 | ```bash
60 | $ npm install
61 | ```
62 |
63 | ### Testing
64 |
65 | Once installed, you can run the tests locally with:
66 |
67 | ```bash
68 | $ npm test
69 | ```
70 |
71 | ### Running
72 |
73 | To start the app locally, it’s:
74 |
75 | ```bash
76 | $ node app.js
77 | ```
78 |
79 | You can then access it from
80 |
81 | ### Deployment
82 |
83 | For deployment we use Heroku.
84 |
85 | The primary app is called `datapipes` on Heroku. To add it as a git remote, do:
86 |
87 | ```bash
88 | $ heroku git:remote -a datapipes
89 | ```
90 |
91 | Then to deploy:
92 |
93 | ```bash
94 | $ git push datapipes
95 | ```
96 |
97 | ## Inspirations and Related
98 |
99 | * https://github.com/substack/dnode dnode is an asynchronous rpc system for
100 | node.js that lets you call remote functions. You can pass callbacks to remote
101 | functions, and the remote end can call the functions you passed in with
102 | callbacks of its own and so on. It's callbacks all the way down!
103 |
104 | ## Copyright and License
105 |
106 | Copyright 2013-2014 Open Knowledge Foundation and Contributors.
107 |
108 | Licensed under the MIT license:
109 |
110 | Permission is hereby granted, free of charge, to any person obtaining a copy
111 | of this software and associated documentation files (the "Software"), to deal
112 | in the Software without restriction, including without limitation the rights
113 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
114 | copies of the Software, and to permit persons to whom the Software is
115 | furnished to do so, subject to the following conditions:
116 |
117 | The above copyright notice and this permission notice shall be included in
118 | all copies or substantial portions of the Software.
119 |
120 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
121 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
122 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
123 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
124 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
125 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
126 | THE SOFTWARE.
127 |
128 |
129 |
--------------------------------------------------------------------------------
/app.js:
--------------------------------------------------------------------------------
1 | var fs = require('fs'),
2 | express = require('express'),
3 | path = require('path'),
4 | nunjucks = require('nunjucks'),
5 | request = require('request'),
6 | marked = require('marked'),
7 | _ = require('underscore');
8 |
9 | var util = require('./lib/util'),
10 | TransformOMatic = require('./lib/transform'),
11 | routes = require('./routes/index');
12 |
13 | //CORS middleware
14 | var CORSSupport = function(req, res, next) {
15 | res.header('Access-Control-Allow-Origin', '*');
16 | res.header('Access-Control-Allow-Methods', 'GET,PUT,POST,DELETE');
17 | res.header('Access-Control-Allow-Headers', 'Content-Type');
18 | // intercept OPTIONS method
19 | if ('OPTIONS' == req.method) {
20 | res.send(200);
21 | }
22 | else {
23 | next();
24 | }
25 | };
26 |
27 | var chromeSpaceReplace = function(req, res, next) {
28 | var re = /(?:Windows|Macintosh).*?Chrome/;
29 | var agent = req.headers['user-agent'] || '';
30 | if (re.test(agent)) {
31 | var parts = req.url.split('?');
32 | var datapipe = parts.shift();
33 | if (datapipe.indexOf('%20') !== -1) {
34 | // replace %20s with nbsps
35 | datapipe = datapipe.replace(/%20/g, ' ');
36 | parts.unshift(datapipe);
37 | res.redirect(parts.join('?'));
38 | return;
39 | }
40 | }
41 |
42 | next();
43 | };
44 |
45 | function errorHandler(err, req, res, next) {
46 | res.status(500);
47 | res.render('error', { error: err });
48 | }
49 |
50 | function getMarkdownContent(filepath, cb) {
51 | fs.readFile(filepath, 'utf8', function(err, text) {
52 | if (err) {
53 | cb(err, null);
54 | } else {
55 | cb(null, marked(text, {gfm: false}));
56 | }
57 | });
58 | }
59 |
60 | function datapipe(path, query, res) {
61 | var pipelineSpec = util.parseUrl(path, query);
62 | var transformers = TransformOMatic.pipeline(pipelineSpec, res);
63 |
64 | if (_.last(transformers).contentType) {
65 | res.setHeader("Content-Type", _.last(transformers).contentType());
66 | } else {
67 | // default to plain text
68 | res.setHeader("Content-Type", "text/plain; charset=utf-8");
69 | }
70 |
71 | var url = query.url;
72 | var stream = request(url)
73 | .on('error', function(err) {
74 | var errStr = 'Error with upstream URL: ' + url;
75 | console.log(errStr);
76 | res.send(500, errStr);
77 | });
78 | TransformOMatic.transform(res, transformers, stream);
79 | }
80 |
81 | var app = express();
82 | app.set('port', process.env.PORT || 5000);
83 | app.set('views', __dirname + '/templates');
84 | app.use(chromeSpaceReplace);
85 | app.use(CORSSupport);
86 | app.use(errorHandler);
87 | app.use(express.static(path.join(__dirname, 'public')));
88 |
89 | var env = new nunjucks.Environment(new nunjucks.FileSystemLoader('views'));
90 | env.express(app);
91 |
92 | app.get(/\/interactive(\/.*)?/, routes.wizard);
93 | app.get(/\/wizard(\/.*)?/, routes.wizard);
94 |
95 | app.get('/*', function(req, res) {
96 | var mdFilename;
97 | var path = req.params[0];
98 |
99 | if (!req.query.url) {
100 | // if there's no url parameter,
101 | // attempt to serve a doc page
102 | var page = path.split('/')[0];
103 | if (page === '') {
104 | mdFilename = 'doc/index.md';
105 | } else {
106 | mdFilename = 'doc/op-' + page + '.md';
107 | }
108 | getMarkdownContent(mdFilename, function(err, content) {
109 | if (err) {
110 | console.log(err);
111 | res.send(404, 'Page not found: ' + req.params[0]);
112 | } else {
113 | res.render('docs.html', {
114 | content: content
115 | });
116 | }
117 | });
118 | } else {
119 | datapipe(path, req.query, res);
120 | }
121 | });
122 |
123 | var server = app.listen(app.get('port'), function() {
124 | console.log("Listening on " + app.get('port'));
125 | });
126 |
127 | module.exports = server;
128 |
--------------------------------------------------------------------------------
/bin/datapipes:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env node
2 | var TransformOMatic = require('../lib/transform'),
3 | stdout = require('../lib/stdout'),
4 | stdin = process.stdin,
5 | fs = require('fs'),
6 | request = require('request'),
7 | path = require('path'),
8 | util = require('../lib/util');
9 |
10 | var docdir = path.join(path.dirname(__dirname), 'doc'),
11 | filepath = path.join(docdir, 'cli.md'),
12 | usage = fs.readFileSync(filepath, 'utf8'),
13 | opInfo = '';
14 |
15 | var opList = fs.readdirSync(docdir).filter(function(fn) {
16 | return fn.indexOf('op-') === 0;
17 | }).map(function(fn) {
18 | var opName = fn.replace('op-', '').replace('.md', '');
19 | return opName;
20 | });
21 |
22 | opInfo = opList.join('\n');
23 |
24 | opInfo += '\n You can get more info on each command by doing:\n\n';
25 | opInfo += ' $0 OPERATION -h\n\n';
26 |
27 | usage = usage.replace('{{Operations}}', opInfo);
28 |
29 | var yargs = require('optimist')
30 | .options({
31 | s: {alias: 'share', boolean: true, describe: 'Generate a URL to share this.'},
32 | h: {alias: 'help', boolean: false, describe: 'Show help (generic or specific operation)'}
33 | })
34 | .usage(usage)
35 | .demand(1, 'Please provide a data source and at least one transform')
36 | ;
37 | var argv = yargs.argv;
38 |
39 | if (argv.h) {
40 | if (argv._.length > 0) {
41 | var opName = argv._[0];
42 | if (opList.indexOf(opName) === -1) {
43 | console.log(opName + ' operation is unknown');
44 | } else {
45 | var fp = path.join(docdir, 'op-' + opName + '.md'),
46 | info = fs.readFileSync(fp, 'utf8');
47 |
48 | info = 'Operation: ' + opName + '\n' + info.split('\n').slice(1).join('\n');
49 | console.log(info);
50 | }
51 | } else {
52 | console.log(yargs.help());
53 | }
54 | // exit
55 | return;
56 | }
57 |
58 | var stream,
59 | pathOrUrl = argv._[0];
60 |
61 | if (pathOrUrl.indexOf('http') === 0) {
62 | stream = request(pathOrUrl);
63 | } else if (pathOrUrl === '_') {
64 | stream = stdin;
65 | } else {
66 | stream = fs.createReadStream(pathOrUrl);
67 | }
68 |
69 | // cli mode
70 | var transformStr = argv._.slice(1)
71 | .map(function(item) {
72 | return item.trim(' ');
73 | })
74 | .join('/');
75 |
76 | if (argv.share) {
77 | var transformUrl = 'https://datapipes.okfnlabs.org/';
78 | transformUrl += encodeURI(transformStr);
79 | transformUrl += '?url=';
80 | transformUrl += argv.url;
81 | var stars = Array(transformUrl.length+1).join('*');
82 |
83 | console.log('URL to share:');
84 | console.log(stars);
85 | console.log(transformUrl);
86 | console.log(stars);
87 |
88 | return;
89 | }
90 |
91 |
92 | var pipelineSpec = util.parseUrl(transformStr);
93 | var transformers = TransformOMatic.pipeline(pipelineSpec);
94 | TransformOMatic.transform(stdout(), transformers, stream);
95 |
--------------------------------------------------------------------------------
/doc/cli.md:
--------------------------------------------------------------------------------
1 | Perform streaming data transformations on local and online csv files.
2 |
3 | Usage: $0 [-s] DATA [PIPELINE OF OPERATIONS ...]
4 |
5 | DATA the file path or URL to the data you want to pass through the
6 | data pipeline. If you want to use stdin use '_' (underscore)
7 |
8 | PIPELINE is the series of operations which will be applied to the input data.
9 | The PIPELINE can be specified in 2 ways on the command line.
10 |
11 | A. in the form like that used online: a single string with
12 | operations separated by '/' e.g.
13 |
14 | "/delete 1/grep abc/head"
15 |
16 | B. as individual operations separated by spaces (ie. classic
17 | positional arguments)
18 |
19 | "delete 1" "grep abc" head
20 |
21 | Available operations are listed below.
22 |
23 | Operations
24 | ==========
25 |
26 | {{Operations}}
27 |
28 | Examples
29 | ========
30 |
31 | $0 data.csv head
32 | $0 data.csv "delete 2" head
33 | $0 data.csv "/delete 2/head/"
34 |
35 |
--------------------------------------------------------------------------------
/doc/dev.md:
--------------------------------------------------------------------------------
1 | # Using as a Library
2 |
3 | ```
4 | var dp = require('datapipes');
5 |
6 | // load data from inUrl, write to outFile after applying the sequence of transformations
7 | dp.transform(inUrl, outFile, [
8 | {
9 | operator: 'head',
10 | options: {
11 | number: 10 // number of rows
12 | }
13 | },
14 | {
15 | operator: 'grep',
16 | options: {
17 | regex: 'london',
18 | ignorecase: true
19 | }
20 | },
21 | {
22 | operator: 'delete',
23 | options: {
24 | range: '3,5:10'
25 | }
26 | }
27 | ]);
28 | ```
29 |
30 | ## do it by hand
31 |
32 | ```
33 | // create a head operator
34 | // this is a stream transform - see node docs
35 | var headOp = dp.operators.head(args, options);
36 |
37 | // create a CSV
38 | var csv = csv().from(...)
39 |
40 | var outFile = fs.createWriteStream('tmp.txt');
41 |
42 | csv.pipe(headOp).pipe(outFile);
43 | ```
44 |
45 | ## Make Your Own Operators
46 |
47 | We have a helpful `mapToTranform`
48 |
49 | Suppose you have a map function
50 |
51 | ```
52 | function helloMap(obj, idx) {
53 | row[0] = 'hello'
54 | return row;
55 | }
56 |
57 | operators['hello'] = mapToTransform(helloMap);
58 | ```
59 |
60 | ## How It Works
61 |
62 | Operators are Transform streams - i.e. a readable and writable stream
63 | i.e. a readable stream pipes data into it, and a writable stream is piped
64 | from it. The data piped in is a series of JSON objects of the form:
65 |
66 | {
67 | row: ['row', 'data', 'goes', 'here'],
68 | index: 7
69 | }
70 |
71 | The data pushed out should be of the same form.
72 |
73 | Questions:
74 |
75 | * How do I skip a row (just don't push it on ...)
76 | * How do I tell an upstream part of the pipeline to halt
77 |
78 | Most of the operators here inherit the node Transform class. As such, they
79 | define a _transform() method, and optionally a _flush() method. More
80 | details here:
81 |
82 | http://nodejs.org/api/stream.html#stream_class_stream_transform_1
83 |
84 |
--------------------------------------------------------------------------------
/doc/index.md:
--------------------------------------------------------------------------------
1 | # Shareable Simple Data Transformations
2 |
3 | Data Pipes is an online service for doing **simple data transformations** on tabular
4 | data – deleting rows and columns, find and replace, filtering, viewing as HTML.
5 |
6 | Even better you can **connect these transformations** together Unix pipes style
7 | to make more complex transformations (for example, first delete a column, then
8 | do a find and replace).
9 |
10 | You can do all of this in your browser **without having to install anything**
11 | and to **share your data and pipeline** all you need to do is copy and paste a
12 | URL.
13 |
14 | ### Quick start
15 |
16 | * [View a CSV][html] – turn a CSV into a nice online HTML table in seconds
17 | * [Pipeline Wizard][wizard] – create your own Data Pipeline interactively
18 | * [Find out more](#doc) – including full docs of the API
19 |
20 | [wizard]: /wizard/
21 |
22 |
23 |
24 | ## Example
25 |
26 | To illustrate here's an example which shows the power of DataPipes. It shows
27 | DataPipes being used to clean up and display a raw spending data CSV file from
28 | the Greater London Authority.
29 |
30 | [https://datapipes.okfnlabs.org/csv/head -n 50/cut 0/delete 1:7/grep -i London/html?url=https://raw.githubusercontent.com/okfn/datapipes/master/test/data/gla.csv][ex]
31 |
32 | [ex]: /csv/head%20-n%2050/cut%200/delete%201:7/grep%20-i%20London/html?url=https://raw.githubusercontent.com/okfn/datapipes/master/test/data/gla.csv
33 |
34 | This does the following:
35 |
36 | * parses the incoming url as CSV
37 | * slices out the first 50 rows (using [head][])
38 | * deletes the first column (using [cut][])
39 | * deletes rows 1-5 (using [delete][])
40 | * then selects those rows with London (case-insensitive) in them (using [grep][])
41 | * finally transforms the output to an HTML table (using [html][])
42 |
43 | Here's what the output looks like:
44 |
45 | [][ex]
46 |
47 |
API
48 |
49 | The basic API is of the form:
50 |
51 | /csv/{transform} {args}/?url={source-url}
52 |
53 | For example, here is a head operation which shows first n rows or a file (default case with no arguments will show first 10 lines):
54 |
55 | /csv/head/?url={source-url}
56 |
57 | With arguments (showing first 20 rows):
58 |
59 | /csv/head -n 20/?url={source-url}
60 |
61 | ### Piping
62 |
63 | You can also do **piping**, that is pass output of one transformation as input to another:
64 |
65 | /csv/{trans1} {args}/{trans2} {args}/.../?url={source-url}
66 |
67 | ### Input Formats
68 |
69 | At present we only support CSV but we are considering support for JSON, plain text and RSS.
70 |
71 | *If you are interested in [JSON support then vote here][json-issue])*
72 |
73 | [json-issue]: https://github.com/okfn/datapipes/issues/16
74 |
75 | ### Query string substitution
76 |
77 | Some characters can’t be used in a URL path because of [restrictions][ietf]. If this is a limitation (for instance if you need to use backslashes in your `grep` regex) variables can be defined in the query string and substituted in. E.g.:
78 |
79 | /csv/grep $dt/html/?dt=\d{2}-\d{2}-\d{4}&url={source-url}
80 |
81 | [ietf]: https://tools.ietf.org/html/rfc3986
82 |
83 | ### CORS and JS web apps
84 |
85 | CORS is supported so you can use this from pure JS web apps.
86 |
87 | ## Transform Operations
88 |
89 | The basic operations are inspired by unix-style commands such `head`, `cut`, `grep`, `sed` but really anything a map function can do could be supported. ([Suggest new operations here][suggest]).
90 |
91 | [suggest]: https://github.com/okfn/datapipes/issues
92 |
93 | * [none][] (aka `raw`) = no transform but file parsed (useful with CORS)
94 | * [csv][] = parse / render csv
95 | * [head][] = take only first X rows
96 | * [tail][] = take only last X rows
97 | * [delete][] = delete rows
98 | * [strip][] = delete all blank rows
99 | * [grep][] = filter rows based on pattern matching
100 | * [cut][] = select / delete columns
101 | * [replace][] = find and replace (not yet implemented)
102 | * [html][] = render as viewable HTML table
103 |
104 | [none]: /none/
105 | [csv]: /csv/
106 | [head]: /head/
107 | [tail]: /tail/
108 | [delete]: /delete/
109 | [strip]: /strip/
110 | [grep]: /grep/
111 | [cut]: /cut/
112 | [replace]: /replace/
113 | [html]: /html/
114 |
115 |
Contributing
116 |
117 | Under the hood Data Pipes is a simple open-source node.js webapp living [here on github][source].
118 |
119 | It's super easy to contribute and here are some of the [current issues][issues].
120 |
121 | [source]: https://github.com/okfn/datapipes
122 | [issues]: https://github.com/okfn/datapipes/issues
123 |
124 |
--------------------------------------------------------------------------------
/doc/op-csv.md:
--------------------------------------------------------------------------------
1 | ## csv
2 |
3 | Parse or render csv
4 |
5 | ### Usage
6 |
7 | csv [-tHS] [-d DELIMITER] [-p ESCAPECHAR] [-q QUOTECHAR]
8 |
9 | -t, --tabs
10 | Indicate the data being parsed or rendered is
11 | tab-delimited.
12 |
13 | -H, --no-header-row
14 | On parse, indicate that the data does not contain a
15 | header row.
16 | On render, this switch is not valid and has no effect.
17 |
18 | -S, --skipinitialspace
19 | Ignore whitespace immediately following the delimiter.
20 |
21 | -d DELIMITER, --delimiter DELIMITER
22 | Delimiting character for the data. Defaults to comma.
23 |
24 | -p ESCAPECHAR, --escapechar ESCAPECHAR
25 | Character used to escape the quote character. Defaults
26 | to backslash.
27 |
28 | -q QUOTECHAR, --quotechar QUOTECHAR
29 | Character used to quote strings.
30 |
31 | ### Examples
32 |
33 | Turn comma-delimited data (csv) to tab-delimited (tsv).
34 |
35 | /csv/csv -t/?url=http://static.london.gov.uk/gla/expenditure/docs/2012-13-P12-250.csv
36 |
--------------------------------------------------------------------------------
/doc/op-cut.md:
--------------------------------------------------------------------------------
1 | ## cut
2 |
3 | Remove specified columns from data.
4 |
5 | ### Usage
6 |
7 | cut [--complement] [RANGE]
8 |
9 | --complement
10 | Keep the specified columns, and delete the rest.
11 |
12 | RANGE
13 | Comma separated column indices (0 based). Ranges can
14 | also be specified with a hyphen.
15 |
16 | ### Examples
17 |
18 | Delete the columns 0 and 3 of a CSV file:
19 |
20 | /csv/cut 0,3/?url=http://static.london.gov.uk/gla/expenditure/docs/2012-13-P12-250.csv
21 |
22 | Delete the columns 1,3 and 5 to 10 of a CSV file:
23 |
24 | /csv/cut 1,3,5:10/?url=http://static.london.gov.uk/gla/expenditure/docs/2012-13-P12-250.csv
25 |
--------------------------------------------------------------------------------
/doc/op-delete.md:
--------------------------------------------------------------------------------
1 | ## delete
2 |
3 | Remove specified rows from data.
4 |
5 | ### Usage
6 |
7 | delete [RANGE]
8 |
9 | RANGE
10 | Comma separated column indices (0 based). Ranges can
11 | also be specified with a colon.
12 |
13 | range = comma separated range of row indices (0 based) e.g.
14 |
15 | ### Examples
16 |
17 | Delete the rows 1-5 and 10-15 (inclusive) of a CSV file:
18 |
19 | /csv/delete 1:5,10:15/?url=http://static.london.gov.uk/gla/expenditure/docs/2012-13-P12-250.csv
20 |
--------------------------------------------------------------------------------
/doc/op-grep.md:
--------------------------------------------------------------------------------
1 | ## grep
2 |
3 | Filter data to only those rows where certain columns match a pattern.
4 |
5 | ### Usage
6 |
7 | grep [-iv] [-e pattern] [-c columns] [PATTERN]
8 |
9 | -c COLUMNS, --columns COLUMNS
10 | comma-separated list of columns to search.
11 |
12 | -e PATTERN, --regexp PATTERN
13 | The regular expression to search for.
14 |
15 | -i, --ignore-case
16 | Perform case-insensitive pattern matching.
17 |
18 | -v, --invert-match
19 | Return the rows that do __not__ match the regular
20 | expression.
21 |
22 | ### Examples
23 |
24 | Return only those rows containing LONDON:
25 |
26 | /csv/grep LONDON?url=http://static.london.gov.uk/gla/expenditure/docs/2012-13-P12-250.csv
27 |
28 | Return only those rows that do __not__ mention LONDON (piped through html):
29 |
30 | /csv/grep -v LONDON/html/?url=http://static.london.gov.uk/gla/expenditure/docs/2012-13-P12-250.csv
31 |
--------------------------------------------------------------------------------
/doc/op-head.md:
--------------------------------------------------------------------------------
1 | ## head
2 |
3 | Truncate dataset to its first rows.
4 |
5 | ### Usage
6 |
7 | head [[-n] COUNT]
8 |
9 | COUNT
10 | Number of rows to truncate to. If this option is
11 | omitted, it defaults to 10.
12 |
13 | Note we allow you to prefix `COUNT` with `-n` to ensure compatability with standard unix `head`.
14 |
15 | ### Examples
16 |
17 | Return the first 10 rows.
18 |
19 | [/csv/head/?url=https://raw.githubusercontent.com/datasets/bond-yields-uk-10y/9e921283/data/annual.csv](/csv/head/?url=https://raw.githubusercontent.com/datasets/bond-yields-uk-10y/9e921283/data/annual.csv)
20 |
21 | Return the first 20 rows.
22 |
23 | [/csv/head 20/?url=https://raw.githubusercontent.com/datasets/bond-yields-uk-10y/9e921283/data/annual.csv](/csv/head%2020/?url=https://raw.githubusercontent.com/datasets/bond-yields-uk-10y/9e921283/data/annual.csv)
24 |
25 |
--------------------------------------------------------------------------------
/doc/op-html.md:
--------------------------------------------------------------------------------
1 | ## html
2 |
3 | Convert the data to an elegant HTML table (with line numbers!).
4 |
5 |
9 |
10 |
11 | ### Usage
12 |
13 | html
14 |
15 | You can also highlight lines by their line numbers:
16 |
17 | html/?url=...#L10
18 |
19 | ### Examples
20 |
21 | S&P 500 companies:
22 |
23 | [/csv/html/?url=https://raw.githubusercontent.com/datasets/s-and-p-companies-financials/c9f83a9c/data/constituents-financials.csv](/csv/html/?url=https://raw.githubusercontent.com/datasets/s-and-p-companies-financials/c9f83a9c/data/constituents-financials.csv)
24 |
25 | Highlight a line:
26 |
27 | [/csv/html/?url=https://raw.githubusercontent.com/datasets/s-and-p-companies-financials/c9f83a9c/data/constituents-financials.csv#L110](/csv/html/?url=https://raw.githubusercontent.com/datasets/s-and-p-companies-financials/c9f83a9c/data/constituents-financials.csv#L110)
28 |
29 |
--------------------------------------------------------------------------------
/doc/op-map.md:
--------------------------------------------------------------------------------
1 | ## map
2 |
3 | Apply a user-defined operator
4 |
5 | ### Usage
6 |
7 | map [URL]
8 |
9 | URL
10 | Location of the code of the operator.
11 |
12 | ### Operator definition
13 |
14 | Operators consist of two functions:
15 |
16 | * `transform(input)`, which accepts the current line of data, and returns the processed line.
17 | * `flush()`, which is called after all the data has been passed through.
18 |
19 | This [uppercase operator](https://gist.github.com/andylolz/7794290) serves as an example.
20 |
21 | ### Examples
22 |
23 | [/csv/map $map/html?map=https://gist.github.com/andylolz/7794290/raw/8e88a5daac9a6496a8397dad99e14f18ed5ab378/uppercase.js&url=https://raw.githubusercontent.com/okfn/datapipes/master/test/data/gla.csv](/csv/map%20$map/html?map=https://gist.github.com/andylolz/7794290/raw/8e88a5daac9a6496a8397dad99e14f18ed5ab378/uppercase.js&url=https://raw.githubusercontent.com/okfn/datapipes/master/test/data/gla.csv)
24 |
--------------------------------------------------------------------------------
/doc/op-none.md:
--------------------------------------------------------------------------------
1 | ## none (aka raw)
2 |
3 | No transform of the data. Still useful as:
4 |
5 | * CORS support (you can access a random CSV file from JS)
6 | * Simple plain text style view (quick and dirty view of the file)
7 |
8 | ### Usage
9 |
10 | none
11 |
12 | ### Examples
13 |
14 | [/none/?url=https://raw.githubusercontent.com/datasets/bond-yields-uk-10y/9e921283/data/annual.csv](/none/?url=https://raw.githubusercontent.com/datasets/bond-yields-uk-10y/9e921283/data/annual.csv)
15 |
--------------------------------------------------------------------------------
/doc/op-replace.md:
--------------------------------------------------------------------------------
1 | ## replace
2 |
3 | Find and replace.
4 |
5 | ### Usage
6 |
7 | replace [-r] [-c columns] [FIND] [REPLACE]
8 |
9 | -r, --regex
10 | Find argument is a regular expression.
11 |
12 | -c COLUMNS, --columns COLUMNS
13 | comma-separated list of columns to search.
14 |
15 | FIND
16 | Text to search for.
17 |
18 | REPLACE
19 | Text to replace the found text with. Defaults
20 | to the empty string.
21 |
22 | ### Examples
23 |
24 | Turn multiple consecutive spaces into a single space.
25 |
26 | [/csv/head/replace -r $f $r?f=\s\s*&r=%20&url=https://raw.githubusercontent.com/okfn/datapipes/master/test/data/gla.csv](/csv/head/replace%20-r%20$f%20$r?f=\s\s*&r=%20&url=https://raw.githubusercontent.com/okfn/datapipes/master/test/data/gla.csv)
27 |
--------------------------------------------------------------------------------
/doc/op-strip.md:
--------------------------------------------------------------------------------
1 | ## strip
2 |
3 | Discard empty rows.
4 |
5 | ### Usage
6 |
7 | strip
8 |
9 | ### Examples
10 |
11 | [/csv/head/strip?url=https://raw.githubusercontent.com/okfn/datapipes/master/test/data/gla.csv](/csv/head/strip?url=https://raw.githubusercontent.com/okfn/datapipes/master/test/data/gla.csv)
12 |
--------------------------------------------------------------------------------
/doc/op-tail.md:
--------------------------------------------------------------------------------
1 | ## tail
2 |
3 | Truncate dataset to its last rows.
4 |
5 | ### Usage
6 |
7 | tail [-n COUNT]
8 |
9 | -n COUNT
10 | Number of rows to truncate to. If this option is
11 | omitted, it defaults to 10.
12 |
13 | A leading + sign means this number is relative to the
14 | first row. Otherwise it is relative to the last row.
15 |
16 | ### Examples
17 |
18 | Return the last 10 rows.
19 |
20 | [/csv/tail/?url=https://raw.githubusercontent.com/datasets/bond-yields-uk-10y/9e921283/data/annual.csv](/csv/tail/?url=https://raw.githubusercontent.com/datasets/bond-yields-uk-10y/9e921283/data/annual.csv)
21 |
22 | Return all rows after the first 5.
23 |
24 | [/csv/tail -n +5/?url=https://raw.githubusercontent.com/datasets/bond-yields-uk-10y/9e921283/data/annual.csv](/csv/tail -n +5/?url=https://raw.githubusercontent.com/datasets/bond-yields-uk-10y/9e921283/data/annual.csv)
25 |
--------------------------------------------------------------------------------
/lib/fixedqueue.js:
--------------------------------------------------------------------------------
1 | function FixedQueue(maxsize) {
2 | if (!(this instanceof FixedQueue)) return new FixedQueue(size);
3 | this._maxsize = maxsize;
4 | this._queue = [];
5 | }
6 |
7 | FixedQueue.prototype.push = function(item) {
8 | this._queue.push(item);
9 | if (this._queue.length > this._maxsize) {
10 | return this._queue.shift();
11 | }
12 | return undefined;
13 | };
14 |
15 | FixedQueue.prototype.shift = function() {
16 | return this._queue.shift();
17 | };
18 |
19 | FixedQueue.prototype.get = function() {
20 | return this._queue;
21 | };
22 |
23 | module.exports = FixedQueue;
24 |
--------------------------------------------------------------------------------
/lib/index.js:
--------------------------------------------------------------------------------
1 | var ops = require('./operators.js'),
2 | transform = require('./transform');
3 |
4 | module.exports = {
5 | operators: ops,
6 | pipeline: transform.pipeline,
7 | transform: transform.transform
8 | };
9 |
--------------------------------------------------------------------------------
/lib/operators.js:
--------------------------------------------------------------------------------
1 | var _ = require('underscore');
2 | var csv = require('csv');
3 | var inherits = require('util').inherits;
4 | var PassThrough = require('stream').PassThrough;
5 | var request = require('request');
6 | var Sandbox = require('sandbox');
7 | var Transform = require('stream').Transform;
8 | var FixedQueue = require('./fixedqueue');
9 | var optimist = require('./tools').optimist;
10 | var ShorthandList = require('./shorthandlist');
11 |
12 | var headerrow = false;
13 |
14 | // For a given pipeline, each operator is constructed by calling:
15 | //
16 | // ```new operators[operator name](args)```
17 | //
18 | // ...where operator name is the name of the operator, and args is a string
19 | // array of the input arguments to the operator.
20 | //
21 | // The constructed operator is then treated as a readable and writable stream
22 | // i.e. a readable stream pipes data into it, and a writable stream is piped
23 | // from it. The data piped in is a series of stringified JSON objects of the
24 | // form:
25 | //
26 | // {row: ['row', 'data', 'goes', 'here'], index: 7}
27 | //
28 | // The data pushed out should be of the same form.
29 | //
30 | // Most of the operators here inherit the node Transform class. As such, they
31 | // define a _transform() method, and optionally a _flush() method. More
32 | // details here:
33 | //
34 | // http://nodejs.org/api/stream.html#stream_class_stream_transform_1
35 |
36 | var operators = {
37 | incsv: function(args) {
38 | var argv = optimist()
39 | .options({
40 | d: {alias: 'delimiter', default: ',', string: true},
41 | p: {alias: 'escapechar', default: '\\', string: true},
42 | q: {alias: 'quotechar', default: '\"', string: true},
43 | t: {alias: 'tabs', boolean: true},
44 | H: {alias: 'no-header-row', boolean: true},
45 | S: {alias: 'skipinitialspace', boolean: true},
46 | })
47 | .string(['delimiter', 'escapechar', 'quotechar'])
48 | .parse(args)
49 | ;
50 |
51 | // by default, assume there is a header row
52 | headerrow = true;
53 | if (argv.H || argv['header-row'] === false) {
54 | headerrow = false;
55 | }
56 |
57 | if (argv.t) {
58 | // tabs switch overrides delimiter opt
59 | argv.d = '\t';
60 | }
61 | return csv()
62 | .from.options({
63 | delimiter: argv.d,
64 | escape: argv.p,
65 | quote: argv.q,
66 | ltrim: argv.S,
67 | })
68 | .transform(function(row, index) {
69 | return JSON.stringify({'row': row, 'index': index});
70 | });
71 | },
72 |
73 | // none transformation
74 | none: function(args) {
75 | PassThrough.call(this);
76 | },
77 |
78 | // return our HEAD transformation
79 | head: function(args) {
80 | Transform.call(this, {objectMode: true});
81 |
82 | var argv = optimist()
83 | .options('n', {
84 | default: 10,
85 | })
86 | .parse(args)
87 | ;
88 |
89 | this._number = argv.n;
90 | if (argv._[0]) {
91 | try {
92 | this._number = parseInt(argv._[0]);
93 | } catch(e) {}
94 | }
95 | this._lines = 0;
96 |
97 | this._pushheaderrow = headerrow;
98 | },
99 |
100 | // return our tail transformation
101 | tail: function(args) {
102 | Transform.call(this, {objectMode: true});
103 |
104 | var argv = optimist()
105 | .options('n', {
106 | default: '10',
107 | string: true
108 | })
109 | .parse(args)
110 | ;
111 |
112 | var number = argv.n;
113 |
114 | if (number.charAt(0) == '+') {
115 | // relative to the beginning of the stream
116 | this._rel = 'beginning';
117 | this._lines = 0;
118 | this._number = number;
119 | } else {
120 | // relative to the end of the stream
121 | this._rel = 'end';
122 | if (number.charAt(0) == '-') {
123 | number = -number;
124 | }
125 | this._fixedqueue = new FixedQueue(number);
126 | }
127 |
128 | this._pushheaderrow = headerrow;
129 | },
130 |
131 | // return a transformation that will cut columns.
132 | // Accepts a comma separated list of column positions to remove.
133 | // This is 0-indexed.
134 | cut: function(args) {
135 | Transform.call(this, {objectMode: true});
136 |
137 | var argv = optimist()
138 | .boolean('complement')
139 | .parse(args)
140 | ;
141 |
142 | if (argv._.length < 1) {
143 | throw 'Error: cut requires at least 1 argument.';
144 | }
145 |
146 | var columns = argv._[0].toString();
147 |
148 | this._complement = argv.complement;
149 |
150 | this._cols = ShorthandList(columns);
151 | },
152 |
153 | // transformation that will grep for a pattern
154 | grep: function(args) {
155 | Transform.call(this, {objectMode: true});
156 |
157 | var argv = optimist()
158 | .options({
159 | c: {alias: 'columns', string: true},
160 | i: {alias: 'case-insensitive', boolean: true},
161 | e: {alias: 'regexp', string: true},
162 | v: {alias: 'invert-match', boolean: true},
163 | })
164 | .string(['columns', 'regexp'])
165 | .parse(args)
166 | ;
167 |
168 | if (argv.c !== undefined) {
169 | this._cols = ShorthandList(argv.c);
170 | }
171 |
172 | var flags = (argv.i) ? 'i' : '';
173 | var regex = argv.e || argv._[0];
174 | this._pattern = new RegExp(regex, flags);
175 |
176 | this._invert = argv.v;
177 |
178 | this._pushheaderrow = headerrow;
179 | },
180 |
181 | // return a transformation that will find and replace all
182 | // occurrences of a string
183 | replace: function(args) {
184 | Transform.call(this, {objectMode: true});
185 |
186 | var argv = optimist()
187 | .options({
188 | c: {alias: 'columns', string: true},
189 | r: {alias: 'regexp', boolean: true},
190 | })
191 | .string('columns')
192 | .parse(args)
193 | ;
194 |
195 | if (argv._.length < 1) {
196 | throw 'Error: replace requires at least 1 argument.';
197 | }
198 |
199 | if (argv.c !== undefined) {
200 | this._cols = ShorthandList(argv.c);
201 | }
202 |
203 | this._find = (argv.r) ? new RegExp(argv._[0], 'g') : argv._[0];
204 | this._replace = (argv._.length > 1) ? argv._[1] : '';
205 |
206 | this._pushheaderrow = headerrow;
207 | },
208 |
209 | // return a transformation that will delete empty rows
210 | strip: function(args) {
211 | Transform.call(this, {objectMode: true});
212 |
213 | this._pushheaderrow = headerrow;
214 | },
215 |
216 | // transformation that will delete rows
217 | delete: function(args) {
218 | Transform.call(this, {objectMode: true});
219 |
220 | var argv = optimist()
221 | .parse(args)
222 | ;
223 |
224 | if (argv._.length < 1) {
225 | throw 'Error: delete requires at least 1 argument.';
226 | }
227 |
228 | var shorthand = argv._[0].toString();
229 | this._shorthandlist = ShorthandList(shorthand);
230 | this._index = 0;
231 |
232 | this._pushheaderrow = headerrow;
233 | },
234 |
235 | map: function(args) {
236 | Transform.call(this, {objectMode: true});
237 |
238 | var argv = optimist()
239 | .parse(args)
240 | ;
241 |
242 | if (argv._.length < 1) {
243 | throw 'Error: map requires at least 1 argument.';
244 | }
245 |
246 | this._map_url = argv._[0];
247 | this._sandbox = new Sandbox();
248 |
249 | this._pushheaderrow = headerrow;
250 | },
251 |
252 | outcsv: function(args) {
253 | Transform.call(this, {objectMode: true});
254 |
255 | var argv = optimist()
256 | .options({
257 | d: {alias: 'delimiter', default: ',', string: true},
258 | p: {alias: 'escapechar', default: '\\', string: true},
259 | q: {alias: 'quotechar', default: '\"', string: true},
260 | t: {alias: 'tabs', boolean: true},
261 | S: {alias: 'skipinitialspace', boolean: true},
262 | })
263 | .string(['delimiter', 'escapechar', 'quotechar'])
264 | .parse(args)
265 | ;
266 | if (argv.t) {
267 | // tabs switch overrides delimiter opt
268 | argv.d = '\t';
269 | }
270 | this._options = {
271 | delimiter: argv.d,
272 | escape: argv.p,
273 | quote: argv.q,
274 | };
275 | this._trimInitialSpace = argv.skipinitialspace;
276 | },
277 |
278 | // return our HTML transformation
279 | outhtml: function(args) {
280 | Transform.call(this, {objectMode: true});
281 |
282 | this._initialHtmlSent = false;
283 | },
284 | };
285 |
286 | inherits(operators.none, PassThrough);
287 |
288 | inherits(operators.head, Transform);
289 |
290 | operators.head.prototype._transform = function(chunk, encoding, done) {
291 | if (this._pushheaderrow) {
292 | this.push(chunk);
293 | this._pushheaderrow = false;
294 | return done();
295 | }
296 |
297 | if (this._lines < this._number) {
298 | this._lines += 1;
299 | this.push(chunk);
300 | } else {
301 | this.push(null);
302 | }
303 |
304 | done();
305 | };
306 |
307 | inherits(operators.tail, Transform);
308 |
309 | operators.tail.prototype._transform = function(chunk, encoding, done) {
310 | if (this._pushheaderrow) {
311 | this.push(chunk);
312 | this._pushheaderrow = false;
313 | return done();
314 | }
315 |
316 | if (this._rel == 'end') {
317 | this._fixedqueue.push(chunk);
318 | } else {
319 | if (this._lines < this._number) {
320 | this._lines += 1;
321 | } else {
322 | this.push(chunk);
323 | }
324 | }
325 | done();
326 | };
327 |
328 | operators.tail.prototype._flush = function(done) {
329 | if (this._rel == 'end') {
330 | var chunk = this._fixedqueue.shift();
331 | while (chunk !== undefined) {
332 | this.push(chunk);
333 | chunk = this._fixedqueue.shift();
334 | }
335 | }
336 | done();
337 | };
338 |
339 | inherits(operators.cut, Transform);
340 |
341 | operators.cut.prototype._transform = function(chunk, encoding, done) {
342 | var json = JSON.parse(chunk);
343 |
344 | if (this._expandedCols === undefined) {
345 | this._expandedCols = this._cols.expand(json.row.length);
346 | if (this._complement) {
347 | this._expandedCols = _.difference(_.range(json.row.length), this._expandedCols);
348 | }
349 | }
350 |
351 | _.each(this._expandedCols, function(position) {
352 | delete json.row[position];
353 | });
354 |
355 | json.row = _.without(json.row, undefined);
356 |
357 | this.push(JSON.stringify(json));
358 |
359 | done();
360 | };
361 |
362 | inherits(operators.grep, Transform);
363 |
364 | operators.grep.prototype._transform = function(chunk, encoding, done) {
365 | if (this._pushheaderrow) {
366 | this.push(chunk);
367 | this._pushheaderrow = false;
368 | return done();
369 | }
370 |
371 | var json = JSON.parse(chunk);
372 |
373 | var self = this;
374 |
375 | var expandedCols = (this._cols !== undefined) ? this._cols.expand(json.row.length) : _.range(json.row.length);
376 | var match = _.any(expandedCols, function(idx) {
377 | return self._pattern.test(json.row[idx]);
378 | });
379 |
380 | if (this._invert) match = !match;
381 |
382 | if (match) this.push(chunk);
383 |
384 | done();
385 | };
386 |
387 | inherits(operators.replace, Transform);
388 |
389 | operators.replace.prototype._transform = function(chunk, encoding, done) {
390 | if (this._pushheaderrow) {
391 | this.push(chunk);
392 | this._pushheaderrow = false;
393 | return done();
394 | }
395 |
396 | var json = JSON.parse(chunk);
397 |
398 | var self = this;
399 |
400 |
401 | var expandedCols;
402 | if (this._cols !== undefined) {
403 | expandedCols = this._cols.expand(json.row.length);
404 | } else {
405 | expandedCols = _.range(json.row.length);
406 | }
407 | _.each(expandedCols, function(idx) {
408 | json.row[idx] = json.row[idx].replace(self._find, self._replace);
409 | });
410 |
411 | this.push(JSON.stringify(json));
412 |
413 | done();
414 | };
415 |
416 | inherits(operators.strip, Transform);
417 |
418 | operators.strip.prototype._transform = function(chunk, encoding, done) {
419 | if (this._pushheaderrow) {
420 | this.push(chunk);
421 | this._pushheaderrow = false;
422 | return done();
423 | }
424 |
425 | var json = JSON.parse(chunk);
426 |
427 | var keep = _.some(json.row, function(val) {
428 | return val !== '';
429 | });
430 |
431 | if (keep) this.push(chunk);
432 |
433 | done();
434 | };
435 |
436 | inherits(operators.delete, Transform);
437 |
438 | operators.delete.prototype._transform = function(chunk, encoding, done) {
439 | if (this._pushheaderrow) {
440 | this.push(chunk);
441 | this._pushheaderrow = false;
442 | return done();
443 | }
444 |
445 | if(this._shorthandlist === undefined || !this._shorthandlist.includes(this._index)) {
446 | this.push(chunk);
447 | }
448 |
449 | this._index += 1;
450 | done();
451 | };
452 |
453 | inherits(operators.map, Transform);
454 |
455 | operators.map.prototype._transform = function(chunk, encoding, done) {
456 | if (this._pushheaderrow) {
457 | this.push(chunk);
458 | this._pushheaderrow = false;
459 | return done();
460 | }
461 |
462 | function sandbox_run() {
463 | self._sandbox.run(self._map_fn + "transform('" + chunk + "');", function(output) {
464 | var result = output.result;
465 | if (result == 'null') {
466 | return done();
467 | }
468 |
469 | result = result.slice(1, -1);
470 | try {
471 | JSON.parse(result);
472 | } catch (e) {
473 | self.emit('error', new Error('Error performing map transform.'));
474 | return;
475 | }
476 | self.push(result);
477 | done();
478 | });
479 | }
480 |
481 | var self = this;
482 | if (this._map_fn === undefined) {
483 | request(this._map_url, function(error, response) {
484 | if (error !== null) {
485 | self.emit('error', new Error('Error opening map URL.'));
486 | return;
487 | }
488 | self._map_fn = response.body;
489 | sandbox_run();
490 | });
491 | } else {
492 | sandbox_run();
493 | }
494 | };
495 |
496 | operators.map.prototype._flush = function(done) {
497 | function sandbox_run() {
498 | self._sandbox.run(self._map_fn + "flush();", function(output) {
499 | var result = output.result;
500 | if (result == 'null') {
501 | return done();
502 | }
503 |
504 | result = result.slice(1, -1);
505 | try {
506 | JSON.parse(result);
507 | } catch (e) {
508 | self.emit('error', new Error('Error performing map flush.'));
509 | return;
510 | }
511 | self.push(result);
512 | done();
513 | });
514 | }
515 |
516 | var self = this;
517 | if (this._map_fn === undefined) {
518 | request(this._map_url, function(error, response) {
519 | self._map_fn = response.body;
520 | sandbox_run();
521 | });
522 | } else {
523 | sandbox_run();
524 | }
525 | };
526 |
527 | inherits(operators.outcsv, Transform);
528 |
529 | operators.outcsv.prototype._transform = function(chunk, encoding, done) {
530 | var self = this;
531 | var row = JSON.parse(chunk).row;
532 |
533 | if (this._trimInitialSpace) {
534 | row = _.each(function(item) {
535 | return item.trimLeft(item);
536 | });
537 | }
538 |
539 | csv()
540 | .from.array([row])
541 | .to.string(function(data){
542 | self.push(data + '\n');
543 | done();
544 | }, self._options)
545 | ;
546 | };
547 |
548 | operators.outcsv.prototype.contentType = function() {
549 | return "text/plain; charset=utf-8";
550 | };
551 |
552 | inherits(operators.outhtml, Transform);
553 |
554 | operators.outhtml.prototype._transform = function(chunk, encoding, done) {
555 | var json = JSON.parse(chunk);
556 | var self = this;
557 |
558 | if (!this._initialHtmlSent) {
559 | this.push('');
560 | this.push('');
561 | this.push('');
562 | this.push('');
563 | this.push('');
564 | this.push('