├── test
    ├── fixtures
    │   ├── sample.csv
    │   └── finance-vix
    │   │   ├── .datahub
    │   │       └── flow.yaml
    │   │   ├── data
    │   │       └── vix-daily.csv
    │   │   ├── datapackage.json
    │   │   └── README.md
    ├── authorization.test.js
    ├── push
    │   └── push.test.js
    └── cli.test.js
├── lib
    └── utils
    │   ├── logo.js
    │   ├── output
    │       ├── info.js
    │       ├── error.js
    │       └── wait.js
    │   ├── tools.js
    │   ├── update.js
    │   └── error.js
├── .gitmodules
├── .gitignore
├── docs
    ├── login.md
    ├── init.md
    ├── push-flow.md
    ├── cat.md
    ├── get.md
    ├── validate.md
    ├── info.md
    ├── help.md
    └── push.md
├── bin
    ├── data-help.js
    ├── data-init.js
    ├── data-login.js
    ├── data-cat.js
    ├── data-push-flow.js
    ├── data-info.js
    ├── data-validate.js
    ├── data.js
    ├── data-get.js
    └── data-push.js
├── test-script.sh
├── .travis.yml
├── package.json
├── README.md
└── DESIGN.md


/test/fixtures/sample.csv:
--------------------------------------------------------------------------------
1 | number,string,boolean
2 | 1,two,true
3 | 3,four,false
4 | 


--------------------------------------------------------------------------------
/lib/utils/logo.js:
--------------------------------------------------------------------------------
1 | module.exports.box = '📦'
2 | module.exports.elephant = '🐘'
3 | module.exports.square = '❒'
4 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "test/fixtures/test-data"]
2 | 	path = test/fixtures/test-data
3 | 	url = https://github.com/frictionlessdata/test-data
4 | 


--------------------------------------------------------------------------------
/lib/utils/output/info.js:
--------------------------------------------------------------------------------
1 | const chalk = require('chalk')
2 | 
3 | // Prints an informational message
4 | module.exports = msg => {
5 |   console.log(`${chalk.gray('>')} ${msg}`)
6 | }
7 | 


--------------------------------------------------------------------------------
/lib/utils/output/error.js:
--------------------------------------------------------------------------------
 1 | const chalk = require('chalk')
 2 | 
 3 | // Prints an error message
 4 | module.exports = msg => {
 5 |   if (msg.message) {
 6 |     msg = msg.message
 7 |   }
 8 |   console.log(`${chalk.red('> Error!')} ${msg}`)
 9 | }
10 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .DS_Store
 2 | 
 3 | lib-cov
 4 | *.seed
 5 | *.log
 6 | *.dat
 7 | *.out
 8 | *.pid
 9 | *.gz
10 | 
11 | pids/
12 | logs/
13 | results/
14 | node_modules/
15 | .idea/
16 | 
17 | npm-debug.log
18 | package-lock.json
19 | 
20 | sandbox/*
21 | packed/*
22 | 


--------------------------------------------------------------------------------
/docs/login.md:
--------------------------------------------------------------------------------
 1 | ```
 2 | ■ data login
 3 | ```
 4 | 
 5 | Login to DataHub system using your Google/GitHub account.
 6 | 
 7 | ## Options:
 8 | 
 9 | ```
10 | -i, --interactive        Displays the authentication URL
11 | -h, --help               Outputs usage information
12 | ```
13 | 


--------------------------------------------------------------------------------
/bin/data-help.js:
--------------------------------------------------------------------------------
1 | const fs = require('fs')
2 | const path = require('path')
3 | 
4 | const {customMarked} = require('../lib/utils/tools.js')
5 | 
6 | const helpMarkdown = fs.readFileSync(path.join(__dirname, '../docs/help.md'), 'utf8')
7 | 
8 | console.log('\n' + customMarked(helpMarkdown))
9 | 


--------------------------------------------------------------------------------
/lib/utils/output/wait.js:
--------------------------------------------------------------------------------
 1 | const ora = require('ora')
 2 | const chalk = require('chalk')
 3 | const {eraseLine} = require('ansi-escapes')
 4 | 
 5 | // Prints a spinner followed by the given text
 6 | module.exports = msg => {
 7 |   const spinner = ora(chalk.gray(msg))
 8 |   spinner.color = 'gray'
 9 |   spinner.start()
10 | 
11 |   return () => {
12 |     spinner.stop()
13 |     process.stdout.write(eraseLine)
14 |   }
15 | }
16 | 


--------------------------------------------------------------------------------
/test-script.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -ev
 3 | echo '>>> Now running shell script...'
 4 | npm i -g git+https://github.com/datahq/data-cli.git
 5 | data --version
 6 | data help
 7 | data info https://datahub.io/core/finance-vix
 8 | 
 9 | echo '>>> Installing data-cli with yarn...'
10 | npm uninstall -g data-cli
11 | 
12 | yarn global add git+https://github.com/datahq/data-cli.git
13 | data --version
14 | data info https://datahub.io/core/finance-vix
15 | 


--------------------------------------------------------------------------------
/docs/init.md:
--------------------------------------------------------------------------------
 1 | ```
 2 | ■ data init
 3 | ```
 4 | 
 5 | Initialize a Data Package in the current working directory.
 6 | It will scan the current working directory and nested directories for the files and generate a `datapackage.json`.
 7 | 
 8 | ## Options:
 9 | 
10 | ```
11 | -h, --help               Output usage information
12 | -i, --interactive        Run init in interactive mode
13 | ```
14 | 
15 | ## Example:
16 | 
17 | ```
18 | # Initialize Data Package in current working directory:
19 | ■ data init
20 | ```
21 | 


--------------------------------------------------------------------------------
/docs/push-flow.md:
--------------------------------------------------------------------------------
 1 | ```
 2 | ■ data push-flow [PATH]
 3 | ```
 4 |   `PATH` (optional) is the path to the data package.
 5 | 
 6 | ## Options:
 7 | 
 8 |   -h, --help               Output usage information
 9 | 
10 | ## Examples:
11 | 
12 | \- Uploads Data Package to DataHub in current working directory
13 | 
14 |   ■ data push-flow
15 | 
16 |   data package should have .datahub/flow.yaml
17 | 
18 | \- Uploads Data Package to DataHub with path:
19 | 
20 |   ■ data push-flow core/finance-vix/
21 | 
22 |   core/finance-vix/ should have datapackage.json and .datahub/flow.yaml
23 | 


--------------------------------------------------------------------------------
/test/fixtures/finance-vix/.datahub/flow.yaml:
--------------------------------------------------------------------------------
 1 | meta:
 2 |   dataset: finance-vix
 3 |   findability: published
 4 |   owner: test
 5 |   ownerid: testid
 6 |   version: 1
 7 | inputs:
 8 | - kind: datapackage
 9 |   parameters:
10 |     resource-mapping:
11 |       vix-daily: http:/testing.com/vixcurrent.csv
12 |   url: http:/testing.com/.datahub/datapackage.json
13 | processing:
14 |   -
15 |     input: vix-daily
16 |     tabulator:
17 |       skip_rows: 2
18 |       headers:
19 |         - Date
20 |         - VIXOpen
21 |         - VIXHigh
22 |         - VIXLow
23 |         - VIXClose
24 |     output: vix-daily
25 | schedule: 'every 1d'
26 | 


--------------------------------------------------------------------------------
/docs/cat.md:
--------------------------------------------------------------------------------
 1 | ```
 2 | ■ data cat [OPTIONS] PATH-OR-URL [OUT-PATH]
 3 | ```
 4 | Read a data file and write its output to stdout or `OUT-PATH` if `OUT-PATH` provided.
 5 | 
 6 | Input data formats supported:
 7 | 
 8 | * csv
 9 | * excel
10 | 
11 | Output data files supported:
12 | 
13 | * ascii table (default - if no format specified)
14 | * csv
15 | * excel (.xlsx)
16 | * markdown (.md)
17 | 
18 | ## Options
19 | 
20 | --format      explicitly provide input file format, e.g., if it does not have conventional name
21 | 
22 | ## Examples
23 | 
24 | Reading from stdin:
25 | 
26 | ```
27 | ■ cat PATH | data cat _ [OUT-PATH]
28 | 
29 | ■ curl URL | data cat _ [OUT-PATH]
30 | ```
31 | 


--------------------------------------------------------------------------------
/docs/get.md:
--------------------------------------------------------------------------------
 1 | ```
 2 | ■ data get <url>
 3 | ```
 4 | 
 5 | Get a dataset from the given URL.
 6 | 
 7 | URL can be one of:
 8 | 
 9 | * dataset in DataHub (e.g., https://datahub.io/core/co2-ppm)
10 | * dataset in GitHub (e.g., https://github.com/datasets/co2-ppm)
11 | * direct URL to dataset
12 | 
13 | ## Options:
14 | 
15 | ```
16 | -h, --help               Outputs usage information
17 | ```
18 | 
19 | ## Example:
20 | 
21 | ```
22 | # Get dataset from DataHub
23 | # Following dataset will be saved in core/co2-ppm
24 | ■ data get https://datahub.io/core/co2-ppm
25 | 
26 | # From GitHub
27 | # Following dataset will be saved in datasets/co2-ppm
28 | ■ data get https://github.com/datasets/co2-ppm
29 | ```
30 | 


--------------------------------------------------------------------------------
/docs/validate.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Validate a descriptor
 3 | 
 4 | ## Usage:
 5 | 
 6 | ```
 7 | # Validates datapackage.json in given path/URL or in cwd if not given:
 8 | ■ data validate [path | URL]
 9 | 
10 | # If a descriptor is invalid, it will print out validation errors.
11 | ```
12 | 
13 | ## Options:
14 | 
15 | ```
16 | -h, --help               Output usage information
17 | ```
18 | 
19 | ## Example:
20 | 
21 | ```
22 | # Validate descriptor in current working directory:
23 | ■ data validate
24 | 
25 | # Validate descriptor from local path:
26 | ■ data validate test/fixtures/datapackage.json
27 | 
28 | # Validate descriptor from URL:
29 | ■ data validate https://bits-staging.datapackaged.com/metadata/core/gdp/_v/latest/datapackage.json
30 | ```
31 | 


--------------------------------------------------------------------------------
/test/fixtures/finance-vix/data/vix-daily.csv:
--------------------------------------------------------------------------------
 1 | Date,VIXOpen,VIXHigh,VIXLow,VIXClose
 2 | 2004-01-02,17.96,18.68,17.54,18.22
 3 | 2004-01-05,18.45,18.49,17.44,17.49
 4 | 2004-01-06,17.66,17.67,16.19,16.73
 5 | 2004-01-07,16.72,16.75,15.05,15.05
 6 | 2004-01-08,15.42,15.68,15.32,15.61
 7 | 2004-01-09,16.15,16.88,15.57,16.75
 8 | 2004-01-12,17.32,17.46,16.79,16.82
 9 | 2004-01-13,16.06,18.33,16.53,18.04
10 | 2004-01-14,17.29,17.03,16.04,16.75
11 | 2004-01-15,17.07,17.31,15.49,15.56
12 | 2004-01-16,15.04,15.44,14.09,15
13 | 2004-01-20,15.77,16.13,15.09,15.21
14 | 2004-01-21,15.63,15.63,14.24,14.34
15 | 2004-01-22,14.02,14.87,14.01,14.71
16 | 2004-01-23,14.73,15.05,14.56,14.84
17 | 2004-01-26,15.78,15.78,14.52,14.55
18 | 2004-01-27,15.28,15.44,14.74,15.35
19 | 2004-01-28,15.37,17.06,15.29,16.78
20 | 2004-01-29,16.88,17.66,16.79,17.14
21 | 


--------------------------------------------------------------------------------
/docs/info.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Preview a Dataset
 3 | 
 4 | ## Usage:
 5 | 
 6 | ```
 7 | # Get information about Dataset:
 8 | ■ data info [path]
 9 | ```
10 | 
11 | ## Options:
12 | 
13 | ```
14 | -h, --help               Output usage information
15 | --format                 Explicitly provide input file format, e.g., if it does not have conventional name
16 | ```
17 | 
18 | ## Example:
19 | 
20 | ```
21 | # Get information about Dataset in current working directory:
22 | ■ data info
23 | 
24 | # Get information about Dataset providing local path:
25 | ■ data info dir/finance-vix
26 | 
27 | # Or you can get info about remote dataset:
28 | ■ data info https://raw.githubusercontent.com/datasets/gdp/master/datapackage.json
29 | 
30 | # Additionally, you can preview local or remote tabular data file:
31 | ■ data info https://raw.githubusercontent.com/datahq/core-datasets-tools/master/examples.csv
32 | 


--------------------------------------------------------------------------------
/lib/utils/tools.js:
--------------------------------------------------------------------------------
 1 | // Markdown
 2 | const marked = require('marked')
 3 | const TerminalRenderer = require('marked-terminal')
 4 | // Global packages
 5 | const globalPackages = require('global-packages')
 6 | 
 7 | const {elephant} = require('./logo')
 8 | 
 9 | marked.setOptions({
10 |   renderer: new TerminalRenderer()
11 | })
12 | module.exports.customMarked = marked
13 | 
14 | const installedWithNPM = async () => {
15 |   let packages
16 | 
17 |   try {
18 |     packages = await globalPackages()
19 |   } catch (err) {
20 |     console.log(err)
21 |     return false
22 |   }
23 | 
24 |   if (!Array.isArray(packages)) {
25 |     return false
26 |   }
27 | 
28 |   const related = packages.find(item => item.name === 'now')
29 | 
30 |   if (!related || related.linked === true) {
31 |     return false
32 |   }
33 | 
34 |   if (related.linked === false) {
35 |     return true
36 |   }
37 | 
38 |   return false
39 | }
40 | module.exports.installedWithNPM = installedWithNPM
41 | 


--------------------------------------------------------------------------------
/docs/help.md:
--------------------------------------------------------------------------------
 1 | ```
 2 | ❒ data [options] <command> <args>
 3 | ```
 4 | ## Commands:
 5 | 
 6 |   DataHub:
 7 | 
 8 |     push        [path]        Push data at `path` to the DataHub
 9 | 
10 |   General:
11 | 
12 |     get         [url]         Retrieve data at `url` to local disk
13 |     info        [path/url]    Get info on data (file or dataset) at path or url
14 |     cat         path [out]    Read data at path and write to out (or stdout)
15 | 
16 |   Data Package specific:
17 | 
18 |     init                      Create a Data Package
19 |     validate                  Validate Data Package structure
20 | 
21 |   Administrative:
22 | 
23 |     help        [cmd]         Show help on cmd
24 |     login                     Login or signup to the DataHub
25 | 
26 | ## Options:
27 | 
28 | -h, --help              Output usage information
29 | -v, --version           Output the version
30 | 
31 | ## Examples
32 | 
33 | Push a Data Package (in the current directory)
34 | 
35 |     ■ data push
36 | 
37 | Get a Data Package from the DataHub owned by `core` and with name `finance-vix`
38 | 
39 |     ■ data get https://datahub.io/core/finance-vix
40 | 
41 | Get a Data Package on github
42 | 
43 |     ■ data get https://github.com/datasets/gdp
44 | 
45 | 


--------------------------------------------------------------------------------
/test/fixtures/finance-vix/datapackage.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "finance-vix",
 3 |   "title": "VIX - CBOE Volatility Index",
 4 |   "homepage": "http://www.cboe.com/micro/VIX/",
 5 |   "version": "0.1.0",
 6 |   "license": "PDDL-1.0",
 7 |   "sources": [{
 8 |     "title": "CBOE VIX Page",
 9 |     "web": "http://www.cboe.com/micro/vix/historical.aspx"
10 |   }],
11 |   "resources": [
12 |     {
13 |       "name": "vix-daily",
14 |       "path": "data/vix-daily.csv",
15 |       "format": "csv",
16 |       "mediatype": "text/csv",
17 |       "schema": {
18 |         "fields": [
19 |           {
20 |             "name": "Date",
21 |             "type": "date",
22 |             "description": ""
23 |           },
24 |           {
25 |             "name": "VIXOpen",
26 |             "type": "number",
27 |             "description": ""
28 |           },
29 |           {
30 |             "name": "VIXHigh",
31 |             "type": "number",
32 |             "description": ""
33 |           },
34 |           {
35 |             "name": "VIXLow",
36 |             "type": "number",
37 |             "description": ""
38 |           },
39 |           {
40 |             "name": "VIXClose",
41 |             "type": "number",
42 |             "description": ""
43 |           }
44 |         ],
45 |         "primaryKey": "Date"
46 |       }
47 |     }
48 |   ],
49 |   "views": [
50 |     {
51 |       "id": "Graph",
52 |       "type": "Graph",
53 |       "state": {
54 |         "graphType": "lines",
55 |         "group": "Date",
56 |         "series": [ "VIXClose" ]
57 |       }
58 |     }
59 |   ]
60 | }
61 | 


--------------------------------------------------------------------------------
/lib/utils/update.js:
--------------------------------------------------------------------------------
 1 | const pkg = require('../../package.json')
 2 | const updateNotifier = require('update-notifier')
 3 | const boxen = require('boxen')
 4 | 
 5 | module.exports = () => {
 6 |   const notifier = updateNotifier({
 7 |     pkg,
 8 |     updateCheckInterval: 1000
 9 |   })
10 | 
11 |   if (!notifier.update) {
12 |     return
13 |   }
14 | 
15 |   // Depending on running OS show appropriate instructions:
16 |   const introduction = 'If you\'ve installed data tool using our executable binary then follow instructions below:\n'
17 |   const instructions = {
18 |     'darwin': `\ncurl -L https://github.com/datahq/data-cli/releases/download/v${notifier.update.latest}/data-macos.gz -o ./data.gz
19 | gunzip -f data.gz && chmod +x data && sudo mv data /usr/local/bin/data`,
20 |     'linux': `\nwget https://github.com/datahq/data-cli/releases/download/v${notifier.update.latest}/data-linux.gz
21 | gunzip -f data-linux.gz && chmod +x data-linux && sudo mv data-linux /usr/local/bin/data`,
22 |     'win32': `\nDepending on your Windows distribution and configurations, you may need to use different path when moving the executable.\n
23 | You need to run 'move' command as administrator:
24 | curl -k --insecure -L https://github.com/datahq/data-cli/releases/download/v${notifier.update.latest}/data-win.exe.gz -o ./data.gz
25 | gzip -d data.gz && move data "C:\\Windows\\System32\\data.exe"`
26 |   }
27 |   const summary = `\ndata -v # should print ${notifier.update.latest}`
28 | 
29 |   if (notifier.update) {
30 |     notifier.notify({
31 |       defer: false,
32 |       isGlobal: true
33 |     })
34 |     console.log(introduction + instructions[process.platform] + summary)
35 |   } else {
36 |     return
37 |   }
38 | }
39 | 


--------------------------------------------------------------------------------
/bin/data-init.js:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env node
 2 | 
 3 | // Packages
 4 | const fs = require('fs')
 5 | const path = require('path')
 6 | const minimist = require('minimist')
 7 | const {Init} = require('datahub-client')
 8 | 
 9 | // Ours
10 | const {customMarked} = require('../lib/utils/tools.js')
11 | const info = require('../lib/utils/output/info.js')
12 | 
13 | const argv = minimist(process.argv.slice(2), {
14 |   string: ['init'],
15 |   boolean: ['help', 'interactive'],
16 |   alias: {
17 |     help: 'h',
18 |     interactive: 'i'
19 |   }
20 | })
21 | 
22 | const initMarkdown = fs.readFileSync(path.join(__dirname, '../docs/init.md'), 'utf8')
23 | const help = () => {
24 |   console.log('\n' + customMarked(initMarkdown))
25 | }
26 | 
27 | if (argv.help) {
28 |   help()
29 |   process.exit(0)
30 | }
31 | 
32 | 
33 | const checkDpIsThere = (path_ = process.cwd()) => {
34 |   const files = fs.readdirSync(path_)
35 |   return files.indexOf('datapackage.json') > -1
36 | }
37 | 
38 | 
39 | (async() => {
40 | 
41 |   const initializer = new Init({interactive: argv.interactive, path_: argv._[0]})
42 |   // Listen for events:
43 |   initializer
44 |     .on('message', (message) => {
45 |       info(message)
46 |     })
47 |     .on('exit', (message) => {
48 |       info(message)
49 |       process.exit(0)
50 |     })
51 | 
52 |   // Get a descriptor generated:
53 |   let descriptor = {}
54 |   if (checkDpIsThere(argv._[0])) {
55 |     descriptor = await initializer.updateDataset()
56 |   } else {
57 |     descriptor = await initializer.createDataset()
58 |   }
59 |   // Now save the generated descriptor:
60 |   const content = JSON.stringify(descriptor, null, 2)
61 |   const dest = path.join(argv._[0] || '', 'datapackage.json')
62 |   fs.writeFile(dest, content, 'utf8', err => {
63 |     if (err) {
64 |       throw new Error(err)
65 |     } else {
66 |       info(`\n💾 Descriptor is saved in "${dest}"`)
67 |     }
68 |   })
69 | 
70 | })()
71 | 


--------------------------------------------------------------------------------
/lib/utils/error.js:
--------------------------------------------------------------------------------
 1 | // Packages:
 2 | const Raven = require('raven')
 3 | 
 4 | // Ours:
 5 | const error = require('./output/error')
 6 | const info = require('./output/info')
 7 | const {version} = require('../../package.json')
 8 | const {installedWithNPM} = require('./tools')
 9 | 
10 | 
11 | async function handleError(err, {debug = false} = {}) {
12 |   if (process.env.datahub !== 'dev') { // Send report to Sentry if not dev env
13 |     // Setup Sentry:
14 |     Raven.config('https://e29902aa81ed414d867f51bd0d1ab91a:2b18fef80e954ba68d8f4351aab99672@sentry.io/305079', {
15 |       release: version,
16 |       extra: {
17 |         args: process.argv,
18 |         nodejsOrBin: installedWithNPM ? process.version : 'bin',
19 |         os: process.platform
20 |       }
21 |     })
22 | 
23 |     await new Promise((resolve, reject) => {
24 |       // Capture errors:
25 |       Raven.captureException(err, (sendErr, eventId) => {
26 |         // Once report is sent resolve the promise. However, we resolve it even
27 |         // if it failed to send a report:
28 |         resolve()
29 |       })
30 |     })
31 |   }
32 |   
33 |   // Coerce Strings to Error instances
34 |   if (typeof err === 'string') {
35 |     err = new Error(err)
36 |   }
37 | 
38 |   if (debug) {
39 |     console.log(`> [debug] handling error: ${err.stack}`)
40 |   }
41 | 
42 |   if (err.code === 'ECONNREFUSED' || err.code === 'ENOTFOUND') {
43 |     error(`Connection error: ${err.message}`)
44 |   } else {
45 |     if (err.constructor.name === 'Array') {
46 |       err.forEach(err => error(err.message))
47 |     } else {
48 |       error(err)
49 |       // Check if error is due to `xdg-open` module on Linux and print instructions:
50 |       if (err.message && err.message.includes('xdg-open')) {
51 |         info('Run following command and try again, please:\ncp /usr/bin/xdg-open /usr/local/bin/xdg-open')
52 |       }
53 |     }
54 |   }
55 | }
56 | 
57 | module.exports = {
58 |   handleError,
59 |   error
60 | }
61 | 


--------------------------------------------------------------------------------
/test/fixtures/finance-vix/README.md:
--------------------------------------------------------------------------------
 1 | CBOE Volatility Index (VIX) time-series dataset including daily open, close,
 2 | high and low. The CBOE Volatility Index (VIX) is a key measure of market
 3 | expectations of near-term volatility conveyed by S&P 500 stock index option
 4 | prices introduced in 1993.
 5 | 
 6 | ## Data
 7 | 
 8 | From the [VIX FAQ][faq]:
 9 | 
10 | > In 1993, the Chicago Board Options Exchange® (CBOE®) introduced the CBOE
11 | > Volatility Index®, VIX®, and it quickly became the benchmark for stock market
12 | > volatility. It is widely followed and has been cited in hundreds of news
13 | > articles in the Wall Street Journal, Barron's and other leading financial
14 | > publications. Since volatility often signifies financial turmoil, VIX is
15 | > often referred to as the "investor fear gauge".
16 | >
17 | > VIX measures market expectation of near term volatility conveyed by stock
18 | > index option prices. The original VIX was constructed using the implied
19 | > volatilities of eight different OEX option series so that, at any given time,
20 | > it represented the implied volatility of a hypothetical at-the-money OEX
21 | > option with exactly 30 days to expiration.
22 | > 
23 | > The New VIX still measures the market's expectation of 30-day volatility, but
24 | > in a way that conforms to the latest thinking and research among industry
25 | > practitioners. The New VIX is based on S&P 500 index option prices and
26 | > incorporates information from the volatility "skew" by using a wider range of
27 | > strike prices rather than just at-the-money series. 
28 | 
29 | [faq]: http://www.cboe.com/micro/vix/faq.aspx
30 | 
31 | ## Preparation
32 | 
33 | Run the shell script:
34 | 
35 |     . scripts/process.sh
36 | 
37 | Output data is in `data/`.
38 | 
39 | ### TODO
40 | 
41 | * Incorporate computed historical data (1990-2003)
42 | * Consider incorporating VOX data
43 | 
44 | ## License
45 | 
46 | No obvious statement on [historical data page][historical]. Given size and
47 | factual nature of the data and its source from a US company would imagine this
48 | was public domain and as such have licensed the Data Package under the Public
49 | Domain Dedication and License (PDDL).
50 | 
51 | [historical]: http://www.cboe.com/micro/vix/historical.aspx
52 | 


--------------------------------------------------------------------------------
/test/authorization.test.js:
--------------------------------------------------------------------------------
 1 | const test = require('ava')
 2 | const {Agent} = require('datahub-client')
 3 | 
 4 | // ==========================
 5 | // USER RIGHTS & RESTRICTIONS
 6 | 
 7 | const mainPath = '/anuveyatsu/finance-vix'
 8 | const dpJsonPath = mainPath + '/datapackage.json'
 9 | const resourceCsvPath = mainPath + '/r/vix-daily.csv'
10 | const resourceJsonPath = mainPath + '/r/vix-daily.json'
11 | const zipPath = mainPath + '/r/finance-vix_zip.zip'
12 | 
13 | const agent = new Agent('https://datahub.io')
14 | 
15 | test('Access private dataset as unauthorized user', async t => {
16 |   let response = await agent.fetch(mainPath)
17 |   t.is(response.status, 404)
18 |   response = await agent.fetch(dpJsonPath)
19 |   t.is(response.status, 404)
20 |   response = await agent.fetch(resourceCsvPath)
21 |   t.is(response.status, 404)
22 |   response = await agent.fetch(resourceJsonPath)
23 |   t.is(response.status, 404)
24 |   response = await agent.fetch(zipPath)
25 |   t.is(response.status, 404)
26 | })
27 | 
28 | test('Access private dataset as non-owner user', async t => {
29 |   // Token for 'test' user (Travis knows it):
30 |   const token = process.env.token
31 |   let response = await agent.fetch(mainPath + `?jwt=${token}`)
32 |   t.is(response.status, 404)
33 |   response = await agent.fetch(dpJsonPath + `?jwt=${token}`)
34 |   t.is(response.status, 404)
35 |   response = await agent.fetch(resourceCsvPath + `?jwt=${token}`)
36 |   t.is(response.status, 404)
37 |   response = await agent.fetch(resourceJsonPath + `?jwt=${token}`)
38 |   t.is(response.status, 404)
39 |   response = await agent.fetch(zipPath + `?jwt=${token}`)
40 |   t.is(response.status, 404)
41 | })
42 | 
43 | test('Access private dataset as owner', async t => {
44 |   // Owner's token is stored as secret env var on Travis
45 |   const token = process.env.SECRET_OWNER_TOKEN
46 |   let response = await agent.fetch(mainPath + `?jwt=${token}`)
47 |   t.is(response.status, 200)
48 |   response = await agent.fetch(dpJsonPath + `?jwt=${token}`)
49 |   t.is(response.status, 200)
50 |   response = await agent.fetch(resourceCsvPath + `?jwt=${token}`)
51 |   t.is(response.status, 200)
52 |   response = await agent.fetch(resourceJsonPath + `?jwt=${token}`)
53 |   t.is(response.status, 200)
54 |   response = await agent.fetch(zipPath + `?jwt=${token}`)
55 |   t.is(response.status, 200)
56 | })
57 | 


--------------------------------------------------------------------------------
/bin/data-login.js:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env node
 2 | 
 3 | const fs = require('fs')
 4 | const path = require('path')
 5 | const minimist = require('minimist')
 6 | const inquirer = require('inquirer')
 7 | 
 8 | const {customMarked} = require('../lib/utils/tools.js')
 9 | const {config} = require('datahub-client')
10 | const {handleError} = require('../lib/utils/error')
11 | const info = require('../lib/utils/output/info.js')
12 | const {login, authenticate} = require('datahub-client')
13 | const wait = require('../lib/utils/output/wait')
14 | 
15 | const argv = minimist(process.argv.slice(2), {
16 |   string: ['login'],
17 |   boolean: ['help', 'interactive'],
18 |   alias: {help: 'h', interactive: 'i'}
19 | })
20 | 
21 | const configMarkdown = fs.readFileSync(path.join(__dirname, '../docs/login.md'), 'utf8')
22 | const help = () => {
23 |   console.log('\n' + customMarked(configMarkdown))
24 | }
25 | 
26 | if (argv.help) {
27 |   help()
28 |   process.exit(0)
29 | }
30 | 
31 | Promise.resolve().then(async () => {
32 |   const stopSpinner = wait('Logging in ...')
33 |   const apiUrl = config.get('api')
34 |   const token = config.get('token')
35 |   let out
36 | 
37 |   try {
38 |     out = await authenticate(apiUrl, token)
39 |   } catch (err) {
40 |     await handleError(err)
41 |     process.exit(1)
42 |   }
43 |   if (out.authenticated) {
44 |     stopSpinner()
45 |     info('You are already logged in.')
46 |     process.exit(0)
47 |   }
48 |   // Signup or signin
49 |   stopSpinner()
50 | 
51 |   // Do choosing login method here
52 |   const loginChoices = Object.keys(out.providers).map(provider => {
53 |     return provider.charAt(0).toUpperCase() + provider.slice(1)
54 |   })
55 |   const result = await inquirer.prompt([
56 |     {
57 |       type: 'list',
58 |       name: 'loginProvider',
59 |       message: 'Login with...',
60 |       choices: loginChoices,
61 |       filter: val => {
62 |         return val.toLowerCase()
63 |       }
64 |     }
65 |   ])
66 |   const authUrl = out.providers[result.loginProvider].url
67 |   info('Opening browser and waiting for you to authenticate online')
68 |   if (argv.interactive) {
69 |     info('Please, copy and paste following URL in your browser:\n' + authUrl)
70 |   } else {
71 |     info('Note: If nothing is loaded in browser please run `data login -i`')
72 |   }
73 | 
74 |   try {
75 |     await login(apiUrl, authUrl, config.get('domain'))
76 |   } catch (err) {
77 |     await handleError(err)
78 |     process.exit(1)
79 |   }
80 |   info('You are logged in!')
81 |   process.exit(0)
82 | })
83 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: node_js
 2 | matrix:
 3 |   include:
 4 |   - os: linux
 5 |     node_js: '8'
 6 |     env: TEST=true
 7 |     sudo: required
 8 |   - os: linux
 9 |     node_js: '7'
10 |     env: TEST=false
11 |     sudo: required
12 |   - os: linux
13 |     node_js: '9'
14 |     env: TEST=false
15 |     sudo: required
16 |   - os: osx
17 |     node_js: '7'
18 |     env: TEST=false
19 |   - os: osx
20 |     node_js: '8'
21 |     env: TEST=true DEPLOY=true
22 |   - os: osx
23 |     node_js: '9'
24 |     env: TEST=false
25 | install: if $TEST; then npm install; else echo 'skipping this script...'; fi
26 | script: if $TEST; then npm test; else echo 'skipping this script...'; fi
27 | before_deploy: if $DEPLOY; then git submodule init && git submodule update && npm run push:test && npm run pack; else echo 'skipping this script...'; fi
28 | deploy:
29 |   - provider: releases
30 |     api_key:
31 |       secure: "Ids51LgFBEoOR1q1DrmxNRP/ryPHPEAuf7vsO7aBVy+qFwcaRzwmvxvjPVa5907Sv7zkccg+qbFW9+W1HQ7IYi4nyaX/4o8WaxQgev2aki2KD5ztQlsWeGRcquAZfZplLrUbXJkEW8cnXcGIE8QKc08yiQa25+3s8fsBi797z8pj04iLAvkK600lFbKls90rQH7FsIlIrc0LarvNNFqEi/qtUoPeMQ6ksWoE1emvRZfLaIh8IIgzfN00MxX/hZb+50v2eZaypjfODZZL5GDvnCM+fofqCyEnJwo0JLxRHz7rHKN4/nAN6Rbvqx2bGQu7WtWCX3nT+u7rdgg3gjqR4oeHHpNqhalwXpfPWU/3G9HiyCSODqa/bps8FODM3hm8+LENQrsWYWZ6/mLxY76F97MqHTQHSz4rB74aKDc+nZ1Teqw8oykC0T47JwguH+ZIfKt7bOW9awWFkEbPSOrWNGQV7ZVjhxKlr4bazeYUyHFhTLjkawH8P1jRSt3H2a+AZES5cDuXEwKER7I7hZU7FCNNbFxUHJo0rSG/dynFk9jmiSmUELAKkkj/C0UTP3r55Zkq7LleOlfhXELmk6cEr+uQIAm/OzQ/CKL+5Vq1uhSNqmdxFGURHQNUQrv2tmysyNEzo+N9mwtOQH43JRjT7PnhuTWI/BOrqi8kk7csUaw="
32 |     file_glob: true
33 |     file: "packed/*"
34 |     skip_cleanup: true
35 |     on:
36 |       tags: true
37 |       condition: $DEPLOY = true
38 |   - provider: npm
39 |     email: datahub@datopian.com
40 |     api_key:
41 |       secure: "riGRy8fqyJPXeTJulBENhxLLktvVimqTlyrjckdifenI8q0vxbTvw4fRep3fKQqgNFrh0dccIagtPO8RSuf0Se9dK+M6mBM2dc8W6t84i0jg+EDoavvhgHlVfXFNEx50lWz2H1EZH4I9MvFbAiXQm7svhaXSMwudzdlHFq/K+0xjDkVgnv4AWOnkezf8XqyOmBfVPcS6mvfEMZgtQPR41eaFM7GZ1hAwOZnOwLSTRMljBiDlBSKp89ahNsmoDua3JMZ8/5s5pp1fBzlHJx6knNF9lSTjXQtJEd1ZGZljdyjIawwCdohzcR37P6iRlCLVAOGKrbeFMUnprUk23HFg3eD86cUtly+jdZd7YqBTSBQ4m9r+3G5YKbUdCbavC0pnc3/cKwP3tYRnLN5PPxo9pTypHwvVzADgG4XBnvXsE07k8F+QBdIJce7JpM1QjDi5xiqJyOEW1YIpVnxOBtO6qc/w+cmlZzcBcdbOfss0+mEU0WHJFj+FE8jDGtt3TJK9PvkV5EKo0KtDGmZVrOeW63CJ3SE47jOcS0GwtxdlSzTsKK9Ic3b9s1pHAoqHy0n/PJEXAJ6NcS0MyeHPPwg1NWWQMIXEt1JQf6iXdcWVVCZBBXShdX4KUUQLkOHXSNv1vIKgE/5UP3bjJ+FHazLQjPjquMIRuGmy6yAwD/GaaLI="
42 |     on:
43 |       tags: true
44 |       condition: $DEPLOY = true
45 | after_script:
46 |   - chmod ugo+x test-script.sh
47 |   - ./test-script.sh
48 | 


--------------------------------------------------------------------------------
/bin/data-cat.js:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env node
 2 | const fs = require('fs')
 3 | const path = require('path')
 4 | 
 5 | const minimist = require('minimist')
 6 | const {File, isUrl} = require('data.js')
 7 | const {writers} = require('datahub-client').cat
 8 | 
 9 | // Ours
10 | const {customMarked} = require('../lib/utils/tools.js')
11 | const info = require('../lib/utils/output/info.js')
12 | const {handleError, error} = require('../lib/utils/error')
13 | 
14 | const argv = minimist(process.argv.slice(2), {
15 |   string: ['cat'],
16 |   boolean: ['help'],
17 |   alias: {help: 'h'}
18 | })
19 | 
20 | const getMarkdown = fs.readFileSync(path.join(__dirname, '../docs/cat.md'), 'utf8')
21 | const help = () => {
22 |   console.log('\n' + customMarked(getMarkdown))
23 | }
24 | 
25 | if (argv.help) {
26 |   help()
27 |   process.exit(0)
28 | }
29 | 
30 | const pathParts = argv._[0] ? path.parse(argv._[0]) : {name: null}
31 | 
32 | let outFileExt, outFormat
33 | if (argv._[1] && argv._[1] !== 'stdout') {
34 |   outFileExt = path.extname(argv._[1]) || '.noext'
35 |   outFormat = outFileExt.slice(1)
36 | } else {
37 |   outFormat = 'ascii'
38 | }
39 | 
40 | const writersDatabase = {
41 |   ascii: writers.ascii,
42 |   csv: writers.csv,
43 |   xlsx: writers.xlsx,
44 |   md: writers.md,
45 |   html: writers.html
46 | }
47 | 
48 | const dumpIt = async (res, {sheet}={}) => {
49 |   let stream
50 |   if (outFormat in writersDatabase) {
51 |     try {
52 |       stream = await writersDatabase[outFormat](res, {sheet})
53 |     } catch (err) {
54 |       if (isUrl(argv._[0])) {
55 |         error('Provided URL is invalid')
56 |       }
57 |       await handleError(err)
58 |       process.exit(1)
59 |     }
60 | 
61 |     if (outFormat === 'ascii') { // Write to stdout
62 |       stream.pipe(process.stdout)
63 |     } else { // Write to file
64 |       const writeStream = fs.createWriteStream(argv._[1], {flags : 'w'})
65 |       stream.pipe(writeStream)
66 |       writeStream.on('close', () => {
67 |         info(`All done! Your data is saved in "${argv._[1]}"`)
68 |       })
69 |     }
70 |   } else {
71 |     info(`Sorry, provided output format is not supported.`)
72 |   }
73 | }
74 | 
75 | if (pathParts.name === '_' || (!pathParts.name && process.stdin.constructor.name === 'Socket')) {
76 |   dumpIt(process.stdin)
77 | } else if (pathParts.name) {
78 |   // Check both 'sheet' and 'sheets' args as users can use both of them:
79 |   let sheet = argv.sheet || argv.sheets
80 |   // Check if it can be coerced to integer, if so we assume it's sheet index:
81 |   sheet = !!parseInt(sheet) ? parseInt(sheet) - 1 : sheet
82 |   const res = File.load(argv._[0], {format: argv.format})
83 |   dumpIt(res, {sheet})
84 | } else {
85 |   info('No input is provided. Please, run "data cat --help" for usage information.')
86 | }
87 | 


--------------------------------------------------------------------------------
/docs/push.md:
--------------------------------------------------------------------------------
 1 | 
 2 |   ■ data push [PATH]
 3 | 
 4 |   `PATH` (optional) is the path to the data file or data package.
 5 | 
 6 | ## Options:
 7 | 
 8 |   -h, --help               Output usage information.
 9 | 
10 |   --format                 Explicitly set the format for a file. Useful when a file does not have conventional
11 |                            naming. E.g., `--format=csv`
12 | 
13 |   -i, --interactive        Enable interactive mode. Useful when pushing a single file.
14 | 
15 |   --schedule               Setup a schedule so the DataHub will automatically re-import the remote file on
16 |                            a regular basis. E.g., `every 90s`, `every 5m`, `every 2d`. The number is always
17 |                            an integer, selector is `s/m/h/d/w` (second -> week) and you can’t schedule for
18 |                            less than 60 seconds.
19 | 
20 |   --sheets                 Set which sheets should be processed when pushing Excel files. By default, only
21 |                            the first sheet is processed. You can use `--sheets=all` option to push "all" sheets.
22 |                            You also can list sheet numbers, e.g., `--sheets=1,2`. If you wanted to push only
23 |                            the second sheet, you would do `--sheets=2`. Sheet number starts from 1.
24 | 
25 |   --name                   Set the name of the dataset without interaction when pushing the single file. Eg: `--name=my-dataset`
26 | 
27 | ### findability options:
28 | 
29 | This options define the dataset visibility on the DataHub.io site:
30 | 
31 |   --public (default)       Everybody can see the dataset in the search results.
32 |                            Everybody can access the dataset by the URL link.
33 | 
34 |   --unlisted               Other users will not see the dataset in the search results.
35 |                            You will see the dataset in the search results.
36 |                            Everybody can access the dataset by the URL link.
37 | 
38 |   --private                Other users cannot access the dataset.
39 |                            Other users will not see the dataset in the search results.
40 |                            You will see the dataset in the search results.
41 | 
42 | ## Examples:
43 | 
44 | Uploads Data Package to DataHub in current working directory:
45 | 
46 |   ■ data push
47 | 
48 | Uploads Data Package to DataHub with path (core/finance-vix/ should have datapackage.json):
49 | 
50 |   ■ data push core/finance-vix/
51 | 
52 | By default, all pushed datasets are public. To make them unlisted:
53 | 
54 |   ■ data push core/finance-vix/ --unlisted
55 | 
56 | Uploads a file from URL to DataHub on weekly basis and sets file format as CSV:
57 | 
58 |   ■ data push URL --schedule="every 1w" --format=csv
59 | 
60 | Uploads a Excel file and processes only the second sheet:
61 | 
62 |   ■ data push myExcel.xlsx --sheets=2
63 | 


--------------------------------------------------------------------------------
/bin/data-push-flow.js:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env node
 2 | const fs = require('fs')
 3 | const path = require('path')
 4 | 
 5 | const minimist = require('minimist')
 6 | const urljoin = require('url-join')
 7 | const {DataHub} = require('datahub-client')
 8 | const {authenticate} = require('datahub-client')
 9 | const {config} = require('datahub-client')
10 | const { write: copyToClipboard } = require('clipboardy')
11 | 
12 | // Ours
13 | const {customMarked} = require('../lib/utils/tools.js')
14 | const {handleError} = require('../lib/utils/error')
15 | const wait = require('../lib/utils/output/wait')
16 | const info = require('../lib/utils/output/info.js')
17 | 
18 | 
19 | const argv = minimist(process.argv.slice(2), {
20 |   string: ['push-flow'],
21 |   boolean: ['help', 'debug', 'interactive'],
22 |   alias: {help: 'h', interactive: 'i'}
23 | })
24 | 
25 | const pushMarkdown = fs.readFileSync(path.join(__dirname, '../docs/push-flow.md'), 'utf8')
26 | const help = () => {
27 |   console.log('\n' + customMarked(pushMarkdown))
28 | }
29 | 
30 | if (argv.help) {
31 |   help()
32 |   process.exit(0)
33 | }
34 | 
35 | Promise.resolve().then(async () => {
36 |   let stopSpinner = () => {}
37 |   // First check if user is authenticated
38 |   const apiUrl = config.get('api')
39 |   const token = config.get('token')
40 |   let out
41 |   try {
42 |     out = await authenticate(apiUrl, token)
43 |   } catch (err) {
44 |     await handleError(err)
45 |     process.exit(1)
46 |   }
47 |   if (!out.authenticated) {
48 |     info('You need to login in order to push your data. Please, use `data login` command.')
49 |     process.exit(0)
50 |   }
51 |   try {
52 |     const datasetPath = argv._[0] || process.cwd()
53 |     stopSpinner = wait('Commencing push ...')
54 | 
55 |     const datahubConfigs = {
56 |       apiUrl: config.get('api'),
57 |       token: config.get('token'),
58 |       debug: argv.debug,
59 |       ownerid: config.get('profile') ? config.get('profile').id : config.get('id'),
60 |       owner: config.get('profile') ? config.get('profile').username : config.get('username')
61 |     }
62 |     const datahub = new DataHub(datahubConfigs)
63 |     const res = await datahub.pushFlow(
64 |       path.join(datasetPath ,'.datahub/flow.yaml'),
65 |       path.join(datasetPath ,'.datahub/datapackage.json')
66 |     )
67 |     const revisionId = res.flow_id.split('/').pop()
68 |     const datasetName = res.dataset_id.split('/').pop()
69 |     stopSpinner()
70 |     const message = '🙌  your data is published!\n'
71 |     const url = urljoin(config.get('domain'), datahubConfigs.owner, datasetName,'v',revisionId)
72 |     await copyToClipboard(url)
73 |     console.log(message + '🔗  ' + url + ' (copied to clipboard)')
74 |   } catch (err) {
75 |     stopSpinner()
76 |     if (argv.debug) {
77 |       console.log('> [debug]\n' + err.stack)
78 |     }
79 |     await handleError(err)
80 |     process.exit(1)
81 |   }
82 | })
83 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "name": "data-cli",
  3 |   "version": "0.10.1",
  4 |   "description": "CLI for working with data packages",
  5 |   "main": "./lib/index.js",
  6 |   "bin": {
  7 |     "data": "./bin/data.js"
  8 |   },
  9 |   "directories": {
 10 |     "test": "test"
 11 |   },
 12 |   "scripts": {
 13 |     "test": "ava -v",
 14 |     "push:test": "ava -v test/push/ -m 'push*'",
 15 |     "watch:test": "npm t -- --watch",
 16 |     "lint": "xo --quiet",
 17 |     "data": "node bin/data.js",
 18 |     "gzip": "rm -rf packed/*.gz && ls packed/data* | xargs gzip -k",
 19 |     "pack": "pkg bin/data.js -c package.json -o packed/data --options no-warnings && npm run gzip",
 20 |     "precommit": "npm run lint && npm run build"
 21 |   },
 22 |   "xo": {
 23 |     "space": true,
 24 |     "semicolon": false,
 25 |     "rules": {
 26 |       "no-var": "warn",
 27 |       "no-use-before-define": 1,
 28 |       "no-await-in-loop": 1,
 29 |       "import/prefer-default-export": 1,
 30 |       "no-negated-condition": 1,
 31 |       "guard-for-in": 1
 32 |     },
 33 |     "ignores": [
 34 |       "test/fixtures/*/**"
 35 |     ]
 36 |   },
 37 |   "ava": {
 38 |     "failFast": false,
 39 |     "files": [
 40 |       "test/*test.js"
 41 |     ]
 42 |   },
 43 |   "pkg": {
 44 |     "scripts": [
 45 |       "bin/*",
 46 |       "lib/**/*"
 47 |     ],
 48 |     "targets": [
 49 |       "node7-linux-x64",
 50 |       "node7-macos-x64",
 51 |       "node7-win-x64"
 52 |     ]
 53 |   },
 54 |   "repository": {
 55 |     "type": "git",
 56 |     "url": "git+https://github.com/datahq/data-cli.git"
 57 |   },
 58 |   "keywords": [
 59 |     "data",
 60 |     "data package",
 61 |     "datahub",
 62 |     "datapackage"
 63 |   ],
 64 |   "author": "Rufus Pollock and DataHQ",
 65 |   "license": "ISC",
 66 |   "bugs": {
 67 |     "url": "https://github.com/datahq/data-cli/issues"
 68 |   },
 69 |   "homepage": "https://datahub.io/docs",
 70 |   "dependencies": {
 71 |     "ansi-escapes": "^3.0.0",
 72 |     "boxen": "^1.3.0",
 73 |     "chalk": "^2.3.0",
 74 |     "clipboardy": "^1.2.3",
 75 |     "data.js": "^0.11.5",
 76 |     "datahub-client": "^0.5.8",
 77 |     "first-run": "^1.2.0",
 78 |     "global-packages": "^1.0.2",
 79 |     "human-readable-ids": "^1.0.3",
 80 |     "inquirer": "^5.1.0",
 81 |     "inquirer-test": "^2.0.1",
 82 |     "jsonlint": "^1.6.2",
 83 |     "marked": "^0.3.6",
 84 |     "marked-terminal": "^2.0.0",
 85 |     "minimist": "^1.2.0",
 86 |     "mkdirp": "^0.5.1",
 87 |     "ora": "^1.3.0",
 88 |     "pkg": "4.2.5",
 89 |     "progress": "^2.0.0",
 90 |     "raven": "^2.4.2",
 91 |     "universal-analytics": "^0.4.16",
 92 |     "unzip": "^0.1.11",
 93 |     "update-notifier": "^2.3.0",
 94 |     "url-join": "^2.0.2",
 95 |     "xlsx": "^0.17.0"
 96 |   },
 97 |   "devDependencies": {
 98 |     "ava": "^0.25.0",
 99 |     "cross-spawn": "^5.1.0",
100 |     "stream-to-array": "^2.3.0",
101 |     "xo": "^0.18.2"
102 |   },
103 |   "resolutions": {
104 |     "graceful-fs": "^4.2.4"
105 |   }
106 | }
107 | 


--------------------------------------------------------------------------------
/bin/data-info.js:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env node
 2 | const fs = require('fs')
 3 | const path = require('path')
 4 | 
 5 | const minimist = require('minimist')
 6 | const data = require('data.js')
 7 | const {info, Agent} = require('datahub-client')
 8 | 
 9 | const {customMarked} = require('../lib/utils/tools.js')
10 | const {handleError} = require('../lib/utils/error')
11 | const printInfo = require('../lib/utils/output/info')
12 | 
13 | const argv = minimist(process.argv.slice(2), {
14 |   string: ['info'],
15 |   boolean: ['help'],
16 |   alias: {help: 'h'}
17 | })
18 | 
19 | const infoMarkdown = fs.readFileSync(path.join(__dirname, '../docs/info.md'), 'utf8')
20 | const help = () => {
21 |   console.log('\n' + customMarked(infoMarkdown))
22 | }
23 | 
24 | if (argv.help) {
25 |   help()
26 |   process.exit(0)
27 | }
28 | 
29 | const fileOrDatasetIdentifier = argv._[0] ? argv._[0] : './'
30 | 
31 | Promise.resolve().then(async () => {
32 |   // If given path is a URL then fetch headers and check if status is OK:
33 |   const agent = new Agent(fileOrDatasetIdentifier, {debug: argv.debug})
34 |   if (data.isUrl(fileOrDatasetIdentifier)) {
35 |     const response = await agent.fetch('/')
36 |     if (response.status >= 400) {
37 |       throw new Error(`Provided URL returns ${response.status} status code.`)
38 |     }
39 |   }
40 | 
41 |   try {
42 |     const parsedIdentifier = await data.parseDatasetIdentifier(fileOrDatasetIdentifier)
43 |     const isdataset = data.isDataset(fileOrDatasetIdentifier)
44 |     const githubDataset = parsedIdentifier.type === 'github' && parsedIdentifier.name.slice((parsedIdentifier.name.lastIndexOf('.') - 1 >>> 0) + 2) === ''
45 |     if (isdataset || parsedIdentifier.type === "datahub" || githubDataset) {
46 |       const dataset = await data.Dataset.load(fileOrDatasetIdentifier)
47 |       const out = info.infoPackage(dataset)
48 |       console.log(customMarked(out))
49 |     } else {
50 |       const file = data.File.load(fileOrDatasetIdentifier, {format: argv.format})
51 |       const knownTabularFormats = ['csv', 'tsv', 'dsv']
52 |       if (knownTabularFormats.includes(file.descriptor.format)) {
53 |         await file.addSchema()
54 |       }
55 |       // Only print table if resource is tabular:
56 |       let table
57 |       let tabularFormatsAndExcel = knownTabularFormats.concat(['xls', 'xlsx'])
58 |       if (tabularFormatsAndExcel.includes(file.descriptor.format)) {
59 |         table = await info.infoResource(file)
60 |       }
61 |       console.log(customMarked('**File descriptor:**'))
62 |       console.log(JSON.stringify(file.descriptor, null, 2))
63 |       if (table) {
64 |         console.log(table)
65 |         console.log(customMarked('*Only showing first 10 lines. There might be more data.*'))
66 |       }
67 |     }
68 |   } catch (err) {
69 |     if (!argv._[0]) {
70 |       printInfo('Running `data info` without an argument will search a `datapackage.json` file in the current working directory.')
71 |     }
72 |     await handleError(err)
73 |     process.exit(1)
74 |   }
75 |   agent.close()
76 | })
77 | 


--------------------------------------------------------------------------------
/bin/data-validate.js:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env node
  2 | 
  3 | // Packages
  4 | const fs = require('fs')
  5 | const path = require('path')
  6 | const minimist = require('minimist')
  7 | const jsonlint = require('jsonlint')
  8 | const {Validator} = require('datahub-client')
  9 | const {Dataset} = require('data.js')
 10 | const {eraseLines} = require('ansi-escapes')
 11 | 
 12 | // Ours
 13 | const {customMarked} = require('../lib/utils/tools')
 14 | const {error} = require('../lib/utils/error')
 15 | const wait = require('../lib/utils/output/wait')
 16 | const info = require('../lib/utils/output/info')
 17 | 
 18 | const argv = minimist(process.argv.slice(2), {
 19 |   string: ['validate'],
 20 |   boolean: ['help'],
 21 |   alias: {help: 'h'}
 22 | })
 23 | 
 24 | const validateMarkdown = fs.readFileSync(path.join(__dirname, '../docs/validate.md'), 'utf8')
 25 | const help = () => {
 26 |   console.log('\n' + customMarked(validateMarkdown))
 27 | }
 28 | 
 29 | if (argv.help) {
 30 |   help()
 31 |   process.exit(0)
 32 | }
 33 | 
 34 | let path_ = argv._[0]
 35 | 
 36 | if (!path_) {
 37 |   path_ = process.cwd()
 38 | }
 39 | 
 40 | const validator = new Validator({identifier: path_})
 41 | const stopSpinner = wait('')
 42 | 
 43 | validator.on('message', (message) => {
 44 |   if (message.constructor.name === 'String') {
 45 |     process.stdout.write(eraseLines(1))
 46 |     info(message)
 47 |   } else {
 48 |     process.stdout.write(eraseLines(2))
 49 |     info(message.name + ': ' + message.status)
 50 |   }
 51 | })
 52 | 
 53 | validator.validate().then(result => {
 54 |   if (result === true) {
 55 |     stopSpinner()
 56 |     process.stdout.write(eraseLines(2))
 57 |     info('Your Data Package is valid!')
 58 |   } else {
 59 |     stopSpinner()
 60 |     process.stdout.write(eraseLines(2))
 61 |     // result is a TableSchemaError with attributes: message, rowNumber, and errors
 62 |     // each error in errors is of form { message, rowNumber, columnNumber }
 63 | 
 64 |     // HACK: strip out confusing "(see 'error.errors')" in error message
 65 |     if (result.message) {
 66 |       error(`Validation has failed for "${result.resource}"`)
 67 |       const msg = result.message.replace(" (see 'error.errors')", '') + ' on line ' + result.rowNumber
 68 |       error(msg)
 69 |       result.errors.forEach(err => {
 70 |         error(err.message)
 71 |       })
 72 |     }
 73 |     else {
 74 |       if (result.constructor.name === 'Array') {
 75 |         result.forEach(err => error(err.message))
 76 |       } else {
 77 |         error(result)
 78 |       }
 79 |     }
 80 |   }
 81 | }).catch(err => {
 82 |   stopSpinner()
 83 |   process.stdout.write(eraseLines(2))
 84 |   error(err.message)
 85 |   if (err.resource) {
 86 |     error(`Resource: ${err.resource}`)
 87 |     error(`Path: ${err.path}`)
 88 |   }
 89 |   // Get path to datapackage.json
 90 |   if (fs.lstatSync(path_).isDirectory()) {
 91 |     // Check datapackage.json in this dir and if doesn't exist then throw error:
 92 |     path_ = path.join(path_, 'datapackage.json')
 93 |     if (!fs.existsSync(path_)) {
 94 |       error('datapackage.json not found in the given directory')
 95 |     }
 96 |   }
 97 |   // Read given path
 98 |   let content
 99 |   try {
100 |     content = fs.readFileSync(path_)
101 |   } catch (err) {
102 |     error(err.message)
103 |     process.exit(1)
104 |   }
105 | 
106 |   var lint = jsonlint.parse(content.toString())
107 |   if (lint.error) {
108 |     error(`Invalid JSON: on line ${lint.line}, character ${lint.character}\n\n  ${lint.error}\n\n${lint.evidence}`)
109 |     process.exit(1)
110 |   }
111 | })
112 | 


--------------------------------------------------------------------------------
/bin/data.js:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env node
  2 | // Native
  3 | const {resolve} = require('path')
  4 | 
  5 | // Packages
  6 | const ua = require('universal-analytics')
  7 | const {config} = require('datahub-client')
  8 | const firstRun = require('first-run')
  9 | 
 10 | const {version} = require('../package.json')
 11 | 
 12 | // Ours
 13 | const {error, handleError} = require('../lib/utils/error')
 14 | const updateNotifier = require('../lib/utils/update')
 15 | 
 16 | // Increase MaxListenersExceededWarning level for cases when the remote dataset has a lot of resources,
 17 | // to avoid: Warning: Possible EventEmitter memory leak detected. X end listeners added.
 18 | // ~11 requests is required to validate remote 1 tabular resource, so I set a limit to match a dataset with 10 files.
 19 | require('events').EventEmitter.defaultMaxListeners = 120;
 20 | 
 21 | // Handle all uncaught exceptions and unhandled rejections
 22 | process.on('uncaughtException', async (err) => {
 23 |   await handleError(err)
 24 |   process.exit(1)
 25 | })
 26 | 
 27 | process.on('unhandledRejection', async (err) => {
 28 |   await handleError(err)
 29 |   process.exit(1)
 30 | })
 31 | 
 32 | // Check and notify if any updates are available:
 33 | updateNotifier()
 34 | 
 35 | // Check if the current path exists and throw and error
 36 | // if the user is trying to deploy a non-existing path!
 37 | // This needs to be done exactly in this place, because
 38 | // the utility imports are taking advantage of it
 39 | try {
 40 |   process.cwd()
 41 | } catch (err) {
 42 |   if (err.code === 'ENOENT' && err.syscall === 'uv_cwd') {
 43 |     console.log(`Current path doesn't exist!`)
 44 |   } else {
 45 |     console.log(err)
 46 |   }
 47 |   process.exit(1)
 48 | }
 49 | 
 50 | const commands = new Set([
 51 |   'help',
 52 |   'get',
 53 |   'push',
 54 |   'push-flow',
 55 |   'validate',
 56 |   'info',
 57 |   'init',
 58 |   'cat',
 59 |   'login'
 60 | ])
 61 | 
 62 | // Parse args and dispatch to relevant command
 63 | let args = process.argv.slice(2)
 64 | 
 65 | if (args[0] === '-v' || args[0] === '--version') {
 66 |   console.log(`${version}`)
 67 |   process.exit()
 68 | }
 69 | 
 70 | // Default command
 71 | let cmd = 'help'
 72 | const index = args.findIndex(a => commands.has(a))
 73 | 
 74 | if (index > -1) {
 75 |   cmd = args[index]
 76 |   args.splice(index, 1)
 77 | 
 78 |   // Dispatch to the underlying command and help will be called there
 79 |   if (cmd === 'help' && index < args.length && commands.has(args[index])) {
 80 |     cmd = args[index]
 81 |     args.splice(index, 1)
 82 |     args.unshift('--help')
 83 |   }
 84 |   if (cmd.includes(' ')) {
 85 |     const parts = cmd.split(' ')
 86 |     cmd = parts.shift()
 87 |     args = [].concat(parts, args)
 88 |   }
 89 | } else if (args[0] === '-h' || args[0] === '--help') {
 90 |   cmd = 'help'
 91 | } else if (args.length === 0) { // One final option is no command in which case show help
 92 |   cmd = 'help'
 93 | } else {
 94 |   error(`Command does not exist "` + args[0] + '"')
 95 |   console.error(`\nTo see a list of available commands run:`)
 96 |   console.error(`\n  data help\n`)
 97 |   process.exit(1)
 98 | }
 99 | 
100 | const bin = resolve(__dirname, 'data-' + cmd + '.js')
101 | 
102 | // Track events using GA:
103 | // Developers should set 'datahub' env var to 'dev' so their usage doesn't get tracked:
104 | if (process.env.datahub !== 'dev') {
105 |   const visitor = ua('UA-80458846-4')
106 |   // If user is logged in then use the datahub userid with GA - it allows us to
107 |   // track a user activity cross-platform, eg, connect activity on CLI and website:
108 |   const userid = config.get('profile') ? config.get('profile').id : config.get('id')
109 |   if (userid) {
110 |     visitor.set('uid', userid)
111 |   }
112 |   // If this is the first run of the app, then track it in GA:
113 |   if (firstRun()) {
114 |     visitor.event('cli', 'first-run', process.platform).send()
115 |   }
116 |   // Track which version is run and on which OS:
117 |   visitor.event('cli-usage-by-os-and-version', process.platform, version).send()
118 |   // Event category is 'cli', action is the command and label is all arguments:
119 |   const commandToTrack = args.length === 0 ? 'noArgs' : cmd
120 |   visitor.event('cli', commandToTrack, process.argv.slice(3, process.argv.length).toString()).send()
121 | }
122 | 
123 | // Prepare process.argv for subcommand
124 | process.argv = process.argv.slice(0, 2).concat(args)
125 | 
126 | // Load sub command
127 | // With custom parameter to make "pkg" happy
128 | require(bin, 'may-exclude')
129 | 


--------------------------------------------------------------------------------
/bin/data-get.js:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env node
  2 | 
  3 | // Packages
  4 | const fs = require('fs')
  5 | const path = require('path')
  6 | const url = require('url')
  7 | const mkdirp = require('mkdirp')
  8 | const minimist = require('minimist')
  9 | const {Dataset, File, isDataset, parseDatasetIdentifier} = require('data.js')
 10 | const {get, config} = require('datahub-client')
 11 | const unzip = require('unzip')
 12 | 
 13 | // Ours
 14 | const {customMarked} = require('../lib/utils/tools.js')
 15 | const wait = require('../lib/utils/output/wait')
 16 | const {handleError} = require('../lib/utils/error')
 17 | 
 18 | const argv = minimist(process.argv.slice(2), {
 19 |   string: ['get'],
 20 |   boolean: ['help', 'debug'],
 21 |   alias: {help: 'h'}
 22 | })
 23 | 
 24 | const getMarkdown = fs.readFileSync(path.join(__dirname, '../docs/get.md'), 'utf8')
 25 | const help = () => {
 26 |   console.log('\n' + customMarked(getMarkdown))
 27 | }
 28 | 
 29 | if (argv.help || !argv._[0]) {
 30 |   help()
 31 |   process.exit(0)
 32 | }
 33 | 
 34 | let identifier = argv._[0]
 35 | 
 36 | const run = async () => {
 37 |   const stopSpinner = wait('Loading...')
 38 |   try {
 39 |     const start = new Date()
 40 |     let pathToSave
 41 |     const parsedIdentifier = await parseDatasetIdentifier(identifier)
 42 |     const itIsDataset = isDataset(identifier)
 43 |     const githubDataset = parsedIdentifier.type === 'github' && parsedIdentifier.name.slice((parsedIdentifier.name.lastIndexOf('.') - 1 >>> 0) + 2) === ''
 44 | 
 45 |     if (itIsDataset || githubDataset) {
 46 |       const dataset = await Dataset.load(identifier)
 47 |       const owner = dataset.identifier.owner || ''
 48 |       const name = dataset.identifier.name
 49 | 
 50 |       pathToSave = path.join(owner, name)
 51 | 
 52 |       if (!checkDestIsEmpty(owner, name)) {
 53 |         throw new Error(`${owner}/${name} is not empty!`)
 54 |       }
 55 | 
 56 |       /** usual dataset download */
 57 |       const allResources = await get(dataset)
 58 |       // Save all files on disk
 59 |       const myPromises = allResources.map(async resource => {
 60 |         return saveIt(owner, name, resource)
 61 |       })
 62 |       await Promise.all(myPromises)
 63 | 
 64 |     } else if (parsedIdentifier.type === "datahub") {
 65 |       // Remove trailing slash:
 66 |       if(identifier.substr(-1) === '/' && identifier.length > 1) {
 67 |         identifier = identifier.slice(0, identifier.length - 1)
 68 |       }
 69 |       // We assume that if /r/ is in identifier then it's r link.
 70 |       if (identifier.includes('/r/')) {
 71 |         pathToSave = await saveFileFromUrl(identifier, argv.format)
 72 |       } else {
 73 |         // Try to guess owner and dataset name here. We're not loading Dataset object
 74 |         // because we want to handle private datasets as well:
 75 |         const idParts = identifier.split('/')
 76 |         const owner = idParts[idParts.length - 2]
 77 |         const name = idParts[idParts.length - 1]
 78 |         const token = config.get('token')
 79 |         pathToSave = path.join(owner, name)
 80 | 
 81 |         if (!checkDestIsEmpty(owner, name)) {
 82 |           throw new Error(`${owner}/${name} is not empty!`)
 83 |         }
 84 | 
 85 |         /** For datasets from the datahub we get zipped version and unzip it.
 86 |                 - less traffic
 87 |                 - zipped version has a fancy file structure
 88 |             #issue: https://github.com/datahq/datahub-qa/issues/86  */
 89 |         const zipped_dataset_url  = `https://datahub.io/${owner}/${name}/r/${name}_zip.zip?jwt=${token}`
 90 |         const archive_path = await saveFileFromUrl(zipped_dataset_url, 'zip')
 91 |         // unzip archive into destination folder
 92 |         fs.createReadStream(archive_path)
 93 |           .pipe(unzip.Extract({ path: pathToSave }))
 94 |           // removing the archive file once we extracted all the dataset files
 95 |           .on('finish', () => {fs.unlinkSync(archive_path)})
 96 |       }
 97 |     } else { // If it is not a dataset - download the file
 98 |       if (parsedIdentifier.type === 'github' && !githubDataset) {
 99 |         identifier += `?raw=true`
100 |       }
101 |       pathToSave = await saveFileFromUrl(identifier, argv.format)
102 |     }
103 | 
104 |     // show time statistic & success message
105 |     stopSpinner()
106 |     const end = new Date() - start
107 |     console.log(`Time elapsed: ${(end / 1000).toFixed(2)} s`)
108 |     console.log(`Dataset/file is saved in "${pathToSave}"`)
109 | 
110 |   } catch (err) {
111 |     stopSpinner()
112 |     if (argv.debug) {
113 |       console.log('> [debug]\n' + err.stack)
114 |     }
115 |     await handleError(err)
116 |     process.exit(1)
117 |   }
118 | }
119 | 
120 | run()
121 | 
122 | /**
123 |  * Download file from url and save it locally using data.js 'File' object.
124 |  * returns path, where the file was saved ( ${filename}.${fileformat} )
125 |  * Using:  let savedPath = await saveFileFromUrl(url, format)
126 |  * @param url: url to get the file
127 |  * @param format: csv, json, zip, etc
128 |  * @returns {Promise}
129 |  */
130 | const saveFileFromUrl = (url, format) => {
131 |   return new Promise(async (resolve, reject) =>{
132 |     const file = await File.load(url, {format: format})
133 |     const destPath = [file.descriptor.name, file.descriptor.format].join('.')
134 |     let stream
135 |     try {
136 |       stream = await file.stream()
137 |     } catch (err) {
138 |       if (err.message === 'Not Found') {
139 |         err.message += ' or Forbidden.'
140 |       }
141 |       await handleError(err)
142 |       process.exit(1)
143 |     }
144 |     stream.pipe(fs.createWriteStream(destPath)).on('finish', () => {
145 |       resolve(destPath)
146 |     })
147 |   })
148 | }
149 | 
150 | const saveIt = (owner, name, resource) => {
151 |   return new Promise(async (resolve, reject) => {
152 |     // We only can save if path is defined
153 |     if (resource.descriptor.path) {
154 |       const pathParts = url.parse(resource.descriptor.path)
155 |       let destPath
156 |       if (pathParts.protocol === 'http:' || pathParts.protocol === 'https:') {
157 |         const relativePath = resource.descriptor.path.split('/').slice(5).join('/')
158 |         destPath = path.join(owner, name, relativePath)
159 |       } else {
160 |         destPath = path.join(owner, name, resource.descriptor.path)
161 |       }
162 |       mkdirp.sync(path.dirname(destPath))
163 |       const stream = await resource.stream()
164 |       stream.pipe(fs.createWriteStream(destPath)).on('finish', () => {
165 |         resolve()
166 |       })
167 |     }
168 |   })
169 | }
170 | 
171 | // TODO: Move this somewhere to utils
172 | const checkDestIsEmpty = (owner, name) => {
173 |   const dest = path.join(owner, name)
174 |   return !fs.existsSync(dest) || fs.readdirSync(dest).length === 0;
175 | }
176 | 
177 | module.exports = {
178 |   checkDestIsEmpty
179 | }
180 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ## Overview
  2 | 
  3 | **"Data-cli"** is an important part of the [DataHub](https://datahub.io/docs/about) project. This is a command line tool, that helps you to manipulate your data (as `git` manipulates the code).
  4 | 
  5 | For example you have a set of data as a result of your work, let it be few data-files and a description. And you want to share it with your colleagues. With the **"data-cli"** you just need to:
  6 | 
  7 | ```shell
  8 | cd data-folder
  9 | data init  # convert my data files into the data-package
 10 | > "Answer a few questions here, e.g. dataset name, files to include, etc"
 11 | data push  # upload the dataset onto a DataHub
 12 | > "As a result you'll got a link to share:
 13 | http://datahub.io/user-name/data-package-name
 14 | ```
 15 | 
 16 | That's it! Your data is online. You can make your data unlisted or private, add some pretty graphics, and many more. Please read http://datahub.io/docs for details.
 17 | 
 18 | With `data-cli` you can also:
 19 | 
 20 | * Get data from online sources
 21 | * Get info about data files and datasets (local and remote)
 22 | * Validate your data to ensure its quality
 23 | * Initialize a new dataset (as a Data Package)
 24 | 
 25 | ## Usage examples:
 26 | 
 27 | Here we show examples of usage for common `data` commands. To see the full command documentation - click on the command name, or proceed to the [help pages](https://github.com/datahq/data-cli/tree/master/docs).
 28 | 
 29 | ### data login
 30 | 
 31 | You should login at the first use of data-cli:
 32 | ```bash
 33 | $ data login
 34 | ? Login with... Github
 35 | > Opening browser and waiting for you to authenticate online
 36 | > You are logged in!
 37 | ```
 38 | 
 39 | ### [data push](https://github.com/datahq/data-cli/blob/master/docs/push.md)
 40 | 
 41 | Upload a dataset or a separate file on the DataHub:
 42 | ```bash
 43 | $ data push mydata.csv
 44 | ? Please, confirm name for this dataset:
 45 | 0-selfish-cougar-7 mydataset
 46 | ? Please, confirm title for this dataset:
 47 | Mydataset Mydataset
 48 |   Uploading [******************************] 100% (0.0s left)
 49 |   your data is published!
 50 | 🔗  https://datahub.io/myname/mydataset/v/1 (copied to clipboard)
 51 | ```
 52 | 
 53 | Alternatively you can set name without interaction
 54 | ```bash
 55 | $ data push mydata.csv --name=mydataset
 56 |   Uploading [******************************] 100% (0.0s left)
 57 |   your data is published!
 58 | 🔗  https://datahub.io/myname/mydataset/v/1 (copied to clipboard)
 59 | ```
 60 | 
 61 | **Note:** by default, findability flag for your dataset is set to `--public`. Use `--unlisted` flag if you want it to not appear in the search results.
 62 | 
 63 | ### [data get](https://github.com/datahq/data-cli/blob/master/docs/get.md)
 64 | 
 65 | Get a dataset from the DataHub or GitHub:
 66 | ```bash
 67 | $ data get http://datahub.io/core/gold-prices
 68 | Time elapsed: 1.72 s
 69 | Dataset/file is saved in "core/gold-prices"
 70 | ```
 71 | 
 72 | ### [data info](https://github.com/datahq/data-cli/blob/master/docs/info.md)
 73 | 
 74 | Shows info about the dataset (local or remote):
 75 | ```bash
 76 | $ data info http://datahub.io/core/gold-prices
 77 | # Gold Prices (Monthly in USD)
 78 | 
 79 | Monthly gold prices since 1950 in USD (London market). Data is sourced from the Bundesbank.
 80 | 
 81 | ## Data
 82 |     * [Bundesbank statistic ... [see more below]
 83 | 
 84 | ## RESOURCES
 85 | ┌───────────────────┬────────┬───────┬───────┐
 86 | │ Name              │ Format │ Size  │ Title │
 87 | ├───────────────────┼────────┼───────┼───────┤
 88 | │ data_csv          │ csv    │ 16172 │       │
 89 | ├───────────────────┼────────┼───────┼───────┤
 90 | │ data_json         │ json   │ 32956 │       │
 91 | ├───────────────────┼────────┼───────┼───────┤
 92 | │ gold-prices_zip   │ zip    │ 17755 │       │
 93 | ├───────────────────┼────────┼───────┼───────┤
 94 | │ data              │ csv    │ 16170 │       │
 95 | └───────────────────┴────────┴───────┴───────┘
 96 | 
 97 | ## README
 98 | Monthly gold prices since 1950 in USD (London market). Data is sourced from the Bundesbank.
 99 | ...
100 | 
101 | ### Licence
102 | ...
103 | ```
104 | 
105 | ### [data cat](https://github.com/datahq/data-cli/blob/master/docs/cat.md)
106 | 
107 | Works similar as Unix `cat` command but works with remote resources and can convert tabular data into different formats:
108 | ```bash
109 | $ data cat http://datahub.io/core/gold-prices/r/0.csv
110 | ┌──────────────────────────────────────┬──────────────────────────────────────┐
111 | │ date                                 │ price                                │
112 | ├──────────────────────────────────────┼──────────────────────────────────────┤
113 | │ 1950-02-01                           │ 34.730                               │
114 | ├──────────────────────────────────────┼──────────────────────────────────────┤
115 | │ 1950-03-01                           │ 34.730                               │
116 | 
117 | ...........
118 | ```
119 | You can also convert tabular data into different formats (the source could be remote as well):
120 | ```bash
121 | $ data cat prices.csv prices.md
122 | > All done! Your data is saved in "prices.md"
123 | user@pc:~/Downloads$ cat prices.md
124 | | date       | price    |
125 | | ---------- | -------- |
126 | | 1950-02-01 | 34.730   |
127 | | 1950-03-01 | 34.730   |
128 | ```
129 | 
130 | ### [data init](https://github.com/datahq/data-cli/blob/master/docs/init.md)
131 | 
132 | Data-cli has an `init` command that will automatically generate Data Package metadata including scanning the current directory for data files and inferring [table schema] for tabular files:
133 | ```bash
134 | $ data init
135 | This process initializes a new datapackage.json file.
136 | Once there is a datapackage.json file, you can still run `data init`
137 | to update/extend it.
138 | Press ^C at any time to quit.
139 | 
140 | ? Enter Data Package name prices
141 | ? Enter Data Package title prices
142 | ? Do you want to add following file as a resource "prices.csv" - y/n? y
143 | prices.csv is just added to resources
144 | ? Do you want to add following file as a resource "prices.xls" - y/n? y
145 | prices.xls is just added to resources
146 | 
147 | ? Going to write to /home/user/Downloads/datapackage.json:
148 | {
149 |   "name": "prices",
150 |   "title": "prices",
151 |   "resources": [
152 |     {
153 |       "path": "prices.csv",
154 |       "name": "prices",
155 |       "format": "csv",
156 | ....
157 |     },
158 |       "schema": {
159 |         "fields": [
160 |           {
161 |             "name": "date",
162 |             "type": "date",
163 |             "format": "default"
164 |           },
165 |           {
166 | ........
167 |     {
168 |       "path": "prices.xls",
169 |       "pathType": "local",
170 |       "name": "prices",
171 |       "format": "xls",
172 |       "mediatype": "application/vnd.ms-excel",
173 |       "encoding": "windows-1250"
174 |     }
175 |   ]
176 | }
177 | 
178 | 
179 | Is that OK - y/n? y
180 | datapackage.json file is saved in /home/user/Downloads/datapackage.json
181 | ```
182 | 
183 | ### [data validate](https://github.com/datahq/data-cli/blob/master/docs/validate.md)
184 | 
185 | ```bash
186 | $ data validate path/to/correct/datapackage
187 | > Your Data Package is valid!
188 | ```
189 | ```bash
190 | $ data validate path/to/invalid-data
191 | > Error! Validation has failed for "missing-column"
192 | > Error! The column header names do not match the field names in the schema on line 2
193 | 
194 | ```
195 | 
196 | ### data help
197 | 
198 | Also you can run "help" command in your terminal to see command docs:
199 | ```shell
200 | $ data help
201 | 'General description'
202 | $ data help push
203 | > 'push command description'
204 | 
205 | # data help get
206 | # data help init
207 | # etc ...
208 | ```
209 | 
210 | ## Installation
211 | 
212 | ```
213 | npm install data-cli --global
214 | ```
215 | After installation you can run `data-cli` by the name `data`:
216 | ```
217 | data --version
218 | > 0.8.9
219 | ```
220 | 
221 | If you're not using NPM you can install `data-cli` binaries following [this instructions](https://datahub.io/docs/getting-started/installing-data#installing-binaries).
222 | 
223 | # For developers
224 | 
225 | [![Build Status](https://travis-ci.org/datahq/data-cli.svg?branch=master)](https://travis-ci.org/datahq/data-cli)
226 | [![XO code style](https://img.shields.io/badge/code_style-XO-5ed9c7.svg)](https://github.com/sindresorhus/xo)
227 | [![Issues](https://img.shields.io/badge/issue-tracker-orange.svg)](https://github.com/datahq/data-cli/issues)
228 | 
229 | ## Configuration
230 | 
231 | Configuration is in `~/.config/datahub/config.json`. In general, you should not need to edit this by hand. You can also override any variables in there using environment variables or on the command line by using the same name e.g.
232 | 
233 | ```
234 | $ data login --api https://api-testing.datahub.io
235 | ```
236 | 
237 | NB: you can set a custom location for the `config.json` config file using the `DATAHUB_JSON` environment variable e.g.:
238 | 
239 | ```
240 | export DATAHUB_JSON=~/.config/datahub/my-special-config.json
241 | ```
242 | 
243 | ## Environment
244 | 
245 | *You need to have Node.js version >7.6*
246 | 
247 | **NOTE:** if you're a developer, you need to set `datahub=dev` environment variable so your usage of the CLI isn't tracked in the analytics:
248 | 
249 | It is recommended that you set this up permanently, e.g., MacOS users need to edit `~/.bash_profile` file - add this script in your `~/.bash_profile`:
250 | 
251 | ```bash
252 | # The next line sets 'datahub' env var so data-cli doesn't send tracking data to Analytics
253 | export datahub=dev
254 | ```
255 | 
256 | and then restart your terminal.
257 | 
258 | ## Install
259 | 
260 | ```
261 | $ npm install
262 | ```
263 | 
264 | ## Running tests
265 | 
266 | We use Ava for our tests. For running tests use:
267 | 
268 | ```
269 | $ [sudo] npm test
270 | ```
271 | 
272 | To run tests in watch mode:
273 | 
274 | ```
275 | $ [sudo] npm run watch:test
276 | ```
277 | 
278 | We also have tests for `push` command that publishes some of test datasets to DataHub. While Travis runs all tests on every commit, the `push` tests are run only on tagged commits. To run these tests locally you need to have credentials for 'test' user and use following command:
279 | 
280 | ```
281 | $ [sudo] npm test test/push/push.test.js
282 | ```
283 | 
284 | ## Lint
285 | 
286 | We use XO for checking our code for JS standard/convention/style:
287 | 
288 | ```bash
289 | # When you run tests, it first runs lint:
290 | $ npm test
291 | 
292 | # To run lint separately:
293 | $ npm run lint # shows errors only
294 | 
295 | # Fixing erros automatically:
296 | $ xo --fix
297 | ```
298 | 


--------------------------------------------------------------------------------
/bin/data-push.js:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env node
  2 | const fs = require('fs')
  3 | const path = require('path')
  4 | 
  5 | const minimist = require('minimist')
  6 | const urljoin = require('url-join')
  7 | const inquirer = require('inquirer')
  8 | const hri = require('human-readable-ids').hri
  9 | const {Dataset, File, xlsxParser, isDataset, isUrl} = require('data.js')
 10 | const { write: copyToClipboard } = require('clipboardy')
 11 | const toArray = require('stream-to-array')
 12 | const {DataHub, Validator, authenticate, config, Agent} = require('datahub-client')
 13 | const ua = require('universal-analytics')
 14 | const ProgressBar = require('progress')
 15 | 
 16 | // Ours
 17 | const {customMarked} = require('../lib/utils/tools.js')
 18 | const {error, handleError} = require('../lib/utils/error')
 19 | const wait = require('../lib/utils/output/wait')
 20 | const info = require('../lib/utils/output/info.js')
 21 | 
 22 | 
 23 | const argv = minimist(process.argv.slice(2), {
 24 |   string: ['push', 'sheets'],
 25 |   boolean: ['help', 'test', 'debug', 'interactive', 'unlisted', 'private', 'zip', 'sqlite'],
 26 |   alias: {help: 'h', interactive: 'i', sheets: 'sheet'}
 27 | })
 28 | 
 29 | const pushMarkdown = fs.readFileSync(path.join(__dirname, '../docs/push.md'), 'utf8')
 30 | const help = () => {
 31 |   console.log('\n' + customMarked(pushMarkdown))
 32 | }
 33 | 
 34 | if (argv.help) {
 35 |   help()
 36 |   process.exit(0)
 37 | }
 38 | 
 39 | Promise.resolve().then(async () => {
 40 |   let stopSpinner = () => {}
 41 |   // First check if user is authenticated
 42 |   const apiUrl = config.get('api')
 43 |   const token = config.get('token')
 44 |   let out = {authenticated: true}
 45 |   if (!argv.test) {
 46 |     try {
 47 |       out = await authenticate(apiUrl, token)
 48 |     } catch (err) {
 49 |       await handleError(err)
 50 |       process.exit(1)
 51 |     }
 52 |   }
 53 |   if (!out.authenticated) {
 54 |     info('You need to login in order to push your data. Please, use `data login` command.')
 55 |     process.exit(0)
 56 |   }
 57 |   try {
 58 |     const filePath = argv._[0] || process.cwd()
 59 |     let dataset
 60 |     if (isDataset(filePath)) {
 61 |       if (isUrl(filePath)) {
 62 |         console.log('Error: You can push only local datasets.')
 63 |         process.exit(0)
 64 |       }
 65 |       try {
 66 |         dataset = await Dataset.load(filePath)
 67 |       } catch(err){
 68 |         info("You can run:")
 69 |         info("'data validate' to check your data.")
 70 |         info("'data init' to create a datapackage.")
 71 |         info("'data help push' to get more info.")
 72 |         await handleError(err)
 73 |         process.exit(1)
 74 |       }
 75 |     } else {
 76 |       dataset = await prepareDatasetFromFile(filePath)
 77 |     }
 78 | 
 79 |     dataset.resources.forEach(resource => {
 80 |       if (resource.constructor.name === 'FileInline') {
 81 |         throw new Error('We do not support dataset with inlined data')
 82 |       }
 83 |     })
 84 | 
 85 |     stopSpinner = wait('Commencing push ...')
 86 | 
 87 |     const datahubConfigs = {
 88 |       apiUrl: config.get('api'),
 89 |       token: config.get('token'),
 90 |       debug: argv.debug,
 91 |       ownerid: config.get('profile') ? config.get('profile').id : config.get('id'),
 92 |       owner: config.get('profile') ? config.get('profile').username : config.get('username')
 93 |     }
 94 |     let findability = 'published'
 95 |     if (argv.unlisted) {
 96 |       findability = 'unlisted'
 97 |     }
 98 |     if (argv.private) {
 99 |       findability = 'private'
100 |     }
101 |     const datahub = new DataHub(datahubConfigs)
102 |     const options = {
103 |       findability: findability,
104 |       sheets: argv.sheets ? argv.sheets.toString() : undefined,
105 |       outputs: {
106 |         zip: argv.zip,
107 |         sqlite: argv.sqlite
108 |       },
109 |       schedule: argv.schedule
110 |     }
111 | 
112 |     // Validate metadata prior to pushing:
113 |     // Let's normalize resource names as it is common when they're capitalized
114 |     // or have spaces - they're generated from file names:
115 |     for (const idx in dataset.descriptor.resources) {
116 |       if (!dataset.descriptor.resources[idx].name.match(validationPatterns['nameValidation'])) {
117 |         dataset.descriptor.resources[idx].name = dataset.descriptor.resources[idx].name.replace(/\s+/g, '-').toLowerCase()
118 |         dataset.resources[idx].descriptor.name = dataset.resources[idx].descriptor.name.replace(/\s+/g, '-').toLowerCase()
119 |       }
120 |     }
121 |     const validator = new Validator()
122 |     await validator.validateMetadata(dataset.descriptor)
123 |     stopSpinner()
124 |     // Show the progress bars for each file being uploaded:
125 |     const progressBars = []
126 |     // Listen for 'upload' events being emited from the DataHub class:
127 |     datahub.on('upload', (message) => {
128 |       // Check if a bar is already initiated:
129 |       const barItem = progressBars.find(item => item.file === message.file)
130 |       if (barItem) {
131 |         try {
132 |           if (message.completed) {
133 |             if (process.platform !== 'win32') {
134 |               barItem.bar.interrupt('Completed: ' + message.file)
135 |             } else {
136 |               info('Completed: ' + message.file)
137 |             }
138 |           } else {
139 |             barItem.bar.tick(message.chunk.length)
140 |           }
141 |         } catch (err) {
142 |           info(err.message)
143 |         }
144 |       } else { // If a bar doesn't exist initiate one:
145 |         progressBars.push({
146 |           file: message.file,
147 |           bar: new ProgressBar(`  Uploading [:bar] :percent (:etas left)   ${message.file}`, {
148 |             complete: '*',
149 |             incomplete: ' ',
150 |             width: 30,
151 |             total: message.total,
152 |             clear: process.platform === 'win32' ? false : true
153 |           })
154 |         })
155 |       }
156 |     })
157 | 
158 |     const res = await datahub.push(dataset, options)
159 |     // Analytics:
160 |     if (process.env.datahub !== 'dev') {
161 |       const visitor = ua('UA-80458846-4')
162 |       visitor.set('uid', datahubConfigs.ownerid)
163 |       // Check if it's the first push:
164 |       const agent = new Agent(datahubConfigs.apiUrl, {debug: argv.debug})
165 |       let response = await agent.fetch(
166 |         `/metastore/search/events?owner="${datahubConfigs.owner}"&size=0`,
167 |         {headers: {'Auth-Token': token}}
168 |       )
169 |       if (response.ok) {
170 |         response = await response.json()
171 |         if (response.summary && response.summary.total === 0) { // It's the first push
172 |           visitor.event('cli', 'push-first').send()
173 |         }
174 |         // Count sucessful pushes:
175 |         visitor.event('cli', 'push-success').send()
176 |       }
177 |     }
178 |     // Print success message and provide URL to showcase page:
179 |     let revisionId = res.flow_id.split('/').pop()
180 |     const message = '\n🙌  your data is published!\n'
181 |     const url = urljoin(config.get('domain'), datahubConfigs.owner, dataset.descriptor.name, 'v', revisionId)
182 |     let copied = ' (copied to clipboard)'
183 |     try {
184 |       await copyToClipboard(url)
185 |     } catch (err) {
186 |       copied = ''
187 |       console.log(`Warning: Failed to copy to clipboard - ${err.message}`)
188 |     }
189 |     console.log(message + '🔗  ' + url + copied)
190 |   } catch (err) {
191 |     stopSpinner()
192 |     if (argv.debug) {
193 |       console.log('> [debug]\n' + err.stack)
194 |     }
195 |     await handleError(err)
196 |     process.exit(1)
197 |   }
198 | })
199 | 
200 | const prepareDatasetFromFile = async filePath => {
201 |   let file
202 |   if (isUrl(filePath)) {
203 |     file = await File.load(filePath, {format: argv.format})
204 |   } else {
205 |     const pathParts = path.parse(filePath)
206 |     file = await File.load(pathParts.base, {basePath: pathParts.dir, format: argv.format})
207 |   }
208 |   // List of formats that are known as tabular
209 |   const knownTabularFormats = ['csv', 'tsv', 'dsv']
210 |   if (knownTabularFormats.includes(file.descriptor.format)) {
211 |     try {
212 |       await file.addSchema()
213 |     } catch(err){
214 |       error("tabular file is invalid: " + file.path)
215 |       error(err.message)
216 |       if (argv.debug){
217 |         console.log('> [debug]\n' + err.stack)
218 |       }
219 |       process.exit(1)
220 |     }
221 | 
222 |     if (argv.interactive) {
223 |       // Prompt user with headers and fieldTypes
224 |       const headers = file.descriptor.schema.fields.map(field => field.name)
225 |       const fieldTypes = file.descriptor.schema.fields.map(field => field.type)
226 |       const questions = [
227 |         ask('headers', headers, 'y', 'yesOrNo'),
228 |         ask('types', fieldTypes, 'y', 'yesOrNo')
229 |       ]
230 |       const answers = await inquirer.prompt(questions)
231 | 
232 |       if (answers.headers === 'n' || answers.types === 'n') {
233 |         // Maybe nicer exit - user has chosen not to proceed for now ...
234 |         throw new Error('Please, generate datapackage.json (you can use "data init") and push.')
235 |       }
236 |     }
237 |   }
238 | 
239 |   let dpName, dpTitle
240 |   if (argv.name) { // If name is provided in args then no user prompting:
241 |     dpName = argv.name.toString()
242 |     // Make unslugifies version for title:
243 |     dpTitle = dpName.replace(/-+/g, ' ')
244 |     dpTitle = dpTitle.charAt(0).toUpperCase() + dpTitle.slice(1)
245 |   } else {
246 |     dpName = file.descriptor.name.replace(/\s+/g, '-').toLowerCase()
247 |     // Add human readable id so that this packge does not conflict with other
248 |     // packages (name is coming from the file name which could just be
249 |     // data.csv)
250 |     dpName += '-' + hri.random()
251 |     // Confirm dpName with user:
252 |     let answer = await inquirer.prompt([ask('name', dpName, dpName, 'nameValidation')])
253 |     dpName = answer.name
254 |     // Make unslugifies version for title:
255 |     dpTitle = dpName.replace(/-+/g, ' ')
256 |     dpTitle = dpTitle.charAt(0).toUpperCase() + dpTitle.slice(1)
257 |     // Confirm title with user:
258 |     answer = await inquirer.prompt([ask('title', dpTitle, dpTitle)])
259 |     dpTitle = answer.title
260 |   }
261 | 
262 |   const metadata = {
263 |     name: dpName,
264 |     title: dpTitle,
265 |     resources: []
266 |   }
267 |   const dataset = await Dataset.load(metadata)
268 |   dataset.addResource(file)
269 |   return dataset
270 | }
271 | 
272 | const validationPatterns = {
273 |   yesOrNo: /^[y,n]+$/,
274 |   nameValidation: /^([-a-z0-9._\/])+$/
275 | }
276 | 
277 | const ask = (property, data, defaultValue, validation) => {
278 |   const inquirerObj = {
279 |     type: 'input',
280 |     name: property,
281 |     message: `Please, confirm ${property} for this dataset:\n${data}`,
282 |     default: () => {
283 |       return defaultValue
284 |     }
285 |   }
286 |   if (validation) {
287 |     inquirerObj.validate = value => {
288 |       const pass = value.match(validationPatterns[validation])
289 |       if (pass) {
290 |         return true
291 |       }
292 |       return `Provided value must match following pattern: ${validationPatterns[validation]}`
293 |     }
294 |   }
295 |   return inquirerObj
296 | }
297 | 


--------------------------------------------------------------------------------
/DESIGN.md:
--------------------------------------------------------------------------------
  1 | # Design of `data`
  2 | 
  3 | **Note: this material is from 2017-2018**
  4 | 
  5 | # What is the `data` tool
  6 | 
  7 | The `data` tool performs 3 complementary tasks:
  8 | 
  9 | * Data Publishing: it is the **DataHub command line interface** with support for pushing and getting data from the DataHub
 10 | * Data Packaging: it is a command line **Data Package manager** for creating, inspecting, validating and accessing data packages and their resources
 11 | * Data Wrangling: it is a lightweight command line **Data Wrangler tool** performing tasks like inspecting and cat'ing data files.
 12 | 
 13 | Illustrative set of commands
 14 | 
 15 | ```bash
 16 | # =====
 17 | # Publishing
 18 | 
 19 | # data publishing
 20 | data push FILE
 21 | data push DIRECTORY
 22 | 
 23 | # get a file (from DataHub)
 24 | data get FILE / URL
 25 | 
 26 | # delete a published dataset
 27 | data delete
 28 | 
 29 | # =====
 30 | # Packaging
 31 | 
 32 | # create a data package
 33 | data init
 34 | 
 35 | # validate a data package
 36 | data validate
 37 | 
 38 | # =====
 39 | # Wrangling
 40 | 
 41 | # data (pre)viewing and conversion
 42 | data cat FILE
 43 | # data conversion
 44 | data cat FILE OUTFILE
 45 | 
 46 | # DIR: data package summary (assumes it is a data package)
 47 | # FIlE: print out meta and stream a summary of data (can turn off maybe in future with --no-preview)
 48 | data info {FILE-or-DIR}
 49 | ```
 50 | 
 51 | **Overview diagram**
 52 | 
 53 | State of feature is indicated:
 54 | 
 55 | * light green = working well
 56 | * pink = working but needs work (and is priority)
 57 | * pink dashed = does not exist and priority
 58 | * green = exists and needs work but not priority
 59 | * grey = does not exist and not a priority
 60 | 
 61 | ```mermaid
 62 | graph LR
 63 | 
 64 | datahub["DataHub CLI tool<br />publish (and get)"]
 65 | dpm[Data Package Manager<br/>works with DPs]
 66 | wrangler[Data Wrangler<br/>]
 67 | wrangle2["Complex wrangling"]
 68 | 
 69 | data[data cli]
 70 | 
 71 | data --> datahub
 72 | data --> dpm
 73 | data --> wrangler
 74 | 
 75 | datahub --> push
 76 | datahub --> login
 77 | datahub --> get
 78 | datahub --> delete
 79 | 
 80 | dpm --> init
 81 | dpm --> validate
 82 | dpm --> inspect
 83 | 
 84 | wrangler --> info
 85 | wrangler --> cat
 86 | wrangler --> wrangle2
 87 | 
 88 | classDef done fill:lightgreen,stroke:#333,stroke-width:2px;
 89 | classDef ok fill:green,stroke:#333,stroke-width:2px;
 90 | classDef priority fill:pink,stroke:#333,stroke-width:2px;
 91 | classDef prioritynotstarted fill:pink,stroke:#333,stroke-width:2px,stroke-dasharray: 5, 5;
 92 | classDef prioritylow fill:grey,stroke:#333,stroke-width:1px,stroke-dasharray;
 93 | class login done;
 94 | class push,validate priority;
 95 | class get,wrangler,delete prioritylow;
 96 | class init,info,cat ok;
 97 | ```
 98 | 
 99 | Why do we have 3 features in one:
100 | 
101 | * Doing push requires a lot of other stuff including the data package lib and (some) data wrangling (see diagram in next section)
102 | * To encourage people to use the data tool for itself not just for the DataHub. (But: people have lots of other tools - do they need another one?)
103 | 
104 | Question: should we focus the tool just on the DataHub part (plus some Data Packages)?
105 | 
106 | * In general we focus on MVP right now (remove what is not essential!)
107 |   * => drop the data wrangling stuff (?)
108 | * However, we actually need most of this and it is useful to have some of this to hand (but be careful about feature creep)
109 | 
110 | ## Doing `push` requires lots of the other stuff
111 | 
112 | This diagram shows what is involved with doing a push
113 | 
114 | ```mermaid
115 | graph TD
116 | 
117 | push[push]
118 | show[show/cat/convert]
119 | validate
120 | 
121 | push --> hubapi
122 | 
123 | subgraph data.js
124 |   infer
125 |   caststream[cast stream]
126 |   objstream[object stream]
127 |   tableschema[tableschema]
128 |   descriptor["descriptor<br>(pkg/resource)"]
129 |   stream[raw stream]
130 |   parseid["parse identifier<br>(Infer filetype)"]
131 |   userprovided[User<br>Provided]
132 |   ui
133 | end
134 | 
135 | subgraph datahub-client
136 |   hubapi --> login
137 |   hubapi --> pushlib[push]
138 | end
139 | 
140 | push --> descriptor
141 | push --> stream
142 | descriptor --> tableschema
143 | 
144 | show --> caststream
145 | 
146 | tableschema --> infer
147 | tableschema -.-> userprovided
148 | infer --> objstream
149 | infer -.-> ui
150 | 
151 | objstream --> stream
152 | stream --> parseid
153 | caststream --> objstream
154 | caststream --> tableschema
155 | 
156 | hubapi[hubapi<br><br>DataHub API wrapper]
157 | ```
158 | 
159 | For more on data.js Library - See https://hackmd.io/CwZgnOCMDs0LQEMAckBMdSsnMAjaIcADKgKYBmArGOSACbRJhA==?both
160 | 
161 | 
162 | 
163 | # What is wanted (user perspective)
164 | 
165 | [DataHub] *As a Publisher I want to*
166 | 
167 | 1. Push / publish a local file: data push {file}
168 | 2. Push / publish a remote url: data push {url}
169 | 3. Push / publish a data package: ...
170 | 4. Get a data package, modify and republish (?)
171 | 5. Add a view
172 | 
173 | These are in priority order.
174 | 
175 | [Data Packager] I want to
176 | 
177 | * create a package with data files
178 | * validate a data package or file
179 | * inspect data packages
180 | * read a resource (cast) 
181 | 
182 | [Wrangler] I want to
183 | 
184 | * inspect data files quickly (size, type etc)
185 | * convert them ... e.g. xls -> csv, csv -> json
186 | 
187 | ## An imagined session
188 | 
189 | ```bash
190 | # search around for datasets
191 | 
192 | # I found some csv or excel or even pdf - great! I want to see if useful
193 | data cat URL
194 | 
195 | # or maybe just get some info on it e.g. its size, last updated
196 | data info URL
197 | 
198 | # let's download it to inspect more ...
199 | data get url
200 | 
201 | data cat file
202 | 
203 | # hmm - it will need some cleaning up.
204 | 
205 | # time to make some notes ...
206 | vi README.md
207 | 
208 | # or maybe i just want to save this file online now ...
209 | data push file
210 | 
211 | # more expert users may want to creata data package first ...
212 | data init
213 | data push
214 | ```
215 | 
216 | # Operations
217 | 
218 | ## `push`
219 | 
220 | 
221 | ```bash
222 | # create a dataset with this file (auto-generate)
223 | data push myfile.csv
224 | 
225 | # what about specifying the dataset target
226 | data push myfile.csv my-existing-dataset
227 | 
228 | # or with username
229 | data push {myfile.csv} @myname/xyz
230 | 
231 | # or prompt for the dataset name
232 | data push myfile.csv
233 | 
234 | > Dataset name: my-ram-xyz
235 | > Dataset title: ...
236 | 
237 | If a dataset already exists
238 | # TODO: what if we prompt for file name from user and it is the same as an existing dataset
239 | 
240 | # we can store this somewhere ...
241 | .datahub/config
242 |   default=myname/mydataset
243 | 
244 | data push --dataset=xyz/name myfile.csv -- replaces the file
245 | 
246 | # replaces the schema
247 | data push --dataset=name --schema myschema.yaml
248 | 
249 | # updates the schema
250 | data push --dataset=name --schema myschema.json
251 |   
252 | # fetches the schema
253 | data get --dataset=name --schema --format=yaml
254 | 
255 | 
256 | data push x [to y]
257 | data push myfile.csv [{dataset}]
258 | 
259 | # what could be a problem
260 | 
261 | data push myfile1.csv myfile2.csv [{dataset}]
262 | 
263 | dataset = [user/]name
264 | ```
265 | 
266 | 
267 | As a Publisher i want to publish a file and create a new dataset
268 | 
269 | ```
270 | data push myfile.csv [dataset-name]
271 | ```
272 | 
273 | As a Publisher I want to replace a file in an existing dataset
274 | 
275 | As a Publisher I want to add a file to an existing dataset
276 | 
277 | ```
278 | # if existing file with path myfile.csv or name myfile then we replace
279 | data push myfile.csv dataset-name
280 | 
281 | # if existing file with name existing-file then we replace, otherwise we add as `existing-file`
282 | data push myfile.csv dataset-name:existing-file
283 | 
284 | # if existing file exists so this would replace WARN the user ...
285 | data push myfile.csv dataset-name
286 | ```
287 | 
288 | As a Publisher I want to delete a file from an existing dataset
289 | 
290 | ```
291 | data strip dataset-name[:file-name]
292 | ```
293 | 
294 | 
295 | 
296 | ### data push {file} - Push / publish a local file: 
297 | 
298 | ```
299 | ✓   data push some.csv
300 | ✓   data push some.xls
301 | ✓?  data push some.xlsx
302 | ✓?   data push a-random-file.xxx
303 | ✓?  data push some.pdf   // like a random file ...
304 | ✓?  data push some.png   // does it get viewed ...
305 | 
306 | ✓?  data push some.zip  // inside are files => should act like pushing a directory?
307 | 
308 | => should create data package with just README and no resources
309 | data push README.md (?)
310 | 
311 | data push datapackage.json (?)
312 | 
313 | data push file1.csv file2.csv (?)
314 | data push somefile.parquet
315 | ```
316 | 
317 | * Do we guess media type
318 | * Do we add hash
319 | * Do we prompt for file title
320 | * Do we validate before pushing
321 | * Do we add file size
322 | 
323 | ### Issues
324 | 
325 | * No progress bar on upload
326 | * No updates about what is happening on a push (we could update on each step successfully completed)
327 | * We should skip re-uploading a file to rawstore if already uploaded
328 |   * TODO: does rawstore tell us this atm?
329 | * no support for data push and complex flows and flows involving automation
330 |   * Could we automate creating the flow.yml?
331 | 
332 | 2017-12-26 pushing to https://datahub.io/rufuspollock1/world-wealth-and-income-database/v/1 says at the top "Your data is safely stored and is getting processed - it will be here soon!" BUT I can already see a page (after a scan i do realise no files are there but that's sort of weird) and there is no other update information. Looking in JS debug i find:
333 | 
334 | ```
335 | Uncaught (in promise) TypeError: Cannot read property 'forEach' of undefined
336 |     at https://datahub.io/static/dpr-js/dist/bundle.js:69:10210
337 |     at r (https://datahub.io/static/dpr-js/dist/bundle.js:43:6760)
338 |     at Generator._invoke (https://datahub.io/static/dpr-js/dist/bundle.js:43:7805)
339 |     at Generator.e.(anonymous function) [as next] (https://datahub.io/static/dpr-js/dist/bundle.js:43:6939)
340 |     at r (https://datahub.io/static/dpr-js/dist/bundle.js:50:5810)
341 |     at https://datahub.io/static/dpr-js/dist/bundle.js:50:5912
342 |     at <anonymous>
343 | ```
344 | 
345 | 
346 | 2017-12-23 try pushing a data package with just a readme (not even a datapackage.json) and check whether it works (why? I'd like to push research datasets where i'm still in the process of digging stuff up)
347 | 
348 | ## `get`
349 | 
350 | ### Issues
351 | 
352 | 2017-12-23 data get command hung on me and I can't debug ... (no debug flag)  => I looked into adding this but introducing debug per command is a pain (we do it once by hand atm for push) => it should be systematic => refactoring dispatch code in the cli (and maybe therefore into the datahub-lib code ...)
353 | 
354 | data get command should have option to pull "remote" resources to local paths ...
355 | 
356 | 2017-12-23 (?) data get command should pretty print the datapackage.json
357 | 
358 | ## `login`
359 | 
360 | Login is working
361 | 
362 | ## `cat`
363 | 
364 | ...
365 | 
366 | ## `init`
367 | 
368 | ### Issues
369 | 
370 | data init guesses types wrong e.g. for world 
371 | incomes database.
372 | 
373 | 2017-12-26 data init does not add a license field by default
374 | 
375 | ## `validate`
376 | 
377 | ### Issues with data validation
378 | 
379 | * Does not run offline as errors on failure to access a profile (who cares about profiles by default - 99% of what i want to check is the data ...)
380 | 
381 | ## `info`
382 | 
383 | ```
384 | For FILE:
385 | 
386 | {file-path}
387 | size: ..
388 | md5: ...
389 | format: ...
390 | encoding: ...
391 | 
392 | {PREVIEW}
393 | 
394 | data info 
395 | 
396 | ```
397 | 
398 | ## `delete` / `purge` / `hide`
399 | 
400 | As Publisher I want to delete a dataset permanently so it does not exist
401 | 
402 | ```
403 | data purge dataset-name
404 | 
405 | # prompt user to type out dataset name to 
406 | 
407 | 
408 | # TODO: since people look for delete do we want to explain you hide the dataset
409 | ??
410 | data hide
411 | data unpublish
412 | data delete # respond with use data push --findability ...
413 | ```
414 | 
415 | # Push File - Detailed Analysis
416 | 
417 | Levels:
418 | 
419 | 0. Already have Data Package (?)
420 | 1. Good CSV
421 | 2. Good Excel
422 | 3. Bad data (i.e. has ...)
423 | 3. Something else
424 | 
425 | ```
426 | data push {file-or-directory}
427 | ```
428 | 
429 | How does data push work?
430 | 
431 | ```
432 | # you are pushing the raw file
433 | # and the extraction to get one or more data tables ...
434 | # in the background we are creating a data package + pipeline
435 | data push {file}
436 | 
437 | Algorithm:
438 | 
439 | 1. Detect type / format
440 | 2. Choose the data (e.g. sheet from excel)
441 | 3. Review the headers
442 | 4. Infer data-types and review
443 | 5. [Add constraints]
444 | 6. Data validation
445 | 7. Upload
446 | 8. Get back a link - view page (or the raw url) e.g. http://datapackaged.com/core/finance-vix
447 |   * You can view, share, publish, [fork]
448 | 
449 | 1. Detect file type
450 |  => file extension
451 |   1. Offer guess
452 |   2. Probable guess (options?)
453 |   3. Unknown - tell us
454 | 
455 | 1B. Detect encoding (for CSV)
456 | 
457 | 2. Choose the data
458 |   1. 1 sheet => ok
459 |   2. Multiple sheets guess and offer
460 |   3. Multiple sheets - ask them (which to include)
461 | 
462 | 2B: bad data case - e.g. selecting within table
463 | 
464 | 3. Review the headers
465 |   * Here is what we found
466 |   * More than one option for headers - try to reconcile
467 |   *
468 | 
469 | 
470 | ### Upload:
471 | 
472 | * raw file with name a function of the md5 hash
473 |   * Pros: efficient on space (e.g. same file stored once but means you need to worry about garbage collection?)
474 | * the pipeline description: description of data and everything else we did [into database]
475 | 
476 | Then pipeline runs e.g. load into a database or into a data package
477 | 
478 | * stores output somewhere ...
479 | 
480 | Viewable online ...
481 | 
482 | Note:
483 | data push url # does not store file
484 | data push file # store in rawstore
485 | 
486 | ### BitStore
487 | 
488 | /rawstore/ - content addressed storage (md5 or sha hashed)
489 | /packages/{owner}/{name}/{tag-or-pipeline}
490 | ```
491 | 
492 | 
493 | Try this for a CSV file
494 | 
495 | ```
496 | data push mydata.csv
497 | 
498 | # review headers
499 | 
500 | # data types ...
501 | 
502 | 
503 | Upload
504 | 
505 | * csv file gets stored as blob md5 ...
506 | * output of the pipeline stored ...
507 |   * canonical CSV gets generated ...
508 | ```
509 | 
510 | 
511 | 
512 | Data Push directory
513 | 
514 | ```
515 | data push {directory}
516 | 
517 | # could just do data push file for each file but ...
518 | # that could be tedious
519 | # once I've mapped one file you try reusing that mapping for others ...
520 | # .data directory that stores the pipeline and the datapackage.json
521 | ```
522 | 
523 | 
524 | ## Push File - Sequence Diagram
525 | 
526 | CLI architecture
527 | 
528 | ```mermaid
529 | sequenceDiagram
530 | 
531 |   participant bin/push.js
532 |   participant datahub
533 |   participant data
534 | 
535 |   bin/push.js ->> data: new Package(path)
536 |   data -->> bin/push.js: pkg obj
537 |   bin/push.js ->> data: pkg.load()
538 |   data ->> data: load
539 |   data -->> bin/push.js: loaded pkg
540 |   bin/push.js ->> datahub: new DataHub({...})
541 |   datahub -->> bin/push.js: datahub obj
542 |   bin/push.js ->> datahub: datahub.push(pkg)
543 |   datahub ->> data: pkg.resources
544 |   data -->> datahub: loaded resources
545 |   datahub ->> data: Resource.load(datapackage.json)
546 |   datahub ->> datahub: authorize
547 |   datahub ->> datahub: upload
548 |   datahub ->> datahub: makeSourceSpec
549 |   datahub ->> datahub: upload source spec
550 |   datahub -->> bin/push.js: 0 or 1
551 | ```
552 | 
553 | ### Analysis
554 | 
555 | What are the components involved ...?
556 | 
557 | ```mermaid
558 | graph TD
559 | 
560 | cli[data push myfile.csv]
561 | 
562 | cli --> parseid["parsePath(myfile.csv)"<br><br> path=myfile.csv, sourceType=local, format=csv, mediaType=text/csv]
563 | 
564 | parseid --> getstr["getStream(descriptor) - depending on url or local"<br><br>node stream]
565 | 
566 | getstr --> parsestr["parseStream(rawStream)"<br><br>object iterator/node obj stream]
567 | 
568 | parsestr --> infer["infer(objstream)<br><br>tableschema.json"]
569 | 
570 | infer --> ui[show user infer and check]
571 | ```
572 | 
573 | # Appendix: Notes on NodeJS streams
574 | 
575 | https://www.bennadel.com/blog/2692-you-have-to-explicitly-end-streams-after-pipes-break-in-node-js.htm
576 | 
577 | https://gist.github.com/spion/ecdc92bc5de5b381da30
578 | 
579 | https://github.com/maxogden/mississippi <-- recommended for managing node streams in consistent ways
580 | 


--------------------------------------------------------------------------------
/test/push/push.test.js:
--------------------------------------------------------------------------------
  1 | // These tests are run only on tagged commits
  2 | 
  3 | const test = require('ava')
  4 | const clipboardy = require('clipboardy')
  5 | 
  6 | const {runcli} = require('../cli.test.js')
  7 | 
  8 | 
  9 | // =====================
 10 | // DATA-CLI PUSH correct
 11 | 
 12 | // QA tests [pushing valid CSV file]
 13 | 
 14 | test.serial('push command succeeds with regular CSV file', async t => {
 15 |   const path_ = 'test/fixtures/test-data/files/csv/separators/comma.csv'
 16 |   const args = '--name=comma-separated'
 17 |   const result = await runcli('push', path_, args)
 18 |   const stdout = result.stdout.split('\n')
 19 |   const hasPublishedMessage = stdout.find(item => item.includes('your data is published!'))
 20 |   const hasURLtoShowcase = stdout.find(item => item.includes('https://datahub.io/test/comma-separated/v/'))
 21 |   t.truthy(hasPublishedMessage)
 22 |   t.truthy(hasURLtoShowcase)
 23 |   const whatsInClipboard = await clipboardy.read()
 24 |   t.true(whatsInClipboard.includes('https://datahub.io/test/comma-separated/v/'))
 25 | })
 26 | 
 27 | // end of [pushing valid CSV file]
 28 | 
 29 | test.serial('push --public', async t => {
 30 |   const path_ = 'test/fixtures/test-data/files/csv/separators/comma.csv'
 31 |   const args = ['--name=public-test', '--public', '--debug']
 32 |   const result = await runcli('push', path_, ...args)
 33 | 
 34 |   t.truthy(result.stdout.includes('"findability": "published"'))
 35 | })
 36 | 
 37 | // QA tests [pushing valid dataset from path]
 38 | 
 39 | test.serial('push command succeeds for valid dataset', async t => {
 40 |   const path_ = 'test/fixtures/test-data/packages/basic-csv'
 41 |   const result = await runcli('push', path_)
 42 |   const stdout = result.stdout.split('\n')
 43 |   const hasPublishedMessage = stdout.find(item => item.includes('your data is published!'))
 44 |   const hasURLtoShowcase = stdout.find(item => item.includes('https://datahub.io/test/basic-csv/v/'))
 45 |   t.truthy(hasPublishedMessage)
 46 |   t.truthy(hasURLtoShowcase)
 47 |   const whatsInClipboard = await clipboardy.read()
 48 |   t.true(whatsInClipboard.includes('https://datahub.io/test/basic-csv/v/'))
 49 | })
 50 | 
 51 | // end of [pushing valid dataset from path]
 52 | 
 53 | // QA tests [pushing valid dataset from working directory]
 54 | test.serial('pushing valid dataset from working directory', async t =>{
 55 |   const path_ = 'test/fixtures/test-data/packages/basic-csv'
 56 |   const usualWorkingDir = process.cwd()
 57 |   process.chdir(path_)
 58 |   console.log('Working directory changed: ' + process.cwd())
 59 |   // push test
 60 |   const result = await runcli('push')
 61 |   const stdout = result.stdout.split('\n')
 62 |   const hasPublishedMessage = stdout.find(item => item.includes('your data is published!'))
 63 |   const hasURLtoShowcase = stdout.find(item => item.includes('https://datahub.io/test/basic-csv/v/'))
 64 |   t.truthy(hasPublishedMessage)
 65 |   t.truthy(hasURLtoShowcase)
 66 |   const whatsInClipboard = await clipboardy.read()
 67 |   t.true(whatsInClipboard.includes('https://datahub.io/test/basic-csv/v/'))
 68 |   // change working dir to default, so other tests will not fail
 69 |   process.chdir(usualWorkingDir)
 70 |   console.log('Working directory restored: ' + process.cwd())
 71 | })
 72 | 
 73 | // QA tests [pushing multiple CSV files together] - should push only one file and show a WARNING message
 74 | // https://datahub.io/test/zero/v/87
 75 | test.serial.failing('pushing multiple CSV files Warning message', async t => {
 76 |   const path_ = 'test/fixtures/test-data/files/csv/separators/comma.csv'
 77 |   const path2_ = 'test/fixtures/test-data/files/csv/separators/colon.csv'
 78 |   const args = '--name=comma-separated'
 79 |   const result = await runcli('push', path_, path2_, args)
 80 |   const stdout = result.stdout.split('\n')
 81 |   const hasPublishedMessage = stdout.find(item => item.includes('your data is published!'))
 82 |   const hasURLtoShowcase = stdout.find(item => item.includes('https://datahub.io/test/comma-separated/v/'))
 83 |   const hasWarningMessage = stdout.find(item => item.includes(`Warning: pushing only the ${path_} file.`))
 84 |   t.truthy(hasPublishedMessage)
 85 |   t.truthy(hasURLtoShowcase)
 86 |   t.truthy(hasWarningMessage)
 87 |   const whatsInClipboard = await clipboardy.read()
 88 |   t.true(whatsInClipboard.includes('https://datahub.io/test/comma-separated/v/'))
 89 | })
 90 | 
 91 | 
 92 | // QA tests [pushing valid dataset with path to datapackage.json]
 93 | 
 94 | test.serial('push command succeeds for valid dataset with path to dp.json', async t => {
 95 |   const path_ = 'test/fixtures/test-data/packages/basic-csv/datapackage.json'
 96 |   const result = await runcli('push', path_)
 97 |   const stdout = result.stdout.split('\n')
 98 |   const hasPublishedMessage = stdout.find(item => item.includes('your data is published!'))
 99 |   const hasURLtoShowcase = stdout.find(item => item.includes('https://datahub.io/test/basic-csv/v/'))
100 |   t.truthy(hasPublishedMessage)
101 |   t.truthy(hasURLtoShowcase)
102 |   const whatsInClipboard = await clipboardy.read()
103 |   t.true(whatsInClipboard.includes('https://datahub.io/test/basic-csv/v/'))
104 | })
105 | 
106 | // end of [pushing valid dataset with path to datapackage.json]
107 | 
108 | // QA tests [pushing valid CSV from URL]
109 | 
110 | test.serial('push command succeeds with regular CSV file from URL', async t => {
111 |   const url_ = 'https://raw.githubusercontent.com/frictionlessdata/test-data/master/files/csv/separators/comma.csv'
112 |   const args = '--name=comma-separated'
113 |   const result = await runcli('push', url_, args)
114 |   const stdout = result.stdout.split('\n')
115 |   const hasPublishedMessage = stdout.find(item => item.includes('your data is published!'))
116 |   const hasURLtoShowcase = stdout.find(item => item.includes('https://datahub.io/test/comma-separated/v/'))
117 |   t.truthy(hasPublishedMessage)
118 |   t.truthy(hasURLtoShowcase)
119 |   const whatsInClipboard = await clipboardy.read()
120 |   t.true(whatsInClipboard.includes('https://datahub.io/test/comma-separated/v/'))
121 | })
122 | 
123 | // end of [pushing valid CSV from URL]
124 | 
125 | 
126 | // ========================
127 | // Invalid metadata or data
128 | 
129 | // QA tests [Push: Invalid datapackage.json]
130 | 
131 | test('push command fails with invalid JSON descriptor', async t => {
132 |   let path_ = 'test/fixtures/test-data/packages/invalid-json-single-quotes'
133 |   let result = await runcli('push', path_)
134 |   let stdout = result.stdout.split('\n')
135 |   let hasErrorMsg = stdout.find(item => item.includes('> Error! Unexpected token \' in JSON at position 27'))
136 |   t.truthy(hasErrorMsg)
137 |   // Suggests running validate command:
138 |   const hasSuggestionMsg = stdout.find(item => item.includes('> \'data validate\' to check your data.'))
139 |   t.truthy(hasSuggestionMsg)
140 | 
141 |   path_ = 'test/fixtures/test-data/packages/invalid-json-missing-comma'
142 |   result = await runcli('push', path_)
143 |   stdout = result.stdout.split('\n')
144 |   hasErrorMsg = stdout.find(item => item.includes('> Error! Unexpected string in JSON at position 113'))
145 |   t.truthy(hasErrorMsg)
146 | })
147 | 
148 | // end of [Push: Invalid datapackage.json]
149 | 
150 | // QA tests [Push: Invalid descriptor metadata]
151 | 
152 | test('push command fails with descriptor validation error', async t => {
153 |   let path_ = 'test/fixtures/test-data/packages/invalid-descriptor'
154 |   let result = await runcli('push', path_)
155 |   let stdout = result.stdout.split('\n')
156 |   const hasErrorMsg = stdout.find(item => item.includes('Descriptor validation error:'))
157 |   t.truthy(hasErrorMsg)
158 |   let hasErrorDetails = stdout.find(item => item.includes('String does not match pattern: ^([-a-z0-9._/])+$'))
159 |   t.truthy(hasErrorDetails)
160 |   hasErrorDetails = stdout.find(item => item.includes('at \"/name\" in descriptor'))
161 |   t.truthy(hasErrorDetails)
162 | })
163 | 
164 | // end of [Push: Invalid descriptor metadata]
165 | 
166 | // QA tests [Push: Missing descriptor]
167 | 
168 | test('push command fails if descriptor is missing', async t => {
169 |   let path_ = 'test/fixtures/test-data/packages'
170 |   let result = await runcli('push', path_)
171 |   let stdout = result.stdout.split('\n')
172 |   const hasErrorMsg = stdout.find(item => item.includes('> Error! No datapackage.json at destination.'))
173 |   t.truthy(hasErrorMsg)
174 |   let suggestsToDoValidate = stdout.find(item => item.includes('data validate'))
175 |   let suggestsToDoInit = stdout.find(item => item.includes('data init'))
176 |   t.truthy(suggestsToDoValidate)
177 |   t.truthy(suggestsToDoInit)
178 | })
179 | 
180 | // end of [Push: Missing descriptor]
181 | 
182 | // QA tests [Push: pushing remote data package]
183 | 
184 | test('push command fails for remote datasets', async t => {
185 |   let path_ = 'https://github.com/frictionlessdata/test-data/blob/master/packages/basic-csv/datapackage.json'
186 |   let result = await runcli('push', path_)
187 |   let stdout = result.stdout.split('\n')
188 |   const hasErrorMsg = stdout.find(item => item.includes('Error: You can push only local datasets.'))
189 |   t.truthy(hasErrorMsg)
190 | })
191 | 
192 | // end of [Push: pushing remote data package]
193 | 
194 | // QA tests [Push: pushing valid dataset with remote resource]
195 | 
196 | test('push command succeeds for valid dataset with remote resource', async t => {
197 |   let path_ = 'test/fixtures/test-data/packages/remote-csv'
198 |   let result = await runcli('push', path_)
199 |   let stdout = result.stdout.split('\n')
200 |   const hasPublishedMessage = stdout.find(item => item.includes('your data is published!'))
201 |   const hasURLtoShowcase = stdout.find(item => item.includes('https://datahub.io/test/remote-resource/v/'))
202 |   t.truthy(hasPublishedMessage)
203 |   t.truthy(hasURLtoShowcase)
204 |   const whatsInClipboard = await clipboardy.read()
205 |   t.true(whatsInClipboard.includes('https://datahub.io/test/remote-resource/v/'))
206 | })
207 | 
208 | // end of [Push: pushing valid dataset with remote resource]
209 | 
210 | // QA tests [Pushing invalid CSV file (irrespective of schema)]
211 | // Also includes [pushing invalid CSV from URL ]
212 | 
213 | test.failing('push command fails for invalid local CSV file', async t => {
214 |   const path_ = 'test/fixtures/test-data/packages/invalid-data/extra-column.csv'
215 |   const args = '--name=extra-column'
216 |   const result = await runcli('push', path_, args)
217 |   const stdout = result.stdout.split('\n')
218 |   const hasErrorMsg = stdout.find(item => item.includes('> Error! Number of columns is inconsistent on line 2'))
219 |   t.truthy(hasErrorMsg)
220 | })
221 | 
222 | // end of [Pushing invalid CSV file (irrespective of schema)]
223 | 
224 | // QA tests [Pushing packaged invalid CSV file (irrespective of schema)]
225 | 
226 | test.serial('push command succeeds with packaged invalid CSV', async t => {
227 |   const path_ = 'test/fixtures/test-data/packages/invalid-data'
228 |   const result = await runcli('push', path_)
229 |   const stdout = result.stdout.split('\n')
230 |   const hasPublishedMessage = stdout.find(item => item.includes('your data is published!'))
231 |   const hasURLtoShowcase = stdout.find(item => item.includes('https://datahub.io/test/basic-csv/v/'))
232 |   t.truthy(hasPublishedMessage)
233 |   t.truthy(hasURLtoShowcase)
234 |   const whatsInClipboard = await clipboardy.read()
235 |   t.true(whatsInClipboard.includes('https://datahub.io/test/basic-csv/v/'))
236 | })
237 | 
238 | // end of [Pushing packaged invalid CSV file (irrespective of schema)]
239 | 
240 | // QA tests [Push non existing file]
241 | 
242 | test('push command fails for non-existing file', async t => {
243 |   let path_ = 'non-existing.csv'
244 |   let result = await runcli('push', path_)
245 |   let stdout = result.stdout.split('\n')
246 |   const hasErrorMsg = stdout.find(item => item.includes('> Error! ENOENT: no such file or directory, lstat \'non-existing.csv\''))
247 |   t.truthy(hasErrorMsg)
248 | })
249 | 
250 | // end of [Push non existing file]
251 | 
252 | // QA tests [pushing empty but correct files]
253 | 
254 | test('push command for empty files: no ext, html, txt, json', async t => {
255 |   let path_ = 'test/fixtures/test-data/files/empty-files/empty'
256 |   let args = '--name=empty-no-extension'
257 |   let result = await runcli('push', path_, args)
258 |   let stdout = result.stdout.split('\n')
259 |   let hasPublishedMessage = stdout.find(item => item.includes('your data is published!'))
260 |   let hasURLtoShowcase = stdout.find(item => item.includes('https://datahub.io/test/empty-no-extension/v/'))
261 |   t.truthy(hasPublishedMessage)
262 |   t.truthy(hasURLtoShowcase)
263 | 
264 |   path_ = 'test/fixtures/test-data/files/empty-files/empty.html'
265 |   args = '--name=empty-html'
266 |   result = await runcli('push', path_, args)
267 |   stdout = result.stdout.split('\n')
268 |   hasPublishedMessage = stdout.find(item => item.includes('your data is published!'))
269 |   hasURLtoShowcase = stdout.find(item => item.includes('https://datahub.io/test/empty-html/v/'))
270 |   t.truthy(hasPublishedMessage)
271 |   t.truthy(hasURLtoShowcase)
272 |   let whatsInClipboard = await clipboardy.read()
273 |   t.true(whatsInClipboard.includes('https://datahub.io/test/empty-html/v/'))
274 | 
275 |   path_ = 'test/fixtures/test-data/files/empty-files/empty.txt'
276 |   args = '--name=empty-txt'
277 |   result = await runcli('push', path_, args)
278 |   stdout = result.stdout.split('\n')
279 |   hasPublishedMessage = stdout.find(item => item.includes('your data is published!'))
280 |   hasURLtoShowcase = stdout.find(item => item.includes('https://datahub.io/test/empty-txt/v/'))
281 |   t.truthy(hasPublishedMessage)
282 |   t.truthy(hasURLtoShowcase)
283 | 
284 |   path_ = 'test/fixtures/test-data/files/empty-files/empty.json'
285 |   args = '--name=empty-json'
286 |   result = await runcli('push', path_, args)
287 |   stdout = result.stdout.split('\n')
288 |   hasPublishedMessage = stdout.find(item => item.includes('your data is published!'))
289 |   hasURLtoShowcase = stdout.find(item => item.includes('https://datahub.io/test/empty-json/v/'))
290 |   t.truthy(hasPublishedMessage)
291 |   t.truthy(hasURLtoShowcase)
292 |   whatsInClipboard = await clipboardy.read()
293 |   t.true(whatsInClipboard.includes('https://datahub.io/test/empty-json/v/'))
294 | })
295 | 
296 | test('push command fails for empty files tabular files such as csv,xls', async t => {
297 |   let path_ = 'test/fixtures/test-data/files/empty-files/empty.csv'
298 |   let args = '--name=empty-csv'
299 |   let result = await runcli('push', path_, args)
300 |   let stdout = result.stdout.split('\n')
301 |   let hasErrorMsg = stdout.find(item => item.includes('tabular file is invalid: test/fixtures/test-data/files/empty-files/empty.csv'))
302 |   t.truthy(hasErrorMsg)
303 | 
304 |   path_ = 'test/fixtures/test-data/files/empty-files/empty.xls'
305 |   result = await runcli('push', path_, args)
306 |   args = '--name=empty-xls'
307 |   stdout = result.stdout.split('\n')
308 |   hasErrorMsg = stdout.find(item => item.includes('You cannot push an empty sheet. Please, add some data and try again.'))
309 |   t.truthy(hasErrorMsg)
310 | })
311 | 
312 | // end of [pushing empty but correct files]
313 | 
314 | // QA tests [pushing 0 bytes files]
315 | 
316 | test('push command fails for zero byte files', async t => {
317 |   let path_ = 'test/fixtures/test-data/files/zero-files/zero'
318 |   let args = '--name=zero'
319 |   let result = await runcli('push', path_, args)
320 |   let stdout = result.stdout.split('\n')
321 |   let hasPublishedMessage = stdout.find(item => item.includes('your data is published!'))
322 |   let hasURLtoShowcase = stdout.find(item => item.includes('https://datahub.io/test/zero/v/'))
323 |   t.truthy(hasPublishedMessage)
324 |   t.truthy(hasURLtoShowcase)
325 | 
326 |   path_ = 'test/fixtures/test-data/files/zero-files/zero.csv'
327 |   result = await runcli('push', path_, args)
328 |   stdout = result.stdout.split('\n')
329 |   t.true(stdout[0].includes('> Error! tabular file is invalid:'))
330 | 
331 |   path_ = 'test/fixtures/test-data/files/zero-files/zero.html'
332 |   result = await runcli('push', path_, args)
333 |   stdout = result.stdout.split('\n')
334 |   hasPublishedMessage = stdout.find(item => item.includes('your data is published!'))
335 |   hasURLtoShowcase = stdout.find(item => item.includes('https://datahub.io/test/zero/v/'))
336 |   t.truthy(hasPublishedMessage)
337 |   t.truthy(hasURLtoShowcase)
338 | 
339 |   path_ = 'test/fixtures/test-data/files/zero-files/zero.txt'
340 |   result = await runcli('push', path_, args)
341 |   stdout = result.stdout.split('\n')
342 |   hasPublishedMessage = stdout.find(item => item.includes('your data is published!'))
343 |   hasURLtoShowcase = stdout.find(item => item.includes('https://datahub.io/test/zero/v/'))
344 |   t.truthy(hasPublishedMessage)
345 |   t.truthy(hasURLtoShowcase)
346 | 
347 |   path_ = 'test/fixtures/test-data/files/zero-files/zero.json'
348 |   result = await runcli('push', path_, args)
349 |   stdout = result.stdout.split('\n')
350 |   hasPublishedMessage = stdout.find(item => item.includes('your data is published!'))
351 |   hasURLtoShowcase = stdout.find(item => item.includes('https://datahub.io/test/zero/v/'))
352 |   t.truthy(hasPublishedMessage)
353 |   t.truthy(hasURLtoShowcase)
354 | 
355 |   path_ = 'test/fixtures/test-data/files/zero-files/zero.xls'
356 |   result = await runcli('push', path_, args)
357 |   stdout = result.stdout.split('\n')
358 |   let hasErrorMsg = stdout.find(item => item.includes('You can not push empty files, please add some data and try again'))
359 |   t.truthy(hasErrorMsg)
360 | })
361 | 
362 | // end of [pushing 0 bytes files]
363 | 
364 | 
365 | // ==========
366 | // Formatting
367 | 
368 | // QA tests [pushing valid CSV with force formatting wrong extention (from path and URl)]
369 | 
370 | test.serial('push command succeeds for CSV with wrong ext but force formatting', async t => {
371 |   const path_ = 'test/fixtures/test-data/files/wrong-extension-files/comma.txt'
372 |   let argName = '--name=comma-separated'
373 |   let argFormat = '--format=csv'
374 |   let result = await runcli('push', path_, argName, argFormat)
375 |   let stdout = result.stdout.split('\n')
376 |   let hasPublishedMessage = stdout.find(item => item.includes('your data is published!'))
377 |   let hasURLtoShowcase = stdout.find(item => item.includes('https://datahub.io/test/comma-separated/v/'))
378 |   t.truthy(hasPublishedMessage)
379 |   t.truthy(hasURLtoShowcase)
380 |   let whatsInClipboard = await clipboardy.read()
381 |   t.true(whatsInClipboard.includes('https://datahub.io/test/comma-separated/v/'))
382 | 
383 |   const url_ = 'https://raw.githubusercontent.com/frictionlessdata/test-data/master/files/wrong-extension-files/comma.txt'
384 |   result = await runcli('push', path_, argName, argFormat)
385 |   stdout = result.stdout.split('\n')
386 |   hasPublishedMessage = stdout.find(item => item.includes('your data is published!'))
387 |   hasURLtoShowcase = stdout.find(item => item.includes('https://datahub.io/test/comma-separated/v/'))
388 |   t.truthy(hasPublishedMessage)
389 |   t.truthy(hasURLtoShowcase)
390 |   whatsInClipboard = await clipboardy.read()
391 |   t.true(whatsInClipboard.includes('https://datahub.io/test/comma-separated/v/'))
392 | })
393 | 
394 | // end of [pushing valid CSV with force formatting wrong extention (from path and URl)]
395 | 
396 | // QA tests [pushing valid XLS and XLSX with force formatting]
397 | 
398 | test('push command succeeds for Excel with wrong ext but force formatting', async t => {
399 |   let path_ = 'test/fixtures/test-data/files/wrong-extension-files/sample-1-sheet.txt'
400 |   let argName = '--name=sample-excel-with-force-formatting'
401 |   let argFormat = '--format=xls'
402 |   let result = await runcli('push', path_, argName, argFormat)
403 |   let stdout = result.stdout.split('\n')
404 |   let hasPublishedMessage = stdout.find(item => item.includes('your data is published!'))
405 |   let hasURLtoShowcase = stdout.find(item => item.includes('https://datahub.io/test/sample-excel-with-force-formatting/v/'))
406 |   t.truthy(hasPublishedMessage)
407 |   t.truthy(hasURLtoShowcase)
408 |   let whatsInClipboard = await clipboardy.read()
409 |   t.true(whatsInClipboard.includes('https://datahub.io/test/sample-excel-with-force-formatting/v/'))
410 | 
411 |   path_ = 'test/fixtures/test-data/files/wrong-extension-files/sample-1-sheet.pdf'
412 |   argFormat = '--format=xlsx'
413 |   result = await runcli('push', path_, argName, argFormat)
414 |   stdout = result.stdout.split('\n')
415 |   hasPublishedMessage = stdout.find(item => item.includes('your data is published!'))
416 |   hasURLtoShowcase = stdout.find(item => item.includes('https://datahub.io/test/sample-excel-with-force-formatting/v/'))
417 |   t.truthy(hasPublishedMessage)
418 |   t.truthy(hasURLtoShowcase)
419 |   whatsInClipboard = await clipboardy.read()
420 |   t.true(whatsInClipboard.includes('https://datahub.io/test/sample-excel-with-force-formatting/v/'))
421 | })
422 | 
423 | // end of [pushing valid XLS and XLSX with force formatting]
424 | 
425 | // QA test [pushing not CSV with force formatting]
426 | 
427 | test('push command fails for non-CSV with force formatting', async t => {
428 |   let path_ = 'test/fixtures/test-data/files/excel/sample-1-sheet.xls'
429 |   const argName = '--name=not-csv-as-csv'
430 |   const argFormat = '--format=csv'
431 |   let result = await runcli('push', path_, argName, argFormat)
432 |   let stdout = result.stdout.split('\n')
433 |   let hasExpectedErrorMsg = stdout.find(item => item.includes('> Error! tabular file is invalid'))
434 |   t.truthy(hasExpectedErrorMsg)
435 | 
436 |   let url_ = 'https://raw.githubusercontent.com/frictionlessdata/test-data/master/files/excel/sample-1-sheet.xls'
437 |   result = await runcli('push', path_, argName, argFormat)
438 |   stdout = result.stdout.split('\n')
439 |   hasExpectedErrorMsg = stdout.find(item => item.includes('> Error! tabular file is invalid'))
440 |   t.truthy(hasExpectedErrorMsg)
441 | 
442 |   path_ = 'test/fixtures/test-data/files/excel/sample-1-sheet.xlsx'
443 |   result = await runcli('push', path_, argName, argFormat)
444 |   stdout = result.stdout.split('\n')
445 |   hasExpectedErrorMsg = stdout.find(item => item.includes('> Error! tabular file is invalid'))
446 |   t.truthy(hasExpectedErrorMsg)
447 | 
448 |   url_ = 'https://raw.githubusercontent.com/frictionlessdata/test-data/master/files/excel/sample-1-sheet.xlsx'
449 |   result = await runcli('push', path_, argName, argFormat)
450 |   stdout = result.stdout.split('\n')
451 |   hasExpectedErrorMsg = stdout.find(item => item.includes('> Error! tabular file is invalid'))
452 |   t.truthy(hasExpectedErrorMsg)
453 | })
454 | 
455 | // end of [pushing not CSV with force formatting]
456 | 
457 | // QA test [pushing not CSV with force formatting (non tabular )]
458 | 
459 | test('push command fails for non-CSV (non-tabular) files with force formatting', async t => {
460 |   let path_ = 'test/fixtures/test-data/files/other/sample.json'
461 |   const argName = '--name=not-csv-as-csv'
462 |   const argFormat = '--format=csv'
463 |   let result = await runcli('push', path_, argName, argFormat)
464 |   let stdout = result.stdout.split('\n')
465 |   let hasExpectedErrorMsg = stdout.find(item => item.includes('> Error! tabular file is invalid'))
466 |   t.truthy(hasExpectedErrorMsg)
467 | 
468 |   let url_ = 'https://raw.githubusercontent.com/frictionlessdata/test-data/master/files/other/sample.json'
469 |   result = await runcli('push', path_, argName, argFormat)
470 |   stdout = result.stdout.split('\n')
471 |   hasExpectedErrorMsg = stdout.find(item => item.includes('> Error! tabular file is invalid'))
472 |   t.truthy(hasExpectedErrorMsg)
473 | })
474 | 
475 | // end of [pushing not CSV with force formatting (non tabular )]
476 | 
477 | 
478 | // ===========
479 | // Excel files
480 | 
481 | // QA test [pushing excel file with 1 sheet]
482 | 
483 | test.serial('push command succeeds for simple Excel with 1 sheet', async t => {
484 |   let path_ = 'test/fixtures/test-data/files/excel/sample-1-sheet.xls'
485 |   const argName = '--name=test-excel-1-sheet'
486 |   let result = await runcli('push', path_, argName, '--debug')
487 |   // Check what's printed in console while in debug mode, e.g., if schema is included:
488 |   let hasSchemaForFirstSheet = result.stdout.includes('"name": "number"')
489 |   t.truthy(hasSchemaForFirstSheet)
490 |   let hasPublishedMessage = result.stdout.includes('your data is published!')
491 |   let hasURLtoShowcase = result.stdout.includes('https://datahub.io/test/test-excel-1-sheet/v/')
492 |   t.truthy(hasPublishedMessage)
493 |   t.truthy(hasURLtoShowcase)
494 |   let whatsInClipboard = await clipboardy.read()
495 |   t.true(whatsInClipboard.includes('https://datahub.io/test/test-excel-1-sheet/v/'))
496 | 
497 |   path_ = 'test/fixtures/test-data/files/excel/sample-1-sheet.xlsx'
498 |   result = await runcli('push', path_, argName, '--debug')
499 |   hasSchemaForFirstSheet = result.stdout.includes('"name": "number"')
500 |   t.truthy(hasSchemaForFirstSheet)
501 |   hasPublishedMessage = result.stdout.includes('your data is published!')
502 |   hasURLtoShowcase = result.stdout.includes('https://datahub.io/test/test-excel-1-sheet/v/')
503 |   t.truthy(hasPublishedMessage)
504 |   t.truthy(hasURLtoShowcase)
505 |   whatsInClipboard = await clipboardy.read()
506 |   t.true(whatsInClipboard.includes('https://datahub.io/test/test-excel-1-sheet/v/'))
507 | })
508 | 
509 | // end of [pushing excel file with 1 sheet]
510 | 
511 | // QA test [pushing excel file with selected sheets]
512 | // also includes:
513 | // [pushing excel file with selected non existing sheet]
514 | // [pushing excel file with all sheets]
515 | // [pushing excel file with list of sheets]
516 | 
517 | test.serial('push command succeeds for Excel with selected sheet', async t => {
518 |   let path_ = 'test/fixtures/test-data/files/excel/sample-2-sheets.xls'
519 |   const argName = '--name=test-excel-2-sheets'
520 |   let argSheets = '--sheets=2'
521 |   let result = await runcli('push', path_, argName, argSheets, '--debug')
522 |   // Check what's printed in console while in debug mode, e.g., if schema is included:
523 |   let hasSchemaForSecondSheet = result.stdout.includes('"name": "header4"')
524 |   t.truthy(hasSchemaForSecondSheet)
525 |   let hasPublishedMessage = result.stdout.includes('your data is published!')
526 |   let hasURLtoShowcase = result.stdout.includes('https://datahub.io/test/test-excel-2-sheets/v/')
527 |   t.truthy(hasPublishedMessage)
528 |   t.truthy(hasURLtoShowcase)
529 |   let whatsInClipboard = await clipboardy.read()
530 |   t.true(whatsInClipboard.includes('https://datahub.io/test/test-excel-2-sheets/v/'))
531 | 
532 |   path_ = 'test/fixtures/test-data/files/excel/sample-2-sheets.xlsx'
533 |   result = await runcli('push', path_, argName, argSheets, '--debug')
534 |   // Check what's printed in console while in debug mode, e.g., if schema is included:
535 |   hasSchemaForSecondSheet = result.stdout.includes('"name": "header4"')
536 |   t.truthy(hasSchemaForSecondSheet)
537 |   hasPublishedMessage = result.stdout.includes('your data is published!')
538 |   hasURLtoShowcase = result.stdout.includes('https://datahub.io/test/test-excel-2-sheets/v/')
539 |   t.truthy(hasPublishedMessage)
540 |   t.truthy(hasURLtoShowcase)
541 |   whatsInClipboard = await clipboardy.read()
542 |   t.true(whatsInClipboard.includes('https://datahub.io/test/test-excel-2-sheets/v/'))
543 | 
544 |   argSheets = '--sheets=5'
545 |   result = await runcli('push', path_, argName, argSheets, '--debug')
546 |   let hasErrorMsg = result.stdout.includes('Error! sheet index 5 is out of range')
547 |   t.truthy(hasErrorMsg)
548 | 
549 |   argSheets = '--sheets=all'
550 |   result = await runcli('push', path_, argName, argSheets, '--debug')
551 |   let hasSchemaForFirstSheet = result.stdout.includes('"name": "header1"')
552 |   hasSchemaForSecondSheet = result.stdout.includes('"name": "header4"')
553 |   t.truthy(hasSchemaForFirstSheet)
554 |   t.truthy(hasSchemaForSecondSheet)
555 | 
556 |   argSheets = '--sheets=1,2'
557 |   result = await runcli('push', path_, argName, argSheets, '--debug')
558 |   hasSchemaForFirstSheet = result.stdout.includes('"name": "header1"')
559 |   hasSchemaForSecondSheet = result.stdout.includes('"name": "header4"')
560 |   t.truthy(hasSchemaForFirstSheet)
561 |   t.truthy(hasSchemaForSecondSheet)
562 | })
563 | 
564 | // end of [pushing excel file with selected sheets]
565 | 
566 | 
567 | test('push command fails for resources with invalid URL as path', async t => {
568 |   const url_ = 'https://github.com/datasets/testtest'
569 |   const argName = '--name=test'
570 |   let result = await runcli('push', url_, argName)
571 |   let stdout = result.stdout.split('\n')
572 |   let hasErrorMsg = stdout.find(item => item.includes('> Error! Invalid URL. 404 Not Found: https://github.com/datasets/testtest'))
573 |   t.truthy(hasErrorMsg)
574 | 
575 |   // Pushing a dataset with remote resource:
576 |   const path_ = 'test/fixtures/test-data/packages/invalid-remote-path/'
577 |   result = await runcli('push', path_, argName)
578 |   stdout = result.stdout.split('\n')
579 |   hasErrorMsg = stdout.find(item => item.includes('> Error! '))
580 |   t.truthy(hasErrorMsg)
581 | })
582 | 


--------------------------------------------------------------------------------
/test/cli.test.js:
--------------------------------------------------------------------------------
  1 | // Test the CLI directly
  2 | const fs = require('fs')
  3 | const path = require('path')
  4 | 
  5 | const test = require('ava')
  6 | const {spawn} = require('cross-spawn')
  7 | const run = require('inquirer-test')
  8 | const {ENTER} = require('inquirer-test')
  9 | 
 10 | const {version} = require('../package.json')
 11 | 
 12 | const runcli = (...args) => {
 13 |   return new Promise((resolve, reject) => {
 14 |     const command = path.resolve(__dirname, '../bin/data.js')
 15 |     args.push('--test')
 16 |     const data = spawn(command, args)
 17 | 
 18 |     let stdout = ''
 19 |     data.stdout.on('data', data => {
 20 |       stdout += data
 21 |     })
 22 | 
 23 |     data.on('error', err => {
 24 |       reject(err)
 25 |     })
 26 | 
 27 |     data.on('close', code => {
 28 |       resolve({
 29 |         code,
 30 |         stdout
 31 |       })
 32 |     })
 33 |   })
 34 | }
 35 | 
 36 | // ==========
 37 | // The basics
 38 | 
 39 | 
 40 | 
 41 | test.after.always('cleanup', t => {
 42 |   let deleteFolderRecursive = (path) => {
 43 |     if (fs.existsSync(path)) {
 44 |       fs.readdirSync(path).forEach((file, index) => {
 45 |         let curPath = path + "/" + file;
 46 |         if (fs.lstatSync(curPath).isDirectory()) { // recurse
 47 |           deleteFolderRecursive(curPath);
 48 |         } else { // delete file
 49 |           fs.unlinkSync(curPath);
 50 |         }
 51 |       })
 52 |       fs.rmdirSync(path);
 53 |     }
 54 |   }
 55 |   deleteFolderRecursive('finance-vix')
 56 |   deleteFolderRecursive('test/small-dataset-100kb')
 57 |   deleteFolderRecursive('test/medium-dataset-1mb')
 58 |   deleteFolderRecursive('test/big-dataset-10mb')
 59 |   deleteFolderRecursive('test/private-cli-test')
 60 |   try {
 61 |     fs.unlinkSync('test/fixtures/test-data/files/geo/datapackage.json')
 62 |     fs.unlinkSync('sample.csv')
 63 |     fs.unlinkSync('sample-1-sheet.xls')
 64 |     fs.unlinkSync('0.csv')
 65 |   } catch (err) {
 66 |     console.log('Finished cleanup without deleting some files.')
 67 |   }
 68 | 
 69 | })
 70 | 
 71 | test('"data -v --version" prints version', async t => {
 72 |   let result = await runcli('-v')
 73 | 
 74 |   t.is(result.code, 0)
 75 |   let stdout = result.stdout.split('\n')
 76 |   t.true(stdout.length > 1)
 77 |   t.true(stdout[0].includes(`${version}`))
 78 | 
 79 |   result = await runcli('--version')
 80 | 
 81 |   t.is(result.code, 0)
 82 |   stdout = result.stdout.split('\n')
 83 |   t.true(stdout.length > 1)
 84 |   t.true(stdout[0].includes(`${version}`))
 85 | })
 86 | 
 87 | test('"data help" prints help message', async t => {
 88 |   const result = await runcli('help')
 89 | 
 90 |   t.is(result.code, 0)
 91 |   const stdout = result.stdout.split('\n')
 92 |   t.true(stdout.length > 1)
 93 |   const hasExpectedMsg = stdout.find(item => item.includes('❒ data [options] <command> <args>'))
 94 |   t.truthy(hasExpectedMsg)
 95 | })
 96 | 
 97 | 
 98 | // =======================================
 99 | // DATA-CLI GET
100 | 
101 | test('get command with local dataset', async t => {
102 |   const identifier = 'test/fixtures/finance-vix'
103 |   const result = await runcli('get', identifier)
104 |   const stdout = result.stdout.split('\n')
105 |   const hasTimeElapsedMsg = stdout.find(item => item.includes('Time elapsed:'))
106 |   const hasSuccessMsg = stdout.find(item => item.includes('Dataset/file is saved in "finance-vix"'))
107 |   t.truthy(hasTimeElapsedMsg)
108 |   t.truthy(hasSuccessMsg)
109 | })
110 | 
111 | test('get command with local file', async t => {
112 |   const identifier = 'test/fixtures/sample.csv'
113 |   const result = await runcli('get', identifier)
114 |   const stdout = result.stdout.split('\n')
115 |   const hasTimeElapsedMsg = stdout.find(item => item.includes('Time elapsed:'))
116 |   const hasSuccessMsg = stdout.find(item => item.includes('Dataset/file is saved in "sample.csv"'))
117 |   t.truthy(hasTimeElapsedMsg)
118 |   t.truthy(hasSuccessMsg)
119 | })
120 | 
121 | // QA tests [Get: r links from DataHub]
122 | 
123 | test('get command with r links from DataHub', async t => {
124 |   const identifier = 'https://datahub.io/test/small-dataset-100kb/r/0.csv'
125 |   const result = await runcli('get', identifier)
126 |   const stdout = result.stdout.split('\n')
127 |   const hasTimeElapsedMsg = stdout.find(item => item.includes('Time elapsed:'))
128 |   const hasSuccessMsg = stdout.find(item => item.includes('Dataset/file is saved in "0.csv"'))
129 |   t.truthy(hasTimeElapsedMsg)
130 |   t.truthy(hasSuccessMsg)
131 | })
132 | 
133 | // end of [Get: r links from DataHub]
134 | 
135 | // QA tests [Get: Small dataset from DataHub]
136 | 
137 | test('get command with small dataset from DataHub', async t => {
138 |   const identifier = 'https://datahub.io/test/small-dataset-100kb/'
139 |   const result = await runcli('get', identifier)
140 |   const stdout = result.stdout.split('\n')
141 |   const hasTimeElapsedMsg = stdout.find(item => item.includes('Time elapsed:'))
142 |   const hasSuccessMsg = stdout.find(item => item.includes('Dataset/file is saved in "test/small-dataset-100kb"'))
143 |   t.truthy(hasTimeElapsedMsg)
144 |   t.truthy(hasSuccessMsg)
145 | })
146 | 
147 | // end of [Get: Small dataset from DataHub]
148 | 
149 | // QA tests [Get: Medium dataset from DataHub]
150 | 
151 | test('get command with medium dataset from DataHub', async t => {
152 |   const identifier = 'https://datahub.io/test/medium-dataset-1mb'
153 |   const result = await runcli('get', identifier)
154 |   const stdout = result.stdout.split('\n')
155 |   const hasTimeElapsedMsg = stdout.find(item => item.includes('Time elapsed:'))
156 |   const hasSuccessMsg = stdout.find(item => item.includes('Dataset/file is saved in "test/medium-dataset-1mb"'))
157 |   t.truthy(hasTimeElapsedMsg)
158 |   t.truthy(hasSuccessMsg)
159 | })
160 | 
161 | // end of [Get: Meduim dataset from DataHub]
162 | 
163 | // QA tests [Get: Big dataset from DataHub]
164 | 
165 | test('get command with big dataset from DataHub', async t => {
166 |   const identifier = 'https://datahub.io/test/big-dataset-10mb'
167 |   const result = await runcli('get', identifier)
168 |   const stdout = result.stdout.split('\n')
169 |   const hasTimeElapsedMsg = stdout.find(item => item.includes('Time elapsed:'))
170 |   const hasSuccessMsg = stdout.find(item => item.includes('Dataset/file is saved in "test/big-dataset-10mb"'))
171 |   t.truthy(hasTimeElapsedMsg)
172 |   t.truthy(hasSuccessMsg)
173 | })
174 | 
175 | // end of [Get: Big dataset from DataHub]
176 | 
177 | // QA tests [Get: get excel file]
178 | 
179 | test('get command with excel file', async t => {
180 |   const identifier = 'https://github.com/frictionlessdata/test-data/blob/master/files/excel/sample-1-sheet.xls'
181 |   const result = await runcli('get', identifier)
182 |   const stdout = result.stdout.split('\n')
183 |   const hasTimeElapsedMsg = stdout.find(item => item.includes('Time elapsed:'))
184 |   const hasSuccessMsg = stdout.find(item => item.includes('Dataset/file is saved in "sample-1-sheet.xls"'))
185 |   t.truthy(hasTimeElapsedMsg)
186 |   t.truthy(hasSuccessMsg)
187 | })
188 | 
189 | // end of [Get: get excel file]
190 | 
191 | // QA tests [Get: get private dataset]
192 | 
193 | test('get command with private dataset', async t => {
194 |   const identifier = 'https://datahub.io/test/private-cli-test'
195 |   // Note that token for test user is set in env var. First we pass wrong token
196 |   // as an argument and expect 404 or 403:
197 |   const token = 'non-owner-token'
198 |   let result = await runcli('get', identifier, `--token=${token}`)
199 |   let stdout = result.stdout.split('\n')
200 |   const hasErrorMsg = stdout.find(item => item.includes('> Error! 404: Not Found. Requested URL'))
201 |   t.truthy(hasErrorMsg)
202 | 
203 |   // Now use correct token from env var:
204 |   result = await runcli('get', identifier)
205 |   stdout = result.stdout.split('\n')
206 |   const hasTimeElapsedMsg = stdout.find(item => item.includes('Time elapsed:'))
207 |   t.truthy(hasTimeElapsedMsg)
208 |   t.true(fs.existsSync('test/private-cli-test/datapackage.json'))
209 | })
210 | 
211 | // end of QA tests [Get: get private dataset]
212 | 
213 | 
214 | // =======================================
215 | // CLI commands: validate, cat, info, init
216 | 
217 | test('Init command in non-interactive mode', async t => {
218 |   const result = await runcli('init', 'test/fixtures/test-data/files/geo/')
219 |   t.true(result.stdout.includes('This process initializes a new datapackage.json file'))
220 |   t.true(result.stdout.includes('Descriptor is saved in'))
221 | })
222 | 
223 | // QA tests [Info: basic dataset]
224 | 
225 | test('Info: basic dataset', async t => {
226 |   let identifier = 'test/fixtures/test-data/packages/basic-csv'
227 |   let result = await runcli('info', identifier)
228 |   let stdout = result.stdout.split('\n')
229 |   let hasMsg = stdout.find(item => item.includes('# basic-csv'))
230 |   t.truthy(hasMsg)
231 |   hasMsg = stdout.find(item => item.includes('comma-separated'))
232 |   t.truthy(hasMsg)
233 | 
234 |   identifier = 'https://github.com/frictionlessdata/test-data/tree/master/packages/basic-csv'
235 |   result = await runcli('info', identifier)
236 |   stdout = result.stdout.split('\n')
237 |   hasMsg = stdout.find(item => item.includes('# basic-csv'))
238 |   t.truthy(hasMsg)
239 |   hasMsg = stdout.find(item => item.includes('comma-separated'))
240 |   t.truthy(hasMsg)
241 | })
242 | 
243 | // end of [Info: basic dataset]
244 | 
245 | // QA tests [Info: dataset with multiple resources]
246 | 
247 | test('Info: dataset with multiple resources', async t => {
248 |   let identifier = 'test/fixtures/test-data/packages/different-separators'
249 |   let result = await runcli('info', identifier)
250 |   let stdout = result.stdout.split('\n')
251 |   let hasCaretsResource = stdout.find(item => item.includes('carets'))
252 |   let hasCommaResource = stdout.find(item => item.includes('comma'))
253 |   t.truthy(hasCaretsResource)
254 |   t.truthy(hasCommaResource)
255 | 
256 |   identifier = 'https://github.com/frictionlessdata/test-data/tree/master/packages/different-separators'
257 |   result = await runcli('info', identifier)
258 |   stdout = result.stdout.split('\n')
259 |   hasCaretsResource = stdout.find(item => item.includes('carets'))
260 |   hasCommaResource = stdout.find(item => item.includes('comma'))
261 |   t.truthy(hasCaretsResource)
262 |   t.truthy(hasCommaResource)
263 | })
264 | 
265 | // end if [Info: dataset with multiple resources]
266 | 
267 | // QA tests [Info: basic CSV]
268 | 
269 | test('Info: basic CSV', async t => {
270 |   let identifier = 'test/fixtures/test-data/files/csv/100kb.csv'
271 |   let result = await runcli('info', identifier)
272 |   let stdout = result.stdout.split('\n')
273 |   let hasDialect = stdout.find(item => item.includes('dialect'))
274 |   let hasSchema = stdout.find(item => item.includes('schema'))
275 |   let hasEncodings = stdout.find(item => item.includes('encoding'))
276 |   let hasCreatedDate = stdout.find(item => item.includes('created'))
277 |   let hasValueInTheTenthRow = stdout.find(item => item.includes('Sharlene'))
278 |   let hasValueInTheEleventhRow = stdout.find(item => item.includes('Misti'))
279 |   t.truthy(hasDialect)
280 |   t.truthy(hasSchema)
281 |   t.truthy(hasEncodings)
282 |   t.falsy(hasCreatedDate)
283 |   t.truthy(hasValueInTheTenthRow)
284 |   t.falsy(hasValueInTheEleventhRow)
285 | })
286 | 
287 | // end of [Info: basic CSV]
288 | 
289 | // QA tests [Info: non-tabular file]
290 | 
291 | test('Info: non-tabular file', async t => {
292 |   let identifier = 'test/fixtures/test-data/files/other/sample.pdf'
293 |   let result = await runcli('info', identifier)
294 |   let stdout = result.stdout.split('\n')
295 |   let hasName = stdout.find(item => item.includes('name'))
296 |   let hasFormat = stdout.find(item => item.includes('format'))
297 |   let hasPath = stdout.find(item => item.includes('path'))
298 |   let hasDialect = stdout.find(item => item.includes('dialect'))
299 |   t.truthy(hasName)
300 |   t.truthy(hasFormat)
301 |   t.truthy(hasPath)
302 |   t.falsy(hasDialect)
303 | 
304 |   identifier = 'https://github.com/frictionlessdata/test-data/raw/master/files/other/sample.pdf'
305 |   result = await runcli('info', identifier)
306 |   stdout = result.stdout.split('\n')
307 |   hasName = stdout.find(item => item.includes('name'))
308 |   hasFormat = stdout.find(item => item.includes('format'))
309 |   hasPath = stdout.find(item => item.includes('path'))
310 |   hasDialect = stdout.find(item => item.includes('dialect'))
311 |   t.truthy(hasName)
312 |   t.truthy(hasFormat)
313 |   t.truthy(hasPath)
314 |   t.falsy(hasDialect)
315 | })
316 | 
317 | // QA tests [Info: from datahub and github]
318 | 
319 | test('info command with a dataset from GitHub', async t => {
320 |   const identifier = 'https://github.com/datasets/finance-vix'
321 |   const result = await runcli('info', identifier)
322 |   const stdout = result.stdout.split('\n')
323 |   const hasReadme = stdout.find(item => item.includes('CBOE Volatility Index (VIX) time-series dataset including'))
324 |   const hasResource = stdout.find(item => item.includes('vix-daily'))
325 |   t.truthy(hasReadme)
326 |   t.truthy(hasResource)
327 | })
328 | 
329 | test('info command with a dataset from DataHub', async t => {
330 |   const identifier = 'https://datahub.io/core/finance-vix'
331 |   const result = await runcli('info', identifier)
332 |   const stdout = result.stdout.split('\n')
333 |   const hasReadme = stdout.find(item => item.includes('CBOE Volatility Index (VIX) time-series dataset including'))
334 |   const hasResource = stdout.find(item => item.includes('vix-daily'))
335 |   t.truthy(hasReadme)
336 |   t.truthy(hasResource)
337 | })
338 | 
339 | // end of [Info: from datahub and github]
340 | 
341 | // QA tests [Proper error messages]
342 | 
343 | test('info command - no dataset or descriptor at URL', async t => {
344 |   const url_ = 'https://datahub.io'
345 |   const result = await runcli('info', url_)
346 |   const stdout = result.stdout.split('\n')
347 |   const hasErrorMsg = stdout.find(item => item.includes('Expected URL to a dataset or descriptor.'))
348 |   t.truthy(hasErrorMsg)
349 | })
350 | 
351 | test('get command - no dataset or descriptor at URL', async t => {
352 |   const url_ = 'https://datahub.io'
353 |   const result = await runcli('get', url_)
354 |   const stdout = result.stdout.split('\n')
355 |   const hasErrorMsg = stdout.find(item => item.includes('Expected URL to a dataset or descriptor.'))
356 |   t.truthy(hasErrorMsg)
357 | })
358 | 
359 | // end of [Proper error messages]
360 | 
361 | // QA tests [Validate: basic csv resource]
362 | 
363 | test('validate command - basic dataset', async t => {
364 |   const path_ = 'test/fixtures/test-data/packages/basic-csv'
365 |   const result = await runcli('validate', path_)
366 |   const stdout = result.stdout.split('\n')
367 |   const hasValidMessage = stdout.find(item => item.includes('Your Data Package is valid!'))
368 |   t.truthy(hasValidMessage)
369 | })
370 | 
371 | test.serial('validate command - remote basic dataset', async t => {
372 |   const url_ = 'https://github.com/frictionlessdata/test-data/tree/master/packages/basic-csv'
373 |   const result = await runcli('validate', url_)
374 |   const stdout = result.stdout.split('\n')
375 |   const hasValidMessage = stdout.find(item => item.includes('Your Data Package is valid!'))
376 |   t.truthy(hasValidMessage)
377 | })
378 | 
379 | // end of [Validate: basic csv resource]
380 | 
381 | // QA tests [Validate: non-tabular resource LOCALLY]
382 | 
383 | test('validate command - non-tabular resource', async t => {
384 |   const path_ = 'test/fixtures/test-data/packages/non-tabular-resource'
385 |   const result = await runcli('validate', path_)
386 |   const stdout = result.stdout.split('\n')
387 |   const hasValidMessage = stdout.find(item => item.includes('Your Data Package is valid!'))
388 |   t.truthy(hasValidMessage)
389 | })
390 | 
391 | test.serial('validate command - remote dataset with non-tabular resource', async t => {
392 |   const url_ = 'https://github.com/frictionlessdata/test-data/tree/master/packages/non-tabular-resource'
393 |   const result = await runcli('validate', url_)
394 |   const stdout = result.stdout.split('\n')
395 |   const hasValidMessage = stdout.find(item => item.includes('Your Data Package is valid!'))
396 |   t.truthy(hasValidMessage)
397 | })
398 | 
399 | // end of [Validate: non-tabular resource LOCALLY]
400 | 
401 | // QA tests [Validate: remote resource]
402 | 
403 | test('validate command - remote resource', async t => {
404 |   const path_ = 'test/fixtures/test-data/packages/remote-csv'
405 |   const result = await runcli('validate', path_)
406 |   const stdout = result.stdout.split('\n')
407 |   const hasValidMessage = stdout.find(item => item.includes('Your Data Package is valid!'))
408 |   t.truthy(hasValidMessage)
409 | })
410 | 
411 | test.serial('validate command - remote dataset with remote resource', async t => {
412 |   const url_ = 'https://github.com/frictionlessdata/test-data/tree/master/packages/remote-csv'
413 |   const result = await runcli('validate', url_)
414 |   const stdout = result.stdout.split('\n')
415 |   const hasValidMessage = stdout.find(item => item.includes('Your Data Package is valid!'))
416 |   t.truthy(hasValidMessage)
417 | })
418 | 
419 | // end of [Validate: remote resource]
420 | 
421 | // QA tests [Validate: csv with different separators]
422 | 
423 | test('validate command - csv with different separators', async t => {
424 |   const path_ = 'test/fixtures/test-data/packages/different-separators'
425 |   const result = await runcli('validate', path_)
426 |   const stdout = result.stdout.split('\n')
427 |   const hasValidMessage = stdout.find(item => item.includes('Your Data Package is valid!'))
428 |   t.truthy(hasValidMessage)
429 | })
430 | 
431 | test.serial('validate command - remote dataset with csv with different separators', async t => {
432 |   const url_ = 'https://github.com/frictionlessdata/test-data/tree/master/packages/different-separators'
433 |   const result = await runcli('validate', url_)
434 |   const stdout = result.stdout.split('\n')
435 |   const hasValidMessage = stdout.find(item => item.includes('Your Data Package is valid!'))
436 |   t.truthy(hasValidMessage)
437 | })
438 | 
439 | // end of [Validate: csv with different separators]
440 | 
441 | // QA tests [Validate: invalid path to resource]
442 | 
443 | test('validate command - invalid local path', async t => {
444 |   const path_ = 'test/fixtures/test-data/packages/invalid-local-path'
445 |   const result = await runcli('validate', path_)
446 |   const stdout = result.stdout.split('\n')
447 |   const hasErrorMsg = stdout.find(item => item.includes('> Error! ENOENT: no such file or directory'))
448 |   t.truthy(hasErrorMsg)
449 | })
450 | 
451 | // end of [Validate: invalid path to resource]
452 | 
453 | // QA tests [Validate: invalid remote path to resource]
454 | 
455 | test.serial('validate command - invalid remote path for resource', async t => {
456 |   const path_ = 'test/fixtures/test-data/packages/invalid-remote-path'
457 |   const result = await runcli('validate', path_)
458 |   const stdout = result.stdout.split('\n')
459 |   const hasErrorMsg = stdout.find(item => item.includes('> Error! Request failed with status code 404'))
460 |   const hasResourceName = stdout.find(item => item.includes('> Error! Resource: invalid-remote-path'))
461 |   const hasResourcePath = stdout.find(item => item.includes('> Error! Path: https://raw.githubusercontent.com/frictionlessdata/there/is/no/such/file.csv'))
462 |   t.truthy(hasErrorMsg)
463 |   t.truthy(hasResourceName)
464 |   t.truthy(hasResourcePath)
465 | })
466 | 
467 | test.serial('validate command - remote dataset with invalid remote path for resource', async t => {
468 |   const url_ = 'https://github.com/frictionlessdata/test-data/tree/master/packages/invalid-remote-path'
469 |   const result = await runcli('validate', url_)
470 |   const stdout = result.stdout.split('\n')
471 |   const hasErrorMsg = stdout.find(item => item.includes('> Error! Request failed with status code 404'))
472 |   const hasResourceName = stdout.find(item => item.includes('> Error! Resource: invalid-remote-path'))
473 |   const hasResourcePath = stdout.find(item => item.includes('> Error! Path: https://raw.githubusercontent.com/frictionlessdata/there/is/no/such/file.csv'))
474 |   t.truthy(hasErrorMsg)
475 |   t.truthy(hasResourceName)
476 |   t.truthy(hasResourcePath)
477 | })
478 | 
479 | // end of [Validate: invalid remote path to resource]
480 | 
481 | // QA tests [Validate: csv with different field types, formats and constraints]
482 | 
483 | test.serial('validate command - wrong constraints', async t => {
484 |   const path_ = 'test/fixtures/test-data/packages/types-formats-and-constraints/constraints'
485 |   let result = await runcli('validate', path_)
486 |   let stdout = result.stdout.split('\n')
487 |   let hasErrorMsg = stdout.find(item => item.includes('> Error! There are 7 type and format mismatch errors on line 3'))
488 |   t.truthy(hasErrorMsg)
489 | 
490 |   const url_ = 'https://github.com/frictionlessdata/test-data/tree/master/packages/types-formats-and-constraints/constraints'
491 |   result = await runcli('validate', url_)
492 |   stdout = result.stdout.split('\n')
493 |   hasErrorMsg = stdout.find(item => item.includes('> Error! There are 7 type and format mismatch errors on line 3'))
494 |   t.truthy(hasErrorMsg)
495 | })
496 | 
497 | test.serial('validate command - wrong "date" type/format', async t => {
498 |   const path_ = 'test/fixtures/test-data/packages/types-formats-and-constraints/date'
499 |   let result = await runcli('validate', path_)
500 |   let stdout = result.stdout.split('\n')
501 |   let hasErrorMsg = stdout.find(item => item.includes('> Error! There are 2 type and format mismatch errors on line 3'))
502 |   t.truthy(hasErrorMsg)
503 | 
504 |   const url_ = 'https://github.com/frictionlessdata/test-data/tree/master/packages/types-formats-and-constraints/date'
505 |   result = await runcli('validate', url_)
506 |   stdout = result.stdout.split('\n')
507 |   hasErrorMsg = stdout.find(item => item.includes('> Error! There are 2 type and format mismatch errors on line 3'))
508 |   t.truthy(hasErrorMsg)
509 | })
510 | 
511 | test.serial('validate command - wrong "datetime" type/format', async t => {
512 |   const path_ = 'test/fixtures/test-data/packages/types-formats-and-constraints/datetime'
513 |   let result = await runcli('validate', path_)
514 |   let stdout = result.stdout.split('\n')
515 |   let hasErrorMsg = stdout.find(item => item.includes('> Error! There are 3 type and format mismatch errors on line 3'))
516 |   t.truthy(hasErrorMsg)
517 | 
518 |   const url_ = 'https://github.com/frictionlessdata/test-data/tree/master/packages/types-formats-and-constraints/datetime'
519 |   result = await runcli('validate', url_)
520 |   stdout = result.stdout.split('\n')
521 |   hasErrorMsg = stdout.find(item => item.includes('> Error! There are 3 type and format mismatch errors on line 3'))
522 |   t.truthy(hasErrorMsg)
523 | })
524 | 
525 | test.serial('validate command - wrong "string" type/format', async t => {
526 |   const path_ = 'test/fixtures/test-data/packages/types-formats-and-constraints/string'
527 |   let result = await runcli('validate', path_)
528 |   let stdout = result.stdout.split('\n')
529 |   let hasErrorMsg = stdout.find(item => item.includes('> Error! There are 3 type and format mismatch errors on line 3'))
530 |   t.truthy(hasErrorMsg)
531 | 
532 |   const url_ = 'https://github.com/frictionlessdata/test-data/tree/master/packages/types-formats-and-constraints/string'
533 |   result = await runcli('validate', url_)
534 |   stdout = result.stdout.split('\n')
535 |   hasErrorMsg = stdout.find(item => item.includes('> Error! There are 3 type and format mismatch errors on line 3'))
536 |   t.truthy(hasErrorMsg)
537 | })
538 | 
539 | test.serial('validate command - wrong "time" type/format', async t => {
540 |   const path_ = 'test/fixtures/test-data/packages/types-formats-and-constraints/time'
541 |   let result = await runcli('validate', path_)
542 |   let stdout = result.stdout.split('\n')
543 |   let hasErrorMsg = stdout.find(item => item.includes('> Error! There are 3 type and format mismatch errors on line 3'))
544 |   t.truthy(hasErrorMsg)
545 | 
546 |   const url_ = 'https://github.com/frictionlessdata/test-data/tree/master/packages/types-formats-and-constraints/time'
547 |   result = await runcli('validate', url_)
548 |   stdout = result.stdout.split('\n')
549 |   hasErrorMsg = stdout.find(item => item.includes('> Error! There are 3 type and format mismatch errors on line 3'))
550 |   t.truthy(hasErrorMsg)
551 | })
552 | 
553 | test.serial('validate command - wrong "year" type/format', async t => {
554 |   const path_ = 'test/fixtures/test-data/packages/types-formats-and-constraints/year'
555 |   let result = await runcli('validate', path_)
556 |   let stdout = result.stdout.split('\n')
557 |   let hasErrorMsg = stdout.find(item => item.includes('> Error! There are 1 type and format mismatch errors on line 2'))
558 |   t.truthy(hasErrorMsg)
559 | 
560 |   const url_ = 'https://github.com/frictionlessdata/test-data/tree/master/packages/types-formats-and-constraints/year'
561 |   result = await runcli('validate', url_)
562 |   stdout = result.stdout.split('\n')
563 |   hasErrorMsg = stdout.find(item => item.includes('> Error! There are 1 type and format mismatch errors on line 2'))
564 |   t.truthy(hasErrorMsg)
565 | })
566 | 
567 | test.serial('validate command - wrong "yearmonth" type/format', async t => {
568 |   const path_ = 'test/fixtures/test-data/packages/types-formats-and-constraints/yearmonth'
569 |   let result = await runcli('validate', path_)
570 |   let stdout = result.stdout.split('\n')
571 |   let hasErrorMsg = stdout.find(item => item.includes('> Error! There are 1 type and format mismatch errors on line 2'))
572 |   t.truthy(hasErrorMsg)
573 | 
574 |   const url_ = 'https://github.com/frictionlessdata/test-data/tree/master/packages/types-formats-and-constraints/yearmonth'
575 |   result = await runcli('validate', url_)
576 |   stdout = result.stdout.split('\n')
577 |   hasErrorMsg = stdout.find(item => item.includes('> Error! There are 1 type and format mismatch errors on line 2'))
578 |   t.truthy(hasErrorMsg)
579 | })
580 | 
581 | // end of [Validate: csv with different field types, formats and constraints]
582 | 
583 | // QA tests [Cat: basic csv]
584 | 
585 | test('cat command - basic behaviour', async t => {
586 |   const path_ = 'test/fixtures/test-data/files/csv/all-schema-types.csv'
587 |   const results = await runcli('cat', path_)
588 |   const stdout = results.stdout.split('\n')
589 |   const hasMsg = stdout.find(item => item.includes('│ 1.0  │'))
590 |   t.truthy(hasMsg)
591 | })
592 | 
593 | test('cat command - remote csv file', async t => {
594 |   const url_ = 'https://raw.githubusercontent.com/frictionlessdata/test-data/master/files/csv/all-schema-types.csv'
595 |   const results = await runcli('cat', url_)
596 |   const stdout = results.stdout.split('\n')
597 |   const hasMsg = stdout.find(item => item.includes('│ 1.0  │'))
598 |   t.truthy(hasMsg)
599 | })
600 | 
601 | test('cat command - remote non tabular file', async t => {
602 |   const url_ = 'https://raw.githubusercontent.com/frictionlessdata/test-data/master/files/other/sample.txt'
603 |   const results = await runcli('cat', url_)
604 |   const stdout = results.stdout.split('\n')
605 |   const hasErrorMsg = stdout.find(item => item.includes('> Error! We do not have a parser for that format: txt'))
606 |   t.truthy(hasErrorMsg)
607 | })
608 | 
609 | test('cat command - non-existing path', async t => {
610 |   const path_ = 'non/existing/path'
611 |   const results = await runcli('cat', path_)
612 |   const stdout = results.stdout.split('\n')
613 |   const hasErrorMsg = stdout.find(item => item.includes('> Error! ENOENT: no such file or directory'))
614 |   t.truthy(hasErrorMsg)
615 | })
616 | 
617 | test('cat command - URL that returns 404', async t => {
618 |   const url_ = 'https://raw.githubusercontent.com/frictionlessdata/test-data/master/files/other/sampl.csv'
619 |   const results = await runcli('cat', url_)
620 |   const stdout = results.stdout.split('\n')
621 |   let hasErrorMsg = stdout.find(item => item.includes('> Error! Provided URL is invalid'))
622 |   t.truthy(hasErrorMsg)
623 |   hasErrorMsg = stdout.find(item => item.includes('> Error! 404: Not Found. Requested URL: https://raw.githubusercontent.com/frictionlessdata/test-data/master/files/other/sampl.csv'))
624 |   t.truthy(hasErrorMsg)
625 | })
626 | 
627 | // end of [Cat: basic csv]
628 | 
629 | // QA tests [Cat: different separators]
630 | 
631 | test('cat command - files with different separator', async t => {
632 |   // Local files:
633 |   let path_ = 'test/fixtures/test-data/files/csv/separators/semicolon.csv'
634 |   let results = await runcli('cat', path_)
635 |   let stdout = results.stdout.split('\n')
636 |   let delimiterWasntRecognized = stdout.find(item => item.includes(';'))
637 |   t.falsy(delimiterWasntRecognized)
638 |   let hasCorrectPrint = stdout.find(item => item.includes('number'))
639 |   t.truthy(hasCorrectPrint)
640 | 
641 |   path_ = 'test/fixtures/test-data/files/csv/separators/carets.csv'
642 |   results = await runcli('cat', path_)
643 |   stdout = results.stdout.split('\n')
644 |   delimiterWasntRecognized = stdout.find(item => item.includes('^'))
645 |   t.falsy(delimiterWasntRecognized)
646 |   hasCorrectPrint = stdout.find(item => item.includes('number'))
647 |   t.truthy(hasCorrectPrint)
648 | 
649 |   // Remote files:
650 |   let url_ = 'https://raw.githubusercontent.com/frictionlessdata/test-data/master/files/csv/separators/semicolon.csv'
651 |   results = await runcli('cat', url_)
652 |   stdout = results.stdout.split('\n')
653 |   delimiterWasntRecognized = stdout.find(item => item.includes(';'))
654 |   t.falsy(delimiterWasntRecognized)
655 |   hasCorrectPrint = stdout.find(item => item.includes('number'))
656 |   t.truthy(hasCorrectPrint)
657 | 
658 |   url_ = 'https://raw.githubusercontent.com/frictionlessdata/test-data/master/files/csv/separators/carets.csv'
659 |   results = await runcli('cat', url_)
660 |   stdout = results.stdout.split('\n')
661 |   delimiterWasntRecognized = stdout.find(item => item.includes('^'))
662 |   t.falsy(delimiterWasntRecognized)
663 |   hasCorrectPrint = stdout.find(item => item.includes('number'))
664 |   t.truthy(hasCorrectPrint)
665 | })
666 | 
667 | // end of [Cat: different separators]
668 | 
669 | // QA test [Cat: different encodings]
670 | 
671 | test.failing('cat command - different encodings', async t => {
672 |   const path_ = 'test/fixtures/test-data/files/csv/encodings/iso8859.csv'
673 |   let results = await runcli('cat', path_)
674 |   let stdout = results.stdout.split('\n')
675 |   let hasCorrectPrint = stdout.find(item => item.includes('Réunion'))
676 |   t.truthy(hasCorrectPrint)
677 | 
678 |   const url_ = 'https://raw.githubusercontent.com/frictionlessdata/test-data/master/files/csv/encodings/western-macos-roman.csv'
679 |   results = await runcli('cat', url_)
680 |   stdout = results.stdout.split('\n')
681 |   hasCorrectPrint = stdout.find(item => item.includes('Réunion'))
682 |   t.truthy(hasCorrectPrint)
683 | })
684 | 
685 | // end of [Cat: different encodings]
686 | 
687 | test('cat command - local tsv file', async t => {
688 |   const path_= 'test/fixtures/test-data/files/csv/separators/tab.tsv'
689 |   const results = await runcli('cat', path_)
690 |   const stdout = results.stdout.split('\n')
691 |   const hasCorrectPrint = stdout.find(item => item.includes('number'))
692 |   t.truthy(hasCorrectPrint)
693 | })
694 | 
695 | test('cat command - remote tsv file', async t => {
696 |   const url_ = 'https://raw.githubusercontent.com/frictionlessdata/test-data/master/files/csv/separators/tab.tsv'
697 |   const results = await runcli('cat', url_)
698 |   const stdout = results.stdout.split('\n')
699 |   const hasCorrectPrint = stdout.find(item => item.includes('number'))
700 |   t.truthy(hasCorrectPrint)
701 | })
702 | 
703 | test('cat command - inconsistent columns', async t => {
704 |   const path_ = 'test/fixtures/test-data/files/csv/inconsistent-column-number.csv'
705 |   const results = await runcli('cat', path_)
706 |   const stdout = results.stdout.split('\n')
707 |   const hasErrorMsg = stdout.find(item => item.includes('> Error! Number of columns is inconsistent on line 3'))
708 |   t.truthy(hasErrorMsg)
709 | })
710 | 
711 | test('cat command - remote excel file', async t => {
712 |   const url_ = 'https://github.com/frictionlessdata/test-data/raw/master/files/excel/sample-1-sheet.xls'
713 |   const results = await runcli('cat', url_)
714 |   const stdout = results.stdout.split('\n')
715 |   const hasCorrectPrint = stdout.find(item => item.includes('number'))
716 |   t.truthy(hasCorrectPrint)
717 | })
718 | 
719 | test('cat command - specific excel sheet', async t => {
720 |   const path_ = 'test/fixtures/test-data/files/excel/sample-2-sheets.xlsx'
721 |   // With sheet name:
722 |   let results = await runcli('cat', path_, '--sheet=Sheet2')
723 |   let stdout = results.stdout.split('\n')
724 |   let hasHeaderFrom2ndSheet = stdout.find(item => item.includes('header4'))
725 |   t.truthy(hasHeaderFrom2ndSheet)
726 |   // With sheet index:
727 |   results = await runcli('cat', path_, '--sheet=2')
728 |   stdout = results.stdout.split('\n')
729 |   hasHeaderFrom2ndSheet = stdout.find(item => item.includes('header4'))
730 |   t.truthy(hasHeaderFrom2ndSheet)
731 |   // When sheet doesn't exist:
732 |   results = await runcli('cat', path_, '--sheet=3')
733 |   stdout = results.stdout.split('\n')
734 |   t.is(stdout[0], '> Error! Input source is empty or doesn\'t exist.')
735 | })
736 | 
737 | module.exports = {
738 |   runcli
739 | }
740 | 


--------------------------------------------------------------------------------