├── .dockerignore ├── .github ├── ISSUE_TEMPLATE.md └── PULL_REQUEST_TEMPLATE.md ├── .gitignore ├── .npmignore ├── Dockerfile ├── Jenkinsfile ├── LICENSE ├── Makefile ├── README.md ├── bin └── canvasDataCli ├── build.sh ├── index.js ├── package-lock.json ├── package.json ├── sonar-project.properties ├── src ├── Api.js ├── ConfigTask.js ├── Fetch.js ├── FileDownloader.js ├── GetApi.js ├── Grab.js ├── HistoricalRequests.js ├── List.js ├── Sync.js ├── Unpack.js ├── apiAuth.js ├── cli.js └── logger.js └── test ├── ApiTest.js ├── ConfigTaskTest.js ├── FetchTest.js ├── FileDownloadTest.js ├── GetApiTest.js ├── GrabTest.js ├── HistoricalRequestsTest.js ├── ListTest.js ├── SyncTest.js ├── UnpackTest.js ├── cliTest.js └── fixtures ├── mockApiObjects.js ├── mockDump ├── one │ └── file.gz ├── three │ └── file.gz └── two │ └── file.gz ├── mockLogger.js └── mockSchema.json /.dockerignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | lib/ 3 | report/ 4 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | Please read the following carefully before opening a new issue. 2 | Your issue may be closed if it does not provide the information required by this template. 3 | 4 | We use GitHub Issues, as well as our help email at canvasdatahelp@instructure.com for tracking issues. 5 | 6 | - If you have a question about how to use the CLI; please ask your CSM, or email canvasdatahelp@instructure.com 7 | - If you have a feature request, that should be posted on the community site: [HERE][community_link] 8 | 9 | - Make sure your issue reproduces on the latest version! 10 | 11 | --- Delete everything above this line --- 12 | 13 | ### Description ### 14 | 15 | Explain what you did, what you expected to happen, and what actually happens. 16 | 17 | ### Additional Information ### 18 | 19 | * Node Version [FILL THIS OUT: Can be grabbed with: `node --version` on your CLI.] 20 | * Platform: [FILL THIS OUT: Windows, Mac, or Linux? Which Version?] 21 | * Logs: (If you can please run the CLI with: `-l debug` and provide us the debug logs.) 22 | 23 | [community_link]: https://community.canvaslms.com/community/answers/data -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | Thanks for submitting a PR! We want to make contributing to the Canvas Data CLI as easy as possible. 2 | Please read these instructions carefully: 3 | 4 | - [ ] Explain the **motivation** for making this change. 5 | - [ ] Provide a **test plan** demonstrating that the code is solid. 6 | - [ ] Match the **code formatting** of the rest of the codebase. 7 | - [ ] Make sure to **add tests** to help keep code coverage up. 8 | 9 | ## Motivation (required) ## 10 | 11 | What existing problem does the pull request solve? 12 | 13 | ## Test Plan (required) ## 14 | 15 | A good test plan has the exact commands you ran and their output. 16 | 17 | If you have added code that should be tested, add tests. 18 | 19 | ## Next Steps ## 20 | 21 | - Small pull requests are much easier to review and more likely to get merged. Make sure the PR does only one thing, otherwise please split it. 22 | - Make sure all **tests pass**, we will run this on jenkins but you can run it yourself with the `build.sh` script. 23 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | lib/ 3 | config.js 4 | downloads/ 5 | state.json 6 | report/ 7 | .nyc_output/ 8 | -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | .git 2 | src/ 3 | node_modules/ 4 | config.js 5 | downloads/ 6 | state.json 7 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM instructure/node:10 2 | 3 | USER root 4 | RUN mkdir -p /usr/src/app/report && chown -R docker /usr/src/app 5 | USER docker 6 | 7 | ADD package.json package.json 8 | RUN npm install . --ignore-scripts && rm package.json 9 | ADD . /usr/src/app 10 | 11 | CMD npm test 12 | -------------------------------------------------------------------------------- /Jenkinsfile: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env groovy 2 | 3 | def sendSlack(success) { 4 | status = success ? "passed" : "failed" 5 | color = success ? "good" : "bad" 6 | short_job_name = env.JOB_NAME.replaceAll(/.*\//, ""); 7 | message = "[$short_job_name] <$env.GERRIT_CHANGE_URL|$env.GERRIT_CHANGE_SUBJECT> *$status* <$env.BUILD_URL|New Jenkins>." 8 | slackSend channel: '#oreo-builds', color: color, message: message 9 | } 10 | 11 | pipeline { 12 | agent { label "docker" } 13 | 14 | options { 15 | ansiColor('xterm') 16 | } 17 | 18 | stages { 19 | stage('Tests') { 20 | parallel { 21 | stage('Unit Tests') { 22 | steps { 23 | sh "./build.sh" 24 | } 25 | } 26 | } 27 | } 28 | } 29 | post { 30 | failure { 31 | sendSlack(false) 32 | } 33 | success { 34 | sendSlack(true) 35 | } 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Instructure, Inc. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | compile: 2 | npm install . 3 | ./node_modules/.bin/babel src --out-dir lib/ 4 | publish: compile 5 | npm publish 6 | installLocal: compile 7 | npm install -g --progress=false . 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Canvas Data CLI 2 | A small CLI tool for syncing data from the Canvas Data API. 3 | 4 | NOTE: this is currently in beta, please report any bugs or issues you find! 5 | 6 | ## Installing 7 | ### Prerequisites 8 | This tool should work on Linux, OSX, and Windows. The tool uses node.js runtime, which you will need to install before being able to use it. 9 | 1. Install Node.js - Any version newer than 0.12.0 should work, best bet is to follow the instructions [here](https://nodejs.org/en/download/package-manager/) 10 | ### Install via npm 11 | `npm install -g canvas-data-cli` 12 | ### OR Install from github 13 | `git clone https://github.com/instructure/canvas-data-cli.git && cd canvas-data-cli && make installLocal` 14 | ### Configuring 15 | The Canvas Data CLI requires a configuration file with a fields set. Canvas Data CLI uses a small javascript file as configuration file. 16 | To generate a stub of this configuration run `canvasDataCli sampleConfig` which will create a `config.js.sample` file. Rename this to a file, like `config.js`. 17 | 18 | Edit the file to point to where you want to save the files as well as the file used to track the state of which data exports you have already downloaded. By default the sample config file 19 | tries to pull your API key and secret from environment variables, `CD_API_KEY` and `CD_API_SECRET`, which is more secure, however, you can also hard code the credentials in the config file. 20 | 21 | #### Configuring an HTTP Proxy 22 | 23 | canvas-data-cli has support for HTTP Proxies, both with and without basic authentication. To do this there 24 | are three extra options you can add to your config file. `httpsProxy`, `proxyUsername`, and `proxyPassword`. 25 | 26 | | Config Option | Value | 27 | |:--------------|:----------------------------------------------------------------------------------------| 28 | | httpsProxy | the `host:port` of the https proxy. Ideally it'd look like: `https_proxy_stuff.com:433` | 29 | | proxyUsername | the basic auth username for the https proxy. | 30 | | proxyPassword | the basic auth password for the https proxy. | 31 | 32 | ## Usage 33 | 34 | ### Syncing 35 | 36 | If you want to simply download all the data from Canva Data, the `sync` command can be used to keep an up-to-date copy locally. 37 | 38 | ```Shell 39 | canvasDataCli sync -c path/to/config.js 40 | ``` 41 | 42 | This will start the sync process. The sync process uses the `sync` api endpoint to get a list of all the files. If the file does 43 | 44 | not exist, it will download it. Otherwise, it will skip the file. After downloading all files, it will delete any unexpected files 45 | 46 | in the directory to remove old data. 47 | 48 | On subsequent executions, it will only download the files it doesn't have. 49 | 50 | This process is also resumeable, if for whatever reason you have issues, it should restart and download only the files 51 | 52 | that previously failed. One of the ways to make this more safe is that it downloads the file to a temporary name and 53 | 54 | renames it once the process is finished. This may leave around `gz.tmp` files, but they should get deleted automatically once 55 | 56 | you have a successful run. 57 | 58 | If you run this daily, you should keep all of your data from Canvas Data up to date. 59 | 60 | ### Fetch 61 | 62 | Fetches most up to date data for a single table from the API. This ignores any previously downloaded files and will redownload all the files associated with that table. 63 | 64 | ```Shell 65 | canvasDataCli fetch -c path/to/config.js -t user_dim 66 | ``` 67 | 68 | This will start the fetch process and download what is needed to get the most recent data for that table (in this case, the `user_dim`). 69 | 70 | On subsequent executions, this will redownload all the data for that table, ignoring any previous days data. 71 | 72 | ### Unpack 73 | 74 | *NOTE*: This only works after properly running a `sync` command 75 | 76 | This command will unpack the gzipped files, concat any partitioned files, and add a header to the output file 77 | 78 | ```Shell 79 | canvasDataCli unpack -c path/to/config.js -f user_dim,account_dim 80 | ``` 81 | 82 | This command will unpack the user_dim and account_dim tables to a directory. Currently, you explictly have to give the files you want to unpack 83 | as this has the potential for creating very large files. 84 | 85 | ### API 86 | 87 | This subcommand is designed to allow users to make API calls directly. The main 88 | use case for which is debugging and development. 89 | 90 | ```Shell 91 | canvasDataCli api -c config.js -r /account/self/dump 92 | ``` 93 | 94 | ### Historical Requests 95 | 96 | Periodically requests data is regrouped into collections that span more than 97 | just a single day. In this case, the date that the files were generated differs 98 | from the time that the included requests were made. To make it easier to 99 | identify which files contain the requests made during a particular time range, 100 | we have the `historical-requests` subcommand. 101 | 102 | ```Shell 103 | canvasDataCli historical-requests -c config.js 104 | ``` 105 | 106 | Its output takes the form: 107 | 108 | ```json 109 | { 110 | "dumpId": "...", 111 | "ranges": { 112 | "20180315_20180330": [ 113 | { 114 | "url": "...", 115 | "filename": "..." 116 | }, 117 | { 118 | "url": "...", 119 | "filename": "..." 120 | } 121 | ], 122 | "20180331_20180414": [ 123 | { 124 | "url": "...", 125 | "filename": "..." 126 | } 127 | ] 128 | } 129 | } 130 | ``` 131 | 132 | 133 | ## Developing 134 | 135 | Process: 136 | 1. Write some code 137 | 2. Write tests 138 | 3. Open a pull request 139 | 140 | ### Running tests 141 | 142 | #### In Docker 143 | 144 | If you use docker, you can run tests inside a docker container 145 | ```Shell 146 | ./build.sh 147 | ``` 148 | 149 | #### Native 150 | 151 | ```Shell 152 | npm install . 153 | npm test 154 | ``` 155 | -------------------------------------------------------------------------------- /bin/canvasDataCli: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | var cli = require('../').cli 3 | var argv = cli.cli.parse(process.argv.slice(2)) 4 | cli.run(argv) 5 | -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | mkdir -p report 5 | chmod -R 777 report || true 6 | 7 | docker build -t canvas-data-cli:ci . 8 | docker run -v $(pwd)/report:/usr/src/app/report --rm canvas-data-cli:ci 9 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | Api: require('./lib/Api'), 3 | Sync: require('./lib/Sync'), 4 | cli: require('./lib/cli'), 5 | Fetch: require('./lib/Fetch'), 6 | Unpack: require('./lib/Unpack') 7 | } 8 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "canvas-data-cli", 3 | "version": "0.6.6", 4 | "description": "A CLI tool for interacting with the Canvas Data API", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "nyc mocha --compilers js:babel/register test/*Test.js", 8 | "prepublishOnly": "babel src --out-dir lib/" 9 | }, 10 | "nyc": { 11 | "reporter": [ 12 | "lcov", 13 | "text-summary" 14 | ], 15 | "include": [ 16 | "src/*.js", 17 | "src/**/*.js" 18 | ], 19 | "report-dir": "./report" 20 | }, 21 | "bin": { 22 | "canvasDataCli": "./bin/canvasDataCli" 23 | }, 24 | "repository": { 25 | "type": "git", 26 | "url": "git+https://github.com/instructure/canvas-data-cli.git" 27 | }, 28 | "engines": { 29 | "node": ">=0.12.0" 30 | }, 31 | "author": "", 32 | "license": "MIT", 33 | "bugs": { 34 | "url": "https://github.com/instructure/canvas-data-cli/issues" 35 | }, 36 | "homepage": "https://github.com/instructure/canvas-data-cli#readme", 37 | "dependencies": { 38 | "async": "^1.5.0", 39 | "glob": "^7.0.3", 40 | "end-of-stream": "1.4.1", 41 | "lodash": "^3.10.1", 42 | "map-stream": "0.0.6", 43 | "mkdirp": "^0.5.1", 44 | "multistream": "^2.1.0", 45 | "proxyquire": "^1.7.10", 46 | "pump": "^1.0.1", 47 | "re": "^0.1.4", 48 | "request": "^2.65.0", 49 | "split": "^1.0.0", 50 | "yargs": "^3.29.0" 51 | }, 52 | "devDependencies": { 53 | "babel": "^5.8.29", 54 | "chai": "^3.5.0", 55 | "chai-fs": "^0.1.0", 56 | "mocha": "^2.3.3", 57 | "mocha-sinon": "^1.1.5", 58 | "nyc": "^10.0.0", 59 | "rimraf": "^2.4.3", 60 | "sinon": "^1.17.3", 61 | "touch": "^1.0.0" 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /sonar-project.properties: -------------------------------------------------------------------------------- 1 | sonar.projectKey=canvas-data-cli 2 | sonar.projectName=canvas-data-cli 3 | sonar.projectVersion=0.0.1 4 | 5 | sonar.sources=src 6 | sonar.tests=test 7 | sonar.language=js 8 | 9 | sonar.dynamicAnalysis=reuseReports 10 | 11 | sonar.javascript.jstest.reportsPath=report 12 | sonar.javascript.lcov.reportPath=report/lcov.info 13 | -------------------------------------------------------------------------------- /src/Api.js: -------------------------------------------------------------------------------- 1 | var apiAuth = require('./apiAuth') 2 | var url = require('url') 3 | var path = require('path') 4 | var request = require('request') 5 | var _ = require 6 | const GET = 'GET' 7 | class ApiError extends Error { 8 | constructor(msg, errorCode, resp) { 9 | super(msg) 10 | this.errorCode = errorCode 11 | this.resp = resp 12 | } 13 | } 14 | 15 | class Api { 16 | constructor(config) { 17 | this.apiUrl = config.apiUrl 18 | this.apiKey = config.key 19 | this.apiSecret = config.secret 20 | this.proxyUrl = this.buildProxyUrl(config) 21 | } 22 | buildProxyUrl(config) { 23 | if (config.httpsProxy) { 24 | if (config.proxyUsername) { 25 | return `https://${config.proxyUsername}:${config.proxyPassword}@${config.httpsProxy}` 26 | } else { 27 | return `https://${config.httpsProxy}` 28 | } 29 | } 30 | } 31 | buildUrl(route, query) { 32 | var urlInfo = url.parse(this.apiUrl) 33 | urlInfo.pathname = path.posix.join(urlInfo.pathname, route) 34 | urlInfo.query = query 35 | return url.format(urlInfo) 36 | } 37 | makeRequest(method, route, query, cb) { 38 | if (typeof query === 'function') { 39 | cb = query 40 | query = null 41 | } 42 | var reqOpts = { 43 | method: method, 44 | url: this.buildUrl(route, query), 45 | json: true 46 | } 47 | if(this.proxyUrl) { 48 | reqOpts.proxy = this.proxyUrl 49 | } 50 | request(apiAuth.signRequest(this.apiKey, this.apiSecret, reqOpts), (err, resp, body) => { 51 | if (err) return cb(err) 52 | if (resp.statusCode !== 200) { 53 | var message = body 54 | if (typeof body === 'object') { 55 | message = JSON.stringify(body, 0, 2) 56 | } 57 | return cb(new ApiError(`invalid status code, got ${resp.statusCode}: ${message}`, resp.statusCode, body)) 58 | } 59 | cb(null, body) 60 | }) 61 | } 62 | getDumps(params, cb) { 63 | if (typeof params === 'function') { 64 | cb = params 65 | params = {} 66 | } 67 | this.makeRequest(GET, 'account/self/dump', params, cb) 68 | } 69 | getLatestFiles(cb) { 70 | this.makeRequest(GET, 'account/self/file/latest', cb) 71 | } 72 | getFilesForDump(dumpId, cb) { 73 | this.makeRequest(GET, `account/self/file/byDump/${dumpId}`, cb) 74 | } 75 | getSync(cb) { 76 | this.makeRequest(GET, `account/self/file/sync`, cb) 77 | } 78 | getFilesForTable(tableName, params, cb) { 79 | if (typeof params === 'function') { 80 | cb = params 81 | params = {} 82 | } 83 | this.makeRequest(GET, `account/self/file/byTable/${tableName}`, params, cb) 84 | } 85 | getSchemas(cb) { 86 | this.makeRequest(GET, `schema`, cb) 87 | } 88 | // TODO: kill this, keep just in case people are using the API 89 | getLastestSchema(cb) { 90 | this.getLatestSchema(cb) 91 | } 92 | getLatestSchema(cb) { 93 | this.makeRequest(GET, 'schema/latest', cb) 94 | } 95 | getSchemaVersion(version, cb) { 96 | this.makeRequest(GET, `schema/${version}`, cb) 97 | } 98 | } 99 | 100 | module.exports = Api 101 | -------------------------------------------------------------------------------- /src/ConfigTask.js: -------------------------------------------------------------------------------- 1 | 2 | var fs = require('fs') 3 | var sampleConfig = ` 4 | module.exports = { 5 | saveLocation: './dataFiles', 6 | unpackLocation: './unpackedFiles', // if not defined, defaults to 'unpackedFiles' 7 | apiUrl: 'https://api.inshosteddata.com/api', 8 | key: process.env.CD_API_KEY, // don't hardcode creds, keep them in environment variables ideally! 9 | secret: process.env.CD_API_SECRET, 10 | maxConnections: 200, // The maximum number of files allowed to be downloading simultaneously 11 | } 12 | ` 13 | class ConfigTask { 14 | constructor(opts, config, logger) { 15 | this.logger = logger 16 | } 17 | run(cb) { 18 | this.logger.info(sampleConfig) 19 | this.logger.info('was written to config.js') 20 | fs.writeFile('config.js.sample', sampleConfig, cb) 21 | } 22 | static validate(config) { 23 | var fields = [ 24 | 'saveLocation', 25 | 'apiUrl', 26 | 'key', 27 | 'secret' 28 | ] 29 | var missing = [] 30 | for (var field of fields) { 31 | if (!config[field]) missing.push(field) 32 | } 33 | if (missing.length) return `missing ${missing.join(', ')} fields in config` 34 | return null 35 | } 36 | } 37 | module.exports = ConfigTask 38 | -------------------------------------------------------------------------------- /src/Fetch.js: -------------------------------------------------------------------------------- 1 | var Api = require('./Api') 2 | var path = require('path') 3 | var FileDownloader = require('./FileDownloader') 4 | var async = require('async') 5 | var fs = require('fs') 6 | var _ = require('lodash') 7 | var mkdirp = require('mkdirp') 8 | class Fetch { 9 | constructor(opts, config, logger) { 10 | this.opts = opts 11 | this.logger = logger 12 | this.api = new Api(config) 13 | this.table = opts.table 14 | this.saveLocation = path.resolve(process.cwd(), config.saveLocation) 15 | this.fileDownloader = new FileDownloader(logger) 16 | } 17 | getNewest(files) { 18 | let toDownload = [] 19 | // We can get multiple entries in files.history for a single sequence / dumpId. 20 | // So group these entries up by sequence ID. 21 | let groups = _.groupBy(files.history, 'sequence') 22 | // Walk through the groups in the same sequence order as provided by the API 23 | let sequences = _.map(files.history, 'sequence') 24 | for (let sequence of sequences) { 25 | let group = groups[sequence] 26 | // If /any/ in the group is marked as partial, we'll consider that sequence 27 | // to be completely partial 28 | let partial = _.some(group, 'partial') 29 | let files = _.flatten(_.map(group, 'files')).map((file) => { 30 | file.sequence = sequence 31 | return file 32 | }) 33 | 34 | toDownload.push(...files) 35 | 36 | if (!partial) { 37 | break 38 | } 39 | 40 | } 41 | return toDownload 42 | } 43 | run(cb) { 44 | let saveFolder = path.join(this.saveLocation, this.table) 45 | mkdirp(saveFolder, (err) => { 46 | this.api.getFilesForTable(this.table, (err, files) => { 47 | if (err) return cb(err) 48 | 49 | let toDownload = this.getNewest(files) 50 | this.logger.info(`Files (${toDownload.length})`, toDownload) 51 | 52 | async.map(toDownload, (file, innerCb) => { 53 | this.fileDownloader.downloadToFile( 54 | file, 55 | {tableName: this.table, sequence: file.sequence}, 56 | path.join(saveFolder, `${file.sequence.toString()}-${file.filename}`), 57 | innerCb 58 | ) 59 | }, cb) 60 | }) 61 | }) 62 | } 63 | } 64 | module.exports = Fetch 65 | -------------------------------------------------------------------------------- /src/FileDownloader.js: -------------------------------------------------------------------------------- 1 | var fs = require('fs') 2 | var request = require('request') 3 | var pump = require('pump') 4 | var Re = require('re') 5 | 6 | class FileDownloader { 7 | constructor(logger, reOpts) { 8 | this.logger = logger 9 | this.reOpts = reOpts || { 10 | retries: 5, 11 | strategy: { 12 | "type": Re.STRATEGIES.EXPONENTIAL, 13 | "initial": 100, 14 | "base": 2 15 | } 16 | } 17 | this.re = new Re(this.reOpts) 18 | } 19 | downloadToFile(downloadLink, artifact, dest, cb) { 20 | this.re.try((retryCount, done) => { 21 | this.logger.debug(`downloading ${downloadLink.filename} for artifact ${artifact.tableName}, attempt ${retryCount + 1}`) 22 | var r = request({method: 'GET', url: downloadLink.url}) 23 | var badStatusCode = false 24 | r.on('response', (resp) => { 25 | if (resp.statusCode !== 200) { 26 | this.logger.debug(`got non 200 status code (actual ${resp.statusCode}) from ${downloadLink.url}`) 27 | badStatusCode = true 28 | } 29 | }) 30 | pump(r, fs.createWriteStream(dest), (err) => { 31 | if (err || badStatusCode) { 32 | this.logger.debug(`failed attempt ${retryCount + 1} for ${downloadLink.filename}, err: ${err || badStatusCode}`) 33 | return done(new Error("Failed Attempt."), retryCount) 34 | } 35 | this.logger.debug(`finished downlading ${downloadLink.filename} for artifact ${artifact.tableName}`) 36 | done(null, retryCount) 37 | }) 38 | }, (err, retryCount) => { 39 | cb(err ? new Error(`max number of retries reached for ${downloadLink.filename}, aborting`) : null) 40 | }) 41 | } 42 | 43 | } 44 | module.exports = FileDownloader 45 | -------------------------------------------------------------------------------- /src/GetApi.js: -------------------------------------------------------------------------------- 1 | var Api = require('./Api') 2 | 3 | class GetApi { 4 | constructor(opts, config, logger) { 5 | this.opts = opts 6 | this.logger = logger 7 | this.api = new Api(config) 8 | } 9 | run(cb) { 10 | this.api.makeRequest('GET', this.opts.route, JSON.parse(this.opts.params || '{}'), (err, response) => { 11 | if (err) return cb(err) 12 | this.logger.info(JSON.stringify(response, null, 2)) 13 | return cb(null, response) 14 | }) 15 | } 16 | } 17 | module.exports = GetApi 18 | -------------------------------------------------------------------------------- /src/Grab.js: -------------------------------------------------------------------------------- 1 | var Api = require('./Api') 2 | var path = require('path') 3 | var FileDownloader = require('./FileDownloader') 4 | var async = require('async') 5 | var fs = require('fs') 6 | var _ = require('lodash') 7 | var mkdirp = require('mkdirp') 8 | 9 | class Grab { 10 | constructor(opts, config, logger) { 11 | this.opts = opts 12 | this.logger = logger 13 | this.api = new Api(config) 14 | this.dump = opts.dump 15 | this.saveLocation = path.resolve(process.cwd(), config.saveLocation) 16 | this.fileDownloader = new FileDownloader(logger) 17 | this.maxConnections = config.maxConnections || 200 18 | } 19 | formatResult(files) { 20 | let finalResult = [] 21 | Object.keys(files.artifactsByTable).forEach((currentValue) => { 22 | const artifact = files.artifactsByTable[currentValue] 23 | artifact.files.forEach((currentFile) => { 24 | finalResult.push({ 25 | tableName: currentValue, 26 | sequence: files.sequence, 27 | filename: currentFile.filename, 28 | url: currentFile.url 29 | }) 30 | }) 31 | }) 32 | return finalResult 33 | } 34 | run(cb) { 35 | const saveFolder = path.join(this.saveLocation, this.dump) 36 | mkdirp(saveFolder, (err) => { 37 | if (err) return cb(err) 38 | this.api.getFilesForDump(this.dump, (err, files) => { 39 | if(err) return cb(err) 40 | 41 | const formattedTables = this.formatResult(files) 42 | 43 | async.mapLimit(formattedTables, this.maxConnections, (file, innerCb) => { 44 | this.fileDownloader.downloadToFile( 45 | file, 46 | {tableName: file.tableName, sequence: file.sequence}, 47 | path.join(saveFolder, `${file.sequence.toString()}-${file.filename}`), 48 | innerCb 49 | ) 50 | }, cb) 51 | }) 52 | }) 53 | } 54 | } 55 | module.exports = Grab 56 | -------------------------------------------------------------------------------- /src/HistoricalRequests.js: -------------------------------------------------------------------------------- 1 | var Api = require('./Api') 2 | var lodash = require('lodash') 3 | 4 | const customizer = (a, b) => { 5 | if (lodash.isArray(a)) { 6 | return a.concat(b); 7 | } 8 | } 9 | 10 | class HistoricalRequests { 11 | constructor(opts, config, logger) { 12 | this.opts = opts 13 | this.logger = logger 14 | this.api = new Api(config) 15 | } 16 | getRangeForFile(url) { 17 | // The URL format is: 18 | // https://.s3.amazonaws.com//requests_split_historical//requests/range/ 19 | return url.split('/')[7] 20 | } 21 | run(cb) { 22 | this.api.getSync((err, response) => { 23 | if (err) return cb(err) 24 | 25 | // Get all the not-partial files and figure out their range 26 | const files = response.files 27 | .filter((file) => file.table == 'requests') 28 | .filter((file) => !file.partial); 29 | const results = lodash.merge({}, ...files 30 | .map((file) => { 31 | return { 32 | [this.getRangeForFile(file.url)]: [file] 33 | } 34 | }), customizer) 35 | 36 | this.logger.info(JSON.stringify(results, null, 2)) 37 | cb(null, results) 38 | }) 39 | } 40 | } 41 | module.exports = HistoricalRequests 42 | -------------------------------------------------------------------------------- /src/List.js: -------------------------------------------------------------------------------- 1 | var Api = require('./Api') 2 | var async = require('async') 3 | 4 | class List { 5 | constructor(opts, config, logger) { 6 | this.opts = opts 7 | this.logger = logger 8 | this.api = new Api(config) 9 | if (opts.json == null) { 10 | opts.json = false 11 | } 12 | this.jsonOut = opts.json 13 | } 14 | run(cb) { 15 | this.api.getDumps((err, dumps) => { 16 | if (err) return cb(err) 17 | let shouldShowCompletedMesssage = true 18 | if (!this.jsonOut) { 19 | dumps.map((dump) => { 20 | this.logger.info(`- Dump ID: [ ${dump.dumpId} ] 21 | Sequence: [ ${dump.sequence} ] 22 | Account ID: [ ${dump.accountId} ] 23 | Number of Files: [ ${dump.numFiles} ] 24 | Finished: [ ${dump.finished} ] 25 | Expires At: [ ${dump.expires} ] 26 | Created At: [ ${dump.createdAt} ]`) 27 | }) 28 | } else { 29 | this.logger.info(JSON.stringify(dumps)) 30 | shouldShowCompletedMesssage = false 31 | } 32 | cb(null, shouldShowCompletedMesssage) 33 | }) 34 | } 35 | } 36 | module.exports = List 37 | -------------------------------------------------------------------------------- /src/Sync.js: -------------------------------------------------------------------------------- 1 | var path = require('path') 2 | var fs = require('fs') 3 | var Api = require('./Api') 4 | var FileDownloader = require('./FileDownloader') 5 | var async = require('async') 6 | var mkdirp = require('mkdirp') 7 | var glob = require('glob') 8 | const DEFAULT_LIMIT = 50 9 | const CONCURRENCY_LIMIT = 5 10 | class Sync { 11 | constructor(opts, config, logger) { 12 | this.opts = opts 13 | this.logger = logger 14 | this.api = new Api(config) 15 | this.fileDownloader = new FileDownloader(logger) 16 | this.saveLocation = path.resolve(process.cwd(), config.saveLocation) 17 | } 18 | run(cb) { 19 | this.getSync((err, toSync) => { 20 | if (err) return cb(err) 21 | this.downloadSchema(toSync.schemaVersion, (err) => { 22 | if (err) return cb(err) 23 | async.mapLimit(toSync.files, CONCURRENCY_LIMIT, this.processFile.bind(this), (err, results) => { 24 | if (err) return cb(err) 25 | 26 | var splitResults = this.splitResults(results) 27 | this.logResults(splitResults) 28 | 29 | if (splitResults.erroredFiles.length) { 30 | this.logger.warn(`${splitResults.erroredFiles.length} files failed to download, please try running the sync again, if this error persists, open a ticket. No files will be cleaned up`) 31 | this.logger.warn(splitResults.erroredFiles) 32 | return cb(new Error('failed to download some files, try running sync again')) 33 | } 34 | 35 | this.cleanupFiles(results, (err) => { 36 | if (err) return cb(err) 37 | this.logger.info('finished cleanup, done!') 38 | cb() 39 | }) 40 | }) 41 | }) 42 | }) 43 | } 44 | splitResults(results) { 45 | var erroredFiles = results.filter((res) => res.error) 46 | var newDownloaded = results.filter((res) => res.didDownload).map((res) => res.filename) 47 | var cached = results.filter((res) => !res.error && !res.didDownload).map((res) => res.filename) 48 | return {erroredFiles, newDownloaded, cached, results} 49 | } 50 | logResults(splitResults) { 51 | if (splitResults.cached.length === splitResults.results.length) { 52 | this.logger.info('no new files were downloaded') 53 | } else { 54 | this.logger.info(`downloaded ${splitResults.newDownloaded.length} new files out of ${splitResults.results.length} total files`) 55 | this.logger.debug('new files downloaded', splitResults.newDownloaded) 56 | this.logger.debug('cached files', splitResults.cached) 57 | } 58 | } 59 | getSync(cb) { 60 | this.logger.info('fetching current list of files from API...') 61 | this.api.getSync((err, toSync) => { 62 | if (err && err.errorCode === 404) { 63 | this.logger.error('no files exist for account, cannot sync') 64 | err.silence = true 65 | return cb(err) 66 | } 67 | if (err) return cb(err) 68 | if (toSync.incomplete) this.logger.warn(`Could not retrieve a full list of files! Some incremental data will be missing!`) 69 | 70 | this.logger.info(`total number of files: ${toSync.files.length} files`) 71 | cb(null, toSync) 72 | }) 73 | } 74 | downloadSchema(schemaVersion, cb) { 75 | mkdirp(this.saveLocation, (err) => { 76 | if (err) return cb(err) 77 | this.api.getSchemaVersion(schemaVersion, (err, schema) => { 78 | if (err) return cb(err) 79 | fs.writeFile(path.join(this.saveLocation, 'schema.json'), JSON.stringify(schema, 0, 2), cb) 80 | }) 81 | }) 82 | } 83 | buildDir(fileInfo) { 84 | return path.join(this.saveLocation, fileInfo.table) 85 | } 86 | buildTempPath(fileInfo) { 87 | return this.buildRealPath(fileInfo) + '.tmp' 88 | } 89 | buildRealPath(fileInfo) { 90 | return path.join(this.buildDir(fileInfo), fileInfo.filename) 91 | } 92 | processFile(fileInfo, cb) { 93 | var filename = this.buildRealPath(fileInfo) 94 | 95 | this.logger.info(`checking for existence of ${fileInfo.filename}`) 96 | this.fileExists(filename, (err, exists) => { 97 | if (err) return cb(err) 98 | if (!exists) return this.downloadFile(fileInfo, cb) 99 | this.logger.info(`already have ${fileInfo.filename}, no need to redownload`) 100 | return cb(null, {error: null, table: fileInfo.table, filename: fileInfo.filename, savedTo: filename, didDownload: false}) 101 | }) 102 | } 103 | fileExists(filename, cb) { 104 | fs.stat(filename, (err, stat) => { 105 | if (err && err.code !== 'ENOENT') return cb(err) 106 | if (err && err.code === 'ENOENT') return cb(null, false) 107 | cb(null, true) 108 | }) 109 | } 110 | downloadFile(fileInfo, cb) { 111 | var filename = this.buildRealPath(fileInfo) 112 | var tmpFilename = this.buildTempPath(fileInfo) 113 | 114 | this.logger.info(`${filename} does not exist, downloading`) 115 | mkdirp(this.buildDir(fileInfo), (err) => { 116 | if (err) return cb(err) 117 | this.fileDownloader.downloadToFile(fileInfo, {tableName: fileInfo.table}, tmpFilename, (err) => { 118 | this.logger.info(`${filename} finished`) 119 | if (err) return cb(null, {error: err, table: fileInfo.table, filename: fileInfo.filename}) 120 | this.logger.debug(`rename ${tmpFilename} to ${filename}`) 121 | fs.rename(tmpFilename, filename, (err) => { 122 | if (err) return cb(err) 123 | cb(null, {error: null, table: fileInfo.table, filename: fileInfo.filename, savedTo: filename, didDownload: true}) 124 | }) 125 | }) 126 | }) 127 | } 128 | cleanupFiles(downloadedFiles, cb) { 129 | var byFilename = {} 130 | for (var file of downloadedFiles) { 131 | byFilename[path.relative(this.saveLocation, file.savedTo)] = true 132 | } 133 | this.logger.info('searching for old files to remove') 134 | glob('**/*', {cwd: this.saveLocation, nodir: true}, (err, files) => { 135 | if (err) return cb(err) 136 | // rewrite paths because glob returns inproper path seperators on windows (/ instead of \) 137 | var toRemove = files.map((f) => f.split('/').join(path.sep)).filter((f) => { 138 | return f !== 'schema.json' && !byFilename[f] 139 | }) 140 | this.logger.debug('will remove files', toRemove) 141 | async.map(toRemove.map((name) => path.join(this.saveLocation, name)), fs.unlink, cb) 142 | }) 143 | } 144 | } 145 | module.exports = Sync 146 | -------------------------------------------------------------------------------- /src/Unpack.js: -------------------------------------------------------------------------------- 1 | const zlib = require("zlib") 2 | const fs = require("fs") 3 | const path = require("path") 4 | const stream = require("stream") 5 | const mkdirp = require("mkdirp") 6 | const Multistream = require("multistream") 7 | const pump = require('pump') 8 | const async = require('async') 9 | const split = require('split') 10 | const mapS = require('map-stream') 11 | 12 | class Unpack { 13 | constructor(opts, config, logger) { 14 | this.logger = logger 15 | this.tableFilter = opts.filter 16 | this.sourceLocation = path.resolve(process.cwd(), config.saveLocation) 17 | this.outputLocation = path.resolve(process.cwd(), config.unpackLocation || './unpackedFiles') 18 | this.schemaLocation = path.join(this.sourceLocation, 'schema.json') 19 | } 20 | buildTitlesHeader(table) { 21 | return table.columns.map((c) => { 22 | return c.name 23 | }).join("\t") + '\n' 24 | } 25 | loadSchema(cb) { 26 | fs.stat(this.schemaLocation, (err, stat) => { 27 | if (err && !stat) { 28 | this.logger.error('could not find schema, have you downloaded files yet?') 29 | return cb(err) 30 | } 31 | const schema = require(this.schemaLocation) 32 | cb(null, schema) 33 | }) 34 | } 35 | addTitleAndUnzip(schema, sourceDir, outputDir, cb) { 36 | const toUnpack = [] 37 | for (let key in schema.schema) { 38 | let table = schema.schema[key] 39 | if (this.tableFilter.indexOf(table.tableName) >= 0 ) { 40 | toUnpack.push(table) 41 | } 42 | } 43 | this.logger.debug(`will unpack ${toUnpack.map((p) => p.tableName).join(',')}`) 44 | if (toUnpack.length === 0) { 45 | this.logger.warn('no files matched filter, nothing will be unpacked') 46 | return cb() 47 | } 48 | async.each(toUnpack, (table, cb) => { 49 | const inputDir = path.join(sourceDir, table.tableName) 50 | const outputTableName = path.join(outputDir, table.tableName + '.txt') 51 | const outputStream = fs.createWriteStream(outputTableName) 52 | this.logger.info(`outputting ${table.tableName} to ${outputTableName}`) 53 | this.processTable(table, inputDir, outputStream, (err) => { 54 | if (err) return cb(err) 55 | this.logger.info(`finished with ${table.tableName}`) 56 | cb() 57 | }) 58 | }, cb) 59 | } 60 | processTable(table, inputDir, outputStream, cb) { 61 | outputStream.write(this.buildTitlesHeader(table)) 62 | fs.readdir(inputDir, (err, files) => { 63 | if (err) return cb(err) 64 | const streamCreators = files.map((f) => { 65 | // return a function so that mulitstream 66 | // lazily creates the streams 67 | return function() { 68 | const gunzip = zlib.createUnzip() 69 | return fs.createReadStream(path.join(inputDir, f)) 70 | .pipe(gunzip) 71 | .pipe(split()) 72 | .pipe(mapS((item, cb) => { 73 | if (item.trim() === '') return cb() 74 | // add newlines for each row 75 | return cb(null, item + '\n') 76 | })) 77 | } 78 | }) 79 | const multi = new Multistream(streamCreators) 80 | pump(multi, outputStream, cb) 81 | }) 82 | } 83 | run(cb) { 84 | this.loadSchema((err, schema) => { 85 | if (err) return cb(err) 86 | mkdirp(this.outputLocation, (err) => { 87 | if (err) return cb(err) 88 | this.addTitleAndUnzip(schema, this.sourceLocation, this.outputLocation, cb) 89 | }) 90 | }) 91 | } 92 | } 93 | module.exports = Unpack 94 | -------------------------------------------------------------------------------- /src/apiAuth.js: -------------------------------------------------------------------------------- 1 | var crypto = require('crypto') 2 | var url = require('url') 3 | var HMAC_ALG = 'sha256' 4 | var apiAuth = { 5 | buildMessage: function(secret, timestamp, reqOpts) { 6 | var urlInfo = url.parse(reqOpts.path, true) 7 | var sortedParams = Object.keys(urlInfo.query).sort(function(a, b) { 8 | return a.localeCompare(b) 9 | }) 10 | var sortedParts = [] 11 | for (var i = 0; i < sortedParams.length; i++) { 12 | var paramName = sortedParams[i] 13 | sortedParts.push(paramName + '=' + urlInfo.query[paramName]) 14 | } 15 | var parts = [ 16 | reqOpts.method.toUpperCase(), 17 | reqOpts.host || '', 18 | reqOpts.contentType || '', 19 | reqOpts.contentMD5 || '', 20 | urlInfo.pathname, 21 | sortedParts.join('&') || '', 22 | timestamp, 23 | secret 24 | ] 25 | return parts.join('\n') 26 | }, 27 | buildHmacSig: function(secret, timestamp, reqOpts) { 28 | var message = apiAuth.buildMessage(secret, timestamp, reqOpts) 29 | var hmac = crypto.createHmac(HMAC_ALG, new Buffer(secret)) 30 | hmac.update(message) 31 | return hmac.digest('base64') 32 | }, 33 | signRequest: function(key, secret, requestOpts, opts) { 34 | opts = opts || {} 35 | var urlInfo = url.parse(requestOpts.url) 36 | requestOpts.headers = requestOpts.headers || {} 37 | var dateVal = requestOpts.headers.Date || opts.date || new Date().toUTCString() 38 | // ensure the date header exists 39 | requestOpts.headers.Date = dateVal 40 | var reqOpts = { 41 | method: requestOpts.method || 'GET', 42 | path: urlInfo.path, 43 | host: opts.host || urlInfo.host, 44 | contentType: apiAuth.determineContentType(requestOpts, opts), 45 | contentMD5: opts.contentMD5 || (requestOpts.headers && requestOpts.headers['Content-MD5'] ? requestOpts.headers['Content-MD5'] : null), 46 | } 47 | var signature = apiAuth.buildHmacSig(secret, dateVal, reqOpts) 48 | requestOpts.headers.Authorization = 'HMACAuth ' + key + ':' + signature 49 | return requestOpts 50 | }, 51 | determineContentType: function(requestOpts, opts) { 52 | if (opts && opts.contentType) return opts.contentType 53 | if (requestOpts.form) return 'application/x-www-form-urlencoded' 54 | if (requestOpts.formData) return 'multipart/form-data' 55 | if (requestOpts.json && requestOpts.body) return 'application/json' 56 | if (requestOpts.json && typeof requestOpts.json === 'object') return 'application/json' 57 | if (requestOpts.body && typeof requestOpts.body === 'string') return 'text/plain' 58 | if (requestOpts.body && Buffer.isBuffer(requestOpts.body)) return 'application/octet-stream' 59 | return null 60 | } 61 | } 62 | 63 | module.exports = apiAuth 64 | -------------------------------------------------------------------------------- /src/cli.js: -------------------------------------------------------------------------------- 1 | var path = require('path') 2 | var fs = require('fs') 3 | var yargs = require('yargs') 4 | var logger = require('./logger') 5 | var Sync = require('./Sync') 6 | var Config = require('./ConfigTask') 7 | var Unpack = require('./Unpack') 8 | var Fetch = require('./Fetch') 9 | var Grab = require('./Grab') 10 | var List = require('./List') 11 | var GetApi = require('./GetApi') 12 | var HistoricalRequests = require('./HistoricalRequests') 13 | 14 | var cli = yargs 15 | .usage('canvasDataCli ') 16 | .demand(1, 'must provide a valid command') 17 | .option('level', { 18 | alias: 'l', 19 | default: 'info', 20 | describe: `logging level to use, valid levels are ${logger.levels.join(', ')}`, 21 | type: 'string' 22 | }) 23 | .command('sync', 'download the latest files from the API', (yargs) => { 24 | yargs.option('config', { 25 | alias: 'c', 26 | demand: true, 27 | describe: 'the configuration file to use', 28 | type: 'string' 29 | }) 30 | .help('help') 31 | }) 32 | .command('sampleConfig', 'write a sample config file to config.js.sample') 33 | .command('unpack', 'decompress and merge files into a single file', (yargs) => { 34 | yargs.option('config', { 35 | alias: 'c', 36 | demand: true, 37 | describe: 'the configuration file to use', 38 | type: 'string' 39 | }) 40 | .option('filter', { 41 | alias: 'f', 42 | describe: 'list of tables to unpack, ex: -f user_dim account_dim', 43 | demand: true, 44 | array: true, 45 | type: 'string' 46 | }) 47 | .help('help') 48 | }) 49 | .command('fetch', 'fetch a single table', (yargs) => { 50 | yargs.options('config', { 51 | alias: 'c', 52 | demand: true, 53 | describe: 'the configuration file to use', 54 | type: 'string' 55 | }) 56 | .option('table', { 57 | alias: 't', 58 | describe: 'the table to fetch', 59 | demand: true, 60 | type: 'string' 61 | }) 62 | }) 63 | .command('grab', 'grab one specific dump', (yargs) => { 64 | yargs.options('config', { 65 | alias: 'c', 66 | demand: true, 67 | describe: 'the configuration file to use', 68 | type: 'string' 69 | }) 70 | .option('dump', { 71 | alias: 'd', 72 | describe: 'the dump to fetch', 73 | demand: true, 74 | type: 'string' 75 | }) 76 | }) 77 | .command('list', 'list all dumps', (yargs) => { 78 | yargs.options('config', { 79 | alias: 'c', 80 | demand: true, 81 | describe: 'the configuration file to use.', 82 | type: 'string' 83 | }).option('json', { 84 | alias: 'j', 85 | describe: 'output in json format', 86 | demand: false, 87 | type: 'boolean' 88 | }) 89 | }) 90 | .command('api', 'submit API GET request', (yargs) => { 91 | yargs.options('config', { 92 | alias: 'c', 93 | demand: true, 94 | describe: 'the configuration file to use.', 95 | type: 'string' 96 | }).option('route', { 97 | alias: 'r', 98 | describe: 'route to request', 99 | demand: true, 100 | type: 'string' 101 | }).option('params', { 102 | alias: 'p', 103 | desscribe: 'params in JSON form', 104 | demand: false, 105 | type: 'string' 106 | }) 107 | }) 108 | .command('historical-requests', 'show historical requests by date ranges', (yargs) => { 109 | yargs.options('config', { 110 | alias: 'c', 111 | demand: true, 112 | describe: 'the configuration file to use.', 113 | type: 'string' 114 | }) 115 | }) 116 | .help('help') 117 | .alias('v', 'version') 118 | .version(() => require('../package').version) 119 | .describe('v', 'show version information') 120 | 121 | var runnerMap = { 122 | sync: {requireConfig: true, class: Sync}, 123 | sampleConfig: {class: Config}, 124 | unpack: {requireConfig: true, class: Unpack}, 125 | fetch: {requireConfig: true, class: Fetch}, 126 | grab: {requireConfig: true, class: Grab}, 127 | list: {requireConfig: true, class: List}, 128 | api: {requireConfig: true, class: GetApi}, 129 | 'historical-requests': {requireConfig: true, class: HistoricalRequests} 130 | } 131 | module.exports = { 132 | cli: cli, 133 | run(argv) { 134 | var command = argv._[0] 135 | var runner = runnerMap[command] 136 | var logLevel = argv.l || argv.level 137 | if (logLevel) { 138 | logger.setLevel(logLevel) 139 | } 140 | if (!runner) { 141 | logger.error('invalid command') 142 | cli.showHelp() 143 | process.exit(1) 144 | } 145 | var RunnerClass = runner.class 146 | var config = {} 147 | if (runner.requireConfig) { 148 | var configFile = argv.config 149 | var configPath = path.resolve(process.cwd(), configFile) 150 | try { 151 | fs.statSync(configPath) 152 | } catch (e) { 153 | logger.error(`config file at ${configPath} does not exist`) 154 | process.exit(1) 155 | } 156 | 157 | config = require(configPath) 158 | var isInvalidConfig = Config.validate(config) 159 | if (isInvalidConfig) { 160 | logger.error(`config at ${configPath} is invalid`) 161 | logger.error(isInvalidConfig) 162 | process.exit(1) 163 | } 164 | } 165 | 166 | var runner = new RunnerClass(argv, config, logger) 167 | runner.run((err, showComplete = true) => { 168 | if (err) { 169 | logger.error('an error occured') 170 | logger.error(err) 171 | if (err.stack && !err.silence) logger.error(err.stack) 172 | process.exit(1) 173 | } 174 | if (showComplete) { 175 | logger.info(`${command} command completed successfully`) 176 | } 177 | }) 178 | } 179 | } 180 | -------------------------------------------------------------------------------- /src/logger.js: -------------------------------------------------------------------------------- 1 | 2 | var levels = ['debug', 'info', 'warn', 'log', 'error'] 3 | class Logger { 4 | constructor(level) { 5 | this._level = level 6 | } 7 | setLevel(level) { 8 | this._level = level 9 | } 10 | levelIndex(level) { 11 | return levels.indexOf(level) 12 | } 13 | shouldLog(level) { 14 | return this.levelIndex(level) >= this.levelIndex(this._level) 15 | } 16 | logIt(level, ...args) { 17 | if (this.shouldLog(level)) { 18 | if (console[level]) { 19 | console[level](...args) 20 | } else { 21 | console.log(...args) 22 | } 23 | } 24 | } 25 | } 26 | 27 | function makeLogger(level) { 28 | return function(...args) { 29 | this.logIt(level, ...args) 30 | } 31 | } 32 | for (var level of levels) { 33 | Logger.prototype[level] = makeLogger(level) 34 | } 35 | Logger.prototype.levels = levels 36 | module.exports = new Logger('info') 37 | -------------------------------------------------------------------------------- /test/ApiTest.js: -------------------------------------------------------------------------------- 1 | const assert = require('assert') 2 | const proxyquire = require('proxyquire') 3 | 4 | let nextReqHandler = null 5 | function buildMockResp(returnBody, statusCode, cb) { 6 | process.nextTick(() => { 7 | cb(null, {statusCode: statusCode}, returnBody) 8 | }) 9 | } 10 | function mockRequest(opts, cb) { 11 | if (nextReqHandler) return nextReqHandler(opts, cb) 12 | return buildMockResp({mocked: true}, 200, cb) 13 | } 14 | 15 | const Api = proxyquire('../src/Api', { 16 | request: mockRequest, 17 | apiAuth: { 18 | signRequest(key, secret, opts) { 19 | opts.headers = opts.headers || {} 20 | opts.headers['Authorization'] = `${key}:${secret}` 21 | return opts 22 | } 23 | } 24 | }) 25 | 26 | function buildTest(apiObj, endpoint, args, expectedMethod, expectedRoute, expectedParams, cb) { 27 | if (typeof expectedParams === 'function') { 28 | cb = expectedParams 29 | expectedParams = {} 30 | } 31 | function mockMakeRequest(method, route, maybeParams, cb) { 32 | if (typeof maybeParams === 'function') { 33 | cb = maybeParams 34 | maybeParams = {} 35 | } 36 | assert.equal(method, expectedMethod) 37 | assert.equal(route, expectedRoute) 38 | assert.deepEqual(maybeParams, expectedParams) 39 | cb(null, {mocked: true}) 40 | } 41 | args.push(cb) 42 | apiObj.makeRequest = mockMakeRequest.bind(apiObj) 43 | apiObj[endpoint].apply(apiObj, args) 44 | } 45 | 46 | describe('ApiTest', () => { 47 | describe('buildProxyUrl', () => { 48 | it('should return null if not httpsProxy', () => { 49 | assert(Api.prototype.buildProxyUrl({httpsProxy: false}) == null) 50 | }) 51 | it('should return simple url if a proxy', () => { 52 | assert.equal(Api.prototype.buildProxyUrl({httpsProxy: 'someplace.com'}), 'https://someplace.com') 53 | }) 54 | it('should return with username and pass if defined', () => { 55 | const config = {httpsProxy: 'someplace.com', proxyUsername: 'bob', proxyPassword: 'pass'} 56 | assert.equal(Api.prototype.buildProxyUrl(config), 'https://bob:pass@someplace.com') 57 | }) 58 | }) 59 | describe('buildUrl', () => { 60 | it('should join the api url and query', () => { 61 | const api = new Api({apiUrl: 'http://myapi.com/api'}) 62 | assert.equal(api.buildUrl('my/fancy/route', {foobar: true}), 'http://myapi.com/api/my/fancy/route?foobar=true') 63 | }) 64 | it('should handle an extra slash', () => { 65 | const api = new Api({apiUrl: 'http://myapi.com/api/'}) 66 | assert.equal(api.buildUrl('my/fancy/route', {foobar: true}), 'http://myapi.com/api/my/fancy/route?foobar=true') 67 | }) 68 | it('should handle preceeding slash', () => { 69 | const api = new Api({apiUrl: 'http://myapi.com/api/'}) 70 | assert.equal(api.buildUrl('/my/fancy/route', {foobar: true}), 'http://myapi.com/api/my/fancy/route?foobar=true') 71 | }) 72 | }) 73 | describe('makeRequest', () => { 74 | it('should return the body of a request with query', (done) => { 75 | const api = new Api({apiUrl: 'http://myapi.com/api', key: 'foo', secret: 'bar'}) 76 | api.makeRequest('get', 'some/route', {foobar: true}, (err, body) => { 77 | assert.ifError(err) 78 | assert.deepEqual(body, {mocked: true}) 79 | done() 80 | 81 | }) 82 | }) 83 | it('should return the body of a request without a query', (done) => { 84 | const api = new Api({apiUrl: 'http://myapi.com/api', key: 'foo', secret: 'bar'}) 85 | api.makeRequest('get', 'some/route', (err, body) => { 86 | assert.ifError(err) 87 | assert.deepEqual(body, {mocked: true}) 88 | done() 89 | 90 | }) 91 | }) 92 | it('should throw errors for non 200 status codes', (done) => { 93 | 94 | nextReqHandler = function(opts, cb) { 95 | cb(null, {statusCode: 500}, {error: true, message: 'you screwed it up'}) 96 | } 97 | const api = new Api({apiUrl: 'http://myapi.com/api', key: 'foo', secret: 'bar'}) 98 | api.makeRequest('get', 'some/route', (err, body) => { 99 | assert(err instanceof Error) 100 | done() 101 | }) 102 | }) 103 | }) 104 | describe('getDumps', () => { 105 | it('should work without params to get index of dumps', (done) => { 106 | const api = new Api({apiUrl: 'http://myapi.com/api', key: 'foo', secret: 'bar'}) 107 | buildTest(api, 'getDumps', [], 'GET', 'account/self/dump', done) 108 | }) 109 | it('should work with params and still index of dumps', (done) => { 110 | const api = new Api({apiUrl: 'http://myapi.com/api', key: 'foo', secret: 'bar'}) 111 | buildTest(api, 'getDumps', [{foobar: true}], 'GET', 'account/self/dump', {foobar: true}, done) 112 | }) 113 | }) 114 | describe('getLatestFiles', () => { 115 | it('should make get latest call', (done) => { 116 | const api = new Api({apiUrl: 'http://myapi.com/api', key: 'foo', secret: 'bar'}) 117 | buildTest(api, 'getLatestFiles', [], 'GET', 'account/self/file/latest', done) 118 | }) 119 | }) 120 | describe('getFilesForDump', () => { 121 | it('make a call to get files for a given dump', (done) => { 122 | const api = new Api({apiUrl: 'http://myapi.com/api', key: 'foo', secret: 'bar'}) 123 | buildTest(api, 'getFilesForDump', [1234], 'GET', 'account/self/file/byDump/1234', done) 124 | }) 125 | }) 126 | describe('getSync', () => { 127 | it('should make a sync call', (done) => { 128 | const api = new Api({apiUrl: 'http://myapi.com/api', key: 'foo', secret: 'bar'}) 129 | buildTest(api, 'getSync', [], 'GET', 'account/self/file/sync', done) 130 | }) 131 | }) 132 | describe('getFilesForTable', () => { 133 | it('should work without params and get for a tablename', (done) => { 134 | const api = new Api({apiUrl: 'http://myapi.com/api', key: 'foo', secret: 'bar'}) 135 | buildTest(api, 'getFilesForTable', ['someTable'], 'GET', 'account/self/file/byTable/someTable', done) 136 | }) 137 | it('should work with params and get for a tablename', (done) => { 138 | const api = new Api({apiUrl: 'http://myapi.com/api', key: 'foo', secret: 'bar'}) 139 | buildTest( 140 | api, 'getFilesForTable', ['someTable', {foobar: true}], 141 | 'GET', 'account/self/file/byTable/someTable', {foobar: true}, done 142 | ) 143 | }) 144 | }) 145 | describe('getSchemas', () => { 146 | it('should make a call for all schema', (done) => { 147 | const api = new Api({apiUrl: 'http://myapi.com/api', key: 'foo', secret: 'bar'}) 148 | buildTest(api, 'getSchemas', [], 'GET', 'schema', done) 149 | }) 150 | }) 151 | describe('getLatestSchema', () => { 152 | it('get the latest schema call', (done) => { 153 | const api = new Api({apiUrl: 'http://myapi.com/api', key: 'foo', secret: 'bar'}) 154 | buildTest(api, 'getLatestSchema', [], 'GET', 'schema/latest', done) 155 | }) 156 | it('should also would for the type getLastestSchema', (done) => { 157 | const api = new Api({apiUrl: 'http://myapi.com/api', key: 'foo', secret: 'bar'}) 158 | buildTest(api, 'getLastestSchema', [], 'GET', 'schema/latest', done) 159 | }) 160 | }) 161 | describe('getSchemaVersion', () => { 162 | it('should work without to get a specific schema version', (done) => { 163 | const api = new Api({apiUrl: 'http://myapi.com/api', key: 'foo', secret: 'bar'}) 164 | buildTest(api, 'getSchemaVersion', ['v1.0.0'], 'GET', 'schema/v1.0.0', done) 165 | }) 166 | }) 167 | }) 168 | -------------------------------------------------------------------------------- /test/ConfigTaskTest.js: -------------------------------------------------------------------------------- 1 | const assert = require('assert') 2 | const fs = require('fs') 3 | 4 | const ConfigTask = require('../src/ConfigTask') 5 | const logger = require('./fixtures/mockLogger') 6 | 7 | describe('ConfigTask', () => { 8 | describe('validate', () => { 9 | it('should return nothing if we got all the required fields', () => { 10 | const out = ConfigTask.validate({saveLocation: '/tmp', apiUrl: 'http://api.com', key: 'bob', secret: 'secret'}) 11 | assert(out == null) 12 | }) 13 | it('return a string of failed fields', () => { 14 | const out = ConfigTask.validate({saveLocation: '/tmp', apiUrl: 'http://api.com'}) 15 | assert.equal(out, 'missing key, secret fields in config') 16 | }) 17 | }) 18 | describe('constructor', () => { 19 | it('should not blow up', () => { 20 | new ConfigTask({}, {}, logger) 21 | }) 22 | }) 23 | describe('run', () => { 24 | it('should write a sample file to current dir', (done) => { 25 | new ConfigTask({}, {}, logger).run((err) => { 26 | assert.ifError(err) 27 | fs.access('./config.js.sample', fs.F_OK, (err) => { 28 | assert.ifError(err) 29 | fs.unlink('./config.js.sample', done) 30 | }) 31 | }) 32 | }) 33 | }) 34 | }) 35 | -------------------------------------------------------------------------------- /test/FetchTest.js: -------------------------------------------------------------------------------- 1 | require('mocha-sinon') 2 | 3 | const os = require('os') 4 | const path = require('path') 5 | const crypto = require('crypto') 6 | 7 | const chai = require('chai') 8 | chai.use(require('chai-fs')) 9 | const assert = chai.assert 10 | 11 | const _ = require('lodash') 12 | const rimraf = require('rimraf') 13 | const touch = require('touch') 14 | const logger = require('./fixtures/mockLogger') 15 | const mockApi = require('./fixtures/mockApiObjects') 16 | 17 | const Fetch = require('../src/Fetch') 18 | function buildTestFetch(tableName) { 19 | const tmpDir = path.join(os.tmpdir(), crypto.randomBytes(12).toString('hex')) 20 | const config = { 21 | tmpDir: tmpDir, 22 | saveLocation: path.join(tmpDir, 'dataFiles'), 23 | unpackLocation: path.join(tmpDir, 'unpackedFiles'), 24 | apiUrl: 'https://mockApi/api', 25 | key: 'fakeKey', 26 | secret: 'fakeSecret' 27 | } 28 | return {fetch: new Fetch({table: tableName || 'account_dim'}, config, logger), config} 29 | } 30 | 31 | function cleanupFetch(sync, config, cb) { 32 | rimraf(config.tmpDir, cb) 33 | } 34 | 35 | describe('FetchTest', function() { 36 | describe('getNewest', () => { 37 | it('should return only the most recent dump if non partial table', () => { 38 | var {fetch, config} = buildTestFetch('user_dim') 39 | var ret = fetch.getNewest({ 40 | table: 'user_dim', 41 | history: [ 42 | { 43 | dumpId: '1234', 44 | sequence: 3, 45 | partial: false, 46 | files: [ 47 | {url: 'http://url_to_download/file1.tar.gz', filename: 'file1.tar.gz'}, 48 | {url: 'http://url_to_download/file2.tar.gz', filename: 'file2.tar.gz'} 49 | ] 50 | }, 51 | { 52 | dumpId: '1234', 53 | sequence: 2, 54 | partial: true, 55 | files: [ 56 | {url: 'http://url_to_download/file1.tar.gz', filename: 'file1.tar.gz'}, 57 | {url: 'http://url_to_download/file2.tar.gz', filename: 'file2.tar.gz'} 58 | ] 59 | } 60 | ] 61 | }) 62 | assert.equal(ret.length, 2) 63 | assert.equal(ret[0].sequence, 3) 64 | assert.equal(ret[0].filename, 'file1.tar.gz') 65 | assert.equal(ret[1].sequence, 3) 66 | assert.equal(ret[1].filename, 'file2.tar.gz') 67 | }) 68 | it('should return multiple dumps for partial tables', () => { 69 | var {fetch, config} = buildTestFetch('user_dim') 70 | var ret = fetch.getNewest({ 71 | table: 'user_dim', 72 | history: [ 73 | { 74 | dumpId: '1234', 75 | sequence: 3, 76 | partial: true, 77 | files: [ 78 | {url: 'http://url_to_download/file1.tar.gz', filename: 'file1.tar.gz'}, 79 | {url: 'http://url_to_download/file2.tar.gz', filename: 'file2.tar.gz'} 80 | ] 81 | }, 82 | { 83 | dumpId: '1232', 84 | sequence: 2, 85 | partial: false, 86 | files: [ 87 | {url: 'http://url_to_download/file1.tar.gz', filename: 'filef0.tar.gz'}, 88 | {url: 'http://url_to_download/file2.tar.gz', filename: 'filef1.tar.gz'} 89 | ] 90 | }, 91 | { 92 | dumpId: '1231', 93 | sequence: 1, 94 | partial: true, 95 | files: [ 96 | {url: 'http://url_to_download/file1.tar.gz', filename: 'file1.tar.gz'}, 97 | {url: 'http://url_to_download/file2.tar.gz', filename: 'file2.tar.gz'} 98 | ] 99 | } 100 | ] 101 | }) 102 | assert.equal(ret.length, 4) 103 | assert.equal(ret[0].sequence, 3) 104 | assert.equal(ret[0].filename, 'file1.tar.gz') 105 | assert.equal(ret[1].sequence, 3) 106 | assert.equal(ret[1].filename, 'file2.tar.gz') 107 | assert.equal(ret[2].sequence, 2) 108 | assert.equal(ret[2].filename, 'filef0.tar.gz') 109 | assert.equal(ret[3].sequence, 2) 110 | assert.equal(ret[3].filename, 'filef1.tar.gz') 111 | }) 112 | it('should merge duplicate non partial sequences', () => { 113 | var {fetch, config} = buildTestFetch('requests') 114 | var ret = fetch.getNewest({ 115 | table: 'requests', 116 | history: [ 117 | { 118 | dumpId: '1234', 119 | sequence: 1, 120 | partial: false, 121 | files: [ 122 | {url: 'http://url_to_download/file1.tar.gz', filename: 'file1.tar.gz'}, 123 | {url: 'http://url_to_download/file2.tar.gz', filename: 'file2.tar.gz'} 124 | ] 125 | }, 126 | { 127 | dumpId: '1234', 128 | sequence: 1, 129 | partial: false, 130 | files: [ 131 | {url: 'http://url_to_download/file3.tar.gz', filename: 'file3.tar.gz'}, 132 | {url: 'http://url_to_download/file4.tar.gz', filename: 'file4.tar.gz'} 133 | ] 134 | } 135 | ] 136 | }) 137 | assert.equal(ret.length, 4) 138 | assert.equal(ret[0].sequence, 1) 139 | assert.equal(ret[0].filename, 'file1.tar.gz') 140 | assert.equal(ret[1].sequence, 1) 141 | assert.equal(ret[1].filename, 'file2.tar.gz') 142 | assert.equal(ret[2].sequence, 1) 143 | assert.equal(ret[2].filename, 'file3.tar.gz') 144 | assert.equal(ret[3].sequence, 1) 145 | assert.equal(ret[3].filename, 'file4.tar.gz') 146 | }) 147 | }) 148 | describe('run', () => { 149 | it('it should write files for a non partial table', function(done) { 150 | const tableName = 'user_dim' 151 | var {fetch, config} = buildTestFetch(tableName) 152 | var apiStub = this.sinon.stub(fetch.api, 'getFilesForTable', (opts, cb) => { 153 | cb(null, mockApi.buildDumpHistory({table: tableName})) 154 | }) 155 | var downloadStub = this.sinon.stub(fetch.fileDownloader, 'downloadToFile', (filename, opts, savePath, cb) => { 156 | touch(savePath, cb) 157 | }) 158 | fetch.run((err, res) => { 159 | assert.ifError(err) 160 | assert.equal(res.length, 2) 161 | assert(downloadStub.callCount, 2) 162 | assert.isFile(path.join(config.saveLocation, tableName, '0-filename-1.tar.gz')) 163 | cleanupFetch(fetch, config, done) 164 | }) 165 | }) 166 | it('it should write files for a partial table', function(done) { 167 | const tableName = 'partial_dim' 168 | var {fetch, config} = buildTestFetch(tableName) 169 | var apiStub = this.sinon.stub(fetch.api, 'getFilesForTable', (opts, cb) => { 170 | var entryOpts = [ 171 | {sequence: 3, partial: true}, 172 | {sequence: 2, partial: false}, 173 | {sequence: 1, partial: true} 174 | ] 175 | cb(null, mockApi.buildDumpHistory({table: tableName, numEntries: 3, entryOpts})) 176 | }) 177 | var downloadStub = this.sinon.stub(fetch.fileDownloader, 'downloadToFile', (filename, opts, savePath, cb) => { 178 | touch(savePath, cb) 179 | }) 180 | fetch.run((err, res) => { 181 | assert.ifError(err) 182 | assert.equal(res.length, 4) 183 | assert(downloadStub.callCount, 4) 184 | assert.isFile(path.join(config.saveLocation, tableName, '3-filename-1.tar.gz')) 185 | assert.isFile(path.join(config.saveLocation, tableName, '2-filename-1.tar.gz')) 186 | assert.notPathExists(path.join(config.saveLocation, tableName, '1-filename-1.tar.gz')) 187 | cleanupFetch(fetch, config, done) 188 | }) 189 | }) 190 | }) 191 | }) 192 | -------------------------------------------------------------------------------- /test/FileDownloadTest.js: -------------------------------------------------------------------------------- 1 | const assert = require('assert') 2 | const os = require('os') 3 | const fs = require('fs') 4 | const path = require('path') 5 | const rimraf = require('rimraf') 6 | const mkdirp = require('mkdirp') 7 | const proxyquire = require('proxyquire') 8 | 9 | const Re = require('re') 10 | 11 | let nextReqHandler = null 12 | const defaultFile = path.join(__dirname, 'fixtures', 'mockDump', 'one', 'file.gz') 13 | function buildMockResp(readStream, statusCode) { 14 | process.nextTick(() => { 15 | readStream.emit('response', {statusCode}) 16 | }) 17 | return readStream 18 | } 19 | function mockRequest(opts) { 20 | if (nextReqHandler) return nextReqHandler(opts) 21 | return buildMockResp(fs.createReadStream(defaultFile), 200) 22 | } 23 | 24 | const logger = require('./fixtures/mockLogger') 25 | const FileDownloader = proxyquire('../src/FileDownloader', {request: mockRequest}) 26 | 27 | function fileExists(path, cb) { 28 | fs.stat(path, (err, stats) => { 29 | if (err && err.code === 'ENOENT') return cb(null, false) 30 | if (err) return cb(err) 31 | if (stats && stats.size === 0) return cb(null, false) 32 | cb(null, true) 33 | }) 34 | } 35 | 36 | 37 | describe('FileDownloader', () => { 38 | const baseDir = path.join(os.tmpdir(), 'fileDownloadTest') 39 | before((done) => { 40 | mkdirp(baseDir, done) 41 | }) 42 | after((done) => { 43 | rimraf(baseDir, done) 44 | }) 45 | describe('downloadToFile', () => { 46 | const fd = new FileDownloader(logger, { 47 | retries: 3, 48 | strategy: { 49 | "type": Re.STRATEGIES.CONSTANT, 50 | "initial": 20 51 | } 52 | }) 53 | const dlLink = { 54 | filename: 'mock', 55 | url: 'mock' 56 | } 57 | const artifact = {tableName: 'fake'} 58 | it('should work to download a file', (done) => { 59 | const dest = path.join(baseDir, 'success') 60 | fd.downloadToFile(dlLink, artifact, dest, (err) => { 61 | assert.ifError(err) 62 | fileExists(dest, (err, exists) => { 63 | assert.ifError(err) 64 | assert(exists) 65 | done() 66 | }) 67 | }) 68 | }) 69 | it('should retry on a failure and work', (done) => { 70 | const dest = path.join(baseDir, 'one_fail') 71 | let handlerCalled = false 72 | nextReqHandler = function(opts) { 73 | handlerCalled = true 74 | nextReqHandler = function() { 75 | return buildMockResp(fs.createReadStream(defaultFile), 200) 76 | } 77 | return buildMockResp(fs.createReadStream(defaultFile), 404) 78 | } 79 | fd.downloadToFile(dlLink, artifact, dest, (err) => { 80 | assert(handlerCalled) 81 | assert.ifError(err) 82 | fileExists(dest, (err, exists) => { 83 | assert.ifError(err) 84 | assert(exists) 85 | done() 86 | }) 87 | }) 88 | }) 89 | it('should error out after a few retries', (done) => { 90 | nextReqHandler = function(opts) { 91 | return buildMockResp(fs.createReadStream(defaultFile), 404) 92 | } 93 | const dest = path.join(baseDir, 'fail') 94 | fd.downloadToFile(dlLink, artifact, dest, (err) => { 95 | assert(err instanceof Error) 96 | done() 97 | }) 98 | }) 99 | }) 100 | }) 101 | -------------------------------------------------------------------------------- /test/GetApiTest.js: -------------------------------------------------------------------------------- 1 | const assert = require('assert') 2 | 3 | const GetApi = require('../src/GetApi') 4 | const logger = require('./fixtures/mockLogger') 5 | 6 | describe('GetApi', () => { 7 | describe('run', () => { 8 | it('should get successfully', (done) => { 9 | const expected = {success: true} 10 | const get = new GetApi({}, {}, logger) 11 | get.api.makeRequest = (method, path, params, cb) => cb(null, expected) 12 | 13 | get.run((err, response) => { 14 | assert.ifError(err) 15 | assert.deepEqual(response, expected) 16 | done() 17 | }) 18 | }) 19 | 20 | it('should propagate errors', (done) => { 21 | const expected = {success: false} 22 | const get = new GetApi({}, {}, logger) 23 | get.api.makeRequest = (method, path, params, cb) => cb(expected, null) 24 | 25 | get.run((err) => { 26 | assert.deepEqual(err, expected) 27 | done() 28 | }) 29 | }) 30 | }) 31 | }) 32 | -------------------------------------------------------------------------------- /test/GrabTest.js: -------------------------------------------------------------------------------- 1 | const assert = require('assert') 2 | const fs = require('fs') 3 | const path = require('path') 4 | const os = require('os') 5 | const rimraf = require('rimraf') 6 | 7 | const Grab = require('../src/Grab') 8 | const logger = require('./fixtures/mockLogger') 9 | 10 | function getFilesMock(dumpId, cb) { 11 | cb(null, { 12 | sequence: 2, 13 | artifactsByTable: { 14 | table1: { 15 | files: [ 16 | {filename: 'table1-0.gz', url:'http://myapi.com/dl/table1-0.gz'}, 17 | {filename: 'table1-1.gz', url:'http://myapi.com/dl/table1-1.gz'} 18 | ] 19 | }, 20 | table2: { 21 | files: [ 22 | {filename: 'table2-0.gz', url:'http://myapi.com/dl/table2-0.gz'} 23 | ] 24 | } 25 | } 26 | }) 27 | } 28 | 29 | describe('Grab', () => { 30 | describe('run', () => { 31 | it('should be able to download a dump to given folder', (done) => { 32 | const tmpDir = path.join(os.tmpdir(), 'grab_test') 33 | const grab = new Grab({dump: 'abcd'}, {saveLocation: 'http://myApi', saveLocation: tmpDir}, logger) 34 | grab.api.getFilesForDump = getFilesMock 35 | // just collect the files we were called with, make sure it looks sane instead of validating from FS 36 | const gotFiles = [] 37 | grab.fileDownloader.downloadToFile = function(fileInfo, artifactInfo, dest, cb) { 38 | gotFiles.push(dest.replace(tmpDir, '')) 39 | cb() 40 | } 41 | const expected = ['/abcd/2-table1-0.gz', '/abcd/2-table1-1.gz', '/abcd/2-table2-0.gz'] 42 | // make sure folder is there though 43 | grab.run((err) => { 44 | assert.ifError(err) 45 | assert.deepEqual(gotFiles, expected) 46 | fs.access(tmpDir, fs.F_OK, (err) => { 47 | assert.ifError(err) 48 | rimraf(tmpDir, done) 49 | }) 50 | }) 51 | }) 52 | }) 53 | }) 54 | -------------------------------------------------------------------------------- /test/HistoricalRequestsTest.js: -------------------------------------------------------------------------------- 1 | const assert = require('assert') 2 | 3 | const HistoricalRequests = require('../src/HistoricalRequests') 4 | const logger = require('./fixtures/mockLogger') 5 | 6 | const assertHistoricalRequests = (input, expected, done) => { 7 | const hr = new HistoricalRequests({}, {}, logger) 8 | hr.api.getSync = (cb) => cb(null, input) 9 | 10 | hr.run((err, response) => { 11 | assert.ifError(err) 12 | assert.deepEqual(response, expected) 13 | done() 14 | }) 15 | } 16 | 17 | describe('HistoricalRequests', () => { 18 | describe('run', () => { 19 | it('ignores daily dumps', (done) => { 20 | const input = { 21 | schemaVersion: 'test', 22 | files: [ 23 | { 24 | url: 'https://s3.amazonaws.com/timestamp/dw_split/account/requests/b%3D1/part-1', 25 | filename: 'requests-1.gz', 26 | table: 'requests', 27 | partial: true 28 | } 29 | ] 30 | } 31 | const expected = {} 32 | 33 | assertHistoricalRequests(input, expected, done) 34 | }) 35 | 36 | it('ignores other tables', (done) => { 37 | const input = { 38 | schemaVersion: 'test', 39 | files: [ 40 | { 41 | url: 'https://s3.amazonaws.com/timestamp/dw_split/account/users/b%3D1/part-1', 42 | filename: 'users-1.gz', 43 | table: 'users', 44 | partial: false 45 | } 46 | ] 47 | } 48 | const expected = {} 49 | 50 | assertHistoricalRequests(input, expected, done) 51 | }) 52 | 53 | it('groups ranges', (done) => { 54 | const input = { 55 | schemaVersion: 'test', 56 | files: [ 57 | { 58 | url: 'https://s3.amazonaws.com/timestamp/requests_split_historical/account/requests/range-1/0/part-1', 59 | filename: 'requests-1.gz', 60 | table: 'requests', 61 | partial: false 62 | }, 63 | { 64 | url: 'https://s3.amazonaws.com/timestamp/requests_split_historical/account/requests/range-1/0/part-2', 65 | filename: 'requests-2.gz', 66 | table: 'requests', 67 | partial: false 68 | }, 69 | { 70 | url: 'https://s3.amazonaws.com/timestamp/requests_split_historical/account/requests/range-2/0/part-1', 71 | filename: 'requests-1.gz', 72 | table: 'requests', 73 | partial: false 74 | } 75 | ] 76 | } 77 | const expected = { 78 | 'range-1': [ 79 | { 80 | url: 'https://s3.amazonaws.com/timestamp/requests_split_historical/account/requests/range-1/0/part-1', 81 | filename: 'requests-1.gz', 82 | table: 'requests', 83 | partial: false 84 | }, 85 | { 86 | url: 'https://s3.amazonaws.com/timestamp/requests_split_historical/account/requests/range-1/0/part-2', 87 | filename: 'requests-2.gz', 88 | table: 'requests', 89 | partial: false 90 | } 91 | ], 92 | 'range-2': [ 93 | { 94 | url: 'https://s3.amazonaws.com/timestamp/requests_split_historical/account/requests/range-2/0/part-1', 95 | filename: 'requests-1.gz', 96 | table: 'requests', 97 | partial: false 98 | } 99 | ] 100 | } 101 | assertHistoricalRequests(input, expected, done) 102 | }) 103 | 104 | it('should propagate errors', (done) => { 105 | const expected = {success: false} 106 | const hr = new HistoricalRequests({}, {}, logger) 107 | hr.api.getSync = (cb) => cb(expected, null) 108 | 109 | hr.run((err) => { 110 | assert.deepEqual(err, expected) 111 | done() 112 | }) 113 | }) 114 | }) 115 | }) 116 | -------------------------------------------------------------------------------- /test/ListTest.js: -------------------------------------------------------------------------------- 1 | const assert = require('assert') 2 | 3 | const List = require('../src/List') 4 | const logger = require('./fixtures/mockLogger') 5 | 6 | function listMock(cb) { 7 | cb(null, [ 8 | {dumpId: 'a', sequence: 1, accountId: 'fake', numFiles: 2, finished: true, expires: 'd', createdAt: 'd'}, 9 | {dumpId: 'b', sequence: 1, accountId: 'fake', numFiles: 2, finished: true, expires: 'd', createdAt: 'd'}, 10 | {dumpId: 'c', sequence: 1, accountId: 'fake', numFiles: 2, finished: true, expires: 'd', createdAt: 'd'}, 11 | {dumpId: 'd', sequence: 1, accountId: 'fake', numFiles: 2, finished: true, expires: 'd', createdAt: 'd'} 12 | ]) 13 | } 14 | 15 | describe('List', () => { 16 | describe('run', () => { 17 | it('should list dumps', (done) => { 18 | let callCount = 0 19 | logger.info = function() { 20 | callCount++ 21 | } 22 | const list = new List({}, {}, logger) 23 | list.api.getDumps = listMock 24 | list.run((err) => { 25 | assert.equal(callCount, 4) 26 | done(err) 27 | }) 28 | }) 29 | it('should return an error if that fails', (done) => { 30 | const list = new List({}, {}, logger) 31 | list.api.getDumps = function(cb) { 32 | cb(new Error('bah')) 33 | } 34 | list.run((err) => { 35 | assert(err instanceof Error) 36 | done() 37 | }) 38 | }) 39 | }) 40 | }) 41 | -------------------------------------------------------------------------------- /test/SyncTest.js: -------------------------------------------------------------------------------- 1 | require('mocha-sinon') 2 | 3 | const os = require('os') 4 | const path = require('path') 5 | const crypto = require('crypto') 6 | const fs = require('fs') 7 | 8 | const assert = require('chai').assert 9 | const rimraf = require('rimraf') 10 | const logger = require('./fixtures/mockLogger') 11 | const mockApi = require('./fixtures/mockApiObjects') 12 | const async = require('async') 13 | const mkdirp = require('mkdirp') 14 | 15 | 16 | const Sync = require('../src/Sync') 17 | function buildTestSync() { 18 | const tmpDir = path.join(os.tmpdir(), crypto.randomBytes(12).toString('hex')) 19 | const config = { 20 | tmpDir: tmpDir, 21 | saveLocation: path.join(tmpDir, 'dataFiles'), 22 | unpackLocation: path.join(tmpDir, 'unpackedFiles'), 23 | apiUrl: 'https://mockApi/api', 24 | key: 'fakeKey', 25 | secret: 'fakeSecret' 26 | } 27 | var sync = new Sync({}, config, logger) 28 | sync.testConfig = config 29 | return {sync: sync} 30 | } 31 | 32 | function cleanupSync(sync, cb) { 33 | rimraf(sync.testConfig.tmpDir, cb) 34 | } 35 | 36 | function touchFile(filename, cb) { 37 | if (typeof filename === 'object') { 38 | filename = filename.savedTo 39 | } 40 | mkdirp(path.dirname(filename), (err) => { 41 | if (err) return cb(err) 42 | fs.open(filename, 'w', (err, fd) => { 43 | if (err) return cb(err) 44 | fs.close(fd, cb) 45 | }) 46 | }) 47 | } 48 | function fileExists(filename, cb) { 49 | if (typeof filename === 'object') { 50 | filename = filename.savedTo 51 | } 52 | fs.stat(filename, (err, fileStat) => { 53 | if (err && err.code !== 'ENOENT') return cb(err) 54 | if (err && err.code === 'ENOENT') return cb(null, false) 55 | cb(null, true) 56 | }) 57 | } 58 | 59 | describe('SyncTest', function() { 60 | describe('run', () => { 61 | it('should successfully call back when done', (done) => { 62 | const config = {saveLocation: '/tmp'} 63 | const sync = new Sync({}, config, logger) 64 | sync.getSync = function(cb) { 65 | cb(null, {files: [1, 2, 3]}) 66 | } 67 | sync.downloadSchema = function(verson, cb) { 68 | cb() 69 | } 70 | sync.processFile = function(fileInfo, cb) { 71 | if (fileInfo === 2) { 72 | return cb(null, {filename: fileInfo}) 73 | } 74 | cb(null, {didDownload: true, filename: fileInfo}) 75 | } 76 | sync.cleanupFiles = function(results, cb) { 77 | cb() 78 | } 79 | sync.run(done) 80 | }) 81 | it('should fail when some files error', (done) => { 82 | const config = {saveLocation: '/tmp'} 83 | const sync = new Sync({}, config, logger) 84 | sync.getSync = function(cb) { 85 | cb(null, {files: [1, 2, 3]}) 86 | } 87 | sync.downloadSchema = function(verson, cb) { 88 | cb() 89 | } 90 | sync.processFile = function(fileInfo, cb) { 91 | if (fileInfo === 2) { 92 | return cb(null, {error: true}) 93 | } 94 | cb(null, {didDownload: true, filename: fileInfo}) 95 | } 96 | sync.cleanupFiles = function(results, cb) { 97 | cb() 98 | } 99 | sync.run((err) => { 100 | assert(err instanceof Error) 101 | done() 102 | }) 103 | }) 104 | }) 105 | describe('getSync', () => { 106 | const config = {saveLocation: '/tmp'} 107 | const sync = new Sync({}, config, logger) 108 | it('should callback with a sync if api call worked', (done) => { 109 | sync.api.getSync = function(cb) { 110 | cb(null, {files: [1, 2, 3]}) 111 | } 112 | sync.getSync((err, data) => { 113 | assert.ifError(err) 114 | assert.deepEqual(data, {files: [1, 2, 3]}) 115 | done() 116 | }) 117 | }) 118 | it('should callback with an error if api sync failed', (done) => { 119 | sync.api.getSync = function(cb) { 120 | cb(new Error('bah')) 121 | } 122 | sync.getSync((err, data) => { 123 | assert(err instanceof Error) 124 | done() 125 | }) 126 | }) 127 | it('should silence 404s', (done) => { 128 | sync.api.getSync = function(cb) { 129 | const e = new Error('bah') 130 | e.errorCode = 404 131 | cb(e) 132 | } 133 | sync.getSync((err, data) => { 134 | assert(err instanceof Error) 135 | assert(err.silence) 136 | done() 137 | }) 138 | }) 139 | 140 | }) 141 | describe('dirHandling', () => { 142 | var {sync} = buildTestSync() 143 | after((done) => cleanupSync(sync, done)) 144 | it('should build dirs properly', () => { 145 | assert.include(sync.buildDir({table: 'foobar'}), '/foobar', 'properly joins from a file object to get a directory') 146 | assert.include(sync.buildRealPath({table: 'foobar', filename: 'foobar-1234'}), '/foobar/foobar-1234', 'properly joins from a file object to get a full path') 147 | assert.notEqual(sync.buildRealPath({table: 'foobar', filename: 'foobar-1234'}), '/foobar/foobar-1234', 'properly joins from a file object to get a full path') 148 | }) 149 | }) 150 | describe('fileExists', () => { 151 | var {sync} = buildTestSync() 152 | after((done) => cleanupSync(sync, done)) 153 | it('should return true when a file does exist', (done) => { 154 | var file = path.join(sync.saveLocation, 'shouldExist') 155 | touchFile(file, (err) => { 156 | assert.ifError(err) 157 | sync.fileExists(file, (err, exists) => { 158 | assert.ifError(err) 159 | assert(exists) 160 | done() 161 | }) 162 | }) 163 | }) 164 | it('should return false when a file does not exist', (done) => { 165 | sync.fileExists(path.join(sync.saveLocation, 'fakeFile'), (err, exists) => { 166 | assert.ifError(err) 167 | assert(!exists) 168 | done() 169 | }) 170 | }) 171 | }) 172 | describe('downloadSchema', () => { 173 | var {sync} = buildTestSync() 174 | after((done) => cleanupSync(sync, done)) 175 | it('should ensure the save location exists', function(done) { 176 | var schemaStub = this.sinon.stub(sync.api, 'getSchemaVersion') 177 | schemaStub.onFirstCall().callsArgWith(1, null, {schemaVersion: '1.0.0'}) 178 | fs.stat(sync.saveLocation, (err) => { 179 | assert.equal(err.code, 'ENOENT') 180 | sync.downloadSchema('1.0.0', (err) => { 181 | assert.ifError(err) 182 | fs.stat(path.join(sync.saveLocation, 'schema.json'), (err, schema) => { 183 | assert.ifError(err) 184 | assert(schema.isFile()) 185 | done() 186 | }) 187 | }) 188 | }) 189 | }) 190 | }) 191 | describe('processFile', () => { 192 | var {sync} = buildTestSync() 193 | after((done) => cleanupSync(sync, done)) 194 | it('does not download the file if it exists', function(done) { 195 | var existsStub = this.sinon.stub(sync, 'fileExists') 196 | existsStub.onFirstCall().callsArgWith(1, null, true) 197 | var downloadSpy = this.sinon.spy(sync, 'downloadFile') 198 | sync.processFile({filename: 'existsFile.gz', table: 'exists'}, (err, res) => { 199 | assert.ifError(err) 200 | assert(!res.didDownload) 201 | assert(res.filename, 'existsFile.gz') 202 | assert(res.table, 'exists') 203 | assert(res.savedTo, path.join(sync.saveLocation, 'exists', 'existsFile.gz')) 204 | assert(!downloadSpy.called) 205 | done() 206 | }) 207 | }) 208 | it('does call to download if the file does not exist', function(done) { 209 | var existsStub = this.sinon.stub(sync, 'fileExists') 210 | existsStub.onFirstCall().callsArgWith(1, null, false) 211 | var downloadStub = this.sinon.stub(sync, 'downloadFile') 212 | downloadStub.onFirstCall().callsArgWith(1, null, {didDownload: true}) 213 | 214 | sync.processFile({filename: 'notExistsFile.gz', table: 'notExists'}, (err, res) => { 215 | assert.ifError(err) 216 | assert(res.didDownload) 217 | assert(downloadStub.called) 218 | done() 219 | }) 220 | }) 221 | it('handles if fileExists throws some other error', function(done) { 222 | var existsStub = this.sinon.stub(sync, 'fileExists') 223 | existsStub.onFirstCall().callsArgWith(1, new Error('unexpected'), false) 224 | 225 | sync.processFile({filename: 'notExistsFile.gz', table: 'notExists'}, (err, res) => { 226 | assert(err) 227 | assert.instanceOf(err, Error) 228 | done() 229 | }) 230 | }) 231 | }) 232 | describe('downloadFile', () => { 233 | var {sync} = buildTestSync() 234 | after((done) => cleanupSync(sync, done)) 235 | it('creates the enclosing folder if it does not exist', function(done) { 236 | var fileObj = {filename: 'foobar.gz', table: 'bubbles'} 237 | var downloaderStub = this.sinon.stub(sync.fileDownloader, 'downloadToFile', (fileInfo, info, dest, cb) => fs.open(dest, 'w', cb)) 238 | fs.stat(sync.buildDir(fileObj), (err) => { 239 | assert.equal(err.code, 'ENOENT') 240 | sync.downloadFile(fileObj, (err, res) => { 241 | assert.ifError(err) 242 | assert(res.didDownload) 243 | assert.equal(res.table, fileObj.table) 244 | assert.equal(res.filename, fileObj.filename) 245 | assert.include(res.savedTo, fileObj.filename) 246 | fs.stat(res.savedTo, (err, fileStat) => { 247 | assert.ifError(err) 248 | assert(fileStat.isFile()) 249 | fs.stat(sync.buildTempPath(fileObj), (err) => { 250 | assert.equal(err.code, 'ENOENT', 'does not leave a straggling temporary file') 251 | done() 252 | }) 253 | }) 254 | }) 255 | }) 256 | }) 257 | 258 | it('does not return an error if the download fails', function(done) { 259 | var fileObj = {filename: 'error.gz', table: 'error'} 260 | var downloaderStub = this.sinon.stub(sync.fileDownloader, 'downloadToFile') 261 | downloaderStub.onFirstCall().callsArgWith(3, new Error('unexpected')) 262 | sync.downloadFile(fileObj, (err, res) => { 263 | assert.ifError(err) 264 | assert.instanceOf(res.error, Error) 265 | fs.stat(sync.buildRealPath(fileObj), (err) => { 266 | assert.equal(err.code, 'ENOENT', 'does not leave a real file which would break future syncs') 267 | done() 268 | }) 269 | }) 270 | }) 271 | }) 272 | describe('cleanupFiles', () => { 273 | var {sync} = buildTestSync() 274 | after((done) => cleanupSync(sync, done)) 275 | function attachSavedTo(obj) { 276 | obj.savedTo = sync.buildRealPath(obj) 277 | return obj 278 | } 279 | it('should not delete files that are part of the downloaded set', (done) => { 280 | var validFiles = [ 281 | {table: 'foo', filename: '1.gz',}, 282 | {table: 'foo', filename: '2.gz'}, 283 | {table: 'bar', filename: '1.gz'}, 284 | {table: 'with_', filename: 'hello'}, 285 | ].map(attachSavedTo) 286 | var toDelete = [ 287 | {table: 'deleteMe', filename: 'asdfasdfsad.gz'}, 288 | {table: '_reallyDeleteMe', filename: 'boop.gz'}, 289 | {table: 'silylongnamehopefullythisshouldworkprettymucheverywherebutifnotwewillatleasthaveatestforit', filename: 'file.gz'} 290 | ].map(attachSavedTo) 291 | var toCreate = validFiles.slice(0) 292 | toCreate.push(...toDelete) 293 | var schemaPath = path.join(sync.saveLocation, 'schema.json') 294 | toCreate.push(schemaPath) 295 | async.map(toCreate, touchFile, (err) => { 296 | assert.ifError(err) 297 | sync.cleanupFiles(validFiles, (err) => { 298 | assert.ifError(err) 299 | // add this back on for the utility function to check for existence 300 | validFiles.push(schemaPath) 301 | async.map(validFiles, fileExists, (err, res) => { 302 | assert.ifError(err) 303 | assert.equal(res.length, 5) 304 | for (var r of res) { 305 | assert(r) 306 | } 307 | async.map(toDelete, fileExists, (err, res) => { 308 | assert.ifError(err) 309 | assert.equal(res.length, 3) 310 | for (var r of res) { 311 | assert(!r) 312 | } 313 | done() 314 | }) 315 | }) 316 | }) 317 | }) 318 | }) 319 | }) 320 | }) 321 | -------------------------------------------------------------------------------- /test/UnpackTest.js: -------------------------------------------------------------------------------- 1 | const os = require('os') 2 | const fs = require('fs') 3 | const path = require('path') 4 | const gzip = require('zlib') 5 | const {Transform} = require('stream') 6 | 7 | const async = require('async') 8 | const chai = require('chai') 9 | const assert = chai.assert 10 | const rimraf = require('rimraf') 11 | const mkdirp = require('mkdirp') 12 | 13 | const logger = require('./fixtures/mockLogger') 14 | 15 | const Unpack = require('../src/Unpack') 16 | 17 | const tmpdir = path.join(os.tmpdir(), 'unpack') 18 | 19 | function buildFile(prefix, rowCount, colCount, lastNewline) { 20 | const f = [...Array(rowCount).keys()].map((i) => { 21 | return [...Array(colCount).keys()].map((j) => prefix + i.toString() + j.toString()).join('\t') 22 | }).join('\n') 23 | if (lastNewline) return f + '\n' 24 | return f 25 | } 26 | 27 | function buildTable(colCount) { 28 | const cols = [...Array(colCount).keys()].map((i) => { 29 | const charCode = 65 + (i % 26) 30 | return {name: String.fromCharCode(charCode)} 31 | }) 32 | return {columns: cols} 33 | } 34 | 35 | function getTableHeader(table) { 36 | return table.columns.map((e) => e.name).join('\t') 37 | } 38 | 39 | function buildFileName(num, max) { 40 | let numLen = max.toString().length 41 | let zeroes = [...Array(numLen).keys()].map(() => '0').join('') 42 | return (zeroes + num.toString()).slice(-numLen) 43 | } 44 | function buildTest(tmpDir, fileCount, rowsPerFile, colCount, lastNewline, cb) { 45 | if (typeof lastNewline === 'function') { 46 | cb = lastNewline 47 | lastNewline = true 48 | } 49 | const toCreate = [...Array(fileCount).keys()].map((el) => { 50 | return {i: el, n: path.join(tmpDir, `${buildFileName(el, fileCount)}.gz`)} 51 | }) 52 | mkdirp(tmpDir, (err) => { 53 | if (err) return cb(err) 54 | async.map(toCreate, (fn, cb) => { 55 | const contents = buildFile(fn.i, rowsPerFile, colCount, lastNewline) 56 | gzip.gzip(contents, (err, res) => { 57 | if (err) return cb(err) 58 | fs.writeFile(fn.n, res, (err) => { 59 | if (err) return cb(err) 60 | cb(null, contents) 61 | }) 62 | }) 63 | }, (err, res) => { 64 | if (err) return cb(err) 65 | const table = buildTable(colCount) 66 | const withNewline = res.map((r) => { 67 | if (r.charAt(r.length - 1) !== '\n') return r + '\n' 68 | return r 69 | }) 70 | const contents = getTableHeader(table) + '\n' + withNewline.join('') 71 | cb(null, {table, contents}) 72 | }) 73 | }) 74 | } 75 | 76 | function assertStream(s, expected, cb) { 77 | let c = '' 78 | s.on('data', (n) => { 79 | c += n 80 | //console.log('hello', n.toString('utf8')) 81 | }) 82 | s.on('end', () => { 83 | assert.equal(c, expected) 84 | cb() 85 | }) 86 | } 87 | 88 | function testStream() { 89 | return new Transform({ 90 | transform(chunk, enc, cb) { 91 | cb(null, chunk) 92 | } 93 | }) 94 | } 95 | 96 | let toDelete = null 97 | describe('Unpack', () => { 98 | afterEach((done) => { 99 | if (!toDelete) return done() 100 | rimraf(toDelete, done) 101 | }) 102 | describe('loadSchema', () => { 103 | let unpack = new Unpack({}, {saveLocation: 'fake', unpackLocation: 'fake2'}, logger) 104 | it('should work to load a schema', (done) => { 105 | unpack.schemaLocation = path.join(__dirname, './fixtures/mockSchema.json') 106 | unpack.loadSchema((err, schema) => { 107 | assert.ifError(err) 108 | assert(schema.fakeSchema) 109 | done() 110 | }) 111 | }) 112 | it('should throw an error if no schema', (done) => { 113 | unpack.schemaLocation = path.join(__dirname, './fixtures/missing') 114 | unpack.loadSchema((err, schema) => { 115 | assert(err instanceof Error) 116 | done() 117 | }) 118 | }) 119 | }) 120 | describe('addTitleAndUnzip', () => { 121 | const schema = { 122 | schema: { 123 | one: { 124 | tableName: 'one', 125 | columns: [ 126 | {name: "id"}, 127 | {name: "type"}, 128 | {name: "name"} 129 | ] 130 | }, 131 | two: { 132 | tableName: 'two', 133 | columns: [ 134 | {name: "letter"}, 135 | {name: "type"} 136 | ] 137 | }, 138 | three: { 139 | tableName: 'three', 140 | columns: [ 141 | {name: "sound"} 142 | ] 143 | } 144 | } 145 | } 146 | it('should exit early if nothing matches', (done) => { 147 | let unpack = new Unpack({filter: []}, {saveLocation: 'fake', unpackLocation: 'fake2'}, logger) 148 | unpack.addTitleAndUnzip(schema, 'fake', 'fake', (err, res) => { 149 | assert.ifError(err) 150 | assert(res == null) 151 | done() 152 | }) 153 | }) 154 | it('should write out files to where we expect', (done) => { 155 | const d = path.join(tmpdir, 'full') 156 | toDelete = d 157 | const mockDump = path.join(__dirname, './fixtures/mockDump') 158 | let unpack = new Unpack({filter: ['one', 'two']}, {saveLocation: mockDump, unpackLocation: d}, logger) 159 | mkdirp(d, (err) => { 160 | assert.ifError(err) 161 | unpack.addTitleAndUnzip(schema, mockDump, d, (err) => { 162 | assert.ifError(err) 163 | fs.readFile(path.join(d, 'one.txt'), 'utf8', (err, out) => { 164 | assert.ifError(err) 165 | assert.equal(out, 'id\ttype\tname\n1\tfoo\tbob\n2\tbar\tsteve\n') 166 | fs.readFile(path.join(d, 'two.txt'), 'utf8', (err, out) => { 167 | assert.ifError(err) 168 | assert.equal(out, 'letter\ttype\na\tcat\nb\tdog\n') 169 | fs.access(path.join(d, 'three.txt'), fs.F_OK, (err) => { 170 | assert(err instanceof Error) 171 | done() 172 | }) 173 | }) 174 | }) 175 | }) 176 | }) 177 | }) 178 | 179 | }) 180 | describe('processTable', function() { 181 | this.timeout(4000) 182 | let unpack = new Unpack({}, {saveLocation: 'fake', unpackLocation: 'fake2'}, logger) 183 | it('should handle a single file', (done) => { 184 | const d = path.join(tmpdir, 'single') 185 | toDelete = d 186 | buildTest(d, 1, 3, 3, (err, test) => { 187 | if (err) return done(err) 188 | let output = testStream() 189 | assertStream(output, test.contents, done) 190 | unpack.processTable(test.table, d, output, (err) => { 191 | if (err) return done(err) 192 | }) 193 | }) 194 | }) 195 | it('should handle multiple files', (done) => { 196 | const d = path.join(tmpdir, 'multiple') 197 | toDelete = d 198 | buildTest(d, 3, 3, 3, (err, test) => { 199 | if (err) return done(err) 200 | let output = testStream() 201 | assertStream(output, test.contents, done) 202 | unpack.processTable(test.table, d, output, (err) => { 203 | if (err) return done(err) 204 | }) 205 | }) 206 | }) 207 | it('should handle large files', (done) => { 208 | const d = path.join(tmpdir, 'large') 209 | toDelete = d 210 | buildTest(d, 3, 30000, 3, (err, test) => { 211 | if (err) return done(err) 212 | let output = testStream() 213 | assertStream(output, test.contents, done) 214 | unpack.processTable(test.table, d, output, (err) => { 215 | if (err) return done(err) 216 | }) 217 | }) 218 | }) 219 | it('should handle lots of files', (done) => { 220 | const d = path.join(tmpdir, 'many') 221 | toDelete = d 222 | buildTest(d, 100, 3, 3, (err, test) => { 223 | if (err) return done(err) 224 | let output = testStream() 225 | assertStream(output, test.contents, done) 226 | unpack.processTable(test.table, d, output, (err) => { 227 | if (err) return done(err) 228 | }) 229 | }) 230 | }) 231 | it('should handle a wide file', (done) => { 232 | const d = path.join(tmpdir, 'wide') 233 | toDelete = d 234 | buildTest(d, 3, 30, 300, (err, test) => { 235 | if (err) return done(err) 236 | let output = testStream() 237 | assertStream(output, test.contents, done) 238 | unpack.processTable(test.table, d, output, (err) => { 239 | if (err) return done(err) 240 | }) 241 | }) 242 | }) 243 | it('should handle if last line of file is not newline terminated', (done) => { 244 | const d = path.join(tmpdir, 'newline') 245 | toDelete = d 246 | buildTest(d, 10, 3, 3, false, (err, test) => { 247 | if (err) return done(err) 248 | let output = testStream() 249 | assertStream(output, test.contents, done) 250 | unpack.processTable(test.table, d, output, (err) => { 251 | if (err) return done(err) 252 | }) 253 | }) 254 | 255 | }) 256 | }) 257 | describe('run', () => { 258 | it('should ensure the unpack location is created', (done) => { 259 | const d = path.join(tmpdir, 'run') 260 | let unpack = new Unpack({}, {saveLocation: 'fake', unpackLocation: d}, logger) 261 | unpack.loadSchema = function(cb) { cb(null, {mocked: true}) } 262 | unpack.addTitleAndUnzip = function(s, sourceDir, outDir, cb) { cb() } 263 | unpack.run((err) => { 264 | assert.ifError(err) 265 | fs.access(d, fs.F_OK, (err) => { 266 | assert.ifError(err) 267 | done() 268 | }) 269 | }) 270 | }) 271 | }) 272 | }) 273 | -------------------------------------------------------------------------------- /test/cliTest.js: -------------------------------------------------------------------------------- 1 | const assert = require('assert') 2 | const path = require('path') 3 | const os = require('os') 4 | const fs = require('fs') 5 | 6 | const proxyquire = require('proxyquire') 7 | const logger = require('./fixtures/mockLogger') 8 | 9 | const toMock = [ 10 | 'Sync', 'ConfigTask', 'Unpack', 'Fetch', 'Grab', 'List' 11 | ] 12 | const mocks = toMock.reduce((all, next) => { 13 | function MockClass() { 14 | this.didRun = false 15 | all.instances[next.toLowerCase()] = this 16 | } 17 | MockClass.prototype.run = function(cb) { 18 | this.didRun = true 19 | cb() 20 | } 21 | MockClass.validate = function() {} 22 | all[`./${next}`] = MockClass 23 | return all 24 | }, {instances: {}}) 25 | mocks.logger = logger 26 | 27 | const cli = proxyquire('../src/cli', mocks) 28 | 29 | describe('cli', () => { 30 | const configFileName = path.join(os.tmpDir(), 'canDataCliConfig.js') 31 | before((done) => { 32 | fs.writeFile(configFileName, '{}', done) 33 | }) 34 | after((done) => { 35 | fs.unlink(configFileName, done) 36 | }) 37 | describe('parseArgs', () => { 38 | it('should expose the cli obj and parse args', () => { 39 | const args = cli.cli.parse('sampleConfig') 40 | assert.equal(args._[0], 'sampleConfig') 41 | }) 42 | }) 43 | describe('run', () => { 44 | it('should require config properly', () => { 45 | const argv = { 46 | _: ['sync'], 47 | level: 'info', 48 | config: configFileName 49 | } 50 | cli.run(argv) 51 | assert(mocks.instances.sync.didRun) 52 | }) 53 | }) 54 | }) 55 | -------------------------------------------------------------------------------- /test/fixtures/mockApiObjects.js: -------------------------------------------------------------------------------- 1 | const _ = require('lodash') 2 | const SIXTY_DAYS = (1000 * 60 * 60 * 24 * 60) 3 | var mockApiObjects = module.exports = { 4 | buildDump(opts) { 5 | return _.defaults(opts, { 6 | dumpId: '1234', 7 | sequence: 0, 8 | accountId: "customer_account_id", 9 | numFiles: 10, 10 | finished: true, 11 | expires: Date.now() - SIXTY_DAYS, 12 | updatedAt: "2015-10-24T00:00:00.000Z", 13 | createdAt: "2015-10-24T00:00:00.000Z", 14 | schemaVersion: "1.0.1" 15 | }) 16 | }, 17 | buildDumpFile(opts) { 18 | opts = opts || {} 19 | opts.tableOpts = opts.tableOpts || {} 20 | const tables = opts.tables || ['account_dim', 'course_dim', 'assignment_fact'] 21 | const artifacts = tables.map((tableName) => mockApiObjects.buildDumpArtifact(opts.tableOpts[tableName] || {tableName})) 22 | return _.defaults(opts, { 23 | dumpId: '1234', 24 | sequence: 0, 25 | accountId: "customer_account_id", 26 | numFiles: 10, 27 | finished: true, 28 | expires: Date.now() - SIXTY_DAYS, 29 | updatedAt: "2015-10-24T00:00:00.000Z", 30 | createdAt: "2015-10-24T00:00:00.000Z", 31 | schemaVersion: "1.0.1", 32 | artifactsByTable: _.indexBy(artifacts, 'tableName') 33 | }) 34 | 35 | }, 36 | buildDumpArtifact(opts) { 37 | return _.defaults(opts, { 38 | tableName: 'account_dim', 39 | partial: false, 40 | files: [ 41 | {url: 'http://url_to_download/file1.tar.gz', filename: 'file1.tar.gz'} 42 | ] 43 | }) 44 | }, 45 | buildDumpHistory(opts) { 46 | opts = opts || {} 47 | opts.entryOpts = opts.entryOpts || [] 48 | var numEntries = opts.numEntries || 2 49 | var entries = _.fill(Array(numEntries), 0).map((v, index) => { 50 | var entryOpts = _.defaults(opts.entryOpts[index] || {}, { 51 | sequence: index, 52 | dumpId: '1234' + index 53 | }) 54 | return mockApiObjects.buildDumpHistoryEntry(entryOpts) 55 | }) 56 | return _.defaults(opts, { 57 | table: 'account_dim', 58 | history: entries 59 | }) 60 | }, 61 | buildDumpHistoryEntry(opts) { 62 | opts = opts || {} 63 | var numFiles = opts.numFiles || 2 64 | var files = _.fill(Array(numFiles), 0).map((v, index) => mockApiObjects.buildDumpHistoryFile({filename: `filename-${index}.tar.gz`})) 65 | return _.defaults(opts, { 66 | dumpId: '1234', 67 | sequence: 0, 68 | files: files, 69 | partial: false 70 | }) 71 | }, 72 | buildDumpHistoryFile(opts) { 73 | return _.defaults(opts, { 74 | url: 'http://myapi.com/table/filename-1.tar.gz', filename: 'filename-1.tar.gz' 75 | }) 76 | } 77 | 78 | } 79 | -------------------------------------------------------------------------------- /test/fixtures/mockDump/one/file.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructure/canvas-data-cli/b9316559b734f39bc8d3fa4cabd40a4b5d6cb7ee/test/fixtures/mockDump/one/file.gz -------------------------------------------------------------------------------- /test/fixtures/mockDump/three/file.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructure/canvas-data-cli/b9316559b734f39bc8d3fa4cabd40a4b5d6cb7ee/test/fixtures/mockDump/three/file.gz -------------------------------------------------------------------------------- /test/fixtures/mockDump/two/file.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/instructure/canvas-data-cli/b9316559b734f39bc8d3fa4cabd40a4b5d6cb7ee/test/fixtures/mockDump/two/file.gz -------------------------------------------------------------------------------- /test/fixtures/mockLogger.js: -------------------------------------------------------------------------------- 1 | var logger = require('../../src/logger') 2 | logger.logIt = function() {} 3 | module.exports = logger 4 | -------------------------------------------------------------------------------- /test/fixtures/mockSchema.json: -------------------------------------------------------------------------------- 1 | { 2 | "fakeSchema": true 3 | } 4 | --------------------------------------------------------------------------------