├── .babelrc ├── .eslintrc ├── .github ├── issue_template.md ├── pull_request_template.md ├── stale.yml └── workflows │ └── general.yml ├── .gitignore ├── .npmignore ├── .nycrc ├── .prettierrc ├── LEAD.md ├── LICENSE.md ├── Makefile ├── README.md ├── data ├── csv-dialect.json ├── data-package-dereference.json ├── data-package-multiple-resources.json ├── data-package.json ├── data-resource-dereference.json ├── data-resource.json ├── data.csv ├── data.csvformat ├── data.dialect.csv ├── dp1 │ ├── data.csv │ └── datapackage.json ├── dp2-tabular │ ├── data.csv │ ├── data2.csv │ └── datapackage.json ├── dp3-zip.zip ├── dp3-zip │ ├── data │ │ └── countries.csv │ └── datapackage.json ├── latin1.csv └── table-schema.json ├── examples ├── .keep ├── datapackage.js ├── resource.js └── validate.js ├── karma.conf.js ├── package.json ├── src ├── config.js ├── errors.js ├── helpers.js ├── index.js ├── infer.js ├── package.js ├── profile.js ├── profiles │ ├── data-package.json │ ├── data-resource.json │ ├── fiscal-data-package.json │ ├── registry.json │ ├── tabular-data-package.json │ └── tabular-data-resource.json ├── resource.js └── validate.js ├── test ├── errors.js ├── helpers.js ├── infer.js ├── karma.opts ├── mocha.opts ├── package.js ├── profile.js ├── resource.js └── validate.js └── webpack.config.js /.babelrc: -------------------------------------------------------------------------------- 1 | { 2 | "presets": [ 3 | "es2015" 4 | ], 5 | "plugins": [ 6 | "transform-es2015-modules-commonjs", 7 | "transform-es2017-object-entries", 8 | "transform-async-to-generator", 9 | "transform-object-rest-spread", 10 | "transform-decorators-legacy", 11 | "transform-export-extensions", 12 | "transform-class-properties" 13 | ], 14 | "env": { 15 | "testing": { 16 | "plugins": ["istanbul"] 17 | } 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /.eslintrc: -------------------------------------------------------------------------------- 1 | { 2 | "parser": "babel-eslint", 3 | "extends": [ 4 | "standard", 5 | ], 6 | "env": { 7 | "mocha": true 8 | }, 9 | "rules": { 10 | "no-prototype-builtins": "off", 11 | "space-before-function-paren": "off", 12 | "comma-dangle": ["error", { 13 | "arrays": "always-multiline", 14 | "objects": "always-multiline" 15 | }] 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /.github/issue_template.md: -------------------------------------------------------------------------------- 1 | # Overview 2 | 3 | Please replace this line with full information about your idea or problem. If it's a bug share as much as possible to reproduce it 4 | 5 | --- 6 | 7 | Please preserve this line to notify @aivuk (lead of this repository) 8 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | # Overview 2 | 3 | Please replace this line with full information about your pull request. Make sure that tests pass before publishing it 4 | 5 | --- 6 | 7 | Please preserve this line to notify @aivuk (lead of this repository) 8 | -------------------------------------------------------------------------------- /.github/stale.yml: -------------------------------------------------------------------------------- 1 | # Number of days of inactivity before an issue becomes stale 2 | daysUntilStale: 90 3 | 4 | # Number of days of inactivity before a stale issue is closed 5 | daysUntilClose: 30 6 | 7 | # Issues with these labels will never be considered stale 8 | exemptLabels: 9 | - feature 10 | - enhancement 11 | - bug 12 | 13 | # Label to use when marking an issue as stale 14 | staleLabel: wontfix 15 | 16 | # Comment to post when marking an issue as stale. Set to `false` to disable 17 | markComment: > 18 | This issue has been automatically marked as stale because it has not had 19 | recent activity. It will be closed if no further activity occurs. Thank you 20 | for your contributions. 21 | 22 | # Comment to post when closing a stale issue. Set to `false` to disable 23 | closeComment: false 24 | -------------------------------------------------------------------------------- /.github/workflows/general.yml: -------------------------------------------------------------------------------- 1 | name: general 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | tags: 8 | - v*.*.* 9 | pull_request: 10 | branches: 11 | - main 12 | 13 | jobs: 14 | 15 | # Test 16 | 17 | test: 18 | runs-on: ubuntu-latest 19 | steps: 20 | - name: Checkout repository 21 | uses: actions/checkout@v2 22 | - name: Install Node 23 | uses: actions/setup-node@v1 24 | with: 25 | node-version: 14 26 | - name: Install dependencies 27 | run: npm install 28 | - name: Test software 29 | run: npm test 30 | - name: Report coverage 31 | uses: codecov/codecov-action@v1 32 | 33 | # Release 34 | 35 | release: 36 | if: github.event_name == 'push' && contains(github.ref, 'refs/tags/') 37 | runs-on: ubuntu-latest 38 | needs: [test] 39 | steps: 40 | - name: Checkout repository 41 | uses: actions/checkout@v2 42 | - name: Install Node 43 | uses: actions/setup-node@v2 44 | with: 45 | node-version: 14 46 | registry-url: https://registry.npmjs.org/ 47 | - name: Install dependencies 48 | run: npm install 49 | - name: Build project 50 | run: npm run build 51 | - name: Release to NPM 52 | run: npm publish 53 | env: 54 | NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} 55 | - name: Release to GitHub 56 | uses: softprops/action-gh-release@v1 57 | env: 58 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 59 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | 6 | # Runtime data 7 | pids 8 | *.pid 9 | *.seed 10 | *.pid.lock 11 | 12 | # Directory for instrumented libs generated by jscoverage/JSCover 13 | lib-cov 14 | 15 | # Coverage directory used by tools like istanbul 16 | coverage 17 | 18 | # nyc test coverage 19 | .nyc_output 20 | 21 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 22 | .grunt 23 | 24 | # node-waf configuration 25 | .lock-wscript 26 | 27 | # Compiled binary addons (http://nodejs.org/api/addons.html) 28 | build/Release 29 | 30 | # Dependency directories 31 | node_modules 32 | jspm_packages 33 | 34 | # Optional npm cache directory 35 | .npm 36 | 37 | # Optional eslint cache 38 | .eslintcache 39 | 40 | # Optional REPL history 41 | .node_repl_history 42 | 43 | # Output of 'npm pack' 44 | *.tgz 45 | 46 | # Yarn Integrity file 47 | .yarn-integrity 48 | 49 | # Extra 50 | dist/ 51 | lib/ 52 | package-lock.json 53 | .idea/ 54 | -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage-js/7467c9ebe68b713fde15a28f4c78fe2b59ad2640/.npmignore -------------------------------------------------------------------------------- /.nycrc: -------------------------------------------------------------------------------- 1 | { 2 | "lines": 70, 3 | "check-coverage": true, 4 | "include": [ 5 | "src/**/*.js" 6 | ], 7 | "extension": [ 8 | ".js" 9 | ], 10 | "reporter": [ 11 | "lcov", 12 | "text" 13 | ], 14 | "sourceMap": true, 15 | "instrument": true 16 | } 17 | -------------------------------------------------------------------------------- /.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "semi": false, 3 | "singleQuote": true, 4 | "trailingComma": "es5", 5 | "printWidth": 100 6 | } 7 | -------------------------------------------------------------------------------- /LEAD.md: -------------------------------------------------------------------------------- 1 | aivuk 2 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Open Knowledge 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all list release templates version 2 | 3 | 4 | VERSION := $(shell node -p -e "require('./package.json').version") 5 | LEAD := $(shell head -n 1 LEAD.md) 6 | 7 | 8 | all: list 9 | 10 | list: 11 | @grep '^\.PHONY' Makefile | cut -d' ' -f2- | tr ' ' '\n' 12 | 13 | readme: 14 | npx referencer src README.md --in-place 15 | npx doctoc --maxlevel 3 README.md 16 | 17 | release: 18 | git checkout master && git pull origin && git fetch -p && git diff 19 | @echo "\nContinuing in 10 seconds. Press to abort\n" && sleep 10 20 | @git log --pretty=format:"%C(yellow)%h%Creset %s%Cgreen%d" --reverse -20 21 | @echo "\nReleasing v$(VERSION) in 10 seconds. Press to abort\n" && sleep 10 22 | git commit -a -m 'v$(VERSION)' && git tag -a v$(VERSION) -m 'v$(VERSION)' 23 | git push --follow-tags 24 | 25 | templates: 26 | sed -i -E "s/@(\w*)/@$(LEAD)/" .github/issue_template.md 27 | sed -i -E "s/@(\w*)/@$(LEAD)/" .github/pull_request_template.md 28 | 29 | version: 30 | @echo $(VERSION) 31 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # datapackage-js 2 | 3 | [![Build](https://img.shields.io/github/workflow/status/frictionlessdata/tableschema-js/general/main)](https://github.com/frictionlessdata/datapackage-js/actions) 4 | [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/datapackage-js/main)](https://codecov.io/gh/frictionlessdata/datapackage-js) 5 | [![Registry](https://img.shields.io/npm/v/frictionless-datapackage-js.svg)](https://www.npmjs.com/package/frictionless-datapackage-js) 6 | [![Codebase](https://img.shields.io/badge/github-main-brightgreen)](https://github.com/frictionlessdata/datapackage-js) 7 | [![Support](https://img.shields.io/badge/support-discord-brightgreen)](https://discordapp.com/invite/Sewv6av) 8 | 9 | A library for working with [Data Packages](http://specs.frictionlessdata.io/data-package/). 10 | 11 | ## Features 12 | 13 | - `Package` class for working with data packages 14 | - `Resource` class for working with data resources 15 | - `Profile` class for working with profiles 16 | - `validate` function for validating data package descriptors 17 | - `infer` function for inferring data package descriptors 18 | 19 | ## Contents 20 | 21 | 22 | 23 | 24 | 25 | - [Getting Started](#getting-started) 26 | - [Installation](#installation) 27 | - [Documentation](#documentation) 28 | - [Introduction](#introduction) 29 | - [Working with Package](#working-with-package) 30 | - [Working with Resource](#working-with-resource) 31 | - [Working with Profile](#working-with-profile) 32 | - [Working with validate/infer](#working-with-validateinfer) 33 | - [Working with Foreign Keys](#working-with-foreign-keys) 34 | - [API Referencer](#api-referencer) 35 | - [Package](#package) 36 | - [Resource](#resource) 37 | - [Profile](#profile) 38 | - [validate(descriptor) ⇒ `Object`](#validatedescriptor-%E2%87%92-codeobjectcode) 39 | - [infer(pattern) ⇒ `Object`](#inferpattern-%E2%87%92-codeobjectcode) 40 | - [DataPackageError](#datapackageerror) 41 | - [TableSchemaError](#tableschemaerror) 42 | - [Contributing](#contributing) 43 | - [Changelog](#changelog) 44 | 45 | 46 | 47 | ## Getting Started 48 | 49 | ### Installation 50 | 51 | The package use semantic versioning. It means that major versions could include breaking changes. It's highly recommended to specify `datapackage` version range in your `package.json` file e.g. `datapackage: ^1.0` which will be added by default by `npm install --save`. 52 | 53 | #### NPM 54 | 55 | ```bash 56 | $ npm install datapackage@latest # v1.0 57 | $ npm install datapackage # v0.8 58 | ``` 59 | 60 | #### CDN 61 | 62 | ```html 63 | 64 | ``` 65 | 66 | ## Documentation 67 | 68 | ### Introduction 69 | 70 | Let's start with a simple example for Node.js: 71 | 72 | ```javascript 73 | const {Package} = require('datapackage') 74 | 75 | const descriptor = { 76 | resources: [ 77 | { 78 | name: 'example', 79 | profile: 'tabular-data-resource', 80 | data: [ 81 | ['height', 'age', 'name'], 82 | ['180', '18', 'Tony'], 83 | ['192', '32', 'Jacob'], 84 | ], 85 | schema: { 86 | fields: [ 87 | {name: 'height', type: 'integer'}, 88 | {name: 'age', type: 'integer'}, 89 | {name: 'name', type: 'string'}, 90 | ], 91 | } 92 | } 93 | ] 94 | } 95 | 96 | const dataPackage = await Package.load(descriptor) 97 | const resource = dataPackage.getResource('example') 98 | await resource.read() // [[180, 18, 'Tony'], [192, 32, 'Jacob']] 99 | ``` 100 | 101 | And for browser: 102 | 103 | > https://jsfiddle.net/rollninja/jp60q3zd/ 104 | 105 | After the script registration the library will be available as a global variable `datapackage`: 106 | 107 | ```html 108 | 109 | 110 | 111 | 112 | datapackage-js 113 | 114 | 115 | 116 | 127 | 128 | 129 | ``` 130 | 131 | ### Working with Package 132 | 133 | A class for working with data packages. It provides various capabilities like loading local or remote data package, inferring a data package descriptor, saving a data package descriptor and many more. 134 | 135 | Consider we have some local csv files in a `data` directory. Let's create a data package based on this data using a `Package` class: 136 | 137 | > data/cities.csv 138 | 139 | ```csv 140 | city,location 141 | london,"51.50,-0.11" 142 | paris,"48.85,2.30" 143 | rome,"41.89,12.51" 144 | ``` 145 | 146 | > data/population.csv 147 | 148 | ```csv 149 | city,year,population 150 | london,2017,8780000 151 | paris,2017,2240000 152 | rome,2017,2860000 153 | ``` 154 | 155 | First we create a blank data package:: 156 | 157 | ```javascript 158 | const dataPackage = await Package.load() 159 | ``` 160 | 161 | Now we're ready to infer a data package descriptor based on data files we have. Because we have two csv files we use glob pattern `**/*.csv`: 162 | 163 | ```javascript 164 | await dataPackage.infer('**/*.csv') 165 | dataPackage.descriptor 166 | //{ profile: 'tabular-data-package', 167 | // resources: 168 | // [ { path: 'data/cities.csv', 169 | // profile: 'tabular-data-resource', 170 | // encoding: 'utf-8', 171 | // name: 'cities', 172 | // format: 'csv', 173 | // mediatype: 'text/csv', 174 | // schema: [Object] }, 175 | // { path: 'data/population.csv', 176 | // profile: 'tabular-data-resource', 177 | // encoding: 'utf-8', 178 | // name: 'population', 179 | // format: 'csv', 180 | // mediatype: 'text/csv', 181 | // schema: [Object] } ] } 182 | ``` 183 | 184 | An `infer` method has found all our files and inspected it to extract useful metadata like profile, encoding, format, Table Schema etc. Let's tweak it a little bit: 185 | 186 | ```javascript 187 | dataPackage.descriptor.resources[1].schema.fields[1].type = 'year' 188 | dataPackage.commit() 189 | dataPackage.valid // true 190 | ``` 191 | 192 | Because our resources are tabular we could read it as a tabular data: 193 | 194 | ```javascript 195 | await dataPackage.getResource('population').read({keyed: true}) 196 | 197 | //[ { city: 'london', year: 2017, population: 8780000 }, 198 | // { city: 'paris', year: 2017, population: 2240000 }, 199 | // { city: 'rome', year: 2017, population: 2860000 } ] 200 | ``` 201 | 202 | Let's save our descriptor on the disk. After it we could update our `datapackage.json` as we want, make some changes etc: 203 | 204 | ```javascript 205 | await dataPackage.save('datapackage.json') 206 | ``` 207 | 208 | To continue the work with the data package we just load it again but this time using local `datapackage.json`: 209 | 210 | ```javascript 211 | const dataPackage = await Package.load('datapackage.json') 212 | // Continue the work 213 | ``` 214 | 215 | It was onle basic introduction to the `Package` class. To learn more let's take a look on `Package` class API reference. 216 | 217 | ### Working with Resource 218 | 219 | A class for working with data resources. You can read or iterate tabular resources using the `iter/read` methods and all resource as bytes using `rowIter/rowRead` methods. 220 | 221 | Consider we have some local csv file. It could be inline data or remote link - all supported by `Resource` class (except local files for in-brower usage of course). But say it's `data.csv` for now: 222 | 223 | ```csv 224 | city,location 225 | london,"51.50,-0.11" 226 | paris,"48.85,2.30" 227 | rome,N/A 228 | ``` 229 | 230 | Let's create and read a resource. We use static `Resource.load` method instantiate a resource. Because resource is tabular we could use `resource.read` method with a `keyed` option to get an array of keyed rows: 231 | 232 | ```javascript 233 | const resource = await Resource.load({path: 'data.csv'}) 234 | resource.tabular // true 235 | resource.headers // ['city', 'location'] 236 | await resource.read({keyed: true}) 237 | // [ 238 | // {city: 'london', location: '51.50,-0.11'}, 239 | // {city: 'paris', location: '48.85,2.30'}, 240 | // {city: 'rome', location: 'N/A'}, 241 | // ] 242 | ``` 243 | 244 | As we could see our locations are just a strings. But it should be geopoints. Also Rome's location is not available but it's also just a `N/A` string instead of JavaScript `null`. First we have to infer resource metadata: 245 | 246 | ```javascript 247 | await resource.infer() 248 | resource.descriptor 249 | //{ path: 'data.csv', 250 | // profile: 'tabular-data-resource', 251 | // encoding: 'utf-8', 252 | // name: 'data', 253 | // format: 'csv', 254 | // mediatype: 'text/csv', 255 | // schema: { fields: [ [Object], [Object] ], missingValues: [ '' ] } } 256 | await resource.read({keyed: true}) 257 | // Fails with a data validation error 258 | ``` 259 | 260 | Let's fix not available location. There is a `missingValues` property in Table Schema specification. As a first try we set `missingValues` to `N/A` in `resource.descriptor.schema`. Resource descriptor could be changed in-place but all changes should be commited by `resource.commit()`: 261 | 262 | ```javascript 263 | resource.descriptor.schema.missingValues = 'N/A' 264 | resource.commit() 265 | resource.valid // false 266 | resource.errors 267 | // Error: Descriptor validation error: 268 | // Invalid type: string (expected array) 269 | // at "/missingValues" in descriptor and 270 | // at "/properties/missingValues/type" in profile 271 | ``` 272 | 273 | As a good citiziens we've decided to check out recource descriptor validity. And it's not valid! We should use an array for `missingValues` property. Also don't forget to have an empty string as a missing value: 274 | 275 | ```javascript 276 | resource.descriptor.schema['missingValues'] = ['', 'N/A'] 277 | resource.commit() 278 | resource.valid // true 279 | ``` 280 | 281 | All good. It looks like we're ready to read our data again: 282 | 283 | ```javascript 284 | await resource.read({keyed: true}) 285 | // [ 286 | // {city: 'london', location: [51.50,-0.11]}, 287 | // {city: 'paris', location: [48.85,2.30]}, 288 | // {city: 'rome', location: null}, 289 | // ] 290 | ``` 291 | 292 | Now we see that: 293 | - locations are arrays with numeric lattide and longitude 294 | - Rome's location is a native JavaScript `null` 295 | 296 | And because there are no errors on data reading we could be sure that our data is valid againt our schema. Let's save our resource descriptor: 297 | 298 | ```javascript 299 | await resource.save('dataresource.json') 300 | ``` 301 | 302 | Let's check newly-crated `dataresource.json`. It contains path to our data file, inferred metadata and our `missingValues` tweak: 303 | 304 | ```json 305 | { 306 | "path": "data.csv", 307 | "profile": "tabular-data-resource", 308 | "encoding": "utf-8", 309 | "name": "data", 310 | "format": "csv", 311 | "mediatype": "text/csv", 312 | "schema": { 313 | "fields": [ 314 | { 315 | "name": "city", 316 | "type": "string", 317 | "format": "default" 318 | }, 319 | { 320 | "name": "location", 321 | "type": "geopoint", 322 | "format": "default" 323 | } 324 | ], 325 | "missingValues": [ 326 | "", 327 | "N/A" 328 | ] 329 | } 330 | } 331 | ``` 332 | 333 | If we decide to improve it even more we could update the `dataresource.json` file and then open it again. But this time let's read our resoure as a byte stream: 334 | 335 | ```javascript 336 | const resource = await Resource.load('dataresource.json') 337 | const stream = await resource.rawIter({stream: true}) 338 | stream.on('data', (data) => { 339 | // handle data chunk as a Buffer 340 | }) 341 | ``` 342 | 343 | It was onle basic introduction to the `Resource` class. To learn more let's take a look on `Resource` class API reference. 344 | 345 | ### Working with Profile 346 | 347 | A component to represent JSON Schema profile from [Profiles Registry]( https://specs.frictionlessdata.io/schemas/registry.json): 348 | 349 | ```javascript 350 | await profile = Profile.load('data-package') 351 | 352 | profile.name // data-package 353 | profile.jsonschema // JSON Schema contents 354 | 355 | const {valid, errors} = profile.validate(descriptor) 356 | for (const error of errors) { 357 | // inspect Error objects 358 | } 359 | ``` 360 | 361 | ### Working with validate/infer 362 | 363 | A standalone function to validate a data package descriptor: 364 | 365 | ```javascript 366 | const {valid, errors} = await validate({name: 'Invalid Datapackage'}) 367 | for (const error of errors) { 368 | // inspect Error objects 369 | } 370 | ``` 371 | 372 | ### Working with Foreign Keys 373 | 374 | The library supports foreign keys described in the [Table Schema](http://specs.frictionlessdata.io/table-schema/#foreign-keys) specification. It means if your data package descriptor use `resources[].schema.foreignKeys` property for some resources a data integrity will be checked on reading operations. 375 | 376 | Consider we have a data package: 377 | 378 | ```javascript 379 | const DESCRIPTOR = { 380 | 'resources': [ 381 | { 382 | 'name': 'teams', 383 | 'data': [ 384 | ['id', 'name', 'city'], 385 | ['1', 'Arsenal', 'London'], 386 | ['2', 'Real', 'Madrid'], 387 | ['3', 'Bayern', 'Munich'], 388 | ], 389 | 'schema': { 390 | 'fields': [ 391 | {'name': 'id', 'type': 'integer'}, 392 | {'name': 'name', 'type': 'string'}, 393 | {'name': 'city', 'type': 'string'}, 394 | ], 395 | 'foreignKeys': [ 396 | { 397 | 'fields': 'city', 398 | 'reference': {'resource': 'cities', 'fields': 'name'}, 399 | }, 400 | ], 401 | }, 402 | }, { 403 | 'name': 'cities', 404 | 'data': [ 405 | ['name', 'country'], 406 | ['London', 'England'], 407 | ['Madrid', 'Spain'], 408 | ], 409 | }, 410 | ], 411 | } 412 | ``` 413 | 414 | Let's check relations for a `teams` resource: 415 | 416 | ```javascript 417 | const {Package} = require('datapackage') 418 | 419 | const package = await Package.load(DESCRIPTOR) 420 | teams = package.getResource('teams') 421 | await teams.checkRelations() 422 | // tableschema.exceptions.RelationError: Foreign key "['city']" violation in row "4" 423 | ``` 424 | 425 | As we could see there is a foreign key violation. That's because our lookup table `cities` doesn't have a city of `Munich` but we have a team from there. We need to fix it in `cities` resource: 426 | 427 | ```javascript 428 | package.descriptor['resources'][1]['data'].push(['Munich', 'Germany']) 429 | package.commit() 430 | teams = package.getResource('teams') 431 | await teams.checkRelations() 432 | // True 433 | ``` 434 | 435 | Fixed! But not only a check operation is available. We could use `relations` argument for `resource.iter/read` methods to dereference a resource relations: 436 | 437 | ```javascript 438 | await teams.read({keyed: true, relations: true}) 439 | //[{'id': 1, 'name': 'Arsenal', 'city': {'name': 'London', 'country': 'England}}, 440 | // {'id': 2, 'name': 'Real', 'city': {'name': 'Madrid', 'country': 'Spain}}, 441 | // {'id': 3, 'name': 'Bayern', 'city': {'name': 'Munich', 'country': 'Germany}}] 442 | ``` 443 | 444 | Instead of plain city name we've got a dictionary containing a city data. These `resource.iter/read` methods will fail with the same as `resource.check_relations` error if there is an integrity issue. But only if `relations: true` flag is passed. 445 | A standalone function to infer a data package descriptor. 446 | 447 | ```javascript 448 | const descriptor = await infer('**/*.csv') 449 | //{ profile: 'tabular-data-resource', 450 | // resources: 451 | // [ { path: 'data/cities.csv', 452 | // profile: 'tabular-data-resource', 453 | // encoding: 'utf-8', 454 | // name: 'cities', 455 | // format: 'csv', 456 | // mediatype: 'text/csv', 457 | // schema: [Object] }, 458 | // { path: 'data/population.csv', 459 | // profile: 'tabular-data-resource', 460 | // encoding: 'utf-8', 461 | // name: 'population', 462 | // format: 'csv', 463 | // mediatype: 'text/csv', 464 | // schema: [Object] } ] } 465 | ``` 466 | 467 | ## API Referencer 468 | 469 | ### Package 470 | Package representation 471 | 472 | 473 | * [Package](#Package) 474 | * _instance_ 475 | * [.valid](#Package+valid) ⇒ `Boolean` 476 | * [.errors](#Package+errors) ⇒ `Array.<Error>` 477 | * [.profile](#Package+profile) ⇒ `Profile` 478 | * [.descriptor](#Package+descriptor) ⇒ `Object` 479 | * [.resources](#Package+resources) ⇒ `Array.<Resoruce>` 480 | * [.resourceNames](#Package+resourceNames) ⇒ `Array.<string>` 481 | * [.getResource(name)](#Package+getResource) ⇒ `Resource` \| `null` 482 | * [.addResource(descriptor)](#Package+addResource) ⇒ `Resource` 483 | * [.removeResource(name)](#Package+removeResource) ⇒ `Resource` \| `null` 484 | * [.infer(pattern)](#Package+infer) ⇒ `Object` 485 | * [.commit(strict)](#Package+commit) ⇒ `Boolean` 486 | * [.save(target, raises, returns)](#Package+save) 487 | * _static_ 488 | * [.load(descriptor, basePath, strict)](#Package.load) ⇒ [`Package`](#Package) 489 | 490 | 491 | #### package.valid ⇒ `Boolean` 492 | Validation status 493 | 494 | It always `true` in strict mode. 495 | 496 | **Returns**: `Boolean` - returns validation status 497 | 498 | #### package.errors ⇒ `Array.<Error>` 499 | Validation errors 500 | 501 | It always empty in strict mode. 502 | 503 | **Returns**: `Array.<Error>` - returns validation errors 504 | 505 | #### package.profile ⇒ `Profile` 506 | Profile 507 | 508 | 509 | #### package.descriptor ⇒ `Object` 510 | Descriptor 511 | 512 | **Returns**: `Object` - schema descriptor 513 | 514 | #### package.resources ⇒ `Array.<Resoruce>` 515 | Resources 516 | 517 | 518 | #### package.resourceNames ⇒ `Array.<string>` 519 | Resource names 520 | 521 | 522 | #### package.getResource(name) ⇒ `Resource` \| `null` 523 | Return a resource 524 | 525 | **Returns**: `Resource` \| `null` - resource instance if exists 526 | 527 | | Param | Type | 528 | | --- | --- | 529 | | name | `string` | 530 | 531 | 532 | #### package.addResource(descriptor) ⇒ `Resource` 533 | Add a resource 534 | 535 | **Returns**: `Resource` - added resource instance 536 | 537 | | Param | Type | 538 | | --- | --- | 539 | | descriptor | `Object` | 540 | 541 | 542 | #### package.removeResource(name) ⇒ `Resource` \| `null` 543 | Remove a resource 544 | 545 | **Returns**: `Resource` \| `null` - removed resource instance if exists 546 | 547 | | Param | Type | 548 | | --- | --- | 549 | | name | `string` | 550 | 551 | 552 | #### package.infer(pattern) ⇒ `Object` 553 | Infer metadata 554 | 555 | 556 | | Param | Type | Default | 557 | | --- | --- | --- | 558 | | pattern | `string` | `false` | 559 | 560 | 561 | #### package.commit(strict) ⇒ `Boolean` 562 | Update package instance if there are in-place changes in the descriptor. 563 | 564 | **Returns**: `Boolean` - returns true on success and false if not modified 565 | **Throws**: 566 | 567 | - `DataPackageError` raises any error occurred in the process 568 | 569 | 570 | | Param | Type | Description | 571 | | --- | --- | --- | 572 | | strict | `boolean` | alter `strict` mode for further work | 573 | 574 | **Example** 575 | ```javascript 576 | const dataPackage = await Package.load({ 577 | name: 'package', 578 | resources: [{name: 'resource', data: ['data']}] 579 | }) 580 | 581 | dataPackage.name // package 582 | dataPackage.descriptor.name = 'renamed-package' 583 | dataPackage.name // package 584 | dataPackage.commit() 585 | dataPackage.name // renamed-package 586 | ``` 587 | 588 | #### package.save(target, raises, returns) 589 | Save data package to target destination. 590 | 591 | If target path has a zip file extension the package will be zipped and 592 | saved entirely. If it has a json file extension only the descriptor will be saved. 593 | 594 | 595 | | Param | Type | Description | 596 | | --- | --- | --- | 597 | | target | `string` | path where to save a data package | 598 | | raises | `DataPackageError` | error if something goes wrong | 599 | | returns | `boolean` | true on success | 600 | 601 | 602 | #### Package.load(descriptor, basePath, strict) ⇒ [`Package`](#Package) 603 | Factory method to instantiate `Package` class. 604 | 605 | This method is async and it should be used with await keyword or as a `Promise`. 606 | 607 | **Returns**: [`Package`](#Package) - returns data package class instance 608 | **Throws**: 609 | 610 | - `DataPackageError` raises error if something goes wrong 611 | 612 | 613 | | Param | Type | Description | 614 | | --- | --- | --- | 615 | | descriptor | `string` \| `Object` | package descriptor as local path, url or object. If ththe path has a `zip` file extension it will be unzipped to the temp directory first. | 616 | | basePath | `string` | base path for all relative paths | 617 | | strict | `boolean` | strict flag to alter validation behavior. Setting it to `true` leads to throwing errors on any operation with invalid descriptor | 618 | 619 | 620 | ### Resource 621 | Resource representation 622 | 623 | 624 | * [Resource](#Resource) 625 | * _instance_ 626 | * [.valid](#Resource+valid) ⇒ `Boolean` 627 | * [.errors](#Resource+errors) ⇒ `Array.<Error>` 628 | * [.profile](#Resource+profile) ⇒ `Profile` 629 | * [.descriptor](#Resource+descriptor) ⇒ `Object` 630 | * [.name](#Resource+name) ⇒ `string` 631 | * [.inline](#Resource+inline) ⇒ `boolean` 632 | * [.local](#Resource+local) ⇒ `boolean` 633 | * [.remote](#Resource+remote) ⇒ `boolean` 634 | * [.multipart](#Resource+multipart) ⇒ `boolean` 635 | * [.tabular](#Resource+tabular) ⇒ `boolean` 636 | * [.source](#Resource+source) ⇒ `Array` \| `string` 637 | * [.headers](#Resource+headers) ⇒ `Array.<string>` 638 | * [.schema](#Resource+schema) ⇒ `tableschema.Schema` 639 | * [.iter(keyed, extended, cast, forceCast, relations, stream)](#Resource+iter) ⇒ `AsyncIterator` \| `Stream` 640 | * [.read(limit)](#Resource+read) ⇒ `Array.<Array>` \| `Array.<Object>` 641 | * [.checkRelations()](#Resource+checkRelations) ⇒ `boolean` 642 | * [.rawIter(stream)](#Resource+rawIter) ⇒ `Iterator` \| `Stream` 643 | * [.rawRead()](#Resource+rawRead) ⇒ `Buffer` 644 | * [.infer()](#Resource+infer) ⇒ `Object` 645 | * [.commit(strict)](#Resource+commit) ⇒ `boolean` 646 | * [.save(target)](#Resource+save) ⇒ `boolean` 647 | * _static_ 648 | * [.load(descriptor, basePath, strict)](#Resource.load) ⇒ [`Resource`](#Resource) 649 | 650 | 651 | #### resource.valid ⇒ `Boolean` 652 | Validation status 653 | 654 | It always `true` in strict mode. 655 | 656 | **Returns**: `Boolean` - returns validation status 657 | 658 | #### resource.errors ⇒ `Array.<Error>` 659 | Validation errors 660 | 661 | It always empty in strict mode. 662 | 663 | **Returns**: `Array.<Error>` - returns validation errors 664 | 665 | #### resource.profile ⇒ `Profile` 666 | Profile 667 | 668 | 669 | #### resource.descriptor ⇒ `Object` 670 | Descriptor 671 | 672 | **Returns**: `Object` - schema descriptor 673 | 674 | #### resource.name ⇒ `string` 675 | Name 676 | 677 | 678 | #### resource.inline ⇒ `boolean` 679 | Whether resource is inline 680 | 681 | 682 | #### resource.local ⇒ `boolean` 683 | Whether resource is local 684 | 685 | 686 | #### resource.remote ⇒ `boolean` 687 | Whether resource is remote 688 | 689 | 690 | #### resource.multipart ⇒ `boolean` 691 | Whether resource is multipart 692 | 693 | 694 | #### resource.tabular ⇒ `boolean` 695 | Whether resource is tabular 696 | 697 | 698 | #### resource.source ⇒ `Array` \| `string` 699 | Source 700 | 701 | Combination of `resource.source` and `resource.inline/local/remote/multipart` 702 | provides predictable interface to work with resource data. 703 | 704 | 705 | #### resource.headers ⇒ `Array.<string>` 706 | Headers 707 | 708 | > Only for tabular resources 709 | 710 | **Returns**: `Array.<string>` - data source headers 711 | 712 | #### resource.schema ⇒ `tableschema.Schema` 713 | Schema 714 | 715 | > Only for tabular resources 716 | 717 | 718 | #### resource.iter(keyed, extended, cast, forceCast, relations, stream) ⇒ `AsyncIterator` \| `Stream` 719 | Iterate through the table data 720 | 721 | > Only for tabular resources 722 | 723 | And emits rows cast based on table schema (async for loop). 724 | With a `stream` flag instead of async iterator a Node stream will be returned. 725 | Data casting can be disabled. 726 | 727 | **Returns**: `AsyncIterator` \| `Stream` - async iterator/stream of rows: 728 | - `[value1, value2]` - base 729 | - `{header1: value1, header2: value2}` - keyed 730 | - `[rowNumber, [header1, header2], [value1, value2]]` - extended 731 | **Throws**: 732 | 733 | - `TableSchemaError` raises any error occurred in this process 734 | 735 | 736 | | Param | Type | Description | 737 | | --- | --- | --- | 738 | | keyed | `boolean` | iter keyed rows | 739 | | extended | `boolean` | iter extended rows | 740 | | cast | `boolean` | disable data casting if false | 741 | | forceCast | `boolean` | instead of raising on the first row with cast error return an error object to replace failed row. It will allow to iterate over the whole data file even if it's not compliant to the schema. Example of output stream: `[['val1', 'val2'], TableSchemaError, ['val3', 'val4'], ...]` | 742 | | relations | `boolean` | if true foreign key fields will be checked and resolved to its references | 743 | | stream | `boolean` | return Node Readable Stream of table rows | 744 | 745 | 746 | #### resource.read(limit) ⇒ `Array.<Array>` \| `Array.<Object>` 747 | Read the table data into memory 748 | 749 | > Only for tabular resources; the API is the same as `resource.iter` has except for: 750 | 751 | **Returns**: `Array.<Array>` \| `Array.<Object>` - list of rows: 752 | - `[value1, value2]` - base 753 | - `{header1: value1, header2: value2}` - keyed 754 | - `[rowNumber, [header1, header2], [value1, value2]]` - extended 755 | 756 | | Param | Type | Description | 757 | | --- | --- | --- | 758 | | limit | `integer` | limit of rows to read | 759 | 760 | 761 | #### resource.checkRelations() ⇒ `boolean` 762 | It checks foreign keys and raises an exception if there are integrity issues. 763 | 764 | > Only for tabular resources 765 | 766 | **Returns**: `boolean` - returns True if no issues 767 | **Throws**: 768 | 769 | - `DataPackageError` raises if there are integrity issues 770 | 771 | 772 | #### resource.rawIter(stream) ⇒ `Iterator` \| `Stream` 773 | Iterate over data chunks as bytes. If `stream` is true Node Stream will be returned. 774 | 775 | **Returns**: `Iterator` \| `Stream` - returns Iterator/Stream 776 | 777 | | Param | Type | Description | 778 | | --- | --- | --- | 779 | | stream | `boolean` | Node Stream will be returned | 780 | 781 | 782 | #### resource.rawRead() ⇒ `Buffer` 783 | Returns resource data as bytes. 784 | 785 | **Returns**: `Buffer` - returns Buffer with resource data 786 | 787 | #### resource.infer() ⇒ `Object` 788 | Infer resource metadata like name, format, mediatype, encoding, schema and profile. 789 | 790 | It commits this changes into resource instance. 791 | 792 | **Returns**: `Object` - returns resource descriptor 793 | 794 | #### resource.commit(strict) ⇒ `boolean` 795 | Update resource instance if there are in-place changes in the descriptor. 796 | 797 | **Returns**: `boolean` - returns true on success and false if not modified 798 | **Throws**: 799 | 800 | - DataPackageError raises error if something goes wrong 801 | 802 | 803 | | Param | Type | Description | 804 | | --- | --- | --- | 805 | | strict | `boolean` | alter `strict` mode for further work | 806 | 807 | 808 | #### resource.save(target) ⇒ `boolean` 809 | Save resource to target destination. 810 | 811 | > For now only descriptor will be saved. 812 | 813 | **Returns**: `boolean` - returns true on success 814 | **Throws**: 815 | 816 | - `DataPackageError` raises error if something goes wrong 817 | 818 | 819 | | Param | Type | Description | 820 | | --- | --- | --- | 821 | | target | `string` | path where to save a resource | 822 | 823 | 824 | #### Resource.load(descriptor, basePath, strict) ⇒ [`Resource`](#Resource) 825 | Factory method to instantiate `Resource` class. 826 | 827 | This method is async and it should be used with await keyword or as a `Promise`. 828 | 829 | **Returns**: [`Resource`](#Resource) - returns resource class instance 830 | **Throws**: 831 | 832 | - `DataPackageError` raises error if something goes wrong 833 | 834 | 835 | | Param | Type | Description | 836 | | --- | --- | --- | 837 | | descriptor | `string` \| `Object` | resource descriptor as local path, url or object | 838 | | basePath | `string` | base path for all relative paths | 839 | | strict | `boolean` | strict flag to alter validation behavior. Setting it to `true` leads to throwing errors on any operation with invalid descriptor | 840 | 841 | 842 | ### Profile 843 | Profile representation 844 | 845 | 846 | * [Profile](#Profile) 847 | * _instance_ 848 | * [.name](#Profile+name) ⇒ `string` 849 | * [.jsonschema](#Profile+jsonschema) ⇒ `Object` 850 | * [.validate(descriptor)](#Profile+validate) ⇒ `Object` 851 | * _static_ 852 | * [.load(profile)](#Profile.load) ⇒ [`Profile`](#Profile) 853 | 854 | 855 | #### profile.name ⇒ `string` 856 | Name 857 | 858 | 859 | #### profile.jsonschema ⇒ `Object` 860 | JsonSchema 861 | 862 | 863 | #### profile.validate(descriptor) ⇒ `Object` 864 | Validate a data package `descriptor` against the profile. 865 | 866 | **Returns**: `Object` - returns a `{valid, errors}` object 867 | 868 | | Param | Type | Description | 869 | | --- | --- | --- | 870 | | descriptor | `Object` | retrieved and dereferenced data package descriptor | 871 | 872 | 873 | #### Profile.load(profile) ⇒ [`Profile`](#Profile) 874 | Factory method to instantiate `Profile` class. 875 | 876 | This method is async and it should be used with await keyword or as a `Promise`. 877 | 878 | **Returns**: [`Profile`](#Profile) - returns profile class instance 879 | **Throws**: 880 | 881 | - `DataPackageError` raises error if something goes wrong 882 | 883 | 884 | | Param | Type | Description | 885 | | --- | --- | --- | 886 | | profile | `string` | profile name in registry or URL to JSON Schema | 887 | 888 | 889 | ### validate(descriptor) ⇒ `Object` 890 | This function is async so it has to be used with `await` keyword or as a `Promise`. 891 | 892 | **Returns**: `Object` - returns a `{valid, errors}` object 893 | 894 | | Param | Type | Description | 895 | | --- | --- | --- | 896 | | descriptor | `string` \| `Object` | data package descriptor (local/remote path or object) | 897 | 898 | 899 | ### infer(pattern) ⇒ `Object` 900 | This function is async so it has to be used with `await` keyword or as a `Promise`. 901 | 902 | **Returns**: `Object` - returns data package descriptor 903 | 904 | | Param | Type | Description | 905 | | --- | --- | --- | 906 | | pattern | `string` | glob file pattern | 907 | 908 | 909 | ### DataPackageError 910 | Base class for the all DataPackage errors. 911 | 912 | 913 | ### TableSchemaError 914 | Base class for the all TableSchema errors. 915 | 916 | 917 | ## Contributing 918 | 919 | > The project follows the [Open Knowledge International coding standards](https://github.com/okfn/coding-standards). There are common commands to work with the project: 920 | 921 | ``` 922 | $ npm install 923 | $ npm run test 924 | $ npm run build 925 | ``` 926 | 927 | ## Changelog 928 | 929 | Here described only breaking and the most important changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/datapackage-js/commits/master). 930 | 931 | #### v1.1 932 | 933 | Updated behaviour: 934 | 935 | - Resource's `escapeChar` and `quoteChar` are mutually exclusive now 936 | 937 | New API added: 938 | 939 | - Added support of `zip` files for data packages 940 | - Added support of `format/encoding/dialect` for resources 941 | 942 | #### v1.0 943 | 944 | This version includes various big changes. A migration guide is under development and will be published here. 945 | 946 | #### v0.8 947 | 948 | First stable version of the library. 949 | -------------------------------------------------------------------------------- /data/csv-dialect.json: -------------------------------------------------------------------------------- 1 | {"delimiter": ","} 2 | -------------------------------------------------------------------------------- /data/data-package-dereference.json: -------------------------------------------------------------------------------- 1 | { 2 | "resources": [ 3 | {"name": "name1", "data": ["data"], "schema": "table-schema.json"}, 4 | {"name": "name2", "data": ["data"], "dialect": "#/dialects/main"} 5 | ], 6 | "dialects": { 7 | "main": {"delimiter": ","} 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /data/data-package-multiple-resources.json: -------------------------------------------------------------------------------- 1 | { 2 | "resources": [ 3 | { 4 | "name": "name1", 5 | "data": ["data"] 6 | }, 7 | { 8 | "name": "name2", 9 | "data": ["data"] 10 | } 11 | ] 12 | } 13 | -------------------------------------------------------------------------------- /data/data-package.json: -------------------------------------------------------------------------------- 1 | { 2 | "resources": [ 3 | { 4 | "name": "name", 5 | "data": ["data"] 6 | } 7 | ] 8 | } 9 | -------------------------------------------------------------------------------- /data/data-resource-dereference.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "name", 3 | "data": "data", 4 | "schema": "table-schema.json", 5 | "dialect": "#/dialects/main", 6 | "dialects": { 7 | "main": {"delimiter": ","} 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /data/data-resource.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "name", 3 | "data": "data" 4 | } 5 | -------------------------------------------------------------------------------- /data/data.csv: -------------------------------------------------------------------------------- 1 | name,size 2 | gb,105 3 | us,205 4 | cn,305 5 | -------------------------------------------------------------------------------- /data/data.csvformat: -------------------------------------------------------------------------------- 1 | city,population 2 | london,8787892 3 | paris,2244000 4 | rome,2877215 5 | -------------------------------------------------------------------------------- /data/data.dialect.csv: -------------------------------------------------------------------------------- 1 | name;size 2 | gb;105 3 | us;205 4 | cn;305 5 | -------------------------------------------------------------------------------- /data/dp1/data.csv: -------------------------------------------------------------------------------- 1 | name,size 2 | gb,100 3 | us,200 4 | cn,300 5 | -------------------------------------------------------------------------------- /data/dp1/datapackage.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "abc", 3 | "resources": [ 4 | { 5 | "name": "random", 6 | "format": "csv", 7 | "path": ["data.csv"], 8 | "schema": { 9 | "fields": [ 10 | { 11 | "name": "name", 12 | "type": "string" 13 | }, 14 | { 15 | "name": "size", 16 | "type": "number" 17 | } 18 | ] 19 | } 20 | } 21 | ], 22 | "views": [ 23 | { 24 | "type": "vegalite", 25 | "spec": { 26 | "data": { 27 | "resource": "random" 28 | }, 29 | "mark": "bar", 30 | "encoding": { 31 | "x": { 32 | "field": "name", 33 | "type": "ordinal" 34 | }, 35 | "y": { 36 | "field": "size", 37 | "type": "quantitative" 38 | } 39 | } 40 | } 41 | } 42 | ] 43 | } 44 | -------------------------------------------------------------------------------- /data/dp2-tabular/data.csv: -------------------------------------------------------------------------------- 1 | author,title,year 2 | Robert M. Pirsig,Zen and the Art of Motorcycle Maintenance,1974 3 | Last and First Men: A Story of the Near and Far Future,Olaf Sapledon,1930 4 | Solaris,Stanisław Lem,1961 5 | -------------------------------------------------------------------------------- /data/dp2-tabular/data2.csv: -------------------------------------------------------------------------------- 1 | year,title,director 2 | 1964,Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb,Stanley Kubrick 3 | 2004,"Spring, Summer, Fall, Winter... and Spring",Kim Ki-duk 4 | 2006,Inland Empire,David Lynch 5 | -------------------------------------------------------------------------------- /data/dp2-tabular/datapackage.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Tabular datapackage", 3 | "resources": [ 4 | { 5 | "name": "books", 6 | "format": "csv", 7 | "path": "data.csv", 8 | "schema": { 9 | "fields": [ 10 | { 11 | "name": "author", 12 | "type": "string" 13 | }, 14 | { 15 | "name": "title", 16 | "type": "string" 17 | }, 18 | { 19 | "name": "year", 20 | "type": "integer" 21 | } 22 | ] 23 | } 24 | } 25 | ] 26 | } 27 | -------------------------------------------------------------------------------- /data/dp3-zip.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage-js/7467c9ebe68b713fde15a28f4c78fe2b59ad2640/data/dp3-zip.zip -------------------------------------------------------------------------------- /data/dp3-zip/data/countries.csv: -------------------------------------------------------------------------------- 1 | name,size 2 | gb,100 3 | us,200 4 | cn,300 5 | -------------------------------------------------------------------------------- /data/dp3-zip/datapackage.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "abc", 3 | "resources": [ 4 | { 5 | "name": "countries", 6 | "format": "csv", 7 | "path": "data/countries.csv", 8 | "schema": { 9 | "fields": [ 10 | { 11 | "name": "name", 12 | "type": "string" 13 | }, 14 | { 15 | "name": "size", 16 | "type": "number" 17 | } 18 | ] 19 | } 20 | } 21 | ] 22 | } 23 | -------------------------------------------------------------------------------- /data/latin1.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage-js/7467c9ebe68b713fde15a28f4c78fe2b59ad2640/data/latin1.csv -------------------------------------------------------------------------------- /data/table-schema.json: -------------------------------------------------------------------------------- 1 | {"fields": [{"name": "name"}]} 2 | -------------------------------------------------------------------------------- /examples/.keep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage-js/7467c9ebe68b713fde15a28f4c78fe2b59ad2640/examples/.keep -------------------------------------------------------------------------------- /examples/datapackage.js: -------------------------------------------------------------------------------- 1 | const Datapackage = require('../lib/index').Datapackage 2 | const DATAPACKAGE_URL = 'https://raw.githubusercontent.com/frictionlessdata/datapackage-js/master/data/dp2-tabular/datapackage.json' 3 | 4 | // Create new datapackage from remote descriptor, using 'base' profile 5 | // and 'raiseInvalid' set to false 6 | new Datapackage(DATAPACKAGE_URL, 'tabular', false).then(datapackage => { 7 | // see if datapackage is valid 8 | console.log('datapackage.valid: ', datapackage.valid) 9 | 10 | // Add new Resource 11 | const valid = datapackage.addResource({ name: 'New resource' }) 12 | 13 | // `addResource` returns the validation result of the changes 14 | if (!valid) { 15 | // see the errors why the package is invalid 16 | console.log('\nThe following errors are found: ', datapackage.errors) 17 | } 18 | 19 | // Now the datapackage is marked as invalid 20 | console.log('\ndatapackage.valid: ', datapackage.valid) 21 | 22 | // `Datapackage.resources` getter returns array of valid Resource objects 23 | datapackage.resources.forEach((element, index) => { 24 | console.log(`\nResource number ${index} is named "${element.name}". 25 | The Resource object is:\n${JSON.stringify(element)}`) 26 | }) 27 | 28 | // But the contents of the valid resources can be read 29 | // Note: `Resource.table` returns `jsontableschema.Table` instance. 30 | // For usage details please see https://github.com/frictionlessdata/jsontableschema-js#table 31 | datapackage.resources[0].table.then(table => { 32 | return table.read() 33 | }).then(data => { 34 | console.log(`\nResource named "${datapackage.resources[0].name}" contains this data:`) 35 | console.log(data) 36 | }) 37 | 38 | // Updating the descriptor fields is done trough `Datapackage.update` method 39 | console.log('\nOld datapackage name: ', datapackage.descriptor.name) 40 | datapackage.update({ name: 'Changed name'}) 41 | console.log('New datapackage name: ', datapackage.descriptor.name) 42 | 43 | }).catch(err => { 44 | console.loge(err) 45 | }) 46 | -------------------------------------------------------------------------------- /examples/resource.js: -------------------------------------------------------------------------------- 1 | const Resource = require('../lib/index').Resource 2 | 3 | const resourceData = [ 4 | [180, 18, 'Tony'], 5 | [192, 15, 'Pavle'], 6 | [160, 32, 'Pero'], 7 | [202, 23, 'David'], 8 | ] 9 | 10 | const resourceSchema = { 11 | fields: [ 12 | { 13 | name: 'height', 14 | type: 'integer', 15 | }, 16 | { 17 | name: 'age', 18 | type: 'integer', 19 | }, 20 | { 21 | name: 'name', 22 | type: 'string', 23 | }, 24 | ], 25 | } 26 | 27 | // Create a resource that contains inline data with appropriate resourceSchema 28 | let resource = new Resource({ data: resourceData, schema: resourceSchema }) 29 | 30 | // Display the resource type. Could be inline, remote or local. 31 | // Here we have our data inline. 32 | console.log('First resource type: ', resource.type) 33 | 34 | // When working with inline data it can be fetched with `Resource.source` 35 | console.log('First resource source: ', resource.source) 36 | 37 | // Create a new resource with remote data 38 | const remoteResourceDescriptor = { 39 | "name": "random", 40 | "format": "csv", 41 | "path": "https://raw.githubusercontent.com/frictionlessdata/datapackage-js/master/data/dp1/data.csv", 42 | "schema": { 43 | "fields": [ 44 | { 45 | "name": "name", 46 | "type": "string" 47 | }, 48 | { 49 | "name": "size", 50 | "type": "number" 51 | } 52 | ] 53 | } 54 | } 55 | resource = new Resource(remoteResourceDescriptor) 56 | 57 | // This resource is remote 58 | console.log('\nSecond resource type: ', resource.type) 59 | 60 | // Now `Resource.source` displays the URL where the data can be found 61 | console.log('Second resource source: ', resource.source) 62 | 63 | // To see the data, we must initialize `jsontableschema.Table` and read from there. 64 | // The Table class can be initialized with Resource.table. 65 | // More: https://github.com/frictionlessdata/jsontableschema-js#table 66 | resource.table.then(table => { 67 | return table.read() 68 | }).then(data => { 69 | console.log('Second resource data:') 70 | console.log(data) 71 | }) 72 | -------------------------------------------------------------------------------- /examples/validate.js: -------------------------------------------------------------------------------- 1 | const validate = require('../lib/index').validate 2 | 3 | validate({ name: "Invalid Datapackage" }).then(validation => { 4 | if (validation instanceof Array) { 5 | // output the validation errors 6 | console.log(validation) 7 | } 8 | }) 9 | -------------------------------------------------------------------------------- /karma.conf.js: -------------------------------------------------------------------------------- 1 | const webpackConfig = require('./webpack.config.js') 2 | delete webpackConfig.entry 3 | 4 | // Base 5 | 6 | const karmaConfig = (config) => { 7 | config.set({ 8 | singleRun: true, 9 | browsers: ['jsdom'], 10 | frameworks: ['mocha', 'sinon-chai'], 11 | files: ['test/karma.opts'], 12 | reporters: ['spec'], 13 | preprocessors: { 14 | 'test/karma.opts': ['webpack'], 15 | }, 16 | webpack: webpackConfig, 17 | webpackMiddleware: { 18 | noInfo: true 19 | }, 20 | client: { 21 | mocha: { 22 | opts: 'test/mocha.opts' 23 | } 24 | } 25 | }) 26 | } 27 | 28 | // Module API 29 | 30 | module.exports = karmaConfig 31 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "datapackage", 3 | "version": "1.1.10", 4 | "description": "Utilities to work with Data Packages as defined on specs.frictionlessdata.io", 5 | "license": "MIT", 6 | "main": "lib/index.js", 7 | "engines": { 8 | "node": ">=4" 9 | }, 10 | "files": [ 11 | "lib", 12 | "dist", 13 | "src" 14 | ], 15 | "scripts": { 16 | "build": "npm run compile && npm run bundle", 17 | "bundle": "webpack --progress --hide-modules && NODE_ENV=production webpack --progress --hide-modules", 18 | "compile": "babel src --out-dir lib --copy-files", 19 | "coveralls": "cat ./coverage/lcov.info | coveralls", 20 | "format": "prettier --write '{src,test}/**/*.js' && eslint --fix '{src,test}/**/*.js'", 21 | "lint": "prettier --check '{src,test}/**/*.js' && eslint '{src,test}/**/*.js'", 22 | "pretest": "npm run lint", 23 | "profiles": "npm run profiles:registry && npm run profiles:data-package && npm run profiles:tabular-data-package && npm run profiles:fiscal-data-package && npm run profiles:data-resource && npm run profiles:tabular-data-resource", 24 | "profiles:registry": "wget -O src/profiles/registry.json https://specs.frictionlessdata.io/schemas/registry.json", 25 | "profiles:data-package": "wget -O src/profiles/data-package.json https://specs.frictionlessdata.io/schemas/data-package.json", 26 | "profiles:tabular-data-package": "wget -O src/profiles/tabular-data-package.json https://specs.frictionlessdata.io/schemas/tabular-data-package.json", 27 | "profiles:fiscal-data-package": "wget -O src/profiles/fiscal-data-package.json https://specs.frictionlessdata.io/schemas/fiscal-data-package.json", 28 | "profiles:data-resource": "wget -O src/profiles/data-resource.json https://specs.frictionlessdata.io/schemas/data-resource.json", 29 | "profiles:tabular-data-resource": "wget -O src/profiles/tabular-data-resource.json https://specs.frictionlessdata.io/schemas/tabular-data-resource.json", 30 | "test": "NODE_ENV=testing nyc mocha && karma start" 31 | }, 32 | "dependencies": { 33 | "axios": "^0.21.1", 34 | "glob": "^7.1.2", 35 | "jschardet": "^1.5.1", 36 | "json-pointer": "^0.6.0", 37 | "jszip": "^3.1.5", 38 | "lodash": "^4.13.1", 39 | "regenerator-runtime": "^0.11.0", 40 | "stream-to-async-iterator": "^0.2.0", 41 | "tableschema": "^1.11.5", 42 | "tmp": "0.0.33", 43 | "tv4": "^1.2.7", 44 | "url-join": "^2.0.1" 45 | }, 46 | "devDependencies": { 47 | "axios-mock-adapter": "^1.8.1", 48 | "babel": "^6.5.2", 49 | "babel-cli": "^6.24.1", 50 | "babel-eslint": "^10.1.0", 51 | "babel-istanbul": "^0.12.2", 52 | "babel-loader": "^6.2.1", 53 | "babel-plugin-istanbul": "^3.1.2", 54 | "babel-plugin-transform-async-to-generator": "^6.16.0", 55 | "babel-plugin-transform-class-properties": "^6.22.0", 56 | "babel-plugin-transform-decorators-legacy": "^1.3.4", 57 | "babel-plugin-transform-es2015-modules-commonjs": "^6.22.0", 58 | "babel-plugin-transform-es2017-object-entries": "0.0.3", 59 | "babel-plugin-transform-export-extensions": "^6.22.0", 60 | "babel-plugin-transform-object-rest-spread": "^6.20.2", 61 | "babel-preset-es2015": "^6.6.0", 62 | "chai": "^3.5.0", 63 | "coveralls": "^3.0.6", 64 | "doctoc": "^1.4.0", 65 | "eslint": "^6.8.0", 66 | "eslint-config-standard": "^14.1.0", 67 | "eslint-plugin-import": "^2.20.0", 68 | "eslint-plugin-node": "^11.0.0", 69 | "eslint-plugin-promise": "^4.2.1", 70 | "eslint-plugin-react": "^7.18.3", 71 | "eslint-plugin-standard": "^4.0.1", 72 | "jsdom": "^15.1.1", 73 | "json-loader": "^0.5.4", 74 | "karma": "^4.3.0", 75 | "karma-jsdom-launcher": "^6.1.2", 76 | "karma-mocha": "^1.3.0", 77 | "karma-sinon-chai": "^1.2.4", 78 | "karma-spec-reporter": "0.0.26", 79 | "karma-webpack": "^2.0.2", 80 | "mocha": "^6.2.1", 81 | "mocha-lcov-reporter": "^1.2.0", 82 | "nyc": "^14.1.1", 83 | "prettier": "^2.0.2", 84 | "referencer": "^0.2.5", 85 | "sinon": "^2.1.0", 86 | "sinon-chai": "^2.9.0", 87 | "webpack": "^2.2.0", 88 | "webpack-bundle-size-analyzer": "^2.7.0", 89 | "webpack-dev-server": "^3.8.2", 90 | "webpack-merge": "^2.4.0" 91 | }, 92 | "author": { 93 | "name": "Open Knowledge", 94 | "email": "info@okfn.org", 95 | "url": "https://okfn.org/" 96 | }, 97 | "homepage": "https://github.com/frictionlessdata/datapackage-js", 98 | "bugs": "https://github.com/frictionlessdata/datapackage-js/issues", 99 | "repository": { 100 | "type": "git", 101 | "url": "https://github.com/frictionlessdata/datapackage-js.git" 102 | }, 103 | "keywords": [ 104 | "data package", 105 | "frictionless data", 106 | "open data", 107 | "open knowledge" 108 | ] 109 | } 110 | -------------------------------------------------------------------------------- /src/config.js: -------------------------------------------------------------------------------- 1 | // Module API 2 | 3 | const IS_BROWSER = typeof window !== 'undefined' 4 | const TABULAR_FORMATS = ['csv', 'tsv', 'xls', 'xlsx'] 5 | const DEFAULT_DATA_PACKAGE_PROFILE = 'data-package' 6 | const DEFAULT_RESOURCE_PROFILE = 'data-resource' 7 | const DEFAULT_RESOURCE_ENCODING = 'utf-8' 8 | const DEFAULT_FIELD_TYPE = 'string' 9 | const DEFAULT_FIELD_FORMAT = 'default' 10 | const DEFAULT_MISSING_VALUES = [''] 11 | const DEFAULT_DIALECT = { 12 | delimiter: ',', 13 | doubleQuote: true, 14 | lineTerminator: '\r\n', 15 | quoteChar: '"', 16 | skipInitialSpace: true, 17 | header: true, 18 | caseSensitiveHeader: false, 19 | } 20 | 21 | // System 22 | 23 | module.exports = { 24 | IS_BROWSER, 25 | TABULAR_FORMATS, 26 | DEFAULT_DATA_PACKAGE_PROFILE, 27 | DEFAULT_RESOURCE_PROFILE, 28 | DEFAULT_RESOURCE_ENCODING, 29 | DEFAULT_FIELD_TYPE, 30 | DEFAULT_FIELD_FORMAT, 31 | DEFAULT_MISSING_VALUES, 32 | DEFAULT_DIALECT, 33 | } 34 | -------------------------------------------------------------------------------- /src/errors.js: -------------------------------------------------------------------------------- 1 | const tableschema = require('tableschema') 2 | 3 | // Module API 4 | 5 | /** 6 | * Base class for the all DataPackage errors. 7 | */ 8 | const DataPackageError = tableschema.errors.DataPackageError 9 | 10 | /** 11 | * Base class for the all TableSchema errors. 12 | */ 13 | const TableSchemaError = tableschema.errors.TableSchemaError 14 | 15 | // System 16 | 17 | module.exports = { 18 | DataPackageError, 19 | TableSchemaError, 20 | } 21 | -------------------------------------------------------------------------------- /src/helpers.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs') 2 | const axios = require('axios') 3 | const pathModule = require('path') 4 | const isString = require('lodash/isString') 5 | const cloneDeep = require('lodash/cloneDeep') 6 | const isPlainObject = require('lodash/isPlainObject') 7 | const jsonpointer = require('json-pointer') 8 | const { DataPackageError } = require('./errors') 9 | const config = require('./config') 10 | const omit = require('lodash/omit') 11 | 12 | // Locate descriptor 13 | 14 | function locateDescriptor(descriptor) { 15 | let basePath 16 | 17 | // Infer from path/url 18 | if (isString(descriptor)) { 19 | basePath = descriptor.split('/').slice(0, -1).join('/') || '.' 20 | 21 | // Current dir by default 22 | } else { 23 | basePath = '.' 24 | } 25 | 26 | return basePath 27 | } 28 | 29 | // Retrieve descriptor 30 | 31 | async function retrieveDescriptor(descriptor) { 32 | if (isPlainObject(descriptor)) { 33 | return cloneDeep(descriptor) 34 | } 35 | if (isString(descriptor)) { 36 | // Remote 37 | if (isRemotePath(descriptor)) { 38 | try { 39 | const response = await axios.get(descriptor) 40 | return response.data 41 | } catch (error) { 42 | const message = `Can not retrieve remote descriptor "${descriptor}"` 43 | throw new DataPackageError(message) 44 | } 45 | 46 | // Local 47 | } else { 48 | if (config.IS_BROWSER) { 49 | const message = `Local descriptor "${descriptor}" in browser is not supported` 50 | throw new DataPackageError(message) 51 | } 52 | try { 53 | // TODO: rebase on promisified fs.readFile (async) 54 | const contents = fs.readFileSync(descriptor, 'utf-8') 55 | return JSON.parse(contents) 56 | } catch (error) { 57 | const message = `Can not retrieve local descriptor "${descriptor}"` 58 | throw new DataPackageError(message) 59 | } 60 | } 61 | } 62 | throw new DataPackageError('Descriptor must be String or Object') 63 | } 64 | 65 | // Dereference descriptor 66 | 67 | async function dereferencePackageDescriptor(descriptor, basePath) { 68 | descriptor = cloneDeep(descriptor) 69 | for (const [index, resource] of (descriptor.resources || []).entries()) { 70 | // TODO: May be we should use Promise.all here 71 | descriptor.resources[index] = await dereferenceResourceDescriptor( 72 | resource, 73 | basePath, 74 | descriptor 75 | ) 76 | } 77 | return descriptor 78 | } 79 | 80 | async function dereferenceResourceDescriptor(descriptor, basePath, baseDescriptor) { 81 | descriptor = cloneDeep(descriptor) 82 | baseDescriptor = baseDescriptor || descriptor 83 | const PROPERTIES = ['schema', 'dialect'] 84 | for (const property of PROPERTIES) { 85 | let value = descriptor[property] 86 | 87 | // URI -> No 88 | if (!isString(value)) { 89 | continue 90 | 91 | // URI -> Pointer 92 | } else if (value.startsWith('#')) { 93 | try { 94 | descriptor[property] = jsonpointer.get(baseDescriptor, value.slice(1)) 95 | } catch (error) { 96 | const message = `Not resolved Pointer URI "${value}" for resource.${property}` 97 | throw new DataPackageError(message) 98 | } 99 | 100 | // URI -> Remote 101 | } else { 102 | if (basePath && isRemotePath(basePath)) { 103 | // TODO: support other that Unix OS 104 | value = [basePath, value].join('/') 105 | } 106 | if (isRemotePath(value)) { 107 | try { 108 | const response = await axios.get(value) 109 | descriptor[property] = response.data 110 | } catch (error) { 111 | const message = `Not resolved Remote URI "${value}" for resource.${property}` 112 | throw new DataPackageError(message) 113 | } 114 | 115 | // URI -> Local 116 | } else { 117 | if (config.IS_BROWSER) { 118 | const message = 'Local URI dereferencing in browser is not supported' 119 | throw new DataPackageError(message) 120 | } 121 | if (!isSafePath(value)) { 122 | const message = `Not safe path in Local URI "${value}" for resource.${property}` 123 | throw new DataPackageError(message) 124 | } 125 | if (!basePath) { 126 | const message = `Local URI "${value}" requires base path for resource.${property}` 127 | throw new DataPackageError(message) 128 | } 129 | try { 130 | // TODO: support other that Unix OS 131 | const fullPath = [basePath, value].join('/') 132 | // TODO: rebase on promisified fs.readFile (async) 133 | const contents = fs.readFileSync(fullPath, 'utf-8') 134 | descriptor[property] = JSON.parse(contents) 135 | } catch (error) { 136 | const message = `Not resolved Local URI "${value}" for resource.${property}` 137 | throw new DataPackageError(message) 138 | } 139 | } 140 | } 141 | } 142 | 143 | return descriptor 144 | } 145 | 146 | // Expand descriptor 147 | 148 | function expandPackageDescriptor(descriptor) { 149 | descriptor = cloneDeep(descriptor) 150 | descriptor.profile = descriptor.profile || config.DEFAULT_DATA_PACKAGE_PROFILE 151 | for (const [index, resource] of (descriptor.resources || []).entries()) { 152 | descriptor.resources[index] = expandResourceDescriptor(resource) 153 | } 154 | return descriptor 155 | } 156 | 157 | function expandResourceDescriptor(descriptor) { 158 | descriptor = cloneDeep(descriptor) 159 | descriptor.profile = descriptor.profile || config.DEFAULT_RESOURCE_PROFILE 160 | descriptor.encoding = descriptor.encoding || config.DEFAULT_RESOURCE_ENCODING 161 | if (descriptor.profile === 'tabular-data-resource') { 162 | // Schema 163 | const schema = descriptor.schema 164 | if (schema !== undefined) { 165 | for (const field of schema.fields || []) { 166 | field.type = field.type || config.DEFAULT_FIELD_TYPE 167 | field.format = field.format || config.DEFAULT_FIELD_FORMAT 168 | } 169 | schema.missingValues = schema.missingValues || config.DEFAULT_MISSING_VALUES 170 | } 171 | 172 | // Dialect 173 | const dialect = descriptor.dialect 174 | if (dialect !== undefined) { 175 | for (const [key, value] of Object.entries(filterDefaultDialect(validateDialect(dialect)))) { 176 | if (!dialect.hasOwnProperty(key)) { 177 | dialect[key] = value 178 | } 179 | } 180 | } 181 | } 182 | return descriptor 183 | } 184 | 185 | // Miscellaneous 186 | 187 | // quoteChar and escapeChar are mutually exclusive: https://frictionlessdata.io/specs/csv-dialect/#specification 188 | function filterDefaultDialect(dialect = {}) { 189 | const defaultDialects = dialect.hasOwnProperty('escapeChar') 190 | ? omit(config.DEFAULT_DIALECT, 'quoteChar') 191 | : config.DEFAULT_DIALECT 192 | return defaultDialects 193 | } 194 | 195 | // quoteChar and escapeChar are mutually exclusive: https://frictionlessdata.io/specs/csv-dialect/#specification 196 | function validateDialect(dialect = {}) { 197 | if (dialect.hasOwnProperty('escapeChar') && dialect.hasOwnProperty('quoteChar')) { 198 | throw new DataPackageError( 199 | 'Resource.table dialect options quoteChar and escapeChar are mutually exclusive.' 200 | ) 201 | } 202 | return dialect 203 | } 204 | 205 | function isRemotePath(path) { 206 | // TODO: improve implementation 207 | return path.startsWith('http') 208 | } 209 | 210 | function isSafePath(path) { 211 | const containsWindowsVar = (path) => path.match(/%.+%/) 212 | const containsPosixVar = (path) => path.match(/\$.+/) 213 | 214 | // Safety checks 215 | const unsafenessConditions = [ 216 | pathModule.isAbsolute(path), 217 | path.includes(`..${pathModule.sep}`), 218 | path.startsWith('~'), 219 | containsWindowsVar(path), 220 | containsPosixVar(path), 221 | ] 222 | 223 | return !unsafenessConditions.some(Boolean) 224 | } 225 | 226 | // System 227 | 228 | module.exports = { 229 | locateDescriptor, 230 | retrieveDescriptor, 231 | dereferencePackageDescriptor, 232 | dereferenceResourceDescriptor, 233 | expandPackageDescriptor, 234 | expandResourceDescriptor, 235 | validateDialect, 236 | isRemotePath, 237 | isSafePath, 238 | } 239 | -------------------------------------------------------------------------------- /src/index.js: -------------------------------------------------------------------------------- 1 | require('regenerator-runtime/runtime') 2 | const { Package } = require('./package') 3 | const { Resource } = require('./resource') 4 | const { Profile } = require('./profile') 5 | const { validate } = require('./validate') 6 | const { infer } = require('./infer') 7 | const { DataPackageError } = require('./errors') 8 | const { TableSchemaError } = require('./errors') 9 | 10 | // Module API 11 | 12 | module.exports = { 13 | Package, 14 | Resource, 15 | Profile, 16 | validate, 17 | infer, 18 | DataPackageError, 19 | TableSchemaError, 20 | 21 | // Deprecated 22 | errors: { 23 | DataPackageError, 24 | TableSchemaError, 25 | }, 26 | } 27 | -------------------------------------------------------------------------------- /src/infer.js: -------------------------------------------------------------------------------- 1 | const { Package } = require('./package') 2 | 3 | // Module API 4 | 5 | /** 6 | * This function is async so it has to be used with `await` keyword or as a `Promise`. 7 | * 8 | * @param {string} pattern - glob file pattern 9 | * @returns {Object} returns data package descriptor 10 | */ 11 | async function infer(pattern, { basePath } = {}) { 12 | const dataPackage = await Package.load({}, { basePath }) 13 | const descriptor = await dataPackage.infer(pattern) 14 | return descriptor 15 | } 16 | 17 | // System 18 | 19 | module.exports = { 20 | infer, 21 | } 22 | -------------------------------------------------------------------------------- /src/package.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs') 2 | const JSZip = require('jszip') 3 | const isEqual = require('lodash/isEqual') 4 | const isString = require('lodash/isString') 5 | const isBoolean = require('lodash/isBoolean') 6 | const cloneDeep = require('lodash/cloneDeep') 7 | const isUndefined = require('lodash/isUndefined') 8 | const { promisify } = require('util') 9 | const { Profile } = require('./profile') 10 | const { Resource } = require('./resource') 11 | const { DataPackageError } = require('./errors') 12 | const helpers = require('./helpers') 13 | const config = require('./config') 14 | 15 | // Module API 16 | 17 | /** 18 | * Package representation 19 | */ 20 | class Package { 21 | // Public 22 | 23 | /** 24 | * Factory method to instantiate `Package` class. 25 | * 26 | * This method is async and it should be used with await keyword or as a `Promise`. 27 | * 28 | * @param {string|Object} descriptor - package descriptor as local path, url or object. 29 | * If ththe path has a `zip` file extension it will be unzipped 30 | * to the temp directory first. 31 | * @param {string} basePath - base path for all relative paths 32 | * @param {boolean} strict - strict flag to alter validation behavior. 33 | * Setting it to `true` leads to throwing errors on any operation 34 | * with invalid descriptor 35 | * @throws {DataPackageError} raises error if something goes wrong 36 | * @returns {Package} returns data package class instance 37 | */ 38 | static async load(descriptor = {}, { basePath, strict = false } = {}) { 39 | // Extract zip 40 | // TODO: 41 | // it's first iteration of the zip loading implementation 42 | // for now browser support and tempdir cleanup (not needed?) is not covered 43 | if (isString(descriptor) && descriptor.endsWith('.zip')) { 44 | descriptor = await extractZip(descriptor) 45 | } 46 | 47 | // Get base path 48 | if (isUndefined(basePath)) { 49 | basePath = helpers.locateDescriptor(descriptor) 50 | } 51 | 52 | // Process descriptor 53 | descriptor = await helpers.retrieveDescriptor(descriptor) 54 | descriptor = await helpers.dereferencePackageDescriptor(descriptor, basePath) 55 | 56 | // Get profile 57 | const profile = await Profile.load(descriptor.profile || config.DEFAULT_DATA_PACKAGE_PROFILE) 58 | 59 | return new Package(descriptor, { basePath, strict, profile }) 60 | } 61 | 62 | /** 63 | * Validation status 64 | * 65 | * It always `true` in strict mode. 66 | * 67 | * @returns {Boolean} returns validation status 68 | */ 69 | get valid() { 70 | return this._errors.length === 0 && this.resources.every((resource) => resource.valid) 71 | } 72 | 73 | /** 74 | * Validation errors 75 | * 76 | * It always empty in strict mode. 77 | * 78 | * @returns {Error[]} returns validation errors 79 | */ 80 | get errors() { 81 | const errors = cloneDeep(this._errors) 82 | for (const [index, resource] of this.resources.entries()) { 83 | if (!resource.valid) { 84 | errors.push(new Error(`Resource "${resource.name || index}" validation error(s)`)) 85 | } 86 | } 87 | return errors 88 | } 89 | 90 | /** 91 | * Profile 92 | * 93 | * @returns {Profile} 94 | */ 95 | get profile() { 96 | return this._profile 97 | } 98 | 99 | /** 100 | * Descriptor 101 | * 102 | * @returns {Object} schema descriptor 103 | */ 104 | get descriptor() { 105 | // Never use this.descriptor inside this class (!!!) 106 | return this._nextDescriptor 107 | } 108 | 109 | /** 110 | * Resources 111 | * 112 | * @returns {Resoruce[]} 113 | */ 114 | get resources() { 115 | return this._resources 116 | } 117 | 118 | /** 119 | * Resource names 120 | * 121 | * @returns {string[]} 122 | */ 123 | get resourceNames() { 124 | return this._resources.map((resource) => resource.name) 125 | } 126 | 127 | /** 128 | * Return a resource 129 | * 130 | * @param {string} name 131 | * @returns {Resource|null} resource instance if exists 132 | */ 133 | getResource(name) { 134 | return this._resources.find((resource) => resource.name === name) || null 135 | } 136 | 137 | /** 138 | * Add a resource 139 | * 140 | * @param {Object} descriptor 141 | * @returns {Resource} added resource instance 142 | */ 143 | addResource(descriptor) { 144 | if (!this._currentDescriptor.resources) this._currentDescriptor.resources = [] 145 | this._currentDescriptor.resources.push(descriptor) 146 | this._build() 147 | return this._resources[this._resources.length - 1] 148 | } 149 | 150 | /** 151 | * Remove a resource 152 | * 153 | * @param {string} name 154 | * @returns {(Resource|null)} removed resource instance if exists 155 | */ 156 | removeResource(name) { 157 | const resource = this.getResource(name) 158 | if (resource) { 159 | const predicat = (resource) => resource.name !== name 160 | this._currentDescriptor.resources = this._currentDescriptor.resources.filter(predicat) 161 | this._build() 162 | } 163 | return resource 164 | } 165 | 166 | /** 167 | * Infer metadata 168 | * 169 | * @param {string} pattern 170 | * @returns {Object} 171 | */ 172 | async infer(pattern = false) { 173 | // Files 174 | if (pattern) { 175 | // It's broswer 176 | if (config.IS_BROWSER) { 177 | throw new DataPackageError('Browser is not supported for pattern infer') 178 | } 179 | 180 | // No base path 181 | if (!this._basePath) { 182 | throw new DataPackageError('Base path is required for pattern infer') 183 | } 184 | 185 | // Add resources 186 | const files = await findFiles(pattern, this._basePath) 187 | for (const file of files) { 188 | this.addResource({ path: file }) 189 | } 190 | } 191 | 192 | // Resources 193 | for (const [index, resource] of this.resources.entries()) { 194 | const descriptor = await resource.infer() 195 | this._currentDescriptor.resources[index] = descriptor 196 | this._build() 197 | } 198 | 199 | // Profile 200 | if (this._nextDescriptor.profile === config.DEFAULT_DATA_PACKAGE_PROFILE) { 201 | if (this.resources.length && this.resources.every((resouce) => resouce.tabular)) { 202 | this._currentDescriptor.profile = 'tabular-data-package' 203 | this._build() 204 | } 205 | } 206 | 207 | return this._currentDescriptor 208 | } 209 | 210 | /** 211 | * Update package instance if there are in-place changes in the descriptor. 212 | * 213 | * @example 214 | * 215 | * ```javascript 216 | * const dataPackage = await Package.load({ 217 | * name: 'package', 218 | * resources: [{name: 'resource', data: ['data']}] 219 | * }) 220 | * 221 | * dataPackage.name // package 222 | * dataPackage.descriptor.name = 'renamed-package' 223 | * dataPackage.name // package 224 | * dataPackage.commit() 225 | * dataPackage.name // renamed-package 226 | * ``` 227 | * 228 | * @param {boolean} strict - alter `strict` mode for further work 229 | * @throws {DataPackageError} raises any error occurred in the process 230 | * @returns {Boolean} returns true on success and false if not modified 231 | */ 232 | commit({ strict } = {}) { 233 | if (isBoolean(strict)) this._strict = strict 234 | else if (isEqual(this._currentDescriptor, this._nextDescriptor)) return false 235 | this._currentDescriptor = cloneDeep(this._nextDescriptor) 236 | this._build() 237 | return true 238 | } 239 | 240 | /** 241 | * Save data package to target destination. 242 | * 243 | * If target path has a zip file extension the package will be zipped and 244 | * saved entirely. If it has a json file extension only the descriptor will be saved. 245 | * 246 | * @param {string} target - path where to save a data package 247 | * @param {DataPackageError} raises error if something goes wrong 248 | * @param {boolean} returns true on success 249 | */ 250 | save(target) { 251 | return new Promise((resolve, reject) => { 252 | // Save descriptor to json 253 | if (target.endsWith('.json')) { 254 | const contents = JSON.stringify(this._currentDescriptor, null, 4) 255 | fs.writeFile(target, contents, (error) => (!error ? resolve() : reject(error))) 256 | 257 | // Save package to zip 258 | } else { 259 | // Not supported in browser 260 | if (config.IS_BROWSER) { 261 | throw new DataPackageError('Zip is not supported in browser') 262 | } 263 | 264 | // Prepare zip 265 | const zip = new JSZip() 266 | const descriptor = cloneDeep(this._currentDescriptor) 267 | for (const [index, resource] of this.resources.entries()) { 268 | if (!resource.name) continue 269 | if (!resource.local) continue 270 | let path = `data/${resource.name}` 271 | const format = resource.descriptor.format 272 | if (format) path = `${path}.${format.toLowerCase()}` 273 | descriptor.resources[index].path = path 274 | zip.file(path, resource.rawRead()) 275 | } 276 | zip.file('datapackage.json', JSON.stringify(descriptor, null, 4)) 277 | 278 | // Write zip 279 | zip 280 | .generateNodeStream({ type: 'nodebuffer', streamFiles: true }) 281 | .pipe(fs.createWriteStream(target).on('error', (error) => reject(error))) 282 | .on('error', (error) => reject(error)) 283 | .on('finish', () => resolve(true)) 284 | } 285 | }) 286 | } 287 | 288 | // Private 289 | 290 | constructor(descriptor, { basePath, strict, profile } = {}) { 291 | // Handle deprecated resource.path.url 292 | for (const resource of descriptor.resources || []) { 293 | if (resource.url) { 294 | console.warn( 295 | `Resource property "url: " is deprecated. 296 | Please use "path: " instead.` 297 | ) 298 | resource.path = resource.url 299 | delete resource.url 300 | } 301 | } 302 | 303 | // Set attributes 304 | this._currentDescriptor = cloneDeep(descriptor) 305 | this._nextDescriptor = cloneDeep(descriptor) 306 | this._basePath = basePath 307 | this._strict = strict 308 | this._profile = profile 309 | this._resources = [] 310 | this._errors = [] 311 | 312 | // Build package 313 | this._build() 314 | } 315 | 316 | _build() { 317 | // Process descriptor 318 | this._currentDescriptor = helpers.expandPackageDescriptor(this._currentDescriptor) 319 | this._nextDescriptor = cloneDeep(this._currentDescriptor) 320 | 321 | // Validate descriptor 322 | this._errors = [] 323 | const { valid, errors } = this._profile.validate(this._currentDescriptor) 324 | if (!valid) { 325 | this._errors = errors 326 | if (this._strict) { 327 | const message = `There are ${errors.length} validation errors (see 'error.errors')` 328 | throw new DataPackageError(message, errors) 329 | } 330 | } 331 | 332 | // Update resources 333 | this._resources.length = (this._currentDescriptor.resources || []).length 334 | for (const [index, descriptor] of (this._currentDescriptor.resources || []).entries()) { 335 | const resource = this._resources[index] 336 | if ( 337 | !resource || 338 | !isEqual(resource.descriptor, descriptor) || 339 | (resource.schema && resource.schema.foreignKeys.length) 340 | ) { 341 | this._resources[index] = new Resource(descriptor, { 342 | strict: this._strict, 343 | basePath: this._basePath, 344 | dataPackage: this, 345 | }) 346 | } 347 | } 348 | } 349 | } 350 | 351 | // Internal 352 | 353 | async function extractZip(descriptor) { 354 | // Not supported in browser 355 | if (config.IS_BROWSER) { 356 | throw new DataPackageError('Zip is not supported in browser') 357 | } 358 | 359 | // Load zip 360 | const zip = JSZip() 361 | const tempdir = await promisify(require('tmp').dir)() 362 | await zip.loadAsync(promisify(fs.readFile)(descriptor)) 363 | 364 | // Validate zip 365 | if (!zip.files['datapackage.json']) { 366 | throw new DataPackageError('Invalid zip with data package') 367 | } 368 | 369 | // Save zip to tempdir 370 | for (const [name, item] of Object.entries(zip.files)) { 371 | // Get path/descriptor 372 | const path = `${tempdir}/${name}` 373 | if (path.endsWith('datapackage.json')) { 374 | descriptor = path 375 | } 376 | 377 | // Directory 378 | if (item.dir) { 379 | await promisify(fs.mkdir)(path) 380 | 381 | // File 382 | } else { 383 | const contents = await item.async('nodebuffer') 384 | await promisify(fs.writeFile)(path, contents) 385 | } 386 | } 387 | 388 | return descriptor 389 | } 390 | 391 | function findFiles(pattern, basePath) { 392 | const glob = require('glob') 393 | return new Promise((resolve, reject) => { 394 | const options = { cwd: basePath, ignore: 'node_modules/**' } 395 | glob(pattern, options, (error, files) => { 396 | if (error) reject(error) 397 | resolve(files) 398 | }) 399 | }) 400 | } 401 | 402 | // System 403 | 404 | module.exports = { 405 | Package, 406 | } 407 | -------------------------------------------------------------------------------- /src/profile.js: -------------------------------------------------------------------------------- 1 | const tv4 = require('tv4') 2 | const axios = require('axios') 3 | const isString = require('lodash/isString') 4 | const { DataPackageError } = require('./errors') 5 | const helpers = require('./helpers') 6 | 7 | // Module API 8 | 9 | /** 10 | * Profile representation 11 | */ 12 | class Profile { 13 | // Public 14 | 15 | /** 16 | * Factory method to instantiate `Profile` class. 17 | * 18 | * This method is async and it should be used with await keyword or as a `Promise`. 19 | * 20 | * @param {string} profile - profile name in registry or URL to JSON Schema 21 | * @throws {DataPackageError} raises error if something goes wrong 22 | * @returns {Profile} returns profile class instance 23 | */ 24 | static async load(profile) { 25 | // Remote 26 | if (isString(profile) && helpers.isRemotePath(profile)) { 27 | let jsonschema = _cache[profile] 28 | if (!jsonschema) { 29 | try { 30 | const response = await axios.get(profile) 31 | jsonschema = response.data 32 | } catch (error) { 33 | throw new DataPackageError(`Can not retrieve remote profile "${profile}"`) 34 | } 35 | _cache[profile] = jsonschema 36 | } 37 | profile = jsonschema 38 | } 39 | 40 | return new Profile(profile) 41 | } 42 | 43 | /** 44 | * Name 45 | * 46 | * @returns {string} 47 | */ 48 | get name() { 49 | if (!this._jsonschema.title) return null 50 | return this._jsonschema.title.replace(' ', '-').toLowerCase() 51 | } 52 | 53 | /** 54 | * JsonSchema 55 | * 56 | * @returns {Object} 57 | */ 58 | get jsonschema() { 59 | return this._jsonschema 60 | } 61 | 62 | /** 63 | * Validate a data package `descriptor` against the profile. 64 | * 65 | * @param {Object} descriptor - retrieved and dereferenced data package descriptor 66 | * @returns {Object} returns a `{valid, errors}` object 67 | */ 68 | validate(descriptor) { 69 | const errors = [] 70 | 71 | // Basic validation 72 | const validation = tv4.validateMultiple(descriptor, this._jsonschema) 73 | for (const validationError of validation.errors) { 74 | errors.push( 75 | new Error( 76 | `Descriptor validation error: 77 | ${validationError.message} 78 | at "${validationError.dataPath}" in descriptor and 79 | at "${validationError.schemaPath}" in profile` 80 | ) 81 | ) 82 | } 83 | 84 | return { 85 | valid: !errors.length, 86 | errors, 87 | } 88 | } 89 | 90 | // Private 91 | 92 | constructor(profile) { 93 | // Registry 94 | if (isString(profile)) { 95 | try { 96 | profile = require(`./profiles/${profile}.json`) 97 | } catch (error) { 98 | throw new DataPackageError(`Profiles registry hasn't profile "${profile}"`) 99 | } 100 | } 101 | 102 | this._jsonschema = profile 103 | } 104 | } 105 | 106 | // Internal 107 | 108 | const _cache = {} 109 | 110 | // System 111 | 112 | module.exports = { 113 | Profile, 114 | } 115 | -------------------------------------------------------------------------------- /src/profiles/data-package.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-04/schema#", 3 | "title": "Data Package", 4 | "description": "Data Package is a simple specification for data access and delivery.", 5 | "type": "object", 6 | "required": [ 7 | "resources" 8 | ], 9 | "properties": { 10 | "profile": { 11 | "default": "data-package", 12 | "propertyOrder": 10, 13 | "title": "Profile", 14 | "description": "The profile of this descriptor.", 15 | "context": "Every Package and Resource descriptor has a profile. The default profile, if none is declared, is `data-package` for Package and `data-resource` for Resource.", 16 | "type": "string", 17 | "examples": [ 18 | "{\n \"profile\": \"tabular-data-package\"\n}\n", 19 | "{\n \"profile\": \"http://example.com/my-profiles-json-schema.json\"\n}\n" 20 | ] 21 | }, 22 | "name": { 23 | "propertyOrder": 20, 24 | "title": "Name", 25 | "description": "An identifier string. Lower case characters with `.`, `_`, `-` and `/` are allowed.", 26 | "type": "string", 27 | "pattern": "^([-a-z0-9._/])+$", 28 | "context": "This is ideally a url-usable and human-readable name. Name `SHOULD` be invariant, meaning it `SHOULD NOT` change when its parent descriptor is updated.", 29 | "examples": [ 30 | "{\n \"name\": \"my-nice-name\"\n}\n" 31 | ] 32 | }, 33 | "id": { 34 | "propertyOrder": 30, 35 | "title": "ID", 36 | "description": "A property reserved for globally unique identifiers. Examples of identifiers that are unique include UUIDs and DOIs.", 37 | "context": "A common usage pattern for Data Packages is as a packaging format within the bounds of a system or platform. In these cases, a unique identifier for a package is desired for common data handling workflows, such as updating an existing package. While at the level of the specification, global uniqueness cannot be validated, consumers using the `id` property `MUST` ensure identifiers are globally unique.", 38 | "type": "string", 39 | "examples": [ 40 | "{\n \"id\": \"b03ec84-77fd-4270-813b-0c698943f7ce\"\n}\n", 41 | "{\n \"id\": \"http://dx.doi.org/10.1594/PANGAEA.726855\"\n}\n" 42 | ] 43 | }, 44 | "title": { 45 | "propertyOrder": 40, 46 | "title": "Title", 47 | "description": "A human-readable title.", 48 | "type": "string", 49 | "examples": [ 50 | "{\n \"title\": \"My Package Title\"\n}\n" 51 | ] 52 | }, 53 | "description": { 54 | "propertyOrder": 50, 55 | "format": "textarea", 56 | "title": "Description", 57 | "description": "A text description. Markdown is encouraged.", 58 | "type": "string", 59 | "examples": [ 60 | "{\n \"description\": \"# My Package description\\nAll about my package.\"\n}\n" 61 | ] 62 | }, 63 | "homepage": { 64 | "propertyOrder": 60, 65 | "title": "Home Page", 66 | "description": "The home on the web that is related to this data package.", 67 | "type": "string", 68 | "format": "uri", 69 | "examples": [ 70 | "{\n \"homepage\": \"http://example.com/\"\n}\n" 71 | ] 72 | }, 73 | "created": { 74 | "propertyOrder": 70, 75 | "title": "Created", 76 | "description": "The datetime on which this descriptor was created.", 77 | "context": "The datetime must conform to the string formats for datetime as described in [RFC3339](https://tools.ietf.org/html/rfc3339#section-5.6)", 78 | "type": "string", 79 | "format": "date-time", 80 | "examples": [ 81 | "{\n \"created\": \"1985-04-12T23:20:50.52Z\"\n}\n" 82 | ] 83 | }, 84 | "contributors": { 85 | "propertyOrder": 80, 86 | "title": "Contributors", 87 | "description": "The contributors to this descriptor.", 88 | "type": "array", 89 | "minItems": 1, 90 | "items": { 91 | "title": "Contributor", 92 | "description": "A contributor to this descriptor.", 93 | "properties": { 94 | "title": { 95 | "title": "Title", 96 | "description": "A human-readable title.", 97 | "type": "string", 98 | "examples": [ 99 | "{\n \"title\": \"My Package Title\"\n}\n" 100 | ] 101 | }, 102 | "path": { 103 | "title": "Path", 104 | "description": "A fully qualified URL, or a POSIX file path..", 105 | "type": "string", 106 | "examples": [ 107 | "{\n \"path\": \"file.csv\"\n}\n", 108 | "{\n \"path\": \"http://example.com/file.csv\"\n}\n" 109 | ], 110 | "context": "Implementations need to negotiate the type of path provided, and dereference the data accordingly." 111 | }, 112 | "email": { 113 | "title": "Email", 114 | "description": "An email address.", 115 | "type": "string", 116 | "format": "email", 117 | "examples": [ 118 | "{\n \"email\": \"example@example.com\"\n}\n" 119 | ] 120 | }, 121 | "organisation": { 122 | "title": "Organization", 123 | "description": "An organizational affiliation for this contributor.", 124 | "type": "string" 125 | }, 126 | "role": { 127 | "type": "string", 128 | "enum": [ 129 | "publisher", 130 | "author", 131 | "maintainer", 132 | "wrangler", 133 | "contributor" 134 | ], 135 | "default": "contributor" 136 | } 137 | }, 138 | "required": [ 139 | "title" 140 | ], 141 | "context": "Use of this property does not imply that the person was the original creator of, or a contributor to, the data in the descriptor, but refers to the composition of the descriptor itself." 142 | }, 143 | "examples": [ 144 | "{\n \"contributors\": [\n {\n \"title\": \"Joe Bloggs\"\n }\n ]\n}\n", 145 | "{\n \"contributors\": [\n {\n \"title\": \"Joe Bloggs\",\n \"email\": \"joe@example.com\",\n \"role\": \"author\"\n }\n ]\n}\n" 146 | ] 147 | }, 148 | "keywords": { 149 | "propertyOrder": 90, 150 | "title": "Keywords", 151 | "description": "A list of keywords that describe this package.", 152 | "type": "array", 153 | "minItems": 1, 154 | "items": { 155 | "type": "string" 156 | }, 157 | "examples": [ 158 | "{\n \"keywords\": [\n \"data\",\n \"fiscal\",\n \"transparency\"\n ]\n}\n" 159 | ] 160 | }, 161 | "image": { 162 | "propertyOrder": 100, 163 | "title": "Image", 164 | "description": "A image to represent this package.", 165 | "type": "string", 166 | "examples": [ 167 | "{\n \"image\": \"http://example.com/image.jpg\"\n}\n", 168 | "{\n \"image\": \"relative/to/image.jpg\"\n}\n" 169 | ] 170 | }, 171 | "licenses": { 172 | "propertyOrder": 110, 173 | "title": "Licenses", 174 | "description": "The license(s) under which this package is published.", 175 | "type": "array", 176 | "minItems": 1, 177 | "items": { 178 | "title": "License", 179 | "description": "A license for this descriptor.", 180 | "type": "object", 181 | "properties": { 182 | "name": { 183 | "title": "Open Definition license identifier", 184 | "description": "MUST be an Open Definition license identifier, see http://licenses.opendefinition.org/", 185 | "type": "string", 186 | "pattern": "^([-a-zA-Z0-9._])+$" 187 | }, 188 | "path": { 189 | "title": "Path", 190 | "description": "A fully qualified URL, or a POSIX file path..", 191 | "type": "string", 192 | "examples": [ 193 | "{\n \"path\": \"file.csv\"\n}\n", 194 | "{\n \"path\": \"http://example.com/file.csv\"\n}\n" 195 | ], 196 | "context": "Implementations need to negotiate the type of path provided, and dereference the data accordingly." 197 | }, 198 | "title": { 199 | "title": "Title", 200 | "description": "A human-readable title.", 201 | "type": "string", 202 | "examples": [ 203 | "{\n \"title\": \"My Package Title\"\n}\n" 204 | ] 205 | } 206 | }, 207 | "context": "Use of this property does not imply that the person was the original creator of, or a contributor to, the data in the descriptor, but refers to the composition of the descriptor itself." 208 | }, 209 | "context": "This property is not legally binding and does not guarantee that the package is licensed under the terms defined herein.", 210 | "examples": [ 211 | "{\n \"licenses\": [\n {\n \"name\": \"odc-pddl-1.0\",\n \"uri\": \"http://opendatacommons.org/licenses/pddl/\"\n }\n ]\n}\n" 212 | ] 213 | }, 214 | "resources": { 215 | "propertyOrder": 120, 216 | "title": "Data Resources", 217 | "description": "An `array` of Data Resource objects, each compliant with the [Data Resource](/data-resource/) specification.", 218 | "type": "array", 219 | "minItems": 1, 220 | "items": { 221 | "title": "Data Resource", 222 | "description": "Data Resource.", 223 | "type": "object", 224 | "oneOf": [ 225 | { 226 | "required": [ 227 | "name", 228 | "data" 229 | ] 230 | }, 231 | { 232 | "required": [ 233 | "name", 234 | "path" 235 | ] 236 | } 237 | ], 238 | "properties": { 239 | "profile": { 240 | "propertyOrder": 10, 241 | "default": "data-resource", 242 | "title": "Profile", 243 | "description": "The profile of this descriptor.", 244 | "context": "Every Package and Resource descriptor has a profile. The default profile, if none is declared, is `data-package` for Package and `data-resource` for Resource.", 245 | "type": "string", 246 | "examples": [ 247 | "{\n \"profile\": \"tabular-data-package\"\n}\n", 248 | "{\n \"profile\": \"http://example.com/my-profiles-json-schema.json\"\n}\n" 249 | ] 250 | }, 251 | "name": { 252 | "propertyOrder": 20, 253 | "title": "Name", 254 | "description": "An identifier string. Lower case characters with `.`, `_`, `-` and `/` are allowed.", 255 | "type": "string", 256 | "pattern": "^([-a-z0-9._/])+$", 257 | "context": "This is ideally a url-usable and human-readable name. Name `SHOULD` be invariant, meaning it `SHOULD NOT` change when its parent descriptor is updated.", 258 | "examples": [ 259 | "{\n \"name\": \"my-nice-name\"\n}\n" 260 | ] 261 | }, 262 | "path": { 263 | "propertyOrder": 30, 264 | "title": "Path", 265 | "description": "A reference to the data for this resource, as either a path as a string, or an array of paths as strings. of valid URIs.", 266 | "oneOf": [ 267 | { 268 | "title": "Path", 269 | "description": "A fully qualified URL, or a POSIX file path..", 270 | "type": "string", 271 | "examples": [ 272 | "{\n \"path\": \"file.csv\"\n}\n", 273 | "{\n \"path\": \"http://example.com/file.csv\"\n}\n" 274 | ], 275 | "context": "Implementations need to negotiate the type of path provided, and dereference the data accordingly." 276 | }, 277 | { 278 | "type": "array", 279 | "minItems": 1, 280 | "items": { 281 | "title": "Path", 282 | "description": "A fully qualified URL, or a POSIX file path..", 283 | "type": "string", 284 | "examples": [ 285 | "{\n \"path\": \"file.csv\"\n}\n", 286 | "{\n \"path\": \"http://example.com/file.csv\"\n}\n" 287 | ], 288 | "context": "Implementations need to negotiate the type of path provided, and dereference the data accordingly." 289 | }, 290 | "examples": [ 291 | "[ \"file.csv\" ]\n", 292 | "[ \"http://example.com/file.csv\" ]\n" 293 | ] 294 | } 295 | ], 296 | "context": "The dereferenced value of each referenced data source in `path` `MUST` be commensurate with a native, dereferenced representation of the data the resource describes. For example, in a *Tabular* Data Resource, this means that the dereferenced value of `path` `MUST` be an array.", 297 | "examples": [ 298 | "{\n \"path\": [\n \"file.csv\",\n \"file2.csv\"\n ]\n}\n", 299 | "{\n \"path\": [\n \"http://example.com/file.csv\",\n \"http://example.com/file2.csv\"\n ]\n}\n", 300 | "{\n \"path\": \"http://example.com/file.csv\"\n}\n" 301 | ] 302 | }, 303 | "data": { 304 | "propertyOrder": 230, 305 | "title": "Data", 306 | "description": "Inline data for this resource." 307 | }, 308 | "schema": { 309 | "propertyOrder": 40, 310 | "title": "Schema", 311 | "description": "A schema for this resource.", 312 | "type": "object" 313 | }, 314 | "title": { 315 | "propertyOrder": 50, 316 | "title": "Title", 317 | "description": "A human-readable title.", 318 | "type": "string", 319 | "examples": [ 320 | "{\n \"title\": \"My Package Title\"\n}\n" 321 | ] 322 | }, 323 | "description": { 324 | "propertyOrder": 60, 325 | "format": "textarea", 326 | "title": "Description", 327 | "description": "A text description. Markdown is encouraged.", 328 | "type": "string", 329 | "examples": [ 330 | "{\n \"description\": \"# My Package description\\nAll about my package.\"\n}\n" 331 | ] 332 | }, 333 | "homepage": { 334 | "propertyOrder": 70, 335 | "title": "Home Page", 336 | "description": "The home on the web that is related to this data package.", 337 | "type": "string", 338 | "format": "uri", 339 | "examples": [ 340 | "{\n \"homepage\": \"http://example.com/\"\n}\n" 341 | ] 342 | }, 343 | "sources": { 344 | "propertyOrder": 140, 345 | "options": { 346 | "hidden": true 347 | }, 348 | "title": "Sources", 349 | "description": "The raw sources for this resource.", 350 | "type": "array", 351 | "minItems": 1, 352 | "items": { 353 | "title": "Source", 354 | "description": "A source file.", 355 | "type": "object", 356 | "required": [ 357 | "title" 358 | ], 359 | "properties": { 360 | "title": { 361 | "title": "Title", 362 | "description": "A human-readable title.", 363 | "type": "string", 364 | "examples": [ 365 | "{\n \"title\": \"My Package Title\"\n}\n" 366 | ] 367 | }, 368 | "path": { 369 | "title": "Path", 370 | "description": "A fully qualified URL, or a POSIX file path..", 371 | "type": "string", 372 | "examples": [ 373 | "{\n \"path\": \"file.csv\"\n}\n", 374 | "{\n \"path\": \"http://example.com/file.csv\"\n}\n" 375 | ], 376 | "context": "Implementations need to negotiate the type of path provided, and dereference the data accordingly." 377 | }, 378 | "email": { 379 | "title": "Email", 380 | "description": "An email address.", 381 | "type": "string", 382 | "format": "email", 383 | "examples": [ 384 | "{\n \"email\": \"example@example.com\"\n}\n" 385 | ] 386 | } 387 | } 388 | }, 389 | "examples": [ 390 | "{\n \"sources\": [\n {\n \"name\": \"World Bank and OECD\",\n \"uri\": \"http://data.worldbank.org/indicator/NY.GDP.MKTP.CD\"\n }\n ]\n}\n" 391 | ] 392 | }, 393 | "licenses": { 394 | "description": "The license(s) under which the resource is published.", 395 | "propertyOrder": 150, 396 | "options": { 397 | "hidden": true 398 | }, 399 | "title": "Licenses", 400 | "type": "array", 401 | "minItems": 1, 402 | "items": { 403 | "title": "License", 404 | "description": "A license for this descriptor.", 405 | "type": "object", 406 | "properties": { 407 | "name": { 408 | "title": "Open Definition license identifier", 409 | "description": "MUST be an Open Definition license identifier, see http://licenses.opendefinition.org/", 410 | "type": "string", 411 | "pattern": "^([-a-zA-Z0-9._])+$" 412 | }, 413 | "path": { 414 | "title": "Path", 415 | "description": "A fully qualified URL, or a POSIX file path..", 416 | "type": "string", 417 | "examples": [ 418 | "{\n \"path\": \"file.csv\"\n}\n", 419 | "{\n \"path\": \"http://example.com/file.csv\"\n}\n" 420 | ], 421 | "context": "Implementations need to negotiate the type of path provided, and dereference the data accordingly." 422 | }, 423 | "title": { 424 | "title": "Title", 425 | "description": "A human-readable title.", 426 | "type": "string", 427 | "examples": [ 428 | "{\n \"title\": \"My Package Title\"\n}\n" 429 | ] 430 | } 431 | }, 432 | "context": "Use of this property does not imply that the person was the original creator of, or a contributor to, the data in the descriptor, but refers to the composition of the descriptor itself." 433 | }, 434 | "context": "This property is not legally binding and does not guarantee that the package is licensed under the terms defined herein.", 435 | "examples": [ 436 | "{\n \"licenses\": [\n {\n \"name\": \"odc-pddl-1.0\",\n \"uri\": \"http://opendatacommons.org/licenses/pddl/\"\n }\n ]\n}\n" 437 | ] 438 | }, 439 | "format": { 440 | "propertyOrder": 80, 441 | "title": "Format", 442 | "description": "The file format of this resource.", 443 | "context": "`csv`, `xls`, `json` are examples of common formats.", 444 | "type": "string", 445 | "examples": [ 446 | "{\n \"format\": \"xls\"\n}\n" 447 | ] 448 | }, 449 | "mediatype": { 450 | "propertyOrder": 90, 451 | "title": "Media Type", 452 | "description": "The media type of this resource. Can be any valid media type listed with [IANA](https://www.iana.org/assignments/media-types/media-types.xhtml).", 453 | "type": "string", 454 | "pattern": "^(.+)/(.+)$", 455 | "examples": [ 456 | "{\n \"mediatype\": \"text/csv\"\n}\n" 457 | ] 458 | }, 459 | "encoding": { 460 | "propertyOrder": 100, 461 | "title": "Encoding", 462 | "description": "The file encoding of this resource.", 463 | "type": "string", 464 | "default": "utf-8", 465 | "examples": [ 466 | "{\n \"encoding\": \"utf-8\"\n}\n" 467 | ] 468 | }, 469 | "bytes": { 470 | "propertyOrder": 110, 471 | "options": { 472 | "hidden": true 473 | }, 474 | "title": "Bytes", 475 | "description": "The size of this resource in bytes.", 476 | "type": "integer", 477 | "examples": [ 478 | "{\n \"bytes\": 2082\n}\n" 479 | ] 480 | }, 481 | "hash": { 482 | "propertyOrder": 120, 483 | "options": { 484 | "hidden": true 485 | }, 486 | "title": "Hash", 487 | "type": "string", 488 | "description": "The MD5 hash of this resource. Indicate other hashing algorithms with the {algorithm}:{hash} format.", 489 | "pattern": "^([^:]+:[a-fA-F0-9]+|[a-fA-F0-9]{32}|)$", 490 | "examples": [ 491 | "{\n \"hash\": \"d25c9c77f588f5dc32059d2da1136c02\"\n}\n", 492 | "{\n \"hash\": \"SHA256:5262f12512590031bbcc9a430452bfd75c2791ad6771320bb4b5728bfb78c4d0\"\n}\n" 493 | ] 494 | } 495 | } 496 | }, 497 | "examples": [ 498 | "{\n \"resources\": [\n {\n \"name\": \"my-data\",\n \"data\": [\n \"data.csv\"\n ],\n \"mediatype\": \"text/csv\"\n }\n ]\n}\n" 499 | ] 500 | }, 501 | "sources": { 502 | "propertyOrder": 200, 503 | "options": { 504 | "hidden": true 505 | }, 506 | "title": "Sources", 507 | "description": "The raw sources for this resource.", 508 | "type": "array", 509 | "minItems": 1, 510 | "items": { 511 | "title": "Source", 512 | "description": "A source file.", 513 | "type": "object", 514 | "required": [ 515 | "title" 516 | ], 517 | "properties": { 518 | "title": { 519 | "title": "Title", 520 | "description": "A human-readable title.", 521 | "type": "string", 522 | "examples": [ 523 | "{\n \"title\": \"My Package Title\"\n}\n" 524 | ] 525 | }, 526 | "path": { 527 | "title": "Path", 528 | "description": "A fully qualified URL, or a POSIX file path..", 529 | "type": "string", 530 | "examples": [ 531 | "{\n \"path\": \"file.csv\"\n}\n", 532 | "{\n \"path\": \"http://example.com/file.csv\"\n}\n" 533 | ], 534 | "context": "Implementations need to negotiate the type of path provided, and dereference the data accordingly." 535 | }, 536 | "email": { 537 | "title": "Email", 538 | "description": "An email address.", 539 | "type": "string", 540 | "format": "email", 541 | "examples": [ 542 | "{\n \"email\": \"example@example.com\"\n}\n" 543 | ] 544 | } 545 | } 546 | }, 547 | "examples": [ 548 | "{\n \"sources\": [\n {\n \"name\": \"World Bank and OECD\",\n \"uri\": \"http://data.worldbank.org/indicator/NY.GDP.MKTP.CD\"\n }\n ]\n}\n" 549 | ] 550 | } 551 | } 552 | } -------------------------------------------------------------------------------- /src/profiles/data-resource.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-04/schema#", 3 | "title": "Data Resource", 4 | "description": "Data Resource.", 5 | "type": "object", 6 | "oneOf": [ 7 | { 8 | "required": [ 9 | "name", 10 | "data" 11 | ] 12 | }, 13 | { 14 | "required": [ 15 | "name", 16 | "path" 17 | ] 18 | } 19 | ], 20 | "properties": { 21 | "profile": { 22 | "propertyOrder": 10, 23 | "default": "data-resource", 24 | "title": "Profile", 25 | "description": "The profile of this descriptor.", 26 | "context": "Every Package and Resource descriptor has a profile. The default profile, if none is declared, is `data-package` for Package and `data-resource` for Resource.", 27 | "type": "string", 28 | "examples": [ 29 | "{\n \"profile\": \"tabular-data-package\"\n}\n", 30 | "{\n \"profile\": \"http://example.com/my-profiles-json-schema.json\"\n}\n" 31 | ] 32 | }, 33 | "name": { 34 | "propertyOrder": 20, 35 | "title": "Name", 36 | "description": "An identifier string. Lower case characters with `.`, `_`, `-` and `/` are allowed.", 37 | "type": "string", 38 | "pattern": "^([-a-z0-9._/])+$", 39 | "context": "This is ideally a url-usable and human-readable name. Name `SHOULD` be invariant, meaning it `SHOULD NOT` change when its parent descriptor is updated.", 40 | "examples": [ 41 | "{\n \"name\": \"my-nice-name\"\n}\n" 42 | ] 43 | }, 44 | "path": { 45 | "propertyOrder": 30, 46 | "title": "Path", 47 | "description": "A reference to the data for this resource, as either a path as a string, or an array of paths as strings. of valid URIs.", 48 | "oneOf": [ 49 | { 50 | "title": "Path", 51 | "description": "A fully qualified URL, or a POSIX file path..", 52 | "type": "string", 53 | "examples": [ 54 | "{\n \"path\": \"file.csv\"\n}\n", 55 | "{\n \"path\": \"http://example.com/file.csv\"\n}\n" 56 | ], 57 | "context": "Implementations need to negotiate the type of path provided, and dereference the data accordingly." 58 | }, 59 | { 60 | "type": "array", 61 | "minItems": 1, 62 | "items": { 63 | "title": "Path", 64 | "description": "A fully qualified URL, or a POSIX file path..", 65 | "type": "string", 66 | "examples": [ 67 | "{\n \"path\": \"file.csv\"\n}\n", 68 | "{\n \"path\": \"http://example.com/file.csv\"\n}\n" 69 | ], 70 | "context": "Implementations need to negotiate the type of path provided, and dereference the data accordingly." 71 | }, 72 | "examples": [ 73 | "[ \"file.csv\" ]\n", 74 | "[ \"http://example.com/file.csv\" ]\n" 75 | ] 76 | } 77 | ], 78 | "context": "The dereferenced value of each referenced data source in `path` `MUST` be commensurate with a native, dereferenced representation of the data the resource describes. For example, in a *Tabular* Data Resource, this means that the dereferenced value of `path` `MUST` be an array.", 79 | "examples": [ 80 | "{\n \"path\": [\n \"file.csv\",\n \"file2.csv\"\n ]\n}\n", 81 | "{\n \"path\": [\n \"http://example.com/file.csv\",\n \"http://example.com/file2.csv\"\n ]\n}\n", 82 | "{\n \"path\": \"http://example.com/file.csv\"\n}\n" 83 | ] 84 | }, 85 | "data": { 86 | "propertyOrder": 230, 87 | "title": "Data", 88 | "description": "Inline data for this resource." 89 | }, 90 | "schema": { 91 | "propertyOrder": 40, 92 | "title": "Schema", 93 | "description": "A schema for this resource.", 94 | "type": "object" 95 | }, 96 | "title": { 97 | "propertyOrder": 50, 98 | "title": "Title", 99 | "description": "A human-readable title.", 100 | "type": "string", 101 | "examples": [ 102 | "{\n \"title\": \"My Package Title\"\n}\n" 103 | ] 104 | }, 105 | "description": { 106 | "propertyOrder": 60, 107 | "format": "textarea", 108 | "title": "Description", 109 | "description": "A text description. Markdown is encouraged.", 110 | "type": "string", 111 | "examples": [ 112 | "{\n \"description\": \"# My Package description\\nAll about my package.\"\n}\n" 113 | ] 114 | }, 115 | "homepage": { 116 | "propertyOrder": 70, 117 | "title": "Home Page", 118 | "description": "The home on the web that is related to this data package.", 119 | "type": "string", 120 | "format": "uri", 121 | "examples": [ 122 | "{\n \"homepage\": \"http://example.com/\"\n}\n" 123 | ] 124 | }, 125 | "sources": { 126 | "propertyOrder": 140, 127 | "options": { 128 | "hidden": true 129 | }, 130 | "title": "Sources", 131 | "description": "The raw sources for this resource.", 132 | "type": "array", 133 | "minItems": 1, 134 | "items": { 135 | "title": "Source", 136 | "description": "A source file.", 137 | "type": "object", 138 | "required": [ 139 | "title" 140 | ], 141 | "properties": { 142 | "title": { 143 | "title": "Title", 144 | "description": "A human-readable title.", 145 | "type": "string", 146 | "examples": [ 147 | "{\n \"title\": \"My Package Title\"\n}\n" 148 | ] 149 | }, 150 | "path": { 151 | "title": "Path", 152 | "description": "A fully qualified URL, or a POSIX file path..", 153 | "type": "string", 154 | "examples": [ 155 | "{\n \"path\": \"file.csv\"\n}\n", 156 | "{\n \"path\": \"http://example.com/file.csv\"\n}\n" 157 | ], 158 | "context": "Implementations need to negotiate the type of path provided, and dereference the data accordingly." 159 | }, 160 | "email": { 161 | "title": "Email", 162 | "description": "An email address.", 163 | "type": "string", 164 | "format": "email", 165 | "examples": [ 166 | "{\n \"email\": \"example@example.com\"\n}\n" 167 | ] 168 | } 169 | } 170 | }, 171 | "examples": [ 172 | "{\n \"sources\": [\n {\n \"name\": \"World Bank and OECD\",\n \"uri\": \"http://data.worldbank.org/indicator/NY.GDP.MKTP.CD\"\n }\n ]\n}\n" 173 | ] 174 | }, 175 | "licenses": { 176 | "description": "The license(s) under which the resource is published.", 177 | "propertyOrder": 150, 178 | "options": { 179 | "hidden": true 180 | }, 181 | "title": "Licenses", 182 | "type": "array", 183 | "minItems": 1, 184 | "items": { 185 | "title": "License", 186 | "description": "A license for this descriptor.", 187 | "type": "object", 188 | "properties": { 189 | "name": { 190 | "title": "Open Definition license identifier", 191 | "description": "MUST be an Open Definition license identifier, see http://licenses.opendefinition.org/", 192 | "type": "string", 193 | "pattern": "^([-a-zA-Z0-9._])+$" 194 | }, 195 | "path": { 196 | "title": "Path", 197 | "description": "A fully qualified URL, or a POSIX file path..", 198 | "type": "string", 199 | "examples": [ 200 | "{\n \"path\": \"file.csv\"\n}\n", 201 | "{\n \"path\": \"http://example.com/file.csv\"\n}\n" 202 | ], 203 | "context": "Implementations need to negotiate the type of path provided, and dereference the data accordingly." 204 | }, 205 | "title": { 206 | "title": "Title", 207 | "description": "A human-readable title.", 208 | "type": "string", 209 | "examples": [ 210 | "{\n \"title\": \"My Package Title\"\n}\n" 211 | ] 212 | } 213 | }, 214 | "context": "Use of this property does not imply that the person was the original creator of, or a contributor to, the data in the descriptor, but refers to the composition of the descriptor itself." 215 | }, 216 | "context": "This property is not legally binding and does not guarantee that the package is licensed under the terms defined herein.", 217 | "examples": [ 218 | "{\n \"licenses\": [\n {\n \"name\": \"odc-pddl-1.0\",\n \"uri\": \"http://opendatacommons.org/licenses/pddl/\"\n }\n ]\n}\n" 219 | ] 220 | }, 221 | "format": { 222 | "propertyOrder": 80, 223 | "title": "Format", 224 | "description": "The file format of this resource.", 225 | "context": "`csv`, `xls`, `json` are examples of common formats.", 226 | "type": "string", 227 | "examples": [ 228 | "{\n \"format\": \"xls\"\n}\n" 229 | ] 230 | }, 231 | "mediatype": { 232 | "propertyOrder": 90, 233 | "title": "Media Type", 234 | "description": "The media type of this resource. Can be any valid media type listed with [IANA](https://www.iana.org/assignments/media-types/media-types.xhtml).", 235 | "type": "string", 236 | "pattern": "^(.+)/(.+)$", 237 | "examples": [ 238 | "{\n \"mediatype\": \"text/csv\"\n}\n" 239 | ] 240 | }, 241 | "encoding": { 242 | "propertyOrder": 100, 243 | "title": "Encoding", 244 | "description": "The file encoding of this resource.", 245 | "type": "string", 246 | "default": "utf-8", 247 | "examples": [ 248 | "{\n \"encoding\": \"utf-8\"\n}\n" 249 | ] 250 | }, 251 | "bytes": { 252 | "propertyOrder": 110, 253 | "options": { 254 | "hidden": true 255 | }, 256 | "title": "Bytes", 257 | "description": "The size of this resource in bytes.", 258 | "type": "integer", 259 | "examples": [ 260 | "{\n \"bytes\": 2082\n}\n" 261 | ] 262 | }, 263 | "hash": { 264 | "propertyOrder": 120, 265 | "options": { 266 | "hidden": true 267 | }, 268 | "title": "Hash", 269 | "type": "string", 270 | "description": "The MD5 hash of this resource. Indicate other hashing algorithms with the {algorithm}:{hash} format.", 271 | "pattern": "^([^:]+:[a-fA-F0-9]+|[a-fA-F0-9]{32}|)$", 272 | "examples": [ 273 | "{\n \"hash\": \"d25c9c77f588f5dc32059d2da1136c02\"\n}\n", 274 | "{\n \"hash\": \"SHA256:5262f12512590031bbcc9a430452bfd75c2791ad6771320bb4b5728bfb78c4d0\"\n}\n" 275 | ] 276 | } 277 | } 278 | } -------------------------------------------------------------------------------- /src/profiles/registry.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": "data-package", 4 | "title": "Data Package", 5 | "schema": "https://specs.frictionlessdata.io/schemas/data-package.json", 6 | "schema_path": "data-package.json", 7 | "specification": "https://specs.frictionlessdata.io/data-package/" 8 | }, 9 | { 10 | "id": "tabular-data-package", 11 | "title": "Tabular Data Package", 12 | "schema": "https://specs.frictionlessdata.io/schemas/tabular-data-package.json", 13 | "schema_path": "tabular-data-package.json", 14 | "specification": "http://specs.frictionlessdata.io/tabular-data-package/" 15 | }, 16 | { 17 | "id": "fiscal-data-package", 18 | "title": "Fiscal Data Package", 19 | "schema": "https://specs.frictionlessdata.io/schemas/fiscal-data-package.json", 20 | "schema_path": "fiscal-data-package.json", 21 | "specification": "https://specs.frictionlessdata.io/fiscal-data-package/" 22 | }, 23 | { 24 | "id": "data-resource", 25 | "title": "Data Resource", 26 | "schema": "https://specs.frictionlessdata.io/schemas/data-resource.json", 27 | "schema_path": "data-resource.json", 28 | "specification": "https://specs.frictionlessdata.io/data-resource" 29 | }, 30 | { 31 | "id": "tabular-data-resource", 32 | "title": "Tabular Data Resource", 33 | "schema": "https://specs.frictionlessdata.io/schemas/tabular-data-resource.json", 34 | "schema_path": "tabular-data-resource.json", 35 | "specification": "https://specs.frictionlessdata.io/tabular-data-resource" 36 | }, 37 | { 38 | "id": "table-schema", 39 | "title": "Table Schema", 40 | "schema": "https://specs.frictionlessdata.io/schemas/table-schema.json", 41 | "schema_path": "table-schema.json", 42 | "specification": "https://specs.frictionlessdata.io/table-schema/" 43 | } 44 | ] -------------------------------------------------------------------------------- /src/resource.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs') 2 | const axios = require('axios') 3 | const { Buffer } = require('buffer') 4 | const pathModule = require('path') 5 | const urljoin = require('url-join') 6 | const { Readable } = require('stream') 7 | const assign = require('lodash/assign') 8 | const isEqual = require('lodash/isEqual') 9 | const isArray = require('lodash/isArray') 10 | const isObject = require('lodash/isObject') 11 | const isBoolean = require('lodash/isBoolean') 12 | const cloneDeep = require('lodash/cloneDeep') 13 | const isUndefined = require('lodash/isUndefined') 14 | const S2A = require('stream-to-async-iterator').default 15 | const { Table, Schema } = require('tableschema') 16 | const { DataPackageError } = require('./errors') 17 | const { Profile } = require('./profile') 18 | const helpers = require('./helpers') 19 | const config = require('./config') 20 | 21 | // Module API 22 | 23 | /** 24 | * Resource representation 25 | */ 26 | class Resource { 27 | // Public 28 | 29 | /** 30 | * Factory method to instantiate `Resource` class. 31 | * 32 | * This method is async and it should be used with await keyword or as a `Promise`. 33 | * 34 | * @param {string|Object} descriptor - resource descriptor as local path, url or object 35 | * @param {string} basePath - base path for all relative paths 36 | * @param {boolean} strict - strict flag to alter validation behavior. 37 | * Setting it to `true` leads to throwing errors on 38 | * any operation with invalid descriptor 39 | * @throws {DataPackageError} raises error if something goes wrong 40 | * @returns {Resource} returns resource class instance 41 | */ 42 | static async load(descriptor = {}, { basePath, strict = false } = {}) { 43 | // Get base path 44 | if (isUndefined(basePath)) { 45 | basePath = helpers.locateDescriptor(descriptor) 46 | } 47 | 48 | // Process descriptor 49 | descriptor = await helpers.retrieveDescriptor(descriptor) 50 | descriptor = await helpers.dereferenceResourceDescriptor(descriptor, basePath) 51 | 52 | return new Resource(descriptor, { basePath, strict }) 53 | } 54 | 55 | /** 56 | * Validation status 57 | * 58 | * It always `true` in strict mode. 59 | * 60 | * @returns {Boolean} returns validation status 61 | */ 62 | get valid() { 63 | return this._errors.length === 0 64 | } 65 | 66 | /** 67 | * Validation errors 68 | * 69 | * It always empty in strict mode. 70 | * 71 | * @returns {Error[]} returns validation errors 72 | */ 73 | get errors() { 74 | return this._errors 75 | } 76 | 77 | /** 78 | * Profile 79 | * 80 | * @returns {Profile} 81 | */ 82 | get profile() { 83 | return this._profile 84 | } 85 | 86 | /** 87 | * Descriptor 88 | * 89 | * @returns {Object} schema descriptor 90 | */ 91 | get descriptor() { 92 | // Never use this.descriptor inside this class (!!!) 93 | return this._nextDescriptor 94 | } 95 | 96 | /** 97 | * Name 98 | * 99 | * @returns {string} 100 | */ 101 | get name() { 102 | return this._currentDescriptor.name 103 | } 104 | 105 | /** 106 | * Whether resource is inline 107 | * 108 | * @returns {boolean} 109 | */ 110 | get inline() { 111 | return !!this._sourceInspection.inline 112 | } 113 | 114 | /** 115 | * Whether resource is local 116 | * 117 | * @returns {boolean} 118 | */ 119 | get local() { 120 | return !!this._sourceInspection.local 121 | } 122 | 123 | /** 124 | * Whether resource is remote 125 | * 126 | * @returns {boolean} 127 | */ 128 | get remote() { 129 | return !!this._sourceInspection.remote 130 | } 131 | 132 | /** 133 | * Whether resource is multipart 134 | * 135 | * @returns {boolean} 136 | */ 137 | get multipart() { 138 | return !!this._sourceInspection.multipart 139 | } 140 | 141 | /** 142 | * Whether resource is tabular 143 | * 144 | * @returns {boolean} 145 | */ 146 | get tabular() { 147 | if (this._currentDescriptor.profile === 'tabular-data-resource') return true 148 | if (!this._strict) { 149 | if (config.TABULAR_FORMATS.includes(this._currentDescriptor.format)) return true 150 | if (this._sourceInspection.tabular) return true 151 | } 152 | return false 153 | } 154 | 155 | /** 156 | * Source 157 | * 158 | * Combination of `resource.source` and `resource.inline/local/remote/multipart` 159 | * provides predictable interface to work with resource data. 160 | * 161 | * @returns {Array|string} 162 | */ 163 | get source() { 164 | return this._sourceInspection.source 165 | } 166 | 167 | /** 168 | * Headers 169 | * 170 | * > Only for tabular resources 171 | * 172 | * @returns {string[]} data source headers 173 | */ 174 | get headers() { 175 | if (!this.tabular) return null 176 | return this._getTable().headers 177 | } 178 | 179 | /** 180 | * Schema 181 | * 182 | * > Only for tabular resources 183 | * 184 | * @returns {tableschema.Schema} 185 | */ 186 | get schema() { 187 | if (!this.tabular) return null 188 | return this._getTable().schema 189 | } 190 | 191 | /** 192 | * Iterate through the table data 193 | * 194 | * > Only for tabular resources 195 | * 196 | * And emits rows cast based on table schema (async for loop). 197 | * With a `stream` flag instead of async iterator a Node stream will be returned. 198 | * Data casting can be disabled. 199 | * 200 | * @param {boolean} keyed - iter keyed rows 201 | * @param {boolean} extended - iter extended rows 202 | * @param {boolean} cast - disable data casting if false 203 | * @param {boolean} forceCast - instead of raising on the first row with cast error 204 | * return an error object to replace failed row. It will allow 205 | * to iterate over the whole data file even if it's not compliant to the schema. 206 | * Example of output stream: 207 | * `[['val1', 'val2'], TableSchemaError, ['val3', 'val4'], ...]` 208 | * @param {boolean} relations - if true foreign key fields will be 209 | * checked and resolved to its references 210 | * @param {boolean} stream - return Node Readable Stream of table rows 211 | * @throws {TableSchemaError} raises any error occurred in this process 212 | * @returns {(AsyncIterator|Stream)} async iterator/stream of rows: 213 | * - `[value1, value2]` - base 214 | * - `{header1: value1, header2: value2}` - keyed 215 | * - `[rowNumber, [header1, header2], [value1, value2]]` - extended 216 | */ 217 | async iter({ relations = false, ...options } = {}) { 218 | // Error for non tabular 219 | if (!this.tabular) { 220 | throw new DataPackageError('Methods iter/read are not supported for non tabular data') 221 | } 222 | 223 | // Get relations 224 | if (relations) { 225 | relations = await this._getRelations() 226 | } 227 | 228 | return await this._getTable().iter({ relations, ...options }) 229 | } 230 | 231 | /** 232 | * Read the table data into memory 233 | * 234 | * > Only for tabular resources; the API is the same as `resource.iter` has except for: 235 | * 236 | * @param {integer} limit - limit of rows to read 237 | * @returns {(Array[]|Object[])} list of rows: 238 | * - `[value1, value2]` - base 239 | * - `{header1: value1, header2: value2}` - keyed 240 | * - `[rowNumber, [header1, header2], [value1, value2]]` - extended 241 | */ 242 | async read({ relations = false, ...options } = {}) { 243 | // Error for non tabular 244 | if (!this.tabular) { 245 | throw new DataPackageError('Methods iter/read are not supported for non tabular data') 246 | } 247 | 248 | // Get relations 249 | if (relations) { 250 | relations = await this._getRelations() 251 | } 252 | 253 | return await this._getTable().read({ relations, ...options }) 254 | } 255 | 256 | /** 257 | * It checks foreign keys and raises an exception if there are integrity issues. 258 | * 259 | * > Only for tabular resources 260 | * 261 | * @throws {DataPackageError} raises if there are integrity issues 262 | * @returns {boolean} returns True if no issues 263 | */ 264 | async checkRelations() { 265 | await this.read({ relations: true }) 266 | return true 267 | } 268 | 269 | /** 270 | * Iterate over data chunks as bytes. If `stream` is true Node Stream will be returned. 271 | * 272 | * @param {boolean} stream - Node Stream will be returned 273 | * @returns {Iterator|Stream} returns Iterator/Stream 274 | */ 275 | async rawIter({ stream = false } = {}) { 276 | // Error for inline 277 | if (this.inline) { 278 | throw new DataPackageError('Methods iter/read are not supported for inline data') 279 | } 280 | 281 | const byteStream = await createByteStream(this.source, this.remote) 282 | return stream ? byteStream : new S2A(byteStream) 283 | } 284 | 285 | /** 286 | * Returns resource data as bytes. 287 | * 288 | * @returns {Buffer} returns Buffer with resource data 289 | */ 290 | rawRead() { 291 | return new Promise((resolve) => { 292 | let bytes 293 | this.rawIter({ stream: true }).then((stream) => { 294 | stream.on('data', (data) => { 295 | bytes = bytes ? Buffer.concat([bytes, data]) : data 296 | }) 297 | stream.on('end', () => resolve(bytes)) 298 | }) 299 | }) 300 | } 301 | 302 | /** 303 | * Infer resource metadata like name, format, mediatype, encoding, schema and profile. 304 | * 305 | * It commits this changes into resource instance. 306 | * 307 | * @returns {Object} returns resource descriptor 308 | */ 309 | async infer() { 310 | const descriptor = cloneDeep(this._currentDescriptor) 311 | 312 | // Blank -> Stop 313 | if (this._sourceInspection.blank) { 314 | return descriptor 315 | } 316 | 317 | // Name 318 | if (!descriptor.name) { 319 | descriptor.name = this._sourceInspection.name 320 | } 321 | 322 | // Only for non inline 323 | if (!this.inline) { 324 | // Format 325 | if (!descriptor.format) { 326 | descriptor.format = this._sourceInspection.format 327 | } 328 | 329 | // Mediatype 330 | if (!descriptor.mediatype) { 331 | descriptor.mediatype = `text/${descriptor.format}` 332 | } 333 | 334 | // Encoding 335 | if (descriptor.encoding === config.DEFAULT_RESOURCE_ENCODING) { 336 | if (!config.IS_BROWSER) { 337 | const jschardet = require('jschardet') 338 | const iterator = await this.rawIter() 339 | const bytes = (await iterator.next()).value 340 | const encoding = jschardet.detect(bytes).encoding.toLowerCase() 341 | descriptor.encoding = encoding === 'ascii' ? 'utf-8' : encoding 342 | } 343 | } 344 | } 345 | 346 | // Schema 347 | if (!descriptor.schema) { 348 | if (this.tabular) { 349 | descriptor.schema = await this._getTable().infer() 350 | } 351 | } 352 | 353 | // Profile 354 | if (descriptor.profile === config.DEFAULT_RESOURCE_PROFILE) { 355 | if (this.tabular) { 356 | descriptor.profile = 'tabular-data-resource' 357 | } 358 | } 359 | 360 | // Save descriptor 361 | this._currentDescriptor = descriptor 362 | this._build() 363 | 364 | return descriptor 365 | } 366 | 367 | /** 368 | * Update resource instance if there are in-place changes in the descriptor. 369 | * 370 | * @param {boolean} strict - alter `strict` mode for further work 371 | * @throws DataPackageError raises error if something goes wrong 372 | * @returns {boolean} returns true on success and false if not modified 373 | */ 374 | commit({ strict } = {}) { 375 | if (isBoolean(strict)) this._strict = strict 376 | else if (isEqual(this._currentDescriptor, this._nextDescriptor)) return false 377 | this._currentDescriptor = cloneDeep(this._nextDescriptor) 378 | this._table = null 379 | this._build() 380 | return true 381 | } 382 | 383 | /** 384 | * Save resource to target destination. 385 | * 386 | * > For now only descriptor will be saved. 387 | * 388 | * @param {string} target - path where to save a resource 389 | * @throws {DataPackageError} raises error if something goes wrong 390 | * @returns {boolean} returns true on success 391 | */ 392 | save(target) { 393 | return new Promise((resolve, reject) => { 394 | const contents = JSON.stringify(this._currentDescriptor, null, 4) 395 | fs.writeFile(target, contents, (error) => (!error ? resolve() : reject(error))) 396 | }) 397 | } 398 | 399 | // Private 400 | 401 | constructor(descriptor = {}, { basePath, strict = false, dataPackage } = {}) { 402 | // Handle deprecated resource.path.url 403 | if (descriptor.url) { 404 | console.warn( 405 | `Resource property "url: " is deprecated. 406 | Please use "path: " instead.` 407 | ) 408 | descriptor.path = descriptor.url 409 | delete descriptor.url 410 | } 411 | 412 | // Set attributes 413 | this._currentDescriptor = cloneDeep(descriptor) 414 | this._nextDescriptor = cloneDeep(descriptor) 415 | this._dataPackage = dataPackage 416 | this._basePath = basePath 417 | this._relations = null 418 | this._strict = strict 419 | this._errors = [] 420 | 421 | // Build resource 422 | this._build() 423 | } 424 | 425 | _build() { 426 | // Process descriptor 427 | this._currentDescriptor = helpers.expandResourceDescriptor(this._currentDescriptor) 428 | this._nextDescriptor = cloneDeep(this._currentDescriptor) 429 | 430 | // Inspect source 431 | this._sourceInspection = inspectSource( 432 | this._currentDescriptor.data, 433 | this._currentDescriptor.path, 434 | this._basePath 435 | ) 436 | 437 | // Instantiate profile 438 | this._profile = new Profile(this._currentDescriptor.profile) 439 | 440 | // Validate descriptor 441 | this._errors = [] 442 | const { valid, errors } = this._profile.validate(this._currentDescriptor) 443 | if (!valid) { 444 | this._errors = errors 445 | if (this._strict) { 446 | const message = `There are ${errors.length} validation errors (see 'error.errors')` 447 | throw new DataPackageError(message, errors) 448 | } 449 | } 450 | } 451 | 452 | _getTable() { 453 | if (!this._table) { 454 | // Resource -> Regular 455 | if (!this.tabular) { 456 | return null 457 | } 458 | 459 | // Resource -> Multipart 460 | if (this.multipart) { 461 | throw new DataPackageError('Resource.table does not support multipart resources') 462 | } 463 | 464 | // Resource -> Tabular 465 | const options = {} 466 | const descriptor = this._currentDescriptor 467 | options.format = descriptor.format || 'csv' 468 | options.encoding = descriptor.encoding 469 | const dialect = descriptor.dialect 470 | if (dialect) { 471 | if (dialect.header === false || config.DEFAULT_DIALECT.header === false) { 472 | const fields = (descriptor.schema || {}).fields || [] 473 | options.headers = fields.length ? fields.map((field) => field.name) : null 474 | } 475 | helpers.validateDialect(dialect) 476 | for (const key of DIALECT_KEYS) { 477 | if (dialect[key]) options[key.toLowerCase()] = dialect[key] 478 | } 479 | } 480 | const schemaDescriptor = this._currentDescriptor.schema 481 | const schema = schemaDescriptor ? new Schema(schemaDescriptor) : null 482 | this._table = new Table(this.source, { schema, ...options }) 483 | } 484 | return this._table 485 | } 486 | 487 | async _getRelations() { 488 | if (!this._relations) { 489 | // Prepare resources 490 | const resources = {} 491 | if (this._getTable() && this._getTable().schema) { 492 | for (const fk of this._getTable().schema.foreignKeys) { 493 | resources[fk.reference.resource] = resources[fk.reference.resource] || [] 494 | for (const field of fk.reference.fields) { 495 | resources[fk.reference.resource].push(field) 496 | } 497 | } 498 | } 499 | 500 | // Fill relations 501 | this._relations = {} 502 | for (const [resource] of Object.entries(resources)) { 503 | if (resource && !this._dataPackage) continue 504 | this._relations[resource] = this._relations[resource] || [] 505 | const data = resource ? this._dataPackage.getResource(resource) : this 506 | if (data.tabular) { 507 | this._relations[resource] = await data.read({ keyed: true }) 508 | } 509 | } 510 | } 511 | return this._relations 512 | } 513 | 514 | // Deprecated 515 | 516 | get table() { 517 | return this._getTable() 518 | } 519 | } 520 | 521 | // Internal 522 | 523 | const DIALECT_KEYS = [ 524 | 'delimiter', 525 | 'doubleQuote', 526 | 'lineTerminator', 527 | 'quoteChar', 528 | 'escapeChar', 529 | 'skipInitialSpace', 530 | ] 531 | 532 | function inspectSource(data, path, basePath) { 533 | const inspection = {} 534 | 535 | // Normalize path 536 | if (path && !isArray(path)) { 537 | path = [path] 538 | } 539 | 540 | // Blank 541 | if (!data && !path) { 542 | inspection.source = null 543 | inspection.blank = true 544 | 545 | // Inline 546 | } else if (data) { 547 | inspection.source = data 548 | inspection.inline = true 549 | inspection.tabular = isArray(data) && data.every(isObject) 550 | 551 | // Local/Remote 552 | } else if (path.length === 1) { 553 | // Remote 554 | if (helpers.isRemotePath(path[0])) { 555 | inspection.source = path[0] 556 | inspection.remote = true 557 | } else if (basePath && helpers.isRemotePath(basePath)) { 558 | inspection.source = urljoin(basePath, path[0]) 559 | inspection.remote = true 560 | 561 | // Local 562 | } else { 563 | // Path is not safe 564 | if (!helpers.isSafePath(path[0])) { 565 | throw new DataPackageError(`Local path "${path[0]}" is not safe`) 566 | } 567 | 568 | // Not base path 569 | if (!basePath) { 570 | throw new DataPackageError(`Local path "${path[0]}" requires base path`) 571 | } 572 | 573 | inspection.source = [basePath, path[0]].join('/') 574 | inspection.local = true 575 | } 576 | 577 | // Inspect 578 | inspection.format = pathModule.extname(path[0]).slice(1) 579 | inspection.name = pathModule.basename(path[0], `.${inspection.format}`) 580 | inspection.tabular = config.TABULAR_FORMATS.includes(inspection.format) 581 | 582 | // Multipart Local/Remote 583 | } else if (path.length > 1) { 584 | const inspections = path.map((item) => inspectSource(null, item, basePath)) 585 | assign(inspection, inspections[0]) 586 | inspection.source = inspections.map((item) => item.source) 587 | inspection.multipart = true 588 | } 589 | 590 | return inspection 591 | } 592 | 593 | async function createByteStream(source, remote) { 594 | let stream 595 | 596 | // Remote source 597 | if (remote) { 598 | if (config.IS_BROWSER) { 599 | const response = await axios.get(source) 600 | stream = new Readable() 601 | stream.push(response.data) 602 | stream.push(null) 603 | } else { 604 | const response = await axios.get(source, { responseType: 'stream' }) 605 | stream = response.data 606 | } 607 | 608 | // Local source 609 | } else { 610 | if (config.IS_BROWSER) { 611 | throw new DataPackageError('Local paths are not supported in the browser') 612 | } else { 613 | stream = fs.createReadStream(source) 614 | } 615 | } 616 | 617 | return stream 618 | } 619 | 620 | // System 621 | 622 | module.exports = { 623 | Resource, 624 | } 625 | -------------------------------------------------------------------------------- /src/validate.js: -------------------------------------------------------------------------------- 1 | const { Package } = require('./package') 2 | 3 | // Module API 4 | 5 | /** 6 | * This function is async so it has to be used with `await` keyword or as a `Promise`. 7 | * 8 | * @param {string|Object} descriptor - data package descriptor (local/remote path or object) 9 | * @return {Object} returns a `{valid, errors}` object 10 | */ 11 | async function validate(descriptor) { 12 | const { valid, errors } = await Package.load(descriptor) 13 | return { valid, errors } 14 | } 15 | 16 | // System 17 | 18 | module.exports = { 19 | validate, 20 | } 21 | -------------------------------------------------------------------------------- /test/errors.js: -------------------------------------------------------------------------------- 1 | const { assert } = require('chai') 2 | const tableschema = require('tableschema') 3 | const { DataPackageError } = require('../src/errors') 4 | 5 | // Tests 6 | 7 | describe('DataPackageError', () => { 8 | it('should work with one error', () => { 9 | const error = new DataPackageError('message') 10 | assert.deepEqual(error.message, 'message') 11 | assert.deepEqual(error.multiple, false) 12 | assert.deepEqual(error.errors, []) 13 | }) 14 | 15 | it('should work with multiple errors', () => { 16 | const errors = [new Error('error1'), new Error('error2')] 17 | const error = new DataPackageError('message', errors) 18 | assert.deepEqual(error.message, 'message') 19 | assert.deepEqual(error.multiple, true) 20 | assert.deepEqual(error.errors.length, 2) 21 | assert.deepEqual(error.errors[0].message, 'error1') 22 | assert.deepEqual(error.errors[1].message, 'error2') 23 | }) 24 | 25 | it('should be catchable as a normal error', () => { 26 | try { 27 | throw new DataPackageError('message') 28 | } catch (error) { 29 | assert.deepEqual(error.message, 'message') 30 | assert.deepEqual(error instanceof Error, true) 31 | assert.deepEqual(error instanceof DataPackageError, true) 32 | } 33 | }) 34 | 35 | it('should work with table schema error', () => { 36 | try { 37 | throw new tableschema.errors.TableSchemaError('message') 38 | } catch (error) { 39 | assert.deepEqual(error.message, 'message') 40 | assert.deepEqual(error instanceof Error, true) 41 | assert.deepEqual(error instanceof DataPackageError, true) 42 | assert.deepEqual(error instanceof tableschema.errors.TableSchemaError, true) 43 | } 44 | }) 45 | }) 46 | -------------------------------------------------------------------------------- /test/helpers.js: -------------------------------------------------------------------------------- 1 | const { assert } = require('chai') 2 | const helpers = require('../src/helpers') 3 | 4 | // Tests 5 | 6 | describe('helpers', () => { 7 | ;[ 8 | // path, isSafe 9 | ['data.csv', true], 10 | ['data/data.csv', true], 11 | ['data/country/data.csv', true], 12 | ['data\\data.csv', true], 13 | ['data\\country\\data.csv', true], 14 | ['../data.csv', false], 15 | ['~/data.csv', false], 16 | ['~invalid_user/data.csv', false], 17 | ['%userprofile%', false], 18 | ['%unknown_windows_var%', false], 19 | ['$HOME', false], 20 | ['$UNKNOWN_VAR', false], 21 | ].forEach((test) => { 22 | const [path, isSafe] = test 23 | it(`#isSafePath: ${path} -> ${isSafe}`, async () => { 24 | assert.deepEqual(helpers.isSafePath(path), isSafe) 25 | }) 26 | }) 27 | }) 28 | 29 | // Helpers 30 | 31 | async function catchError(func, ...args) { 32 | let error 33 | try { 34 | await func(...args) 35 | } catch (exception) { 36 | error = exception 37 | } 38 | return error 39 | } 40 | 41 | // System 42 | 43 | module.exports = { 44 | catchError, 45 | } 46 | -------------------------------------------------------------------------------- /test/infer.js: -------------------------------------------------------------------------------- 1 | const { assert } = require('chai') 2 | const { infer } = require('../src') 3 | 4 | // Tests 5 | 6 | describe('infer', () => { 7 | it('it infers local data package', async function () { 8 | if (process.env.USER_ENV === 'browser') this.skip() 9 | const descriptor = await infer('**/*.csv', { basePath: 'data/dp1' }) 10 | assert.deepEqual(descriptor.profile, 'tabular-data-package') 11 | assert.deepEqual(descriptor.resources.length, 1) 12 | assert.deepEqual(descriptor.resources[0].path, 'data.csv') 13 | assert.deepEqual(descriptor.resources[0].format, 'csv') 14 | assert.deepEqual(descriptor.resources[0].encoding, 'utf-8') 15 | assert.deepEqual(descriptor.resources[0].profile, 'tabular-data-resource') 16 | assert.deepEqual(descriptor.resources[0].schema.fields[0].name, 'name') 17 | assert.deepEqual(descriptor.resources[0].schema.fields[1].name, 'size') 18 | }) 19 | }) 20 | -------------------------------------------------------------------------------- /test/karma.opts: -------------------------------------------------------------------------------- 1 | require('regenerator-runtime/runtime') 2 | const testsContext = require.context('.', true, /\.js$/) 3 | testsContext.keys().forEach(testsContext) 4 | -------------------------------------------------------------------------------- /test/mocha.opts: -------------------------------------------------------------------------------- 1 | --recursive 2 | --timeout 10000 3 | -------------------------------------------------------------------------------- /test/package.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs') 2 | const JSZip = require('jszip') 3 | const axios = require('axios') 4 | const sinon = require('sinon') 5 | const { assert } = require('chai') 6 | const { promisify } = require('util') 7 | const { catchError } = require('./helpers') 8 | const cloneDeep = require('lodash/cloneDeep') 9 | const AxiosMock = require('axios-mock-adapter') 10 | const { Package } = require('../src') 11 | const helpers = require('../src/helpers') 12 | const expand = helpers.expandPackageDescriptor 13 | const expandResource = helpers.expandResourceDescriptor 14 | 15 | // Tests 16 | 17 | describe('Package', () => { 18 | describe('#load', () => { 19 | it('initializes with Object descriptor', async () => { 20 | const descriptor = require('../data/dp1/datapackage.json') 21 | const dataPackage = await Package.load(descriptor, { basePath: 'data/dp1' }) 22 | assert.deepEqual(dataPackage.descriptor, expand(descriptor)) 23 | }) 24 | 25 | it('initializes with URL descriptor', async () => { 26 | const descriptor = require('../data/dp1/datapackage.json') 27 | const dataPackage = await Package.load( 28 | 'https://raw.githubusercontent.com/frictionlessdata/datapackage-js/master/data/dp1/datapackage.json' 29 | ) 30 | assert.deepEqual(dataPackage.descriptor, expand(descriptor)) 31 | }) 32 | 33 | it('throws errors for invalid datapackage in strict mode', async () => { 34 | const error = await catchError(Package.load, {}, { strict: true }) 35 | assert.instanceOf(error, Error) 36 | assert.instanceOf(error.errors[0], Error) 37 | assert.include(error.errors[0].message, 'required property') 38 | }) 39 | 40 | it('stores errors for invalid datapackage', async () => { 41 | const dataPackage = await Package.load() 42 | assert.instanceOf(dataPackage.errors, Array) 43 | assert.instanceOf(dataPackage.errors[0], Error) 44 | assert.include(dataPackage.errors[0].message, 'required property') 45 | assert.isFalse(dataPackage.valid) 46 | }) 47 | 48 | it('loads relative resource', async function () { 49 | // TODO: For now tableschema doesn't support in-browser table.read 50 | if (process.env.USER_ENV === 'browser') { 51 | this.skip() 52 | } 53 | const descriptor = 54 | 'https://raw.githubusercontent.com/frictionlessdata/datapackage-js/master/data/dp1/datapackage.json' 55 | const dataPackage = await Package.load(descriptor) 56 | dataPackage.resources[0].descriptor.profile = 'tabular-data-resource' 57 | const data = await dataPackage.resources[0].table.read() 58 | assert.deepEqual(data, [ 59 | ['gb', 100], 60 | ['us', 200], 61 | ['cn', 300], 62 | ]) 63 | }) 64 | 65 | it('loads resource from absolute URL', async function () { 66 | // TODO: For now tableschema doesn't support in-browser table.read 67 | if (process.env.USER_ENV === 'browser') { 68 | this.skip() 69 | } 70 | const descriptor = 71 | 'https://raw.githubusercontent.com/frictionlessdata/datapackage-js/master/data/dp1/datapackage.json' 72 | const dataPackage = await Package.load(descriptor) 73 | dataPackage.resources[0].descriptor.profile = 'tabular-data-resource' 74 | const table = await dataPackage.resources[0].table 75 | const data = await table.read() 76 | assert.deepEqual(data, [ 77 | ['gb', 100], 78 | ['us', 200], 79 | ['cn', 300], 80 | ]) 81 | }) 82 | 83 | it.skip('loads resource from absolute URL disregarding basePath', async function () { 84 | // TODO: For now tableschema doesn't support in-browser table.read 85 | if (process.env.USER_ENV === 'browser') { 86 | this.skip() 87 | } 88 | const descriptor = 89 | 'https://raw.githubusercontent.com/frictionlessdata/datapackage-js/master/data/dp1/datapackage.json' 90 | const dataPackage = await Package.load(descriptor, { basePath: 'local/basePath' }) 91 | dataPackage.resources[0].descriptor.profile = 'tabular-data-resource' 92 | const table = await dataPackage.resources[0].table 93 | const data = await table.read() 94 | assert.deepEqual(data, [ 95 | ['gb', 100], 96 | ['us', 200], 97 | ['cn', 300], 98 | ]) 99 | }) 100 | 101 | it.skip('loads remote resource with basePath', async function () { 102 | // TODO: For now tableschema doesn't support in-browser table.read 103 | if (process.env.USER_ENV === 'browser') { 104 | this.skip() 105 | } 106 | const descriptor = 107 | 'https://raw.githubusercontent.com/frictionlessdata/datapackage-js/master/data/dp1/datapackage.json' 108 | const dataPackage = await Package.load(descriptor, { basePath: 'data' }) 109 | dataPackage.resources[1].descriptor.profile = 'tabular-data-resource' 110 | const table = await dataPackage.resources[1].table 111 | const data = await table.read() 112 | assert.deepEqual(data, [ 113 | ['gb', 105], 114 | ['us', 205], 115 | ['cn', 305], 116 | ]) 117 | }) 118 | 119 | it('remote_dereference', async () => { 120 | const descriptor = 121 | 'https://raw.githubusercontent.com/frictionlessdata/datapackage-js/master/data/data-package-dereference.json' 122 | const dataPackage = await Package.load(descriptor) 123 | assert.deepEqual( 124 | dataPackage.descriptor.resources, 125 | [ 126 | { name: 'name1', data: ['data'], schema: { fields: [{ name: 'name' }] } }, 127 | { name: 'name2', data: ['data'], dialect: { delimiter: ',' } }, 128 | ].map(expandResource) 129 | ) 130 | }) 131 | }) 132 | 133 | describe('#descriptor (retrieve)', () => { 134 | let http 135 | 136 | beforeEach(() => { 137 | http = new AxiosMock(axios) 138 | }) 139 | afterEach(() => { 140 | http.restore() 141 | }) 142 | 143 | it('object', async () => { 144 | const descriptor = { 145 | resources: [{ name: 'name', data: ['data'] }], 146 | } 147 | const dataPackage = await Package.load(descriptor) 148 | assert.deepEqual(dataPackage.descriptor, expand(descriptor)) 149 | }) 150 | 151 | it('string remote path', async () => { 152 | const contents = require('../data/data-package.json') 153 | const descriptor = 'http://example.com/data-package.json' 154 | http.onGet(descriptor).reply(200, contents) 155 | const dataPackage = await Package.load(descriptor) 156 | assert.deepEqual(dataPackage.descriptor, expand(contents)) 157 | }) 158 | 159 | it('string remote path bad', async () => { 160 | const descriptor = 'http://example.com/bad-path.json' 161 | http.onGet(descriptor).reply(500) 162 | const error = await catchError(Package.load, descriptor) 163 | assert.instanceOf(error, Error) 164 | assert.include(error.message, 'Can not retrieve remote') 165 | }) 166 | 167 | it('string local path', async () => { 168 | const contents = require('../data/data-package.json') 169 | const descriptor = 'data/data-package.json' 170 | if (process.env.USER_ENV !== 'browser') { 171 | const dataPackage = await Package.load(descriptor) 172 | assert.deepEqual(dataPackage.descriptor, expand(contents)) 173 | } else { 174 | const error = await catchError(Package.load, descriptor) 175 | assert.instanceOf(error, Error) 176 | assert.include(error.message, 'in browser is not supported') 177 | } 178 | }) 179 | 180 | it('string local path bad', async () => { 181 | const descriptor = 'data/bad-path.json' 182 | const error = await catchError(Package.load, descriptor) 183 | assert.instanceOf(error, Error) 184 | if (process.env.USER_ENV !== 'browser') { 185 | assert.include(error.message, 'Can not retrieve local') 186 | } else { 187 | assert.include(error.message, 'in browser is not supported') 188 | } 189 | }) 190 | }) 191 | 192 | describe('#descriptor (dereference)', () => { 193 | let http 194 | 195 | beforeEach(() => { 196 | http = new AxiosMock(axios) 197 | }) 198 | afterEach(() => { 199 | http.restore() 200 | }) 201 | 202 | it('mixed', async () => { 203 | const descriptor = 'data/data-package-dereference.json' 204 | if (process.env.USER_ENV !== 'browser') { 205 | const dataPackage = await Package.load(descriptor) 206 | assert.deepEqual( 207 | dataPackage.descriptor.resources, 208 | [ 209 | { name: 'name1', data: ['data'], schema: { fields: [{ name: 'name' }] } }, 210 | { name: 'name2', data: ['data'], dialect: { delimiter: ',' } }, 211 | ].map(expandResource) 212 | ) 213 | } else { 214 | const error = await catchError(Package.load, descriptor) 215 | assert.instanceOf(error, Error) 216 | assert.include(error.message, 'in browser') 217 | } 218 | }) 219 | 220 | it('pointer', async () => { 221 | const descriptor = { 222 | resources: [ 223 | { name: 'name1', data: ['data'], schema: '#/schemas/main' }, 224 | { name: 'name2', data: ['data'], dialect: '#/dialects/0' }, 225 | ], 226 | schemas: { main: { fields: [{ name: 'name' }] } }, 227 | dialects: [{ delimiter: ',' }], 228 | } 229 | const dataPackage = await Package.load(descriptor) 230 | assert.deepEqual( 231 | dataPackage.descriptor.resources, 232 | [ 233 | { name: 'name1', data: ['data'], schema: { fields: [{ name: 'name' }] } }, 234 | { name: 'name2', data: ['data'], dialect: { delimiter: ',' } }, 235 | ].map(expandResource) 236 | ) 237 | }) 238 | 239 | it('pointer bad', async () => { 240 | const descriptor = { 241 | resources: [{ name: 'name1', data: ['data'], schema: '#/schemas/main' }], 242 | } 243 | const error = await catchError(Package.load, descriptor) 244 | assert.instanceOf(error, Error) 245 | assert.include(error.message, 'Not resolved Pointer URI') 246 | }) 247 | 248 | it('remote', async () => { 249 | const descriptor = { 250 | resources: [ 251 | { name: 'name1', data: ['data'], schema: 'http://example.com/schema' }, 252 | { name: 'name2', data: ['data'], dialect: 'http://example.com/dialect' }, 253 | ], 254 | } 255 | http.onGet('http://example.com/schema').reply(200, { fields: [{ name: 'name' }] }) 256 | http.onGet('http://example.com/dialect').reply(200, { delimiter: ',' }) 257 | const dataPackage = await Package.load(descriptor) 258 | assert.deepEqual( 259 | dataPackage.descriptor.resources, 260 | [ 261 | { name: 'name1', data: ['data'], schema: { fields: [{ name: 'name' }] } }, 262 | { name: 'name2', data: ['data'], dialect: { delimiter: ',' } }, 263 | ].map(expandResource) 264 | ) 265 | }) 266 | 267 | it('remote bad', async () => { 268 | const descriptor = { 269 | resources: [{ name: 'name1', data: ['data'], schema: 'http://example.com/schema' }], 270 | } 271 | http.onGet('http://example.com/schema').reply(500) 272 | const error = await catchError(Package.load, descriptor) 273 | assert.instanceOf(error, Error) 274 | assert.include(error.message, 'Not resolved Remote URI') 275 | }) 276 | 277 | it('local', async () => { 278 | const descriptor = { 279 | resources: [ 280 | { name: 'name1', data: ['data'], schema: 'table-schema.json' }, 281 | { name: 'name2', data: ['data'], dialect: 'csv-dialect.json' }, 282 | ], 283 | } 284 | if (process.env.USER_ENV !== 'browser') { 285 | const dataPackage = await Package.load(descriptor, { basePath: 'data' }) 286 | assert.deepEqual( 287 | dataPackage.descriptor.resources, 288 | [ 289 | { name: 'name1', data: ['data'], schema: { fields: [{ name: 'name' }] } }, 290 | { name: 'name2', data: ['data'], dialect: { delimiter: ',' } }, 291 | ].map(expandResource) 292 | ) 293 | } else { 294 | const error = await catchError(Package.load, descriptor, { basePath: 'data' }) 295 | assert.instanceOf(error, Error) 296 | assert.include(error.message, 'in browser') 297 | } 298 | }) 299 | 300 | it('local bad', async () => { 301 | const descriptor = { 302 | resources: [{ name: 'name1', data: ['data'], schema: 'bad-path.json' }], 303 | } 304 | const error = await catchError(Package.load, descriptor, { basePath: 'data' }) 305 | assert.instanceOf(error, Error) 306 | if (process.env.USER_ENV !== 'browser') { 307 | assert.include(error.message, 'Not resolved Local URI') 308 | } else { 309 | assert.include(error.message, 'in browser') 310 | } 311 | }) 312 | 313 | it('local bad not safe', async () => { 314 | const descriptor = { 315 | resources: [{ name: 'name1', data: ['data'], schema: '../data/table-schema.json' }], 316 | } 317 | const error = await catchError(Package.load, descriptor, { basePath: 'data' }) 318 | assert.instanceOf(error, Error) 319 | if (process.env.USER_ENV !== 'browser') { 320 | assert.include(error.message, 'Not safe path') 321 | } else { 322 | assert.include(error.message, 'in browser') 323 | } 324 | }) 325 | }) 326 | 327 | describe('#descriptor (expand)', () => { 328 | it('resource', async () => { 329 | const descriptor = { 330 | resources: [ 331 | { 332 | name: 'name', 333 | data: ['data'], 334 | }, 335 | ], 336 | } 337 | const dataPackage = await Package.load(descriptor) 338 | assert.deepEqual(dataPackage.descriptor, { 339 | profile: 'data-package', 340 | resources: [ 341 | { 342 | name: 'name', 343 | data: ['data'], 344 | profile: 'data-resource', 345 | encoding: 'utf-8', 346 | }, 347 | ], 348 | }) 349 | }) 350 | 351 | it('tabular resource schema', async () => { 352 | const descriptor = { 353 | resources: [ 354 | { 355 | name: 'name', 356 | data: ['data'], 357 | profile: 'tabular-data-resource', 358 | schema: { fields: [{ name: 'name' }] }, 359 | }, 360 | ], 361 | } 362 | const dataPackage = await Package.load(descriptor) 363 | assert.deepEqual(dataPackage.descriptor, { 364 | profile: 'data-package', 365 | resources: [ 366 | { 367 | name: 'name', 368 | data: ['data'], 369 | profile: 'tabular-data-resource', 370 | encoding: 'utf-8', 371 | schema: { 372 | fields: [{ name: 'name', type: 'string', format: 'default' }], 373 | missingValues: [''], 374 | }, 375 | }, 376 | ], 377 | }) 378 | }) 379 | 380 | it('tabular resource dialect', async () => { 381 | const descriptor = { 382 | resources: [ 383 | { 384 | name: 'name', 385 | data: ['data'], 386 | profile: 'tabular-data-resource', 387 | dialect: { delimiter: 'custom' }, 388 | }, 389 | ], 390 | } 391 | const dataPackage = await Package.load(descriptor) 392 | assert.deepEqual(dataPackage.descriptor, { 393 | profile: 'data-package', 394 | resources: [ 395 | { 396 | name: 'name', 397 | data: ['data'], 398 | profile: 'tabular-data-resource', 399 | encoding: 'utf-8', 400 | dialect: { 401 | delimiter: 'custom', 402 | doubleQuote: true, 403 | lineTerminator: '\r\n', 404 | quoteChar: '"', 405 | skipInitialSpace: true, 406 | header: true, 407 | caseSensitiveHeader: false, 408 | }, 409 | }, 410 | ], 411 | }) 412 | }) 413 | 414 | it('tabular resource dialect updates quoteChar when given', async () => { 415 | const descriptor = { 416 | resources: [ 417 | { 418 | name: 'name', 419 | data: ['data'], 420 | profile: 'tabular-data-resource', 421 | dialect: { delimiter: 'custom', quoteChar: '+' }, 422 | }, 423 | ], 424 | } 425 | const dataPackage = await Package.load(descriptor) 426 | assert.deepEqual(dataPackage.descriptor, { 427 | profile: 'data-package', 428 | resources: [ 429 | { 430 | name: 'name', 431 | data: ['data'], 432 | profile: 'tabular-data-resource', 433 | encoding: 'utf-8', 434 | dialect: { 435 | delimiter: 'custom', 436 | doubleQuote: true, 437 | lineTerminator: '\r\n', 438 | quoteChar: '+', 439 | skipInitialSpace: true, 440 | header: true, 441 | caseSensitiveHeader: false, 442 | }, 443 | }, 444 | ], 445 | }) 446 | }) 447 | 448 | it('tabular resource dialect does not include quoteChar, given escapeChar', async () => { 449 | const descriptor = { 450 | resources: [ 451 | { 452 | name: 'name', 453 | data: ['data'], 454 | profile: 'tabular-data-resource', 455 | dialect: { delimiter: 'custom', escapeChar: '\\+' }, 456 | }, 457 | ], 458 | } 459 | const dataPackage = await Package.load(descriptor) 460 | assert.deepEqual(dataPackage.descriptor, { 461 | profile: 'data-package', 462 | resources: [ 463 | { 464 | name: 'name', 465 | data: ['data'], 466 | profile: 'tabular-data-resource', 467 | encoding: 'utf-8', 468 | dialect: { 469 | delimiter: 'custom', 470 | doubleQuote: true, 471 | lineTerminator: '\r\n', 472 | escapeChar: '\\+', 473 | skipInitialSpace: true, 474 | header: true, 475 | caseSensitiveHeader: false, 476 | }, 477 | }, 478 | ], 479 | }) 480 | }) 481 | 482 | it('tabular resource dialect throws error given escapeChar and quoteChar', async () => { 483 | const descriptor = { 484 | resources: [ 485 | { 486 | name: 'name', 487 | data: ['data'], 488 | profile: 'tabular-data-resource', 489 | dialect: { 490 | delimiter: 'custom', 491 | escapeChar: '\\', 492 | quoteChar: '"', 493 | }, 494 | }, 495 | ], 496 | } 497 | const error = await catchError(Package.load, descriptor) 498 | assert.instanceOf(error, Error) 499 | assert.include(error.message, 'quoteChar and escapeChar are mutually exclusive') 500 | }) 501 | }) 502 | 503 | describe('#resources', () => { 504 | it('names', async () => { 505 | const descriptor = require('../data/data-package-multiple-resources.json') 506 | const dataPackage = await Package.load(descriptor, { basePath: 'data' }) 507 | assert.lengthOf(dataPackage.resources, 2) 508 | assert.deepEqual(dataPackage.resourceNames, ['name1', 'name2']) 509 | }) 510 | 511 | it('add', async () => { 512 | const descriptor = require('../data/dp1/datapackage.json') 513 | const dataPackage = await Package.load(descriptor, { basePath: 'data/dp1' }) 514 | const resource = dataPackage.addResource({ name: 'name', data: ['test'] }) 515 | assert.isOk(resource) 516 | assert.lengthOf(dataPackage.resources, 2) 517 | assert.deepEqual(dataPackage.resources[1].source, ['test']) 518 | }) 519 | 520 | it('add invalid - throws array of errors in strict mode', async () => { 521 | const descriptor = require('../data/dp1/datapackage.json') 522 | const dataPackage = await Package.load(descriptor, { 523 | basePath: 'data/dp1', 524 | strict: true, 525 | }) 526 | const error = await catchError(dataPackage.addResource.bind(dataPackage), {}) 527 | assert.instanceOf(error, Error) 528 | assert.instanceOf(error.errors[0], Error) 529 | assert.include(error.errors[0].message, 'Data does not match any schemas') 530 | }) 531 | 532 | it('add invalid - save errors in not a strict mode', async () => { 533 | const descriptor = require('../data/dp1/datapackage.json') 534 | const dataPackage = await Package.load(descriptor, { basePath: 'data/dp1' }) 535 | dataPackage.addResource({}) 536 | assert.instanceOf(dataPackage.errors[0], Error) 537 | assert.include(dataPackage.errors[0].message, 'Data does not match any schemas') 538 | assert.isFalse(dataPackage.valid) 539 | }) 540 | 541 | it('add tabular - can read data', async () => { 542 | const descriptor = require('../data/dp1/datapackage.json') 543 | const dataPackage = await Package.load(descriptor, { basePath: 'data/dp1' }) 544 | dataPackage.addResource({ 545 | name: 'name', 546 | data: [ 547 | ['id', 'name'], 548 | ['1', 'alex'], 549 | ['2', 'john'], 550 | ], 551 | schema: { 552 | fields: [ 553 | { name: 'id', type: 'integer' }, 554 | { name: 'name', type: 'string' }, 555 | ], 556 | }, 557 | }) 558 | const rows = await dataPackage.resources[1].table.read() 559 | assert.deepEqual(rows, [ 560 | [1, 'alex'], 561 | [2, 'john'], 562 | ]) 563 | }) 564 | 565 | it('add with not a safe path - throw an error', async () => { 566 | const descriptor = require('../data/dp1/datapackage.json') 567 | const dataPackage = await Package.load(descriptor, { basePath: 'data/dp1' }) 568 | try { 569 | dataPackage.addResource({ 570 | name: 'name', 571 | path: ['../dp1/data.csv'], 572 | }) 573 | assert.isNotOk(true) 574 | } catch (error) { 575 | assert.instanceOf(error, Error) 576 | assert.include(error.message, 'not safe') 577 | } 578 | }) 579 | 580 | it('get existent', async () => { 581 | const descriptor = require('../data/dp1/datapackage.json') 582 | const dataPackage = await Package.load(descriptor, { basePath: 'data/dp1' }) 583 | const resource = dataPackage.getResource('random') 584 | assert.deepEqual(resource.name, 'random') 585 | }) 586 | 587 | it('get non existent', async () => { 588 | const descriptor = require('../data/dp1/datapackage.json') 589 | const dataPackage = await Package.load(descriptor, { basePath: 'data/dp1' }) 590 | const resource = dataPackage.getResource('non-existent') 591 | assert.isNull(resource) 592 | }) 593 | 594 | it('remove existent', async () => { 595 | const descriptor = require('../data/data-package-multiple-resources.json') 596 | const dataPackage = await Package.load(descriptor, { basePath: 'data' }) 597 | assert.lengthOf(dataPackage.resources, 2) 598 | assert.lengthOf(dataPackage.descriptor.resources, 2) 599 | assert.deepEqual(dataPackage.resources[0].name, 'name1') 600 | assert.deepEqual(dataPackage.resources[1].name, 'name2') 601 | const resource = dataPackage.removeResource('name2') 602 | assert.lengthOf(dataPackage.resources, 1) 603 | assert.lengthOf(dataPackage.descriptor.resources, 1) 604 | assert.deepEqual(dataPackage.resources[0].name, 'name1') 605 | assert.deepEqual(resource.name, 'name2') 606 | }) 607 | 608 | it('remove non existent', async () => { 609 | const descriptor = require('../data/dp1/datapackage.json') 610 | const dataPackage = await Package.load(descriptor, { basePath: 'data/dp1' }) 611 | const resource = dataPackage.removeResource('non-existent') 612 | assert.isNull(resource) 613 | assert.lengthOf(dataPackage.resources, 1) 614 | assert.lengthOf(dataPackage.descriptor.resources, 1) 615 | }) 616 | }) 617 | 618 | describe('#save', () => { 619 | // TODO: recover stub with async writeFile 620 | it.skip('general', async function () { 621 | // TODO: check it trows correct error in browser 622 | if (process.env.USER_ENV === 'browser') { 623 | this.skip() 624 | } 625 | const descriptor = { resources: [{ name: 'name', data: ['data'] }] } 626 | const dataPackage = await Package.load(descriptor) 627 | const writeFile = sinon.stub(fs, 'writeFile') 628 | await dataPackage.save('target') 629 | writeFile.restore() 630 | sinon.assert.calledWith(writeFile, 'target', JSON.stringify(expand(descriptor))) 631 | }) 632 | }) 633 | 634 | describe('#commit', () => { 635 | it('modified', async () => { 636 | const descriptor = { resources: [{ name: 'name', data: ['data'] }] } 637 | const dataPackage = await Package.load(descriptor) 638 | dataPackage.descriptor.resources[0].name = 'modified' 639 | assert.deepEqual(dataPackage.resources[0].name, 'name') 640 | const result = dataPackage.commit() 641 | assert.deepEqual(dataPackage.resources[0].name, 'modified') 642 | assert.isTrue(result) 643 | }) 644 | 645 | it('modified invalid in strict mode', async () => { 646 | const descriptor = { resources: [{ name: 'name', path: 'data.csv' }] } 647 | const dataPackage = await Package.load(descriptor, { 648 | basePath: 'data', 649 | strict: true, 650 | }) 651 | dataPackage.descriptor.resources = [] 652 | const error = await catchError(dataPackage.commit.bind(dataPackage), {}) 653 | assert.instanceOf(error, Error) 654 | assert.instanceOf(error.errors[0], Error) 655 | assert.include(error.errors[0].message, 'Array is too short') 656 | }) 657 | 658 | it('not modified', async () => { 659 | const descriptor = { resources: [{ name: 'name', data: ['data'] }] } 660 | const dataPackage = await Package.load(descriptor) 661 | const result = dataPackage.commit() 662 | assert.deepEqual(dataPackage.descriptor, expand(descriptor)) 663 | assert.isFalse(result) 664 | }) 665 | }) 666 | 667 | describe('#foreignKeys', () => { 668 | const DESCRIPTOR = { 669 | resources: [ 670 | { 671 | name: 'main', 672 | data: [ 673 | ['id', 'name', 'surname', 'parent_id'], 674 | ['1', 'Alex', 'Martin', ''], 675 | ['2', 'John', 'Dockins', '1'], 676 | ['3', 'Walter', 'White', '2'], 677 | ], 678 | schema: { 679 | fields: [{ name: 'id' }, { name: 'name' }, { name: 'surname' }, { name: 'parent_id' }], 680 | foreignKeys: [ 681 | { 682 | fields: 'name', 683 | reference: { resource: 'people', fields: 'firstname' }, 684 | }, 685 | ], 686 | }, 687 | }, 688 | { 689 | name: 'people', 690 | data: [ 691 | ['firstname', 'surname'], 692 | ['Alex', 'Martin'], 693 | ['John', 'Dockins'], 694 | ['Walter', 'White'], 695 | ], 696 | }, 697 | ], 698 | } 699 | 700 | it('should read rows if single field foreign keys is valid', async () => { 701 | const resource = (await Package.load(DESCRIPTOR)).getResource('main') 702 | const rows = await resource.read({ relations: true }) 703 | assert.deepEqual(rows, [ 704 | ['1', { firstname: 'Alex', surname: 'Martin' }, 'Martin', null], 705 | ['2', { firstname: 'John', surname: 'Dockins' }, 'Dockins', '1'], 706 | ['3', { firstname: 'Walter', surname: 'White' }, 'White', '2'], 707 | ]) 708 | }) 709 | 710 | it('should throw on read if single field foreign keys is invalid', async () => { 711 | const descriptor = cloneDeep(DESCRIPTOR) 712 | descriptor.resources[1].data[2][0] = 'Max' 713 | const resource = (await Package.load(descriptor)).getResource('main') 714 | const error1 = await catchError(resource.read.bind(resource), { relations: true }) 715 | const error2 = await catchError(resource.checkRelations.bind(resource)) 716 | assert.include(error1.message, 'Foreign key') 717 | assert.include(error2.message, 'Foreign key') 718 | }) 719 | 720 | it('should read rows if single self field foreign keys is valid', async () => { 721 | const descriptor = cloneDeep(DESCRIPTOR) 722 | descriptor.resources[0].schema.foreignKeys[0].fields = 'parent_id' 723 | descriptor.resources[0].schema.foreignKeys[0].reference.resource = '' 724 | descriptor.resources[0].schema.foreignKeys[0].reference.fields = 'id' 725 | const resource = (await Package.load(descriptor)).getResource('main') 726 | const keyedRows = await resource.read({ keyed: true, relations: true }) 727 | assert.deepEqual(keyedRows, [ 728 | { 729 | id: '1', 730 | name: 'Alex', 731 | surname: 'Martin', 732 | parent_id: null, 733 | }, 734 | { 735 | id: '2', 736 | name: 'John', 737 | surname: 'Dockins', 738 | parent_id: { id: '1', name: 'Alex', surname: 'Martin', parent_id: null }, 739 | }, 740 | { 741 | id: '3', 742 | name: 'Walter', 743 | surname: 'White', 744 | parent_id: { id: '2', name: 'John', surname: 'Dockins', parent_id: '1' }, 745 | }, 746 | ]) 747 | }) 748 | 749 | it('should throw on read if single self field foreign keys is invalid', async () => { 750 | const descriptor = cloneDeep(DESCRIPTOR) 751 | descriptor.resources[0].schema.foreignKeys[0].fields = 'parent_id' 752 | descriptor.resources[0].schema.foreignKeys[0].reference.resource = '' 753 | descriptor.resources[0].schema.foreignKeys[0].reference.fields = 'id' 754 | descriptor.resources[0].data[2][0] = '0' 755 | const resource = (await Package.load(descriptor)).getResource('main') 756 | const error1 = await catchError(resource.read.bind(resource), { relations: true }) 757 | const error2 = await catchError(resource.checkRelations.bind(resource)) 758 | assert.include(error1.message, 'Foreign key') 759 | assert.include(error2.message, 'Foreign key') 760 | }) 761 | 762 | it('should read rows if multi field foreign keys is valid', async () => { 763 | const descriptor = cloneDeep(DESCRIPTOR) 764 | descriptor.resources[0].schema.foreignKeys[0].fields = ['name', 'surname'] 765 | descriptor.resources[0].schema.foreignKeys[0].reference.fields = ['firstname', 'surname'] 766 | const resource = (await Package.load(descriptor)).getResource('main') 767 | const keyedRows = await resource.read({ keyed: true, relations: true }) 768 | assert.deepEqual(keyedRows, [ 769 | { 770 | id: '1', 771 | name: { firstname: 'Alex', surname: 'Martin' }, 772 | surname: { firstname: 'Alex', surname: 'Martin' }, 773 | parent_id: null, 774 | }, 775 | { 776 | id: '2', 777 | name: { firstname: 'John', surname: 'Dockins' }, 778 | surname: { firstname: 'John', surname: 'Dockins' }, 779 | parent_id: '1', 780 | }, 781 | { 782 | id: '3', 783 | name: { firstname: 'Walter', surname: 'White' }, 784 | surname: { firstname: 'Walter', surname: 'White' }, 785 | parent_id: '2', 786 | }, 787 | ]) 788 | }) 789 | 790 | it('should throw on read if multi field foreign keys is invalid', async () => { 791 | const descriptor = cloneDeep(DESCRIPTOR) 792 | descriptor.resources[0].schema.foreignKeys[0].fields = ['name', 'surname'] 793 | descriptor.resources[0].schema.foreignKeys[0].reference.fields = ['firstname', 'surname'] 794 | descriptor.resources[1].data[2][0] = 'Max' 795 | const resource = (await Package.load(descriptor)).getResource('main') 796 | const error1 = await catchError(resource.read.bind(resource), { relations: true }) 797 | const error2 = await catchError(resource.checkRelations.bind(resource)) 798 | assert.include(error1.message, 'Foreign key') 799 | assert.include(error2.message, 'Foreign key') 800 | }) 801 | }) 802 | 803 | describe('#zip', () => { 804 | it('should load package from a zip', async function () { 805 | if (process.env.USER_ENV === 'browser') this.skip() 806 | const dp = await Package.load('data/dp3-zip.zip') 807 | const countries = await dp.getResource('countries').read({ keyed: true }) 808 | assert.deepEqual(dp.descriptor.name, 'abc') 809 | assert.deepEqual(countries, [ 810 | { name: 'gb', size: 100 }, 811 | { name: 'us', size: 200 }, 812 | { name: 'cn', size: 300 }, 813 | ]) 814 | }) 815 | 816 | it('should save package as a zip', async function () { 817 | if (process.env.USER_ENV === 'browser') this.skip() 818 | 819 | // Save as a zip 820 | const dp = await Package.load('data/dp3-zip/datapackage.json') 821 | const target = await promisify(require('tmp').file)({ postfix: '.zip' }) 822 | const result = await dp.save(target) 823 | assert.ok(result) 824 | 825 | // Assert file names 826 | const zip = JSZip() 827 | await zip.loadAsync(promisify(fs.readFile)(target)) 828 | assert.deepEqual(zip.file('datapackage.json').name, 'datapackage.json') 829 | assert.deepEqual(zip.file('data/countries.csv').name, 'data/countries.csv') 830 | 831 | // Assert contents 832 | const descContents = await zip.file('datapackage.json').async('string') 833 | const dataContents = await zip.file('data/countries.csv').async('string') 834 | assert.deepEqual(JSON.parse(descContents), dp.descriptor) 835 | assert.deepEqual(dataContents, 'name,size\ngb,100\nus,200\ncn,300\n') 836 | }) 837 | 838 | it('should raise saving package as a zip to the bad path', async function () { 839 | if (process.env.USER_ENV === 'browser') this.skip() 840 | const dp = await Package.load('data/dp3-zip/datapackage.json') 841 | const error = await catchError(dp.save.bind(dp), 'non-existent/datapackage.zip') 842 | assert.include(error.message, 'no such file or directory') 843 | assert.include(error.message, 'non-existent/datapackage.zip') 844 | }) 845 | }) 846 | }) 847 | -------------------------------------------------------------------------------- /test/profile.js: -------------------------------------------------------------------------------- 1 | const axios = require('axios') 2 | const { assert } = require('chai') 3 | const { catchError } = require('./helpers') 4 | const AxiosMock = require('axios-mock-adapter') 5 | const { Profile } = require('../src/profile') 6 | 7 | // Constants 8 | 9 | const PROFILES = [ 10 | 'data-package', 11 | 'tabular-data-package', 12 | 'fiscal-data-package', 13 | 'data-resource', 14 | 'tabular-data-resource', 15 | ] 16 | 17 | // Tests 18 | 19 | describe('Profile', () => { 20 | describe('#load', () => { 21 | let http 22 | 23 | beforeEach(() => { 24 | http = new AxiosMock(axios) 25 | }) 26 | afterEach(() => { 27 | http.restore() 28 | }) 29 | 30 | PROFILES.forEach((name) => { 31 | it(`load registry "${name}" profile`, async () => { 32 | const jsonschema = require(`../src/profiles/${name}.json`) 33 | const profile = await Profile.load(name) 34 | assert.deepEqual(profile.jsonschema, jsonschema) 35 | }) 36 | }) 37 | 38 | it('load remote profile', async () => { 39 | const url = 'http://example.com/data-package.json' 40 | const jsonschema = require('../src/profiles/data-package.json') 41 | http.onGet(url).reply(200, jsonschema) 42 | const profile = await Profile.load(url) 43 | assert.deepEqual(profile.name, 'data-package') 44 | assert.deepEqual(profile.jsonschema, jsonschema) 45 | }) 46 | 47 | it('load remote profile from cache', async () => { 48 | const url = 'http://example.com/data-package.json' 49 | const jsonschema = require('../src/profiles/data-package.json') 50 | http.onGet(url).reply(200, jsonschema) 51 | await Profile.load(url) 52 | 53 | http.onGet(url).reply(400) 54 | const profile = await Profile.load(url) 55 | assert.deepEqual(profile.name, 'data-package') 56 | assert.deepEqual(profile.jsonschema, jsonschema) 57 | }) 58 | 59 | it('throw loading bad registry profile', async () => { 60 | const name = 'bad-data-package' 61 | const error = await catchError(Profile.load, name) 62 | assert.instanceOf(error, Error) 63 | assert.include(error.message, 'profile "bad-data-package"') 64 | }) 65 | 66 | it('throw loading bad remote profile', async () => { 67 | const name = 'http://example.com/profile.json' 68 | http.onGet(name).reply(400) 69 | const error = await catchError(Profile.load, name) 70 | assert.instanceOf(error, Error) 71 | assert.include(error.message, 'Can not retrieve remote') 72 | }) 73 | }) 74 | 75 | describe('#validate', () => { 76 | it('returns true for valid descriptor', async () => { 77 | const descriptor = { resources: [{ name: 'name', data: ['data'] }] } 78 | const profile = await Profile.load('data-package') 79 | assert.isOk(profile.validate(descriptor)) 80 | }) 81 | 82 | it('errors for invalid descriptor', async () => { 83 | const descriptor = {} 84 | const profile = await Profile.load('data-package') 85 | const { valid, errors } = profile.validate(descriptor) 86 | assert.deepEqual(valid, false) 87 | assert.instanceOf(errors[0], Error) 88 | assert.include(errors[0].message, 'Missing required property') 89 | }) 90 | }) 91 | 92 | // TODO: recover https://github.com/frictionlessdata/specs/issues/616 93 | describe.skip('#up-to-date', () => { 94 | PROFILES.forEach((name) => { 95 | it(`profile ${name} should be up-to-date`, async function () { 96 | if (process.env.USER_ENV === 'browser') this.skip() 97 | if (process.env.TRAVIS_BRANCH !== 'master') this.skip() 98 | const profile = await Profile.load(name) 99 | const response = await axios.get(`https://specs.frictionlessdata.io/schemas/${name}.json`) 100 | assert.deepEqual(profile.jsonschema, response.data) 101 | }) 102 | }) 103 | }) 104 | }) 105 | -------------------------------------------------------------------------------- /test/resource.js: -------------------------------------------------------------------------------- 1 | const axios = require('axios') 2 | const { assert } = require('chai') 3 | const { Table } = require('tableschema') 4 | const { catchError } = require('./helpers') 5 | const AxiosMock = require('axios-mock-adapter') 6 | const { Resource } = require('../src/resource') 7 | const helpers = require('../src/helpers') 8 | const expand = helpers.expandResourceDescriptor 9 | 10 | // Tests 11 | 12 | describe('Resource', () => { 13 | let http 14 | 15 | beforeEach(() => { 16 | http = new AxiosMock(axios) 17 | }) 18 | afterEach(() => { 19 | http.restore() 20 | }) 21 | 22 | describe('#load', () => { 23 | it('works with base descriptor', async () => { 24 | const descriptor = { 25 | name: 'name', 26 | data: ['data'], 27 | } 28 | const resource = await Resource.load(descriptor) 29 | assert.deepEqual(resource.name, 'name') 30 | assert.deepEqual(resource.tabular, false) 31 | assert.deepEqual(resource.descriptor, expand(descriptor)) 32 | assert.deepEqual(resource.inline, true) 33 | assert.deepEqual(resource.source, ['data']) 34 | assert.deepEqual(resource.table, null) 35 | }) 36 | 37 | it('works with tabular descriptor', async () => { 38 | const descriptor = { 39 | name: 'name', 40 | data: ['data'], 41 | profile: 'tabular-data-resource', 42 | } 43 | const resource = await Resource.load(descriptor) 44 | assert.deepEqual(resource.name, 'name') 45 | assert.deepEqual(resource.tabular, true) 46 | assert.deepEqual(resource.descriptor, expand(descriptor)) 47 | assert.deepEqual(resource.inline, true) 48 | assert.deepEqual(resource.source, ['data']) 49 | assert.isOk(resource.table) 50 | }) 51 | }) 52 | 53 | describe('#descriptor (retrieve)', () => { 54 | it('object', async () => { 55 | const descriptor = { 56 | name: 'name', 57 | data: 'data', 58 | } 59 | const resource = await Resource.load(descriptor) 60 | assert.deepEqual(resource.descriptor, expand(descriptor)) 61 | }) 62 | 63 | it('string remote path', async () => { 64 | const contents = require('../data/data-resource.json') 65 | const descriptor = 'http://example.com/data-resource.json' 66 | http.onGet(descriptor).reply(200, contents) 67 | const resource = await Resource.load(descriptor) 68 | assert.deepEqual(resource.descriptor, expand(contents)) 69 | }) 70 | 71 | it('string remote path bad', async () => { 72 | const descriptor = 'http://example.com/bad-path.json' 73 | http.onGet(descriptor).reply(500) 74 | const error = await catchError(Resource.load, descriptor) 75 | assert.instanceOf(error, Error) 76 | assert.include(error.message, 'Can not retrieve remote') 77 | }) 78 | 79 | it('string local path', async () => { 80 | const contents = require('../data/data-resource.json') 81 | const descriptor = 'data/data-resource.json' 82 | if (process.env.USER_ENV !== 'browser') { 83 | const resource = await Resource.load(descriptor) 84 | assert.deepEqual(resource.descriptor, expand(contents)) 85 | } else { 86 | const error = await catchError(Resource.load, descriptor) 87 | assert.instanceOf(error, Error) 88 | assert.include(error.message, 'in browser is not supported') 89 | } 90 | }) 91 | 92 | it('string local path bad', async () => { 93 | const descriptor = 'data/bad-path.json' 94 | const error = await catchError(Resource.load, descriptor) 95 | assert.instanceOf(error, Error) 96 | if (process.env.USER_ENV !== 'browser') { 97 | assert.include(error.message, 'Can not retrieve local') 98 | } else { 99 | assert.include(error.message, 'in browser is not supported') 100 | } 101 | }) 102 | }) 103 | 104 | describe('#descriptor (dereference)', () => { 105 | it('general', async () => { 106 | const descriptor = 'data/data-resource-dereference.json' 107 | if (process.env.USER_ENV !== 'browser') { 108 | const resource = await Resource.load(descriptor) 109 | assert.deepEqual( 110 | resource.descriptor, 111 | expand({ 112 | name: 'name', 113 | data: 'data', 114 | schema: { fields: [{ name: 'name' }] }, 115 | dialect: { delimiter: ',' }, 116 | dialects: { main: { delimiter: ',' } }, 117 | }) 118 | ) 119 | } else { 120 | const error = await catchError(Resource.load, descriptor) 121 | assert.instanceOf(error, Error) 122 | assert.include(error.message, 'in browser') 123 | } 124 | }) 125 | 126 | it('pointer', async () => { 127 | const descriptor = { 128 | name: 'name', 129 | data: 'data', 130 | schema: '#/schemas/main', 131 | schemas: { main: { fields: [{ name: 'name' }] } }, 132 | } 133 | const resource = await Resource.load(descriptor) 134 | assert.deepEqual( 135 | resource.descriptor, 136 | expand({ 137 | name: 'name', 138 | data: 'data', 139 | schema: { fields: [{ name: 'name' }] }, 140 | schemas: { main: { fields: [{ name: 'name' }] } }, 141 | }) 142 | ) 143 | }) 144 | 145 | it('pointer bad', async () => { 146 | const descriptor = { 147 | name: 'name', 148 | data: 'data', 149 | schema: '#/schemas/main', 150 | } 151 | const error = await catchError(Resource.load, descriptor) 152 | assert.instanceOf(error, Error) 153 | assert.include(error.message, 'Not resolved Pointer URI') 154 | }) 155 | 156 | it('remote', async () => { 157 | const descriptor = { 158 | name: 'name', 159 | data: 'data', 160 | schema: 'http://example.com/schema', 161 | } 162 | http.onGet(descriptor.schema).reply(200, { fields: [{ name: 'name' }] }) 163 | const resource = await Resource.load(descriptor) 164 | assert.deepEqual( 165 | resource.descriptor, 166 | expand({ 167 | name: 'name', 168 | data: 'data', 169 | schema: { fields: [{ name: 'name' }] }, 170 | }) 171 | ) 172 | }) 173 | 174 | it('remote bad', async () => { 175 | const descriptor = { 176 | name: 'name', 177 | data: 'data', 178 | schema: 'http://example.com/schema', 179 | } 180 | http.onGet(descriptor.schema).reply(500) 181 | const error = await catchError(Resource.load, descriptor) 182 | assert.instanceOf(error, Error) 183 | assert.include(error.message, 'Not resolved Remote URI') 184 | }) 185 | 186 | it('local', async () => { 187 | const descriptor = { 188 | name: 'name', 189 | data: 'data', 190 | schema: 'table-schema.json', 191 | } 192 | if (process.env.USER_ENV !== 'browser') { 193 | const resource = await Resource.load(descriptor, { basePath: 'data' }) 194 | assert.deepEqual( 195 | resource.descriptor, 196 | expand({ 197 | name: 'name', 198 | data: 'data', 199 | schema: { fields: [{ name: 'name' }] }, 200 | }) 201 | ) 202 | } else { 203 | const error = await catchError(Resource.load, descriptor, { basePath: 'data' }) 204 | assert.instanceOf(error, Error) 205 | assert.include(error.message, 'in browser is not supported') 206 | } 207 | }) 208 | 209 | it('local bad', async () => { 210 | const descriptor = { 211 | name: 'name', 212 | data: 'data', 213 | schema: 'bad-path.json', 214 | } 215 | const error = await catchError(Resource.load, descriptor, { basePath: 'data' }) 216 | assert.instanceOf(error, Error) 217 | if (process.env.USER_ENV !== 'browser') { 218 | assert.include(error.message, 'Not resolved Local URI') 219 | } else { 220 | assert.include(error.message, 'in browser is not supported') 221 | } 222 | }) 223 | 224 | it('local bad not safe', async () => { 225 | const descriptor = { 226 | name: 'name', 227 | data: 'data', 228 | schema: '../data/table_schema.json', 229 | } 230 | const error = await catchError(Resource.load, descriptor, { basePath: 'data' }) 231 | assert.instanceOf(error, Error) 232 | if (process.env.USER_ENV !== 'browser') { 233 | assert.include(error.message, 'Not safe path') 234 | } else { 235 | assert.include(error.message, 'in browser is not supported') 236 | } 237 | }) 238 | }) 239 | 240 | describe('#descriptor (expand)', () => { 241 | it('general resource', async () => { 242 | const descriptor = { 243 | name: 'name', 244 | data: 'data', 245 | } 246 | const resource = await Resource.load(descriptor) 247 | assert.deepEqual(resource.descriptor, { 248 | name: 'name', 249 | data: 'data', 250 | profile: 'data-resource', 251 | encoding: 'utf-8', 252 | }) 253 | }) 254 | 255 | it('tabular resource schema', async () => { 256 | const descriptor = { 257 | name: 'name', 258 | data: 'data', 259 | profile: 'tabular-data-resource', 260 | schema: { 261 | fields: [{ name: 'name' }], 262 | }, 263 | } 264 | const resource = await Resource.load(descriptor) 265 | assert.deepEqual(resource.descriptor, { 266 | name: 'name', 267 | data: 'data', 268 | profile: 'tabular-data-resource', 269 | encoding: 'utf-8', 270 | schema: { 271 | fields: [{ name: 'name', type: 'string', format: 'default' }], 272 | missingValues: [''], 273 | }, 274 | }) 275 | }) 276 | 277 | it('tabular resource dialect', async () => { 278 | const descriptor = { 279 | name: 'name', 280 | data: 'data', 281 | profile: 'tabular-data-resource', 282 | dialect: { 283 | delimiter: 'custom', 284 | }, 285 | } 286 | const resource = await Resource.load(descriptor) 287 | assert.deepEqual(resource.descriptor, { 288 | name: 'name', 289 | data: 'data', 290 | profile: 'tabular-data-resource', 291 | encoding: 'utf-8', 292 | dialect: { 293 | delimiter: 'custom', 294 | doubleQuote: true, 295 | lineTerminator: '\r\n', 296 | quoteChar: '"', 297 | skipInitialSpace: true, 298 | header: true, 299 | caseSensitiveHeader: false, 300 | }, 301 | }) 302 | }) 303 | 304 | it('tabular resource dialect updates quoteChar when given', async () => { 305 | const descriptor = { 306 | name: 'name', 307 | data: 'data', 308 | profile: 'tabular-data-resource', 309 | dialect: { 310 | delimiter: 'custom', 311 | quoteChar: '+', 312 | }, 313 | } 314 | const resource = await Resource.load(descriptor) 315 | assert.deepEqual(resource.descriptor, { 316 | name: 'name', 317 | data: 'data', 318 | profile: 'tabular-data-resource', 319 | encoding: 'utf-8', 320 | dialect: { 321 | delimiter: 'custom', 322 | doubleQuote: true, 323 | lineTerminator: '\r\n', 324 | quoteChar: '+', 325 | skipInitialSpace: true, 326 | header: true, 327 | caseSensitiveHeader: false, 328 | }, 329 | }) 330 | }) 331 | 332 | it('tabular resource dialect does not include quoteChar, given escapeChar', async () => { 333 | const descriptor = { 334 | name: 'name', 335 | data: 'data', 336 | profile: 'tabular-data-resource', 337 | dialect: { 338 | delimiter: 'custom', 339 | escapeChar: '\\+', 340 | }, 341 | } 342 | const resource = await Resource.load(descriptor) 343 | assert.deepEqual(resource.descriptor, { 344 | name: 'name', 345 | data: 'data', 346 | profile: 'tabular-data-resource', 347 | encoding: 'utf-8', 348 | dialect: { 349 | delimiter: 'custom', 350 | doubleQuote: true, 351 | lineTerminator: '\r\n', 352 | escapeChar: '\\+', 353 | skipInitialSpace: true, 354 | header: true, 355 | caseSensitiveHeader: false, 356 | }, 357 | }) 358 | }) 359 | 360 | it('tabular resource dialect throws error given escapeChar and quoteChar', async () => { 361 | const descriptor = { 362 | name: 'name', 363 | data: 'data', 364 | profile: 'tabular-data-resource', 365 | dialect: { 366 | delimiter: 'custom', 367 | escapeChar: '\\', 368 | quoteChar: '"', 369 | }, 370 | } 371 | const error = await catchError(Resource.load, descriptor) 372 | assert.instanceOf(error, Error) 373 | assert.include(error.message, 'quoteChar and escapeChar are mutually exclusive') 374 | }) 375 | }) 376 | 377 | describe('#source/sourceType', () => { 378 | it('inline', async () => { 379 | const descriptor = { 380 | name: 'name', 381 | data: 'data', 382 | path: ['path'], 383 | } 384 | const resource = await Resource.load(descriptor) 385 | assert.deepEqual(resource.source, 'data') 386 | assert.deepEqual(resource.inline, true) 387 | }) 388 | 389 | it('local', async () => { 390 | const descriptor = { 391 | name: 'name', 392 | path: ['table.csv'], 393 | } 394 | const resource = await Resource.load(descriptor, { basePath: 'data' }) 395 | assert.deepEqual(resource.source, 'data/table.csv') 396 | assert.deepEqual(resource.local, true) 397 | }) 398 | 399 | it('local base no base path', async () => { 400 | const descriptor = { 401 | name: 'name', 402 | path: ['table.csv'], 403 | } 404 | const error = await catchError(Resource.load, descriptor, { basePath: null }) 405 | assert.instanceOf(error, Error) 406 | assert.include(error.message, 'requires base path') 407 | }) 408 | 409 | it('local bad not safe absolute', async () => { 410 | const descriptor = { 411 | name: 'name', 412 | path: ['/fixtures/table.csv'], 413 | } 414 | const error = await catchError(Resource.load, descriptor, { basePath: 'data' }) 415 | assert.instanceOf(error, Error) 416 | assert.include(error.message, 'not safe') 417 | }) 418 | 419 | it('local bad not safe traversing', async () => { 420 | const descriptor = { 421 | name: 'name', 422 | path: ['../fixtures/table.csv'], 423 | } 424 | const error = await catchError(Resource.load, descriptor, { basePath: 'data' }) 425 | assert.instanceOf(error, Error) 426 | assert.include(error.message, 'not safe') 427 | }) 428 | 429 | it('remote', async () => { 430 | const descriptor = { 431 | name: 'name', 432 | path: ['http://example.com/table.csv'], 433 | } 434 | const resource = await Resource.load(descriptor) 435 | assert.deepEqual(resource.source, 'http://example.com/table.csv') 436 | assert.deepEqual(resource.remote, true) 437 | }) 438 | 439 | it('remote path relative and base path remote', async () => { 440 | const descriptor = { 441 | name: 'name', 442 | path: ['table.csv'], 443 | } 444 | const resource = await Resource.load(descriptor, { basePath: 'http://example.com/' }) 445 | assert.deepEqual(resource.source, 'http://example.com/table.csv') 446 | assert.deepEqual(resource.remote, true) 447 | }) 448 | 449 | it('remote path remote and base path remote', async () => { 450 | const descriptor = { 451 | name: 'name', 452 | path: ['http://example1.com/table.csv'], 453 | } 454 | const resource = await Resource.load(descriptor, { basePath: 'http://example2.com/' }) 455 | assert.deepEqual(resource.source, 'http://example1.com/table.csv') 456 | assert.deepEqual(resource.remote, true) 457 | }) 458 | 459 | it('multipart local', async () => { 460 | const descriptor = { 461 | name: 'name', 462 | path: ['chunk1.csv', 'chunk2.csv'], 463 | } 464 | const resource = await Resource.load(descriptor, { basePath: 'data' }) 465 | assert.deepEqual(resource.source, ['data/chunk1.csv', 'data/chunk2.csv']) 466 | assert.deepEqual(resource.local, true) 467 | assert.deepEqual(resource.multipart, true) 468 | }) 469 | 470 | it('multipart local bad no base path', async () => { 471 | const descriptor = { 472 | name: 'name', 473 | path: ['chunk1.csv', 'chunk2.csv'], 474 | } 475 | const error = await catchError(Resource.load, descriptor, { basePath: null }) 476 | assert.instanceOf(error, Error) 477 | assert.include(error.message, 'requires base path') 478 | }) 479 | 480 | it('multipart local bad not safe absolute', async () => { 481 | const descriptor = { 482 | name: 'name', 483 | path: ['/fixtures/chunk1.csv', 'chunk2.csv'], 484 | } 485 | const error = await catchError(Resource.load, descriptor, { basePath: 'data' }) 486 | assert.instanceOf(error, Error) 487 | assert.include(error.message, 'not safe') 488 | }) 489 | 490 | it('multipart local bad not safe traversing', async () => { 491 | const descriptor = { 492 | name: 'name', 493 | path: ['chunk1.csv', '../fixtures/chunk2.csv'], 494 | } 495 | const error = await catchError(Resource.load, descriptor, { basePath: 'data' }) 496 | // Assert 497 | assert.instanceOf(error, Error) 498 | assert.include(error.message, 'not safe') 499 | }) 500 | 501 | it('multipart remote', async () => { 502 | const descriptor = { 503 | name: 'name', 504 | path: ['http://example.com/chunk1.csv', 'http://example.com/chunk2.csv'], 505 | } 506 | const resource = await Resource.load(descriptor) 507 | assert.deepEqual(resource.source, [ 508 | 'http://example.com/chunk1.csv', 509 | 'http://example.com/chunk2.csv', 510 | ]) 511 | assert.deepEqual(resource.remote, true) 512 | assert.deepEqual(resource.multipart, true) 513 | }) 514 | 515 | it('multipart remote path relative and base path remote', async () => { 516 | const descriptor = { 517 | name: 'name', 518 | path: ['chunk1.csv', 'chunk2.csv'], 519 | } 520 | const resource = await Resource.load(descriptor, { basePath: 'http://example.com' }) 521 | assert.deepEqual(resource.source, [ 522 | 'http://example.com/chunk1.csv', 523 | 'http://example.com/chunk2.csv', 524 | ]) 525 | assert.deepEqual(resource.remote, true) 526 | assert.deepEqual(resource.multipart, true) 527 | }) 528 | 529 | it('multipart remote path remote and base path remote', async () => { 530 | const descriptor = { 531 | name: 'name', 532 | path: ['chunk1.csv', 'http://example2.com/chunk2.csv'], 533 | } 534 | const resource = await Resource.load(descriptor, { basePath: 'http://example1.com' }) 535 | assert.deepEqual(resource.source, [ 536 | 'http://example1.com/chunk1.csv', 537 | 'http://example2.com/chunk2.csv', 538 | ]) 539 | assert.deepEqual(resource.remote, true) 540 | assert.deepEqual(resource.multipart, true) 541 | }) 542 | }) 543 | 544 | describe('#rawRead', () => { 545 | it('it raw reads local file source', async function () { 546 | if (process.env.USER_ENV === 'browser') this.skip() 547 | const resource = await Resource.load({ path: 'data/data.csv' }, { basePath: '.' }) 548 | const bytes = await resource.rawRead() 549 | assert.include(bytes.toString(), 'name,size') 550 | }) 551 | }) 552 | 553 | describe('#table', () => { 554 | it('general resource', async () => { 555 | const descriptor = { 556 | name: 'name', 557 | data: 'data', 558 | } 559 | const resource = await Resource.load(descriptor) 560 | assert.deepEqual(resource.table, null) 561 | }) 562 | 563 | it('tabular resource inline', async () => { 564 | const descriptor = { 565 | name: 'example', 566 | profile: 'tabular-data-resource', 567 | data: [ 568 | ['height', 'age', 'name'], 569 | ['180', '18', 'Tony'], 570 | ['192', '32', 'Jacob'], 571 | ], 572 | schema: { 573 | fields: [ 574 | { name: 'height', type: 'integer' }, 575 | { name: 'age', type: 'integer' }, 576 | { name: 'name', type: 'string' }, 577 | ], 578 | }, 579 | } 580 | const resource = await Resource.load(descriptor) 581 | assert.instanceOf(resource.table, Table) 582 | assert.deepEqual(await resource.table.read(), [ 583 | [180, 18, 'Tony'], 584 | [192, 32, 'Jacob'], 585 | ]) 586 | }) 587 | 588 | it('tabular resource local', async function () { 589 | // Skip test for browser 590 | if (process.env.USER_ENV === 'browser') { 591 | this.skip() 592 | } 593 | // Prepare 594 | const descriptor = { 595 | name: 'example', 596 | profile: 'tabular-data-resource', 597 | path: ['dp1/data.csv'], 598 | schema: { 599 | fields: [ 600 | { name: 'name', type: 'string' }, 601 | { name: 'size', type: 'integer' }, 602 | ], 603 | }, 604 | } 605 | const resource = await Resource.load(descriptor, { basePath: 'data' }) 606 | // Assert 607 | assert.instanceOf(resource.table, Table) 608 | assert.deepEqual(await resource.table.read(), [ 609 | ['gb', 100], 610 | ['us', 200], 611 | ['cn', 300], 612 | ]) 613 | }) 614 | }) 615 | 616 | describe('#infer', () => { 617 | it('preserve resource format from descriptor ', async function () { 618 | if (process.env.USER_ENV === 'browser') this.skip() 619 | const resource = await Resource.load({ path: 'data/data.csvformat', format: 'csv' }) 620 | assert.deepEqual(await resource.infer(), { 621 | encoding: 'utf-8', 622 | format: 'csv', 623 | mediatype: 'text/csv', 624 | name: 'data', 625 | path: 'data/data.csvformat', 626 | profile: 'tabular-data-resource', 627 | schema: { 628 | fields: [ 629 | { format: 'default', name: 'city', type: 'string' }, 630 | { format: 'default', name: 'population', type: 'integer' }, 631 | ], 632 | missingValues: [''], 633 | }, 634 | }) 635 | }) 636 | }) 637 | 638 | describe('#encoding', () => { 639 | it('it supports encoding property', async function () { 640 | if (process.env.USER_ENV === 'browser') this.skip() 641 | const descriptor = { 642 | path: 'data/latin1.csv', 643 | encoding: 'latin1', 644 | schema: { fields: [{ name: 'id' }, { name: 'name' }] }, 645 | } 646 | const resource = await Resource.load(descriptor) 647 | const rows = await resource.read({ keyed: true }) 648 | assert.deepEqual(rows, [ 649 | { id: '1', name: 'english' }, 650 | { id: '2', name: '©' }, 651 | ]) 652 | }) 653 | 654 | it('it reads incorreclty if proper encoding is not set', async function () { 655 | if (process.env.USER_ENV === 'browser') this.skip() 656 | const descriptor = { 657 | path: 'data/latin1.csv', 658 | schema: { fields: [{ name: 'id' }, { name: 'name' }] }, 659 | } 660 | const resource = await Resource.load(descriptor) 661 | const rows = await resource.read({ keyed: true }) 662 | assert.notDeepEqual(rows, [ 663 | { id: '1', name: 'english' }, 664 | { id: '2', name: '©' }, 665 | ]) 666 | }) 667 | }) 668 | 669 | describe('#dialect', () => { 670 | it('it supports dialect.delimiter', async function () { 671 | if (process.env.USER_ENV === 'browser') this.skip() 672 | const descriptor = { 673 | path: 'data/data.dialect.csv', 674 | schema: { fields: [{ name: 'name' }, { name: 'size' }] }, 675 | dialect: { delimiter: ';' }, 676 | } 677 | const resource = await Resource.load(descriptor) 678 | const rows = await resource.read({ keyed: true }) 679 | assert.deepEqual(rows, [ 680 | { name: 'gb', size: '105' }, 681 | { name: 'us', size: '205' }, 682 | { name: 'cn', size: '305' }, 683 | ]) 684 | }) 685 | 686 | it('it supports dialect.header=false', async () => { 687 | const descriptor = { 688 | data: [['a'], ['b'], ['c']], 689 | schema: { fields: [{ name: 'letter' }] }, 690 | dialect: { header: false }, 691 | } 692 | const resource = await Resource.load(descriptor) 693 | const rows = await resource.read({ keyed: true }) 694 | assert.deepEqual(rows, [{ letter: 'a' }, { letter: 'b' }, { letter: 'c' }]) 695 | }) 696 | }) 697 | }) 698 | -------------------------------------------------------------------------------- /test/validate.js: -------------------------------------------------------------------------------- 1 | const { assert } = require('chai') 2 | const { validate } = require('../src') 3 | 4 | // Tests 5 | 6 | describe('validate', () => { 7 | it('returns true for valid descriptor', async () => { 8 | const descriptor = { resources: [{ name: 'name', data: ['data'] }] } 9 | const valid = await validate(descriptor) 10 | assert.ok(valid) 11 | }) 12 | 13 | it('returns array of errors for invalid descriptor', async () => { 14 | const descriptor = { resource: [{ name: 'name' }] } 15 | const { valid, errors } = await validate(descriptor) 16 | assert.deepEqual(valid, false) 17 | assert.deepEqual(errors.length, 1) 18 | }) 19 | }) 20 | -------------------------------------------------------------------------------- /webpack.config.js: -------------------------------------------------------------------------------- 1 | const path = require('path') 2 | const webpack = require('webpack') 3 | const merge = require('webpack-merge') 4 | const ENV = process.env.NODE_ENV || 'development' 5 | 6 | 7 | // Base 8 | 9 | let webpackConfig = { 10 | entry: './src/index.js', 11 | devtool: 'source-map', 12 | module: { 13 | loaders: [ 14 | { test: /\.json$/, loader: 'json-loader' }, 15 | { test: /\.js$/, loaders: ['babel-loader'], exclude: /node_modules/ }, 16 | ] 17 | }, 18 | output: { 19 | library: 'datapackage', 20 | libraryTarget: 'umd', 21 | }, 22 | plugins: [ 23 | new webpack.DefinePlugin({ 24 | 'process.env.USER_ENV': JSON.stringify('browser') 25 | }), 26 | new webpack.IgnorePlugin(/^\.\/locale$/, /moment$/), 27 | new webpack.IgnorePlugin(/^jschardet$/), 28 | new webpack.IgnorePlugin(/^glob$/), 29 | ], 30 | node: { 31 | fs: 'empty', 32 | http: 'empty', 33 | https: 'empty', 34 | crypto: 'empty', 35 | } 36 | } 37 | 38 | 39 | // Development 40 | 41 | if (ENV === 'development') { 42 | webpackConfig = merge(webpackConfig, { 43 | output: { 44 | filename: 'datapackage.js', 45 | path: path.resolve(__dirname, './dist'), 46 | }, 47 | plugins: [ 48 | new webpack.DefinePlugin({ 49 | 'process.env.NODE_ENV': JSON.stringify('development') 50 | }) 51 | ] 52 | }); 53 | } 54 | 55 | 56 | // Production 57 | 58 | if (ENV === 'production') { 59 | webpackConfig = merge(webpackConfig, { 60 | output: { 61 | filename: 'datapackage.min.js', 62 | path: path.resolve(__dirname, './dist'), 63 | }, 64 | plugins: [ 65 | new webpack.DefinePlugin({ 66 | 'process.env.NODE_ENV': JSON.stringify('production') 67 | }), 68 | new webpack.optimize.UglifyJsPlugin({ 69 | sourceMap: true, 70 | compressor: { 71 | screw_ie8: true, 72 | warnings: false, 73 | } 74 | }) 75 | ] 76 | }); 77 | } 78 | 79 | module.exports = webpackConfig 80 | --------------------------------------------------------------------------------