├── .husky └── pre-push ├── content ├── docs │ ├── overview │ │ ├── contributing.md │ │ ├── governance.md │ │ ├── introduction.md │ │ ├── software.mdx │ │ └── changelog.md │ ├── standard │ │ ├── assets │ │ │ └── security-matrix.png │ │ ├── extensions.mdx │ │ ├── glossary.mdx │ │ └── security.mdx │ ├── guides │ │ ├── extending-data-package.md │ │ ├── using-data-package.md │ │ └── mediawiki-tabular-data.md │ ├── extensions │ │ ├── camtrap-data-package.md │ │ └── fiscal-data-package.md │ ├── recipes │ │ ├── data-dependencies.md │ │ ├── external-foreign-keys.md │ │ ├── private-properties.md │ │ ├── compression-of-resources.md │ │ ├── files-inside-archives.md │ │ ├── data-package-version.md │ │ ├── language-support.md │ │ ├── caching-of-resources.md │ │ ├── translation-support.md │ │ ├── data-catalog.md │ │ ├── json-data-resources.md │ │ ├── metadata-in-table-schema.md │ │ └── relationship-between-fields.md │ ├── index.mdx │ └── blog │ │ ├── 2023-11-15-v2-announcement.md │ │ └── 2024-06-26-v2-release.md └── config.ts ├── tsconfig.json ├── public ├── ode.png ├── favicon.png ├── favicon.svg └── profiles │ ├── 1.0 │ └── tabledialect.json │ └── 2.0 │ └── tabledialect.json ├── assets ├── hero.png ├── adoption │ ├── cmoa.png │ ├── cmso.png │ ├── dm4t.png │ ├── fdwc.png │ ├── gbif.png │ ├── nes.png │ ├── odb.png │ ├── opsd.png │ ├── owid.png │ ├── pnnl.png │ ├── pudl.png │ ├── ukds.png │ ├── uop.png │ ├── bcodmo.png │ ├── chicago.png │ ├── dryad.png │ ├── elife.png │ ├── etalab.png │ ├── eucom.png │ ├── github.png │ ├── hubmap.png │ ├── openml.png │ ├── oxford.png │ ├── tesera.png │ ├── zegami.png │ ├── cambridge.png │ ├── dataship.png │ ├── gapminder.png │ ├── validata.png │ ├── causanatura.png │ ├── data-world.png │ ├── gapminder2.png │ ├── nimblelearn.png │ ├── data-retriever.png │ ├── john-snow-labs.png │ ├── open-referral.png │ ├── deploy-solutions.png │ └── libraries-hacked.png ├── funding │ └── okfn.png ├── software │ ├── ode.png │ ├── flatterer.png │ └── data-curator.png ├── styles.css ├── index.ts ├── logo-light.svg └── logo-dark.svg ├── env.d.ts ├── .github ├── PULL_REQUEST_TEMPLATE.md ├── workflows │ └── general.yaml ├── stale.yaml └── ISSUE_TEMPLATE │ ├── 01-bug-report.yaml │ ├── 02-general-issue.yaml │ └── config.yml ├── .prettierignore ├── profiles ├── datapackage.json ├── dataresource.json ├── tabledialect.json ├── tableschema.json └── dictionary │ ├── package.yaml │ ├── dialect.yaml │ ├── resource.yaml │ └── common.yaml ├── .editorconfig ├── .prettierrc.json ├── components ├── MarkdownContent.astro ├── ClickableCard.astro ├── ImageLinkCard.astro ├── SocialIcons.astro ├── About.astro └── Adoption.astro ├── .gitignore ├── .eslintrc.json ├── README.md ├── LICENSE.md ├── package.json ├── scripts └── generate.ts ├── astro.config.js └── CONTRIBUTING.md /.husky/pre-push: -------------------------------------------------------------------------------- 1 | npm test 2 | -------------------------------------------------------------------------------- /content/docs/overview/contributing.md: -------------------------------------------------------------------------------- 1 | ../../../CONTRIBUTING.md -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "astro/tsconfigs/strict" 3 | } 4 | -------------------------------------------------------------------------------- /public/ode.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage/HEAD/public/ode.png -------------------------------------------------------------------------------- /assets/hero.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage/HEAD/assets/hero.png -------------------------------------------------------------------------------- /env.d.ts: -------------------------------------------------------------------------------- 1 | /// 2 | /// 3 | -------------------------------------------------------------------------------- /public/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage/HEAD/public/favicon.png -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | - fixes # 2 | 3 | --- 4 | 5 | Provide a brief description of this change. 6 | -------------------------------------------------------------------------------- /assets/adoption/cmoa.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage/HEAD/assets/adoption/cmoa.png -------------------------------------------------------------------------------- /assets/adoption/cmso.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage/HEAD/assets/adoption/cmso.png -------------------------------------------------------------------------------- /assets/adoption/dm4t.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage/HEAD/assets/adoption/dm4t.png -------------------------------------------------------------------------------- /assets/adoption/fdwc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage/HEAD/assets/adoption/fdwc.png -------------------------------------------------------------------------------- /assets/adoption/gbif.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage/HEAD/assets/adoption/gbif.png -------------------------------------------------------------------------------- /assets/adoption/nes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage/HEAD/assets/adoption/nes.png -------------------------------------------------------------------------------- /assets/adoption/odb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage/HEAD/assets/adoption/odb.png -------------------------------------------------------------------------------- /assets/adoption/opsd.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage/HEAD/assets/adoption/opsd.png -------------------------------------------------------------------------------- /assets/adoption/owid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage/HEAD/assets/adoption/owid.png -------------------------------------------------------------------------------- /assets/adoption/pnnl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage/HEAD/assets/adoption/pnnl.png -------------------------------------------------------------------------------- /assets/adoption/pudl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage/HEAD/assets/adoption/pudl.png -------------------------------------------------------------------------------- /assets/adoption/ukds.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage/HEAD/assets/adoption/ukds.png -------------------------------------------------------------------------------- /assets/adoption/uop.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage/HEAD/assets/adoption/uop.png -------------------------------------------------------------------------------- /assets/funding/okfn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage/HEAD/assets/funding/okfn.png -------------------------------------------------------------------------------- /assets/software/ode.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage/HEAD/assets/software/ode.png -------------------------------------------------------------------------------- /assets/adoption/bcodmo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage/HEAD/assets/adoption/bcodmo.png -------------------------------------------------------------------------------- /assets/adoption/chicago.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage/HEAD/assets/adoption/chicago.png -------------------------------------------------------------------------------- /assets/adoption/dryad.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage/HEAD/assets/adoption/dryad.png -------------------------------------------------------------------------------- /assets/adoption/elife.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage/HEAD/assets/adoption/elife.png -------------------------------------------------------------------------------- /assets/adoption/etalab.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage/HEAD/assets/adoption/etalab.png -------------------------------------------------------------------------------- /assets/adoption/eucom.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage/HEAD/assets/adoption/eucom.png -------------------------------------------------------------------------------- /assets/adoption/github.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage/HEAD/assets/adoption/github.png -------------------------------------------------------------------------------- /assets/adoption/hubmap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage/HEAD/assets/adoption/hubmap.png -------------------------------------------------------------------------------- /assets/adoption/openml.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage/HEAD/assets/adoption/openml.png -------------------------------------------------------------------------------- /assets/adoption/oxford.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage/HEAD/assets/adoption/oxford.png -------------------------------------------------------------------------------- /assets/adoption/tesera.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage/HEAD/assets/adoption/tesera.png -------------------------------------------------------------------------------- /assets/adoption/zegami.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage/HEAD/assets/adoption/zegami.png -------------------------------------------------------------------------------- /assets/adoption/cambridge.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage/HEAD/assets/adoption/cambridge.png -------------------------------------------------------------------------------- /assets/adoption/dataship.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage/HEAD/assets/adoption/dataship.png -------------------------------------------------------------------------------- /assets/adoption/gapminder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage/HEAD/assets/adoption/gapminder.png -------------------------------------------------------------------------------- /assets/adoption/validata.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage/HEAD/assets/adoption/validata.png -------------------------------------------------------------------------------- /assets/software/flatterer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage/HEAD/assets/software/flatterer.png -------------------------------------------------------------------------------- /assets/adoption/causanatura.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage/HEAD/assets/adoption/causanatura.png -------------------------------------------------------------------------------- /assets/adoption/data-world.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage/HEAD/assets/adoption/data-world.png -------------------------------------------------------------------------------- /assets/adoption/gapminder2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage/HEAD/assets/adoption/gapminder2.png -------------------------------------------------------------------------------- /assets/adoption/nimblelearn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage/HEAD/assets/adoption/nimblelearn.png -------------------------------------------------------------------------------- /assets/software/data-curator.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage/HEAD/assets/software/data-curator.png -------------------------------------------------------------------------------- /.prettierignore: -------------------------------------------------------------------------------- 1 | content/docs/guides/csvw-data-package.md 2 | content/docs/guides/mediawiki-tabular-data.md 3 | public/profiles 4 | -------------------------------------------------------------------------------- /assets/adoption/data-retriever.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage/HEAD/assets/adoption/data-retriever.png -------------------------------------------------------------------------------- /assets/adoption/john-snow-labs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage/HEAD/assets/adoption/john-snow-labs.png -------------------------------------------------------------------------------- /assets/adoption/open-referral.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage/HEAD/assets/adoption/open-referral.png -------------------------------------------------------------------------------- /assets/adoption/deploy-solutions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage/HEAD/assets/adoption/deploy-solutions.png -------------------------------------------------------------------------------- /assets/adoption/libraries-hacked.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage/HEAD/assets/adoption/libraries-hacked.png -------------------------------------------------------------------------------- /profiles/datapackage.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-07/schema#", 3 | "$ref": "dictionary.json#/definitions/dataPackage" 4 | } 5 | -------------------------------------------------------------------------------- /profiles/dataresource.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-07/schema#", 3 | "$ref": "dictionary.json#/definitions/dataResource" 4 | } 5 | -------------------------------------------------------------------------------- /profiles/tabledialect.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-07/schema#", 3 | "$ref": "dictionary.json#/definitions/tableDialect" 4 | } 5 | -------------------------------------------------------------------------------- /profiles/tableschema.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-07/schema#", 3 | "$ref": "dictionary.json#/definitions/tableSchema" 4 | } 5 | -------------------------------------------------------------------------------- /content/docs/standard/assets/security-matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage/HEAD/content/docs/standard/assets/security-matrix.png -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | charset = utf-8 5 | end_of_line = lf 6 | indent_style = space 7 | indent_size = 2 8 | insert_final_newline = true 9 | trim_trailing_whitespace = true 10 | -------------------------------------------------------------------------------- /content/docs/guides/extending-data-package.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: How to extend Data Package 3 | sidebar: 4 | order: 2 5 | hidden: true 6 | --- 7 | 8 | :::caution 9 | This section is under development 10 | ::: 11 | -------------------------------------------------------------------------------- /.prettierrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json.schemastore.org/prettierrc", 3 | "semi": false, 4 | "printWidth": 90, 5 | "arrowParens": "avoid", 6 | "trailingComma": "es5", 7 | "plugins": ["prettier-plugin-astro", "@trivago/prettier-plugin-sort-imports"] 8 | } 9 | -------------------------------------------------------------------------------- /content/config.ts: -------------------------------------------------------------------------------- 1 | import { docsSchema } from "@astrojs/starlight/schema" 2 | import { defineCollection } from "astro:content" 3 | import { blogSchema } from "starlight-blog/schema" 4 | 5 | export const collections = { 6 | docs: defineCollection({ 7 | schema: docsSchema({ 8 | extend: context => blogSchema(context), 9 | }), 10 | }), 11 | } 12 | -------------------------------------------------------------------------------- /components/MarkdownContent.astro: -------------------------------------------------------------------------------- 1 | --- 2 | import type { Props } from "@astrojs/starlight/props" 3 | import Default from "@astrojs/starlight/components/MarkdownContent.astro" 4 | --- 5 | 6 | 7 | 8 | 13 | -------------------------------------------------------------------------------- /components/ClickableCard.astro: -------------------------------------------------------------------------------- 1 | --- 2 | import { Card } from "@astrojs/starlight/components" 3 | 4 | interface Props { 5 | title: string 6 | description: string 7 | icon: any 8 | href: string 9 | } 10 | 11 | const { href, ...rest } = Astro.props 12 | --- 13 | 14 | 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /content/docs/extensions/camtrap-data-package.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Camera Trap Data Package 3 | --- 4 | 5 | 6 | 7 | 8 | 9 | 10 |
AuthorsPeter Desmet, Jakub W. Bubnicki, Camtrap DP Development Team
11 | 12 | Camera Trap Data Package (or Camtrap DP for short) is a community developed data exchange format for camera trap data. 13 | 14 | [Camera Trap Data Package](https://camtrap-dp.tdwg.org/) 15 | -------------------------------------------------------------------------------- /components/ImageLinkCard.astro: -------------------------------------------------------------------------------- 1 | --- 2 | import type { HTMLAttributes } from "astro/types" 3 | import { LinkCard } from "@astrojs/starlight/components" 4 | 5 | interface Props extends Omit, "title"> { 6 | title: string 7 | imageSrc: string 8 | description?: string 9 | } 10 | 11 | const { title, description, imageSrc, ...rest } = Astro.props 12 | --- 13 | 14 | ${description}`} 17 | {...rest} 18 | /> 19 | -------------------------------------------------------------------------------- /content/docs/extensions/fiscal-data-package.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Fiscal Data Package 3 | --- 4 | 5 | 6 | 7 | 8 | 9 | 10 |
AuthorsPaul Walsh, Rufus Pollock, Tryggvi Björgvinsson, Steve Bennett, Adam Kariv, Dan Fowler
11 | 12 | Fiscal Data Package is a lightweight and user-oriented format for publishing and consuming fiscal data. Fiscal data packages are made of simple and universal components. They can be produced from ordinary spreadsheet software and used in any environment. 13 | 14 | [Fiscal Data Package](https://fiscal.datapackage.org) 15 | -------------------------------------------------------------------------------- /.github/workflows/general.yaml: -------------------------------------------------------------------------------- 1 | name: general 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | tags: 8 | - v*.*.* 9 | pull_request: 10 | branches: 11 | - main 12 | 13 | jobs: 14 | test: 15 | runs-on: ubuntu-latest 16 | steps: 17 | - name: Checkout repository 18 | uses: actions/checkout@v4 19 | - name: Install Node 20 | uses: actions/setup-node@v4 21 | with: 22 | node-version: 20 23 | cache: npm 24 | - name: Install dependencies 25 | run: npm install 26 | - name: Test software 27 | run: npm test 28 | -------------------------------------------------------------------------------- /components/SocialIcons.astro: -------------------------------------------------------------------------------- 1 | --- 2 | import type { Props } from "@astrojs/starlight/props" 3 | import Default from "@astrojs/starlight/components/SocialIcons.astro" 4 | --- 5 | 6 | v2 (current) 9 | v1 10 | 11 | 12 | 13 | 14 | 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Mac 2 | .DS_Store 3 | 4 | # Node 5 | node_modules/ 6 | jspm_packages/ 7 | .lock-wscript 8 | build/Release 9 | .node_repl_history 10 | *.tgz 11 | .npm 12 | *.so 13 | 14 | # Testing 15 | coverage 16 | htmlcov/ 17 | lib-cov 18 | .tox/ 19 | .coverage 20 | .coverage.* 21 | .cache 22 | .nyc_output 23 | coverage.xml 24 | *,cover 25 | .hypothesis/ 26 | .eslintcache 27 | 28 | # Logs 29 | logs 30 | *.log 31 | npm-debug.log* 32 | 33 | # Runtime data 34 | pids 35 | *.pid 36 | *.seed 37 | *.pid.lock 38 | 39 | # Translations 40 | *.mo 41 | *.pot 42 | 43 | # Extra 44 | .env 45 | .vim/ 46 | .yarn/ 47 | .user/ 48 | .astro/ 49 | .cache/ 50 | .clinic/ 51 | .vscode/ 52 | .wrangler/ 53 | dist/ 54 | build/ 55 | -------------------------------------------------------------------------------- /.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": [ 3 | "eslint:recommended", 4 | "plugin:@typescript-eslint/recommended", 5 | "plugin:astro/recommended" 6 | ], 7 | "parser": "@typescript-eslint/parser", 8 | "plugins": ["@typescript-eslint"], 9 | "ignorePatterns": ["build/"], 10 | "root": true, 11 | "rules": { 12 | "@typescript-eslint/ban-ts-comment": "off", 13 | "@typescript-eslint/no-explicit-any": "off", 14 | "@typescript-eslint/triple-slash-reference": "off" 15 | }, 16 | "overrides": [ 17 | { 18 | "files": ["*.astro"], 19 | "parser": "astro-eslint-parser", 20 | "parserOptions": { 21 | "parser": "@typescript-eslint/parser", 22 | "extraFileExtensions": [".astro"] 23 | } 24 | } 25 | ] 26 | } 27 | -------------------------------------------------------------------------------- /.github/stale.yaml: -------------------------------------------------------------------------------- 1 | # Number of days of inactivity before an issue becomes stale 2 | daysUntilStale: 90 3 | 4 | # Number of days of inactivity before a stale issue is closed 5 | daysUntilClose: 30 6 | 7 | # Issues with these labels will never be considered stale 8 | exemptLabels: 9 | - feature 10 | - enhancement 11 | - bug 12 | 13 | # Label to use when marking an issue as stale 14 | staleLabel: wontfix 15 | 16 | # Comment to post when marking an issue as stale. Set to `false` to disable 17 | markComment: > 18 | This issue has been automatically marked as stale because it has not had 19 | recent activity. It will be closed if no further activity occurs. Thank you 20 | for your contributions. 21 | 22 | # Comment to post when closing a stale issue. Set to `false` to disable 23 | closeComment: false 24 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Data Package 2 | 3 | Data Package is a standard consisting of a set of simple yet extensible specifications to describe datasets, data files and tabular data. It is a data definition language (DDL) and data API that facilitates findability, accessibility, interoperability, and reusability (FAIR) of data. For more information, please visit the [documentation portal](https://datapackage.org). 4 | 5 | ## Funding 6 | 7 | This project is funded through [NGI0 Entrust](https://nlnet.nl/entrust), a fund established by [NLnet](https://nlnet.nl) with financial support from the European Commission's [Next Generation Internet](https://ngi.eu) program. Learn more at the [NLnet project page](https://nlnet.nl/project/FrictionlessStandards/). 8 | 9 | [NLnet foundation logo](https://nlnet.nl) 10 | [NGI Zero Logo](https://nlnet.nl/entrust) 11 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/01-bug-report.yaml: -------------------------------------------------------------------------------- 1 | name: "\U0001F41E Bug Report" 2 | description: Report an issue or possible bug 3 | labels: [] 4 | assignees: [] 5 | body: 6 | - type: markdown 7 | attributes: 8 | value: | 9 | Thank you for taking the time to file a bug report! Please fill out this form as completely as possible. 10 | - type: input 11 | id: version 12 | attributes: 13 | label: What Data Package version are you using? 14 | placeholder: v2 15 | validations: 16 | required: true 17 | - type: textarea 18 | id: description 19 | attributes: 20 | label: Describe the Bug 21 | description: A clear and concise description of what the bug is. 22 | validations: 23 | required: true 24 | - type: checkboxes 25 | id: contribution 26 | attributes: 27 | label: Participation 28 | options: 29 | - label: I am willing to submit a pull request for this issue. 30 | required: false 31 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/02-general-issue.yaml: -------------------------------------------------------------------------------- 1 | name: "\U0001F333 General Issue" 2 | description: Project, website, or documentation related issue or improvement 3 | labels: [] 4 | assignees: [] 5 | body: 6 | - type: markdown 7 | attributes: 8 | value: | 9 | Thank you for taking the time to improve the Data Package project! Please fill out this form as completely as possible. 10 | - type: input 11 | id: version 12 | attributes: 13 | label: What Data Package version are you using? 14 | placeholder: v2 15 | validations: 16 | required: true 17 | - type: textarea 18 | id: description 19 | attributes: 20 | label: Describe the Issue 21 | description: A clear and concise description of what the issue or possible improvement is. 22 | validations: 23 | required: true 24 | - type: checkboxes 25 | id: contribution 26 | attributes: 27 | label: Participation 28 | options: 29 | - label: I am willing to submit a pull request for this issue. 30 | required: false 31 | -------------------------------------------------------------------------------- /components/About.astro: -------------------------------------------------------------------------------- 1 | --- 2 | import { funding } from "../assets" 3 | import { Image } from "astro:assets" 4 | 5 | interface Props { 6 | title: string 7 | } 8 | 9 | const { title } = Astro.props 10 | --- 11 | 12 |
13 | 14 | {title} 15 | 16 |
Open Knowledge Foundation
17 | 18 |
19 | 20 | 49 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | contact_links: 3 | - name: 👀 Explore Discussions 4 | url: https://github.com/frictionlessdata/datapackage/discussions 5 | about: Before opening a discussion, please check if the topic has already been discussed. Please vote on existing discussions to show your support. 6 | - name: 💡 Feature Request 7 | url: https://github.com/frictionlessdata/datapackage/discussions/new?category=ideas 8 | about: Suggest an improvement you’d like to see added to the Data Package Standard 9 | - name: 🚀 Implementation 10 | url: https://github.com/frictionlessdata/datapackage/discussions/new?category=show-and-tell 11 | about: Share your Data Package implementation or related project with the community 12 | - name: 💁 Question 13 | url: https://github.com/frictionlessdata/datapackage/discussions/new?category=q-a 14 | about: Ask a question about the Data Package Standard 15 | - name: 👾 Chat 16 | url: https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg 17 | about: Join the Data Package community on Slack! 18 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | -------------------------------------------------------------------------------- /assets/styles.css: -------------------------------------------------------------------------------- 1 | :root { 2 | --purple-hsl: 209, 60%, 60%; 3 | --overlay-blurple: hsla(var(--purple-hsl), 0.2); 4 | --scrollbar-color: #ddd; 5 | } 6 | 7 | :root[data-theme="light"] { 8 | --purple-hsl: 209, 85%, 65%; 9 | --sl-color-text-accent: #1971c2; 10 | --sl-color-banner-bg: #1971c2; 11 | --sl-color-bg-inline-code: #f6f8fa; 12 | } 13 | 14 | :root[data-theme="dark"] { 15 | --sl-color-text-accent: #4dabf7; 16 | --sl-color-banner-bg: #4dabf7; 17 | --sl-color-bg-inline-code: #23262f; 18 | } 19 | 20 | /* Firefox */ 21 | * { 22 | scrollbar-color: var(--scrollbar-color) transparent; 23 | } 24 | 25 | /* Webkit */ 26 | /* Make scrollbars transparent except for main page scrollbar. */ 27 | ::-webkit-scrollbar, 28 | ::-webkit-scrollbar-track { 29 | width: 6px; 30 | height: 6px; 31 | background-color: transparent; 32 | } 33 | 34 | body::-webkit-scrollbar, 35 | body::-webkit-scrollbar-track { 36 | border-radius: 3px; 37 | } 38 | 39 | ::-webkit-scrollbar-thumb { 40 | border-radius: 3px; 41 | background: var(--scrollbar-color); 42 | } 43 | 44 | /* Style the Markdown heading links. */ 45 | .sl-markdown-content :is(h1, h2, h3, h4, h5, h6) > a { 46 | color: var(--sl-color-white); 47 | text-decoration: none; 48 | &:hover { 49 | text-decoration: underline; 50 | } 51 | } 52 | 53 | article.card { 54 | border-radius: 15px; 55 | } 56 | -------------------------------------------------------------------------------- /content/docs/recipes/data-dependencies.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Data Dependencies 3 | --- 4 | 5 | 6 | 7 | 8 | 9 | 10 |
AuthorsRufus Pollock
11 | 12 | Consider a situation where data packages are part of a tool chain that, say, loads all of the data into an SQL db. You can then imagine a situation where one requires package A which requires package B + C. 13 | 14 | In this case you want to specify that A depends on B and C -- and that "installing" A should install B and C. This is the purpose of `dataDependencies` property. 15 | 16 | ## Specification 17 | 18 | `dataDependencies` is an object. It follows same format as CommonJS Packages spec v1.1. Each dependency defines the lowest compatible MAJOR[.MINOR[.PATCH]] dependency versions (only one per MAJOR version) with which the package has been tested and is assured to work. The version may be a simple version string (see the version property for acceptable forms), or it may be an object group of dependencies which define a set of options, any one of which satisfies the dependency. The ordering of the group is significant and earlier entries have higher priority. Example: 19 | 20 | ```javascript 21 | "dataDependencies": { 22 | "country-codes": "", 23 | "unemployment": "2.1", 24 | "geo-boundaries": { 25 | "acmecorp-geo-boundaries": ["1.0", "2.0"], 26 | "othercorp-geo-boundaries": "0.9.8", 27 | }, 28 | } 29 | ``` 30 | 31 | ## Implementations 32 | 33 | None known. 34 | -------------------------------------------------------------------------------- /content/docs/recipes/external-foreign-keys.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: External Foreign Keys 3 | --- 4 | 5 | 6 | 7 | 8 | 9 | 10 |
AuthorsStephen Gates
11 | 12 | A foreign key is a reference where values in a field (or fields) in a Tabular Data Resource link to values in a field (or fields) in a Tabular Data Resource in the same or in another Tabular Data Package. 13 | 14 | This pattern allows users to link values in a field (or fields) in a Tabular Data Resource to values in a field (or fields) in a Tabular Data Resource in a different Tabular Data Package. 15 | 16 | ## Specification 17 | 18 | The [`foreignKeys`](/standard/table-schema/#foreignkeys) array MAY have a property `package`. This property MUST be, either: 19 | 20 | - a string that is a fully qualified HTTP address to a Data Package `datapackage.json` file 21 | - a data package [`name`](/standard/data-package/#name) that can be resolved by a canonical data package registry 22 | 23 | If the referenced data package has an [`id`](/standard/data-package/#id) that is a fully qualified HTTP address, it SHOULD be used as the `package` value. 24 | 25 | For example: 26 | 27 | ``` 28 | "foreignKeys": [{ 29 | "fields": ["code"], 30 | "reference": { 31 | "package": "https://raw.githubusercontent.com/frictionlessdata/example-data-packages/master/donation-codes/datapackage.json", 32 | "resource": "donation-codes", 33 | "fields": ["donation code"] 34 | } 35 | }] 36 | ``` 37 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "datapackage.org", 3 | "type": "module", 4 | "version": "2.0", 5 | "engines": { 6 | "node": "^20.0.0", 7 | "npm": "^10.0.0" 8 | }, 9 | "scripts": { 10 | "build": "astro build", 11 | "clean": "find . -name 'node_modules' -type d -prune -print -exec rm -rf '{}' +", 12 | "format": "eslint --fix . && prettier --write .", 13 | "generate": "vite-node scripts/generate.ts", 14 | "lint": "eslint . && prettier --check .", 15 | "prepare": "husky", 16 | "preview": "npm run build && astro preview --open --port 8080", 17 | "start": "astro dev", 18 | "update": "ncu -u", 19 | "test": "npm run lint", 20 | "type": "tsc" 21 | }, 22 | "dependencies": { 23 | "@apidevtools/json-schema-ref-parser": "11.6.4", 24 | "@astrojs/markdown-remark": "5.1.1", 25 | "@astrojs/starlight": "0.28.3", 26 | "@trivago/prettier-plugin-sort-imports": "4.3.0", 27 | "@types/fs-extra": "11.0.4", 28 | "@types/js-yaml": "4.0.9", 29 | "@typescript-eslint/eslint-plugin": "7.14.1", 30 | "@typescript-eslint/parser": "7.14.1", 31 | "astro": "4.16.19", 32 | "eslint": "8.57.0", 33 | "eslint-plugin-astro": "1.2.2", 34 | "fs-extra": "11.2.0", 35 | "glob": "10.4.2", 36 | "husky": "9.1.6", 37 | "js-yaml": "4.1.1", 38 | "npm-check-updates": "16.14.20", 39 | "prettier": "3.3.2", 40 | "prettier-plugin-astro": "0.14.0", 41 | "rehype-autolink-headings": "7.1.0", 42 | "remark-custom-heading-id": "2.0.0", 43 | "replace-in-file": "8.2.0", 44 | "starlight-blog": "0.14.1", 45 | "starlight-links-validator": "0.9.0", 46 | "typescript": "5.5.4", 47 | "vanilla-back-to-top": "7.2.1", 48 | "vite-node": "2.1.3" 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /scripts/generate.ts: -------------------------------------------------------------------------------- 1 | import { version } from "../package.json" 2 | import JsonSchema from "@apidevtools/json-schema-ref-parser" 3 | import fs from "fs-extra" 4 | import { glob } from "glob" 5 | import yaml from "js-yaml" 6 | import nodePath from "path" 7 | import process from "process" 8 | import { replaceInFile } from "replace-in-file" 9 | 10 | const SOURCE_DIR = "profiles" 11 | const TARGET_DIR = `public/profiles` 12 | const VERSION_DIR = `${TARGET_DIR}/${version}` 13 | const EXCLUDE_FILES = ["dictionary.json"] 14 | 15 | // Ensure directories 16 | fs.ensureDirSync(VERSION_DIR) 17 | 18 | // Init dictionary 19 | const dictionary = { 20 | $schema: "http://json-schema.org/draft-07/schema#", 21 | definitions: {}, 22 | } 23 | 24 | // Fill dictionary 25 | for (const path of glob.sync(`${SOURCE_DIR}/dictionary/*.yaml`)) { 26 | const contents = fs.readFileSync(path).toString() 27 | Object.assign(dictionary.definitions, yaml.load(contents)) 28 | } 29 | 30 | // Save dictionary 31 | const contents = JSON.stringify(dictionary, null, 2) 32 | fs.writeFileSync(`${VERSION_DIR}/dictionary.json`, contents) 33 | 34 | // Save profiles 35 | for (const path of glob.sync(`${SOURCE_DIR}/*.json`)) { 36 | const name = nodePath.basename(path) 37 | fs.copySync(path, `${VERSION_DIR}/${name}`) 38 | } 39 | 40 | // Dereference profiles 41 | for (const path of glob.sync(`${VERSION_DIR}/*.json`)) { 42 | const name = nodePath.basename(path) 43 | if (EXCLUDE_FILES.includes(name)) continue 44 | const rawSchema = JSON.parse(fs.readFileSync(path).toString()) 45 | const cwd = process.cwd() 46 | process.chdir(VERSION_DIR) 47 | const schema = await JsonSchema.dereference(rawSchema) 48 | process.chdir(cwd) 49 | const contents = JSON.stringify(schema, null, 2) 50 | fs.writeFileSync(path, contents) 51 | } 52 | 53 | // Ensure correct versions in the docs 54 | await replaceInFile({ 55 | files: ["content/docs/standard/*.mdx"], 56 | from: /profile: \/profiles\/\d.\d\//g, 57 | to: `profile: /profiles/${version}/`, 58 | }) 59 | 60 | // Delete dictionary 61 | fs.removeSync(`${VERSION_DIR}/dictionary.json`) 62 | -------------------------------------------------------------------------------- /profiles/dictionary/package.yaml: -------------------------------------------------------------------------------- 1 | dataPackage: 2 | title: Data Package 3 | description: Data Package 4 | type: object 5 | required: 6 | - resources 7 | properties: 8 | $schema: 9 | "$ref": "#/definitions/$schema" 10 | default: https://datapackage.org/profiles/1.0/datapackage.json 11 | propertyOrder: 10 12 | name: 13 | "$ref": "#/definitions/name" 14 | propertyOrder: 20 15 | id: 16 | "$ref": "#/definitions/id" 17 | propertyOrder: 30 18 | title: 19 | "$ref": "#/definitions/title" 20 | propertyOrder: 40 21 | description: 22 | "$ref": "#/definitions/description" 23 | propertyOrder: 50 24 | format: textarea 25 | homepage: 26 | "$ref": "#/definitions/homepage" 27 | propertyOrder: 60 28 | version: 29 | "$ref": "#/definitions/version" 30 | propertyOrder: 65 31 | created: 32 | "$ref": "#/definitions/created" 33 | propertyOrder: 70 34 | contributors: 35 | "$ref": "#/definitions/contributors" 36 | propertyOrder: 80 37 | keywords: 38 | "$ref": "#/definitions/keywords" 39 | propertyOrder: 90 40 | image: 41 | "$ref": "#/definitions/image" 42 | propertyOrder: 100 43 | licenses: 44 | "$ref": "#/definitions/licenses" 45 | propertyOrder: 110 46 | resources: 47 | "$ref": "#/definitions/dataResources" 48 | propertyOrder: 120 49 | sources: 50 | "$ref": "#/definitions/sources" 51 | propertyOrder: 200 52 | options: 53 | hidden: true 54 | dataResources: 55 | title: Data Resources 56 | description: An `array` of Data Resource objects, each compliant with the [Data Resource](/data-resource/) specification. 57 | type: array 58 | minItems: 1 59 | items: 60 | "$ref": "#/definitions/dataResource" 61 | examples: 62 | - | 63 | { 64 | "resources": [ 65 | { 66 | "name": "my-data", 67 | "data": [ 68 | "data.csv" 69 | ], 70 | "mediatype": "text/csv" 71 | } 72 | ] 73 | } 74 | -------------------------------------------------------------------------------- /content/docs/recipes/private-properties.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Private Properties 3 | --- 4 | 5 | 6 | 7 | 8 | 9 | 10 |
AuthorsRufus Pollock, Paul Walsh
11 | 12 | Some software that implements the Frictionless Data specifications may need to store additional information on the various Frictionless Data descriptors. 13 | 14 | For example, a data registry that provides metadata via `datapackage.json` may wish to set an internal version or identifier that is system-specific, and should not be considered as part of the user-generated metadata. 15 | 16 | Properties to store such information should be considered "private", and by convention, the names should be prefixed by an underscore `_`. 17 | 18 | ## Implementations 19 | 20 | There are no known implementations at present. 21 | 22 | ## Specification 23 | 24 | On any Frictionless Data descriptor, data that is not generated by the author/contributors, but is generated by software/a system handling the data, `SHOULD` be considered as "private", and be prefixed by an underscore `_`. 25 | 26 | To demonstrate, let's take the example of a data registry that implements `datapackage.json` for storing dataset metadata. 27 | 28 | A user might upload a `datapackage.json` as follows: 29 | 30 | ``` 31 | { 32 | "name": "my-package", 33 | "resources": [ 34 | { 35 | "name": "my-resource", 36 | "data": [ "my-resource.csv" ] 37 | } 38 | ] 39 | } 40 | ``` 41 | 42 | The registry itself may have a platform-specific version system, and increment versions on each update of the data. To store this information on the datapackage itself, the platform could save this information in a "private" `_platformVersion` property as follows: 43 | 44 | ``` 45 | { 46 | "name": "my-package", 47 | "_platformVersion": 7 48 | "resources": [ 49 | { 50 | "name": "my-resource", 51 | "data": [ "my-resource.csv" ] 52 | } 53 | ] 54 | } 55 | ``` 56 | 57 | Usage of "private" properties ensures a clear distinction between data stored on the descriptor that is user (author/contributor) defined, and any additional data that may be stored by a 3rd party. 58 | -------------------------------------------------------------------------------- /content/docs/recipes/compression-of-resources.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Compression of Resources 3 | --- 4 | 5 | 6 | 7 | 8 | 9 | 10 |
AuthorsMichael Amadi
11 | 12 | It can be argued that applying compression to data resources can make data package publishing more cost-effective and sustainable. Compressing data resources gives publishers the benefit of reduced storage and bandwidth costs and gives consumers the benefit of shorter download times. 13 | 14 | ## Implementations 15 | 16 | - [tabulator-py (Gzip and Zip support)](https://github.com/frictionlessdata/tabulator-py) 17 | - [datapackage-connector (Gzip support)](https://github.com/nimblelearn/datapackage-connector) 18 | - [datapackage-m (Gzip support)](https://github.com/nimblelearn/datapackage-m) 19 | 20 | ## Specification 21 | 22 | All compressed resources `MUST` have a `path` that allows the `compression` property to be inferred. If the compression can't be inferred from the `path` property (e.g. a custom file extension is used) then the `compression` property `MUST` be used to specify the compression. 23 | 24 | Supported compression types: 25 | 26 | - gz 27 | - zip 28 | 29 | Example of a compressed resource with implied compression: 30 | 31 | ``` 32 | { 33 | "name": "data-resource-compression-example", 34 | "path": "http://example.com/large-data-file.csv.gz", 35 | "title": "Large Data File", 36 | "description": "This large data file benefits from compression.", 37 | "format": "csv", 38 | "mediatype": "text/csv", 39 | "encoding": "utf-8", 40 | "bytes": 1073741824 41 | } 42 | ``` 43 | 44 | Example of a compressed resource with the `compression` property: 45 | 46 | ``` 47 | { 48 | "name": "data-resource-compression-example", 49 | "path": "http://example.com/large-data-file.csv.gz", 50 | "title": "Large Data File", 51 | "description": "This large data file benefits from compression.", 52 | "format": "csv", 53 | "compression" : "gz", 54 | "mediatype": "text/csv", 55 | "encoding": "utf-8", 56 | "bytes": 1073741824 57 | } 58 | ``` 59 | 60 | :::note 61 | Resource properties e.g. bytes, hash etc apply to the compressed object -- not to the original uncompressed object. 62 | ::: 63 | -------------------------------------------------------------------------------- /content/docs/recipes/files-inside-archives.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Files Inside Archives 3 | --- 4 | 5 | 6 | 7 | 8 | 9 | 10 |
AuthorsCarles Pina Estany
11 | 12 | Some datasets need to contain a Zip file (or tar, other formats) containing a set of files. 13 | 14 | This might happen for practical reasons (datasets containing thousands of files) or for technical limitations (for example, currently Zenodo doesn't support subdirectories and datasets might need subdirectory structures to be useful). 15 | 16 | ## Implementations 17 | 18 | There are no known implementations at present. 19 | 20 | ## Specification 21 | 22 | The `resources` in a `data-package` can contain "recursive resources": identifying a new resource. 23 | 24 | ## Example 25 | 26 | ```json 27 | { 28 | "profile": "data-package", 29 | "resources": [ 30 | { 31 | "path": "https://zenodo.org/record/3247384/files/Sea-Bird_Processed_Data.zip", 32 | "format": "zip", 33 | "mediatype": "application/zip", 34 | "bytes": "294294242424", 35 | "hash": "a27063c614c183b502e5c03bd9c8931b", 36 | "resources": [ 37 | { 38 | "path": "file_name.csv", 39 | "format": "csv", 40 | "mediatype": "text/csv", 41 | "bytes": 242421, 42 | "hash": "0300048878bb9b5804a1f62869d296bc", 43 | "profile": "tabular-data-resource", 44 | "schema": "tableschema.json" 45 | }, 46 | { 47 | "path": "directory/file_name2.csv", 48 | "format": "csv", 49 | "mediatype": "text/csv", 50 | "bytes": 2424213, 51 | "hash": "ff9435e0ee350efbe8a4a8779a47caaa", 52 | "profile": "tabular-data-resource", 53 | "schema": "tableschema.json" 54 | } 55 | ] 56 | } 57 | ] 58 | } 59 | ``` 60 | 61 | For a `.tar.gz` it would be the same changing the `"format"` and the 62 | `"mediatype"`. 63 | 64 | ## Types of files 65 | 66 | Support for `Zip` and `tar.gz` might be enough: hopefully everything can be re-packaged using these formats. 67 | 68 | To keep the implementation and testing testing: only one recursive level is possible. A `resource` can list `resources` inside (like in the example). But the inner resources cannot contain resources again. 69 | -------------------------------------------------------------------------------- /content/docs/recipes/data-package-version.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Data Package Version 3 | --- 4 | 5 | 6 | 7 | 8 | 9 | 10 |
AuthorsRufus Pollock
11 | 12 | ## Specification 13 | 14 | The Data Package version format follows the [Semantic Versioning](http://semver.org) specification format: MAJOR.MINOR.PATCH 15 | 16 | The version numbers, and the way they change, convey meaning about how the data package has been modified from one version to the next. 17 | 18 | Given a Data Package version number MAJOR.MINOR.PATCH, increment the: 19 | 20 | MAJOR version when you make incompatible changes, e.g. 21 | 22 | - Change the data package, resource or field `name` or `identifier` 23 | - Add, remove or re-order fields 24 | - Change a field `type` or `format` 25 | - Change a field `constraint` to be more restrictive 26 | - Combine, split, delete or change the meaning of data that is referenced by another data resource 27 | 28 | MINOR version when you add data or change metadata in a backwards-compatible manner, e.g. 29 | 30 | - Add a new data resource to a data package 31 | - Add new data to an existing data resource 32 | - Change a field `constraint` to be less restrictive 33 | - Update a reference to another data resource 34 | - Change data to reflect changes in referenced data 35 | 36 | PATCH version when you make backwards-compatible fixes, e.g. 37 | 38 | - Correct errors in existing data 39 | - Change descriptive metadata properties 40 | 41 | ## Scenarios 42 | 43 | - You are developing your data though public consultation. Start your initial data release at 0.1.0 44 | - You release your data for the first time. Use version 1.0.0 45 | - You append last months data to an existing release. Increment the MINOR version number 46 | - You append a column to the data. Increment the MAJOR version number 47 | - You relocate the data to a new `URL` or `path`. No change in the version number 48 | - You change a `title`, `description`, or other descriptive metadata. Increment the PATCH version 49 | - You fix a data entry error by modifying a value. Increment the PATCH version 50 | - You split a row of data in a foreign key reference table. Increment the MAJOR version number 51 | - You update the data and schema to refer to a new version of a foreign key reference table. Increment the MINOR version number 52 | -------------------------------------------------------------------------------- /assets/index.ts: -------------------------------------------------------------------------------- 1 | import bcodmo from "./adoption/bcodmo.png" 2 | import cambridge from "./adoption/cambridge.png" 3 | import causanatura from "./adoption/causanatura.png" 4 | import chicago from "./adoption/chicago.png" 5 | import cmoa from "./adoption/cmoa.png" 6 | import cmso from "./adoption/cmso.png" 7 | import dataRetriever from "./adoption/data-retriever.png" 8 | import dataworld from "./adoption/data-world.png" 9 | import dataship from "./adoption/dataship.png" 10 | import dm4t from "./adoption/dm4t.png" 11 | import dryad from "./adoption/dryad.png" 12 | import elife from "./adoption/elife.png" 13 | import etalab from "./adoption/etalab.png" 14 | import eucom from "./adoption/eucom.png" 15 | import gapminder2 from "./adoption/gapminder2.png" 16 | import gapminder from "./adoption/gapminder.png" 17 | import gbif from "./adoption/gbif.png" 18 | import github from "./adoption/github.png" 19 | import hubmap from "./adoption/hubmap.png" 20 | import johnSnowLabs from "./adoption/john-snow-labs.png" 21 | import librariesHacked from "./adoption/libraries-hacked.png" 22 | import nimblelearn from "./adoption/nimblelearn.png" 23 | import odb from "./adoption/odb.png" 24 | import openml from "./adoption/openml.png" 25 | import opsd from "./adoption/opsd.png" 26 | import owid from "./adoption/owid.png" 27 | import oxford from "./adoption/oxford.png" 28 | import pnnl from "./adoption/pnnl.png" 29 | import pudl from "./adoption/pudl.png" 30 | import tesera from "./adoption/tesera.png" 31 | import ukds from "./adoption/ukds.png" 32 | import uop from "./adoption/uop.png" 33 | import validata from "./adoption/validata.png" 34 | import zegami from "./adoption/zegami.png" 35 | import okfn from "./funding/okfn.png" 36 | import dataCurator from "./software/data-curator.png" 37 | import flatterer from "./software/flatterer.png" 38 | import ode from "./software/ode.png" 39 | 40 | export const adoption = { 41 | bcodmo, 42 | cambridge, 43 | causanatura, 44 | chicago, 45 | cmoa, 46 | cmso, 47 | dataRetriever, 48 | dataship, 49 | dataworld, 50 | dm4t, 51 | dryad, 52 | elife, 53 | etalab, 54 | eucom, 55 | gapminder, 56 | gapminder2, 57 | gbif, 58 | github, 59 | hubmap, 60 | johnSnowLabs, 61 | librariesHacked, 62 | nimblelearn, 63 | odb, 64 | openml, 65 | opsd, 66 | owid, 67 | oxford, 68 | pnnl, 69 | pudl, 70 | tesera, 71 | ukds, 72 | uop, 73 | validata, 74 | zegami, 75 | } 76 | 77 | export const funding = { 78 | okfn, 79 | } 80 | 81 | export const software = { 82 | ode, 83 | dataCurator, 84 | flatterer, 85 | } 86 | -------------------------------------------------------------------------------- /content/docs/recipes/language-support.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Language Support 3 | --- 4 | 5 | 6 | 7 | 8 | 9 | 10 |
AuthorsPaul Walsh
11 | 12 | Language support is a different concern to translation support. Language support deals with declaring the default language of a descriptor and the data it contains in the resources array. Language support makes no claim about the presence of translations when one or more languages are supported in a descriptor or in data. Via the introduction of a `languages` array to any descriptor, we can declare the default language, and any other languages that `SHOULD` be found in the descriptor and the data. 13 | 14 | ## Implementations 15 | 16 | There are no known implementations of this pattern at present. 17 | 18 | ## Specification 19 | 20 | Any Frictionless Data descriptor can declare the language configuration of its metadata and data with the `languages` array. 21 | 22 | `languages` `MUST` be an array, and the first item in the array is the default (non-translated) language. 23 | 24 | If no `languages` array is present, the default language is English (`en`), and therefore is equivalent to: 25 | 26 | ``` 27 | { 28 | "name": "my-package", 29 | "languages": ["en"] 30 | } 31 | ``` 32 | 33 | The presence of a languages array does not ensure that the metadata or the data has translations for all supported languages. 34 | 35 | The descriptor and data sources `MUST` be in the default language. The descriptor and data sources `MAY` have translations for the other languages in the array, using the same language code. `IF` a translation is not present, implementing code `MUST` fallback to the default language string. 36 | 37 | Example usage of `languages`, implemented in the metadata of a descriptor: 38 | 39 | ``` 40 | { 41 | "name": "sun-package", 42 | "languages": ["es", "en"], 43 | "title": "Sol" 44 | } 45 | 46 | # which is equivalent to 47 | { 48 | "name": "sun-package", 49 | "languages": ["es", "en"], 50 | "title": { 51 | "": "Sol", 52 | "en": "Sun" 53 | } 54 | } 55 | ``` 56 | 57 | Example usage of `languages` implemented in the data described by a resource: 58 | 59 | ``` 60 | # resource descriptor 61 | { 62 | "name": "solar-system", 63 | "data": [ "solar-system.csv" ] 64 | "fields": [ 65 | ... 66 | ], 67 | "languages": ["es", "en", "he", "fr", "ar"] 68 | } 69 | 70 | # data source 71 | # some languages have translations, some do not 72 | # assumes a certain translation pattern, see the related section 73 | id,name,name@fr,name@he,name@en 74 | 1,Sol,Soleil,שמש,Sun 75 | 2,Luna,Lune,ירח,Moon 76 | ``` 77 | -------------------------------------------------------------------------------- /content/docs/recipes/caching-of-resources.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Caching of Resources 3 | --- 4 | 5 | 6 | 7 | 8 | 9 | 10 |
AuthorsRufus Pollock, Paul Walsh
11 | 12 | All Frictionless Data specifications allow for referencing resources via http or a local filesystem. 13 | 14 | In the case of remote resources via http, there is always the possibility that the remote server will be unavailable, or, that the resource itself will be temporarily or permanently removed. If implementing systems are doing any processing or analysis with the file, they may wish to keep and reference a local copy while still pointing to the remote URL as the canonical data source. 15 | 16 | Applications that are concerned with the persistent storage of data described in Frictionless Data specifications can use a `_cache` property that mirrors the functionality and usage of the `path` or `data` properties, and refers to a storage location for the data that the application can fall back to if the canonical resource is unavailable. 17 | 18 | ## Implementations 19 | 20 | There are no known implementations of this pattern at present. 21 | 22 | ## Specification 23 | 24 | Implementations `MAY` handle a `_cache` property on any descriptor that supports either a `path` or `data` property. In the case that the data referenced in `path` or `data` is unavailable, `_cache` should be used as a fallback to access the data. The handling of the data stored at `_cache` is beyond the scope of the specification. Implementations might store a copy of the resources in `path` or `data` at ingestion time, update at regular intervals, or any other method to keep an up-to-date, persistent copy. 25 | 26 | Some examples of the `_cache` property. 27 | 28 | ``` 29 | { 30 | "name": "my-package", 31 | "resources": [ 32 | { 33 | "name": "my-resource", 34 | "path": "http://example.com/data/csv/my-resource.csv", 35 | "_cache": "my-resource.csv" 36 | }, 37 | { 38 | "name": "my-resource", 39 | "path": "http://example.com/data/csv/my-resource.csv", 40 | "_cache": "http://data.registry.com/user/files/my-resource.csv" 41 | }, 42 | { 43 | "name": "my-resource", 44 | "data": [ 45 | "http://example.com/data/csv/my-resource.csv", 46 | "http://somewhere-else.com/data/csv/resource2.csv" 47 | ], 48 | "_cache": [ 49 | "my-resource.csv", 50 | "resource2.csv" 51 | ] 52 | }, 53 | { 54 | "name": "my-resource", 55 | "data": [ "http://example.com/data/csv/my-resource.csv" ], 56 | "_cache": "my-resource.csv" 57 | } 58 | ] 59 | } 60 | ``` 61 | -------------------------------------------------------------------------------- /content/docs/recipes/translation-support.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Translation Support 3 | --- 4 | 5 | 6 | 7 | 8 | 9 | 10 |
AuthorsPaul Walsh
11 | 12 | Following on from a general pattern for language support, and the explicit support of metadata translations in Frictionless Data descriptors, it would be desirable to support translations in source data. 13 | 14 | We currently have two patterns for this in discussion. Both patterns arise from real-world implementations that are not specifically tied to Frictionless Data. 15 | 16 | One pattern suggests inline translations with the source data, reserving the `@` symbol in the naming of fields to denote translations. 17 | 18 | The other describes a pattern for storing additional translation sources, co-located with the "source" file described in a descriptor `data`. 19 | 20 | ## Implementations 21 | 22 | There are no known implementations of this pattern in the Frictionless Data core libraries at present. 23 | 24 | ## Specification 25 | 26 | ### Inline 27 | 28 | **Uses a column naming convention for accessing translations**. 29 | 30 | Tabular resource descriptors support translations using `{field_name}@{lang_code}` syntax for translated field names. `lang_code` `MUST` be present in the `languages` array that applies to the resource. 31 | 32 | Any field with the `@` symbol `MUST` be a translation field for another field of data, and `MUST` be parsable according to the `{field_name}@{lang_code}` pattern. 33 | 34 | If a translation field is found in the data that does not have a corresponding `field` (e.g.: `title@es` but no `title`), then the translation field `SHOULD` be ignored. 35 | 36 | If a translation field is found in the data that uses a `lang_code` _not_ declared in the applied `languages` array, then the translation field `SHOULD` be ignored. 37 | 38 | Translation fields `MUST NOT` be described in a schema `fields` array. 39 | 40 | Translation fields `MUST` match the `type`, `format` and `constraints` of the field they translate, with a single exception: Translation fields are never required, and therefore `constraints.required` is always `false` for a translation field. 41 | 42 | ### Co-located translation sources 43 | 44 | **Uses a file storage convention for accessing translations**. 45 | 46 | To be contributed by @jheeffer 47 | 48 | - Has to handle local and remote resources 49 | - Has to be explicit about the translation key/value pattern in the translation files 50 | 51 | ``` 52 | # local 53 | data/file1.csv 54 | data/lang/file1-en.csv 55 | data/lang/file1-es.csv 56 | 57 | # remote 58 | http://example/com/data/file2.csv 59 | http://example/com/data/lang/file2-en.csv 60 | http://example/com/data/lang/file2-es.csv 61 | ``` 62 | -------------------------------------------------------------------------------- /content/docs/guides/using-data-package.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: How to start using Data Package 3 | sidebar: 4 | order: 1 5 | --- 6 | 7 | There are many alternatives when it comes to Data Package Standard implementations. We will cover a few the most popular options which will be a good starting point. 8 | 9 | :::tip 10 | Please take a look at the full list of Data Package [Software](/overview/software/) to find other implementations. 11 | ::: 12 | 13 | ## Open Data Editor 14 | 15 | The simplest way to start using the Data Package Standard is by installing [Open Data Editor](https://opendataeditor.okfn.org/) (currently, in beta): 16 | 17 | [![Open Data Editor](../../../assets/software/ode.png)](https://opendataeditor.okfn.org) 18 | 19 | You can use the visual interface as you usually do in any modern IDE, adding and moving files, validating data, etc. Under the hood, Open Data Editor will be creating Data Package descriptors for your datasets (can be explicitly done by creating a dataset), inferring metadata, and data types. When the data curation work is done a data package can be validated and published, for example, to CKAN. 20 | 21 | Please refer to the [Open Data Editor's documentation](https://opendataeditor.okfn.org) to read about all the features. 22 | 23 | ## frictionless-py 24 | 25 | If you prefer a command-line interface, or Python, there is [frictionless-py](https://framework.frictionlessdata.io/), a complete framework for managing data packages. Here are main commands available in CLI: 26 | 27 | ```bash 28 | frictionless describe # to describe your data 29 | frictionless explore # to explore your data 30 | frictionless extract # to extract your data 31 | frictionless index # to index your data 32 | frictionless list # to list your data 33 | frictionless publish # to publish your data 34 | frictionless query # to query your data 35 | frictionless script # to script your data 36 | frictionless validate # to validate your data 37 | frictionless --help # to get list of the command 38 | frictionless --version # to get the version 39 | ``` 40 | 41 | Please refer to the [frictionless-py's documentation](https://framework.frictionlessdata.io/) to read about all the features. 42 | 43 | ## frictionless-r 44 | 45 | For the R community, there is [frictionless-r](https://docs.ropensci.org/frictionless/) package that allows managing data packages in R language. For example: 46 | 47 | ```r 48 | library(frictionless) 49 | 50 | # Read the datapackage.json file 51 | # This gives you access to all Data Resources of the Data Package without 52 | # reading them, which is convenient and fast. 53 | package <- read_package("https://zenodo.org/records/10053702/files/datapackage.json") 54 | 55 | package 56 | 57 | # List resources 58 | resources(package) 59 | 60 | # Read data from the resource "gps" 61 | # This will return a single data frame, even though the data are split over 62 | # multiple zipped CSV files. 63 | read_resource(package, "gps") 64 | ``` 65 | 66 | Please refer to the [frictionless-r's documentation](https://docs.ropensci.org/frictionless/) to read about all the features. 67 | -------------------------------------------------------------------------------- /content/docs/overview/governance.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Governance 3 | sidebar: 4 | order: 7 5 | --- 6 | 7 | The Data Package Standard is governed by a working group made of [Frictionless](https://frictionlessdata.io/) community members coming from different disciplines. 8 | 9 | The working group works asynchronously, using a review model for any changes in the specifications. We also hold [monthly update calls](https://forms.gle/UEqGnYKESqqw1LeW7) for the working group, which are of course not compulsory, and which are also open to the broader community. 10 | 11 | ## Working Group 12 | 13 | The composition of the working group: 14 | 15 | ### Vetoing Members 16 | 17 | - **Evgeny Karev** ([Datist](https://datist.io/)) 18 | - **Peter Desmet** (Research Institute for Nature and Forest (INBO)) 19 | 20 | ### Voting Members 21 | 22 | - **Phil Schumm** (CTDS - University of Chicago) 23 | - **Kyle Husmann** (Pennsylvania State University) 24 | - **Keith Hughitt** (National Institutes of Health) 25 | - **Jakob Voß** (Verbundzentrale des GBV (VZG)) 26 | - **Ethan Welty** (World Glacier Monitoring Service (WGMS)) 27 | - **Paul Walsh** (Link Digital) 28 | - **Pieter Huybrechts** (Research Institute for Nature and Forest (INBO)) 29 | 30 | ### Advisory Members 31 | 32 | - **Martin Durant** (Anaconda, inc.) 33 | - **Adam Kariv** (The Public Knowledge Workshop) 34 | - **Johan Richer** ([multi.coop](https://www.multi.coop/)) 35 | - **Steve Diggs** ([California Digital Library](https://cdlib.org/)) 36 | 37 | ## Decision Making 38 | 39 | A proposed change to the specifications, that can be initiated by any community or working group member using a GitHub pull request, will be accepted if consensus with the working group is reached, meaning we have arrived at a decision, or at least a compromise, that everyone can live with. 40 | 41 | The working group will be invited to share their view in a devoted GitHub pull request. If a broader conversation is needed, the proposal discussion can be elevated to the monthly call for deliberation. The working group will be given a reasonable amount of time to review the proposed action. 42 | 43 | Consensus is reached and the issue is closed if at least ⅔ of the working group members participate in the discussion and express their favourable opinion. In case of serious and explicitly stated concerns, working group members who are core library investors (at the moment: [Open Knowledge Foundation (OKFN)](https://okfn.org/), the [Research Institute for Nature and Forest (INBO)](https://www.vlaanderen.be/inbo/en-gb/homepage/), [Datopian](https://www.datopian.com/)) may veto a proposed action. 44 | 45 | The community manager at OKFN will reach out to working group members who did not participate in the discussion to make sure their opinion is also captured. Reminders of your participation will be handled with care. Members of the working group can expect a gentle and considerate approach, such as receiving an email once every two weeks highlighting any issues where your vote is pending. The goal is to keep them informed without causing any unnecessary inconvenience. 46 | 47 | Decision-making on the technical maintenance of the specs will be centralised by OKFN. 48 | -------------------------------------------------------------------------------- /public/favicon.svg: -------------------------------------------------------------------------------- 1 | 2 | 15 | 33 | 37 | 41 | 50 | 59 | 63 | 67 | 71 | 73 | 81 | 86 | 91 | 92 | 93 | 94 | -------------------------------------------------------------------------------- /content/docs/recipes/data-catalog.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Data Catalog 3 | --- 4 | 5 | 6 | 7 | 8 | 9 | 10 |
AuthorsMichael Joseph Rosenthal
11 | 12 | There are scenarios where one needs to describe a collection of data packages, such as when building an online registry, or when building a pipeline that ingests multiple datasets. 13 | 14 | In these scenarios, the collection can be described using a "Catalog", where each dataset is represented as a single resource which has: 15 | 16 | ```json 17 | { 18 | "profile": "data-package", 19 | "format": "json" 20 | } 21 | ``` 22 | 23 | ## Specification 24 | 25 | The Data Package Catalog builds directly on the Data Package specification. Thus a Data Package Catalog `MUST` be a Data Package and conform to the [Data Package specification][dp]. 26 | 27 | The Data Package Catalog has the following requirements over and above those imposed by Data Package: 28 | 29 | - There `MUST` be a `profile` property with the value `data-package-catalog`, or a `profile` that extends it 30 | - Each resource `MUST` also be a Data Package 31 | 32 | ### Examples 33 | 34 | A generic package catalog: 35 | 36 | ```json 37 | { 38 | "profile": "data-package-catalog", 39 | "name": "climate-change-packages", 40 | "resources": [ 41 | { 42 | "profile": "json-data-package", 43 | "format": "json", 44 | "name": "beacon-network-description", 45 | "path": "https://http://beacon.berkeley.edu/hypothetical_deployment_description.json" 46 | }, 47 | { 48 | "profile": "tabular-data-package", 49 | "format": "json", 50 | "path": "https://pkgstore.datahub.io/core/co2-ppm/10/datapackage.json" 51 | }, 52 | { 53 | "profile": "tabular-data-package", 54 | "name": "co2-fossil-global", 55 | "format": "json", 56 | "path": "https://pkgstore.datahub.io/core/co2-fossil-global/11/datapackage.json" 57 | } 58 | ] 59 | } 60 | ``` 61 | 62 | A minimal tabular data catalog: 63 | 64 | ```json 65 | { 66 | "profile": "tabular-data-package-catalog", 67 | "name": "datahub-climate-change-packages", 68 | "resources": [ 69 | { 70 | "path": "https://pkgstore.datahub.io/core/co2-ppm/10/datapackage.json" 71 | }, 72 | { 73 | "name": "co2-fossil-global", 74 | "path": "https://pkgstore.datahub.io/core/co2-fossil-global/11/datapackage.json" 75 | } 76 | ] 77 | } 78 | ``` 79 | 80 | Data packages can also be declared inline in the data catalog: 81 | 82 | ```json 83 | { 84 | "profile": "tabular-data-package-catalog", 85 | "name": "my-data-catalog", 86 | "resources": [ 87 | { 88 | "profile": "tabular-data-package", 89 | "name": "my-dataset", 90 | // here we list the data files in this dataset 91 | "resources": [ 92 | { 93 | "profile": "tabular-data-resource", 94 | "name": "resource-name", 95 | "data": [ 96 | { 97 | "id": 1, 98 | "first_name": "Louise" 99 | }, 100 | { 101 | "id": 2, 102 | "first_name": "Julia" 103 | } 104 | ], 105 | "schema": { 106 | "fields": [ 107 | { 108 | "name": "id", 109 | "type": "integer" 110 | }, 111 | { 112 | "name": "first_name", 113 | "type": "string" 114 | } 115 | ], 116 | "primaryKey": "id" 117 | } 118 | } 119 | ] 120 | } 121 | ] 122 | } 123 | ``` 124 | 125 | [dr]: /standard/data-resource/ 126 | [dp]: /standard/data-package/ 127 | 128 | ## Implementations 129 | 130 | None known. 131 | -------------------------------------------------------------------------------- /astro.config.js: -------------------------------------------------------------------------------- 1 | import { rehypeHeadingIds } from "@astrojs/markdown-remark" 2 | import starlight from "@astrojs/starlight" 3 | import { defineConfig } from "astro/config" 4 | import rehypeAutolinkHeadings from "rehype-autolink-headings" 5 | import { remarkHeadingId } from "remark-custom-heading-id" 6 | import starlightBlog from "starlight-blog" 7 | 8 | // import starlightLinksValidator from "starlight-links-validator" 9 | 10 | // https://astro.build/config 11 | export default defineConfig({ 12 | site: "https://datapackage.org", 13 | srcDir: ".", 14 | outDir: "build", 15 | integrations: [ 16 | starlight({ 17 | title: "Data Package Standard", 18 | description: 19 | "Data Package is a standard consisting of a set of simple yet extensible specifications to describe datasets, data files and tabular data. It is a data definition language (DDL) and data API that facilitates findability, accessibility, interoperability, and reusability (FAIR) of data.", 20 | logo: { 21 | light: "/assets/logo-light.svg", 22 | dark: "/assets/logo-dark.svg", 23 | alt: "Data Package Logo", 24 | replacesTitle: true, 25 | }, 26 | social: { 27 | github: "https://github.com/frictionlessdata/datapackage", 28 | }, 29 | favicon: "favicon.svg", 30 | editLink: { 31 | baseUrl: "https://github.com/frictionlessdata/datapackage/edit/main/", 32 | }, 33 | lastUpdated: true, 34 | customCss: ["/assets/styles.css"], 35 | tableOfContents: { minHeadingLevel: 2, maxHeadingLevel: 5 }, 36 | components: { 37 | MarkdownContent: "./components/MarkdownContent.astro", 38 | SocialIcons: "./components/SocialIcons.astro", 39 | }, 40 | plugins: [ 41 | starlightBlog({ 42 | authors: { 43 | sapetti9: { 44 | name: "sapetti9", 45 | title: "Sara Petti", 46 | picture: "https://avatars.githubusercontent.com/u/74717970?v=4", 47 | url: "https://github.com/sapetti9", 48 | }, 49 | }, 50 | }), 51 | // The link validator is useful for debugging but it cleates a lot of false positives 52 | // starlightLinksValidator(), 53 | ], 54 | sidebar: [ 55 | { label: "Overview", autogenerate: { directory: "overview" } }, 56 | { label: "Standard", autogenerate: { directory: "standard" } }, 57 | { 58 | label: "Extensions", 59 | collapsed: true, 60 | autogenerate: { directory: "extensions" }, 61 | }, 62 | { 63 | label: "Recipes", 64 | collapsed: true, 65 | autogenerate: { directory: "recipes" }, 66 | }, 67 | { 68 | label: "Guides", 69 | collapsed: true, 70 | autogenerate: { directory: "guides" }, 71 | }, 72 | ], 73 | head: [ 74 | { 75 | tag: "link", 76 | attrs: { 77 | rel: "icon", 78 | href: "/favicon.png", 79 | sizes: "256x256", 80 | }, 81 | }, 82 | { 83 | tag: "script", 84 | attrs: { 85 | src: "https://plausible.io/js/script.js", 86 | "data-domain": "datapackage.org", 87 | defer: true, 88 | }, 89 | }, 90 | ], 91 | }), 92 | ], 93 | markdown: { 94 | remarkPlugins: [remarkHeadingId], 95 | rehypePlugins: [ 96 | rehypeHeadingIds, 97 | [ 98 | rehypeAutolinkHeadings, 99 | { 100 | behavior: "wrap", 101 | }, 102 | ], 103 | ], 104 | }, 105 | vite: { 106 | resolve: { 107 | preserveSymlinks: true, 108 | }, 109 | }, 110 | }) 111 | -------------------------------------------------------------------------------- /components/Adoption.astro: -------------------------------------------------------------------------------- 1 | --- 2 | import { Image } from "astro:assets" 3 | import { adoption } from "../assets" 4 | --- 5 | 6 |
    7 |
  • 8 | European Commission 12 |
  • 13 |
  • 14 | Github 17 |
  • 18 |
  • 19 | Dryad 23 |
  • 24 |
  • 25 | Our World in Data 28 |
  • 29 |
30 |
    31 |
  • 32 | BCO-DMO 36 |
  • 37 |
  • 38 | GBIF 42 |
  • 43 |
  • 44 | Elife 47 |
  • 48 |
  • 49 | Oxford 53 |
  • 54 |
  • 55 | Gapminder 58 |
  • 59 |
60 |
    61 |
  • 62 | Open Data Blend 65 |
  • 66 |
  • 67 | CMOA 71 |
  • 72 |
  • 73 | Data World 76 |
  • 77 |
  • 78 | The University of Chicago 82 |
  • 83 |
  • 84 | Validata 88 |
  • 89 |
90 | 91 | 122 | -------------------------------------------------------------------------------- /public/profiles/1.0/tabledialect.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-07/schema#", 3 | "title": "Table Dialect", 4 | "description": "The table dialect descriptor.", 5 | "properties": { 6 | "csvddfVersion": { 7 | "title": "CSV Dialect schema version", 8 | "description": "A number to indicate the schema version of CSV Dialect. Version 1.0 was named CSV Dialect Description Format and used different field names.", 9 | "type": "number", 10 | "default": 1.2, 11 | "examples:": ["{\n \"csvddfVersion\": \"1.2\"\n}\n"] 12 | }, 13 | "delimiter": { 14 | "title": "Delimiter", 15 | "description": "A character sequence to use as the field separator.", 16 | "type": "string", 17 | "default": ",", 18 | "examples": ["{\n \"delimiter\": \",\"\n}\n", "{\n \"delimiter\": \";\"\n}\n"] 19 | }, 20 | "doubleQuote": { 21 | "title": "Double Quote", 22 | "description": "Specifies the handling of quotes inside fields.", 23 | "context": "If Double Quote is set to true, two consecutive quotes must be interpreted as one.", 24 | "type": "boolean", 25 | "default": true, 26 | "examples": ["{\n \"doubleQuote\": true\n}\n"] 27 | }, 28 | "lineTerminator": { 29 | "title": "Line Terminator", 30 | "description": "Specifies the character sequence that must be used to terminate rows.", 31 | "type": "string", 32 | "default": "\r\n", 33 | "examples": [ 34 | "{\n \"lineTerminator\": \"\\r\\n\"\n}\n", 35 | "{\n \"lineTerminator\": \"\\n\"\n}\n" 36 | ] 37 | }, 38 | "nullSequence": { 39 | "title": "Null Sequence", 40 | "description": "Specifies the null sequence, for example, `\\N`.", 41 | "type": "string", 42 | "examples": ["{\n \"nullSequence\": \"\\N\"\n}\n"] 43 | }, 44 | "quoteChar": { 45 | "title": "Quote Character", 46 | "description": "Specifies a one-character string to use as the quoting character.", 47 | "type": "string", 48 | "default": "\"", 49 | "examples": ["{\n \"quoteChar\": \"'\"\n}\n"] 50 | }, 51 | "escapeChar": { 52 | "title": "Escape Character", 53 | "description": "Specifies a one-character string to use as the escape character.", 54 | "type": "string", 55 | "examples": ["{\n \"escapeChar\": \"\\\\\"\n}\n"] 56 | }, 57 | "skipInitialSpace": { 58 | "title": "Skip Initial Space", 59 | "description": "Specifies the interpretation of whitespace immediately following a delimiter. If false, whitespace immediately after a delimiter should be treated as part of the subsequent field.", 60 | "type": "boolean", 61 | "default": false, 62 | "examples": ["{\n \"skipInitialSpace\": true\n}\n"] 63 | }, 64 | "header": { 65 | "title": "Header", 66 | "description": "Specifies if the file includes a header row, always as the first row in the file.", 67 | "type": "boolean", 68 | "default": true, 69 | "examples": ["{\n \"header\": true\n}\n"] 70 | }, 71 | "commentChar": { 72 | "title": "Comment Character", 73 | "description": "Specifies that any row beginning with this one-character string, without preceeding whitespace, causes the entire line to be ignored.", 74 | "type": "string", 75 | "examples": ["{\n \"commentChar\": \"#\"\n}\n"] 76 | }, 77 | "caseSensitiveHeader": { 78 | "title": "Case Sensitive Header", 79 | "description": "Specifies if the case of headers is meaningful.", 80 | "context": "Use of case in source CSV files is not always an intentional decision. For example, should \"CAT\" and \"Cat\" be considered to have the same meaning.", 81 | "type": "boolean", 82 | "default": false, 83 | "examples": ["{\n \"caseSensitiveHeader\": true\n}\n"] 84 | } 85 | }, 86 | "examples": [ 87 | "{\n \"dialect\": {\n \"delimiter\": \";\"\n }\n}\n", 88 | "{\n \"dialect\": {\n \"delimiter\": \"\\t\",\n \"quoteChar\": \"'\",\n \"commentChar\": \"#\"\n }\n}\n" 89 | ] 90 | } 91 | -------------------------------------------------------------------------------- /content/docs/recipes/json-data-resources.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: JSON Data Resources 3 | --- 4 | 5 | 6 | 7 | 8 | 9 | 10 |
AuthorsMichael Joseph Rosenthal
11 | 12 | A simple format to describe a single structured JSON data resource. It includes support both for metadata such as author and title and a [schema](https://json-schema.org/) to describe the data. 13 | 14 | ## Introduction 15 | 16 | A **JSON Data Resource** is a type of [Data Resource][dr] specialized for describing structured JSON data. 17 | 18 | JSON Data Resource extends [Data Resource][dr] in following key ways: 19 | 20 | - The `schema` property MUST follow the [JSON Schema](https://json-schema.org/) specification, either as a JSON object directly under the property, or a string referencing another JSON document containing the JSON Schema 21 | 22 | ## Examples 23 | 24 | A minimal JSON Data Resource, referencing external JSON documents, looks as follows. 25 | 26 | ```javascript 27 | // with data and a schema accessible via the local filesystem 28 | { 29 | "profile": "json-data-resource", 30 | "name": "resource-name", 31 | "path": [ "resource-path.json" ], 32 | "schema": "jsonschema.json" 33 | } 34 | 35 | // with data accessible via http 36 | { 37 | "profile": "json-data-resource", 38 | "name": "resource-name", 39 | "path": [ "http://example.com/resource-path.json" ], 40 | "schema": "http://example.com/jsonschema.json" 41 | } 42 | ``` 43 | 44 | A minimal JSON Data Resource example using the data property to inline data looks as follows. 45 | 46 | ```javascript 47 | { 48 | "profile": "json-data-resource", 49 | "name": "resource-name", 50 | "data": { 51 | "id": 1, 52 | "first_name": "Louise" 53 | }, 54 | "schema": { 55 | "type": "object", 56 | "required": [ 57 | "id" 58 | ], 59 | "properties": { 60 | "id": { 61 | "type": "integer" 62 | }, 63 | "first_name": { 64 | "type": "string" 65 | } 66 | } 67 | } 68 | } 69 | ``` 70 | 71 | A comprehensive JSON Data Resource example with all required, recommended and optional properties looks as follows. 72 | 73 | ```javascript 74 | { 75 | "profile": "json-data-resource", 76 | "name": "solar-system", 77 | "path": "http://example.com/solar-system.json", 78 | "title": "The Solar System", 79 | "description": "My favourite data about the solar system.", 80 | "format": "json", 81 | "mediatype": "application/json", 82 | "encoding": "utf-8", 83 | "bytes": 1, 84 | "hash": "", 85 | "schema": { 86 | "$schema": "http://json-schema.org/draft-07/schema#", 87 | "type": "object", 88 | "required": [ 89 | "id" 90 | ], 91 | "properties": { 92 | "id": { 93 | "type": "integer" 94 | }, 95 | "name": { 96 | "type": "string" 97 | }, 98 | "description": { 99 | "type": "string" 100 | } 101 | } 102 | }, 103 | "sources": [{ 104 | "title": "The Solar System - 2001", 105 | "path": "http://example.com/solar-system-2001.json", 106 | "email": "" 107 | }], 108 | "licenses": [{ 109 | "name": "CC-BY-4.0", 110 | "title": "Creative Commons Attribution 4.0", 111 | "path": "https://creativecommons.org/licenses/by/4.0/" 112 | }] 113 | } 114 | ``` 115 | 116 | ## Specification 117 | 118 | A JSON Data Resource MUST be a [Data Resource][dr], that is it MUST conform to the [Data Resource specification][dr]. 119 | 120 | In addition: 121 | 122 | - The Data Resource `schema` property MUST follow the [JSON Schema](https://json-schema.org/) specification, either as a JSON object directly under the property, or a string referencing another JSON document containing the JSON Schema 123 | - There `MUST` be a `profile` property with the value `json-data-resource` 124 | - The data the Data Resource describes MUST, if non-inline, be a JSON file 125 | 126 | ## JSON file requirements 127 | 128 | When `"format": "json"`, files must strictly follow the [JSON specification](https://www.json.org/). Some implementations `MAY` support `"format": "jsonc"`, allowing for non-standard single line and block comments (`//` and `/* */` respectively). 129 | 130 | ## Implementations 131 | 132 | None known. 133 | -------------------------------------------------------------------------------- /content/docs/index.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: Data Package 3 | template: splash 4 | editUrl: false 5 | hero: 6 | tagline: | 7 | Data Package is a standard consisting of a set of 8 | simple yet extensible specifications 9 | to describe datasets, data files and tabular data. 10 | It is a data definition language (DDL) and data API that facilitates 11 | findability, accessibility, interoperability, and reusability (FAIR) of data. 12 | image: 13 | file: ../../assets/hero.png 14 | alt: Data Packages Illustration 15 | actions: 16 | - text: Get Started 17 | link: /overview/introduction/ 18 | icon: right-arrow 19 | variant: primary 20 | - text: Use Cases 21 | link: /overview/adoption/ 22 | icon: rocket 23 | variant: secondary 24 | - text: View on GitHub 25 | link: https://github.com/frictionlessdata/datapackage 26 | icon: external 27 | variant: minimal 28 | banner: 29 | content: | 30 |

31 | The Data Package (v2) standard has been released on June 26, 2024. 32 | See the announcement and changelog for details. 33 |

34 | --- 35 | 36 | import About from "../../components/About.astro" 37 | import Adoption from "../../components/Adoption.astro" 38 | import ClickableCard from "../../components/ClickableCard.astro" 39 | import { CardGrid, LinkCard, Card } from "@astrojs/starlight/components" 40 | 41 |

Standard

42 | 43 | The Data Package standard is a comprehensive set of **specifications** that collectively define a framework for organizing, documenting, and sharing data in a structured and interoperable manner -- [EXPLORE THE STANDARD](/standard/data-package) 44 | 45 | 46 | 47 | A simple container format to describe a coherent collection of data (a dataset), 48 | including its contributors, licenses, etc. 49 | 50 | 51 | A simple format to describe a data resource such as an individual table or file, 52 | including its name, format, path, etc. 53 | 54 | 55 | A simple format to describe the dialect of a tabular data file, including its 56 | delimiter, header rows, escape characters, etc. 57 | 58 | 59 | A simple format to describe tabular data, including field names, types, constraints, 60 | missing values, foreign keys, etc. 61 | 62 | 63 | 64 |
65 | 66 |

Software

67 | 68 | Data Package is backed by a suite of software tools supporting the standard. From the no-code visual tool **Open Data Editor** to low-level drivers for 10 programming languages -- [EXPLORE THE SOFTWARE](/overview/software) 69 | 70 |
71 | 72 | 73 | 74 |
75 | 76 |

Adoption

77 | 78 | Data Package is used for a wide range of scenarios where this standardized data packaging format proves invaluable for efficient data management, sharing, and analysis -- [EXPLORE THE ADOPTION](/overview/adoption) 79 | 80 | 81 | 82 |
83 | 84 |

Documentation

85 | 86 | Read the Data Package documentation to learn more about the project: 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | We are building a world open by design where all knowledge is accessible to everyone 97 | [Learn about Open Knowledge](https://okfn.org) 98 | 99 | -------------------------------------------------------------------------------- /content/docs/overview/introduction.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Introduction 3 | sidebar: 4 | order: 0 5 | --- 6 | 7 | :::tip[FAIR Data Exchange] 8 | The Data Package standard facilitates **findability, accessibility, interoperability, and reusability** of data making it perfect for [FAIR Data Exchange](https://en.wikipedia.org/wiki/FAIR_data). 9 | ::: 10 | 11 | In our increasingly data-driven world, the ability to manage, share, and analyze data effectively has become paramount. Organizations across various domains, from research institutions and governmental agencies to businesses and non-profit organizations, are generating and utilizing vast amounts of data to inform decisions, solve complex problems, and drive innovation. However, this data abundance brings with it a unique set of challenges, particularly when it comes to ensuring data quality, interoperability, and accessibility. 12 | 13 | ## The Project 14 | 15 | The Data Package Standard emerges as a solution to these challenges, offering a structured and versatile framework for organizing, documenting, and distributing data. Whether you are a data scientist, researcher, data engineer, or data steward, the Data Package Standard is designed to streamline your data management processes and facilitate collaboration, making data more discoverable and usable for everyone involved. In-general, the Data Package project consists of these parts: 16 | 17 | 1. **Standard**: Comprehensive set of specifications that collectively define a framework for organizing, documenting, and sharing data in a structured and interoperable manner 18 | 19 | 2. **Extensions**: Data practitioners can extend the standard by incorporating custom metadata, validation rules, or specific constraints to suit their data's peculiarities. 20 | 21 | 3. **Recipes**: Various approaches for solving common problems, in ways that are not specified as a formal Data Package specification. 22 | 23 | 4. **Guides**: The least formal part of the standard containing various guides on how to get started with Data Package or how to extend Data Package standard. 24 | 25 | ## Key Principles 26 | 27 | At its core, the Data Package Standard is built upon a set of key principles that underpin its design and functionality: 28 | 29 | 1. **Simplicity**: The Data Package Standard is intentionally designed to be simple and easy to understand. Its straightforward structure ensures that even users with limited technical expertise can work with it effectively. 30 | 31 | 2. **Flexibility**: Data comes in various forms and structures, and the Data Package Standard accommodates this diversity. It allows you to package data in a way that suits your specific needs, whether you are dealing with tabular data, geographic data, or complex multi-resource datasets. 32 | 33 | 3. **Reproducibility**: Data integrity and reproducibility are vital in scientific research and data analysis. Data Packages include detailed metadata and versioning information, making it possible to reproduce analyses and ensure data quality over time. 34 | 35 | 4. **Interoperability**: To facilitate data exchange and collaboration, the Data Package Standard emphasizes interoperability. Data Packages are designed to work seamlessly with other data tools and standards, such as CSV, JSON, and SQL databases. 36 | 37 | ## Benefits of Adoption 38 | 39 | By adhering to the Data Package Standard, you can unlock several significant advantages in your data management processes: 40 | 41 | 1. **Improved Data Discovery**: Well-structured metadata and clear documentation make it easier for others to discover and understand your data, promoting data sharing and collaboration. 42 | 43 | 2. **Enhanced Data Quality**: Data validation and versioning support help maintain data quality and integrity, reducing errors and ensuring data consistency over time. 44 | 45 | 3. **Efficient Data Sharing**: Data Packages can be easily shared and distributed, making it straightforward to disseminate your data to collaborators, stakeholders, or the public. 46 | 47 | 4. **Community Engagement**: By adopting an open standard like the Data Package Standard, you can engage with a broader community of data practitioners, share best practices, and contribute to the evolution of data management standards. 48 | 49 | As you delve deeper into the Data Package Standard, you will discover its practical applications and how it can revolutionize the way you handle data. Whether you are a data enthusiast or a seasoned professional, embracing this standard can empower you to harness the full potential of your data and drive innovation in your field. 50 | -------------------------------------------------------------------------------- /public/profiles/2.0/tabledialect.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-07/schema#", 3 | "title": "Table Dialect", 4 | "description": "The Table dialect descriptor.", 5 | "type": "object", 6 | "properties": { 7 | "$schema": { 8 | "default": "https://datapackage.org/profiles/1.0/tabledialect.json", 9 | "propertyOrder": 10, 10 | "title": "Profile", 11 | "description": "The profile of this descriptor.", 12 | "type": "string" 13 | }, 14 | "header": { 15 | "title": "Header", 16 | "description": "Specifies if the file includes a header row, always as the first row in the file.", 17 | "type": "boolean", 18 | "default": true, 19 | "examples": [ 20 | "{\n \"header\": true\n}\n" 21 | ] 22 | }, 23 | "headerRows": { 24 | "type": "array", 25 | "default": [ 26 | 1 27 | ], 28 | "items": { 29 | "type": "integer", 30 | "minimum": 1 31 | } 32 | }, 33 | "headerJoin": { 34 | "type": "string", 35 | "default": " " 36 | }, 37 | "commentRows": { 38 | "type": "array", 39 | "default": [ 40 | 1 41 | ], 42 | "items": { 43 | "type": "integer", 44 | "minimum": 1 45 | } 46 | }, 47 | "commentChar": { 48 | "title": "Comment Character", 49 | "description": "Specifies that any row beginning with this one-character string, without preceeding whitespace, causes the entire line to be ignored.", 50 | "type": "string", 51 | "examples": [ 52 | "{\n \"commentChar\": \"#\"\n}\n" 53 | ] 54 | }, 55 | "delimiter": { 56 | "title": "Delimiter", 57 | "description": "A character sequence to use as the field separator.", 58 | "type": "string", 59 | "default": ",", 60 | "examples": [ 61 | "{\n \"delimiter\": \",\"\n}\n", 62 | "{\n \"delimiter\": \";\"\n}\n" 63 | ] 64 | }, 65 | "lineTerminator": { 66 | "title": "Line Terminator", 67 | "description": "Specifies the character sequence that must be used to terminate rows.", 68 | "type": "string", 69 | "default": "\r\n", 70 | "examples": [ 71 | "{\n \"lineTerminator\": \"\\r\\n\"\n}\n", 72 | "{\n \"lineTerminator\": \"\\n\"\n}\n" 73 | ] 74 | }, 75 | "quoteChar": { 76 | "title": "Quote Character", 77 | "description": "Specifies a one-character string to use as the quoting character.", 78 | "type": "string", 79 | "default": "\"", 80 | "examples": [ 81 | "{\n \"quoteChar\": \"'\"\n}\n" 82 | ] 83 | }, 84 | "doubleQuote": { 85 | "title": "Double Quote", 86 | "description": "Specifies the handling of quotes inside fields.", 87 | "context": "If Double Quote is set to true, two consecutive quotes must be interpreted as one.", 88 | "type": "boolean", 89 | "default": true, 90 | "examples": [ 91 | "{\n \"doubleQuote\": true\n}\n" 92 | ] 93 | }, 94 | "escapeChar": { 95 | "title": "Escape Character", 96 | "description": "Specifies a one-character string to use as the escape character.", 97 | "type": "string", 98 | "examples": [ 99 | "{\n \"escapeChar\": \"\\\\\"\n}\n" 100 | ] 101 | }, 102 | "nullSequence": { 103 | "title": "Null Sequence", 104 | "description": "Specifies the null sequence, for example, `\\N`.", 105 | "type": "string", 106 | "examples": [ 107 | "{\n \"nullSequence\": \"\\N\"\n}\n" 108 | ] 109 | }, 110 | "skipInitialSpace": { 111 | "title": "Skip Initial Space", 112 | "description": "Specifies the interpretation of whitespace immediately following a delimiter. If false, whitespace immediately after a delimiter should be treated as part of the subsequent field.", 113 | "type": "boolean", 114 | "default": false, 115 | "examples": [ 116 | "{\n \"skipInitialSpace\": true\n}\n" 117 | ] 118 | }, 119 | "property": { 120 | "type": "string" 121 | }, 122 | "itemType": { 123 | "type": "string", 124 | "enum": [ 125 | "array", 126 | "object" 127 | ] 128 | }, 129 | "itemKeys": { 130 | "type": "array", 131 | "items": { 132 | "type": "string" 133 | } 134 | }, 135 | "sheetNumber": { 136 | "type": "integer", 137 | "minimum": 1 138 | }, 139 | "sheetName": { 140 | "type": "string" 141 | }, 142 | "table": { 143 | "type": "string" 144 | } 145 | } 146 | } -------------------------------------------------------------------------------- /profiles/dictionary/dialect.yaml: -------------------------------------------------------------------------------- 1 | tableDialect: 2 | title: Table Dialect 3 | description: The Table dialect descriptor. 4 | type: object 5 | properties: 6 | $schema: 7 | "$ref": "#/definitions/$schema" 8 | default: https://datapackage.org/profiles/1.0/tabledialect.json 9 | propertyOrder: 10 10 | header: 11 | "$ref": "#/definitions/header" 12 | headerRows: 13 | "$ref": "#/definitions/headerRows" 14 | headerJoin: 15 | "$ref": "#/definitions/headerJoin" 16 | commentRows: 17 | "$ref": "#/definitions/commentRows" 18 | commentChar: 19 | "$ref": "#/definitions/commentChar" 20 | delimiter: 21 | "$ref": "#/definitions/delimiter" 22 | lineTerminator: 23 | "$ref": "#/definitions/lineTerminator" 24 | quoteChar: 25 | "$ref": "#/definitions/quoteChar" 26 | doubleQuote: 27 | "$ref": "#/definitions/doubleQuote" 28 | escapeChar: 29 | "$ref": "#/definitions/escapeChar" 30 | nullSequence: 31 | "$ref": "#/definitions/nullSequence" 32 | skipInitialSpace: 33 | "$ref": "#/definitions/skipInitialSpace" 34 | property: 35 | "$ref": "#/definitions/property" 36 | itemType: 37 | "$ref": "#/definitions/itemType" 38 | itemKeys: 39 | "$ref": "#/definitions/itemKeys" 40 | sheetNumber: 41 | "$ref": "#/definitions/sheetNumber" 42 | sheetName: 43 | "$ref": "#/definitions/sheetName" 44 | table: 45 | "$ref": "#/definitions/table" 46 | header: 47 | title: Header 48 | description: Specifies if the file includes a header row, always as the first row in the file. 49 | type: boolean 50 | default: true 51 | examples: 52 | - | 53 | { 54 | "header": true 55 | } 56 | headerRows: 57 | type: array 58 | default: [1] 59 | items: 60 | type: integer 61 | minimum: 1 62 | headerJoin: 63 | type: string 64 | default: " " 65 | commentRows: 66 | type: array 67 | default: [1] 68 | items: 69 | type: integer 70 | minimum: 1 71 | commentChar: 72 | title: Comment Character 73 | description: Specifies that any row beginning with this one-character string, without preceeding whitespace, causes the entire line to be ignored. 74 | type: string 75 | examples: 76 | - | 77 | { 78 | "commentChar": "#" 79 | } 80 | delimiter: 81 | title: Delimiter 82 | description: A character sequence to use as the field separator. 83 | type: string 84 | default: "," 85 | examples: 86 | - | 87 | { 88 | "delimiter": "," 89 | } 90 | - | 91 | { 92 | "delimiter": ";" 93 | } 94 | lineTerminator: 95 | title: Line Terminator 96 | description: Specifies the character sequence that must be used to terminate rows. 97 | type: string 98 | default: "\r\n" 99 | examples: 100 | - | 101 | { 102 | "lineTerminator": "\r\n" 103 | } 104 | - | 105 | { 106 | "lineTerminator": "\n" 107 | } 108 | doubleQuote: 109 | title: Double Quote 110 | description: Specifies the handling of quotes inside fields. 111 | context: If Double Quote is set to true, two consecutive quotes must be interpreted 112 | as one. 113 | type: boolean 114 | default: true 115 | examples: 116 | - | 117 | { 118 | "doubleQuote": true 119 | } 120 | quoteChar: 121 | title: Quote Character 122 | description: Specifies a one-character string to use as the quoting character. 123 | type: string 124 | default: '"' 125 | examples: 126 | - | 127 | { 128 | "quoteChar": "'" 129 | } 130 | escapeChar: 131 | title: Escape Character 132 | description: Specifies a one-character string to use as the escape character. 133 | type: string 134 | examples: 135 | - | 136 | { 137 | "escapeChar": "\\" 138 | } 139 | nullSequence: 140 | title: Null Sequence 141 | description: Specifies the null sequence, for example, `\N`. 142 | type: string 143 | examples: 144 | - | 145 | { 146 | "nullSequence": "\N" 147 | } 148 | skipInitialSpace: 149 | title: Skip Initial Space 150 | description: Specifies the interpretation of whitespace immediately following 151 | a delimiter. If false, whitespace immediately after a delimiter should be treated 152 | as part of the subsequent field. 153 | type: boolean 154 | default: false 155 | examples: 156 | - | 157 | { 158 | "skipInitialSpace": true 159 | } 160 | property: 161 | type: string 162 | itemType: 163 | type: string 164 | enum: 165 | - array 166 | - object 167 | itemKeys: 168 | type: array 169 | items: 170 | type: string 171 | sheetNumber: 172 | type: integer 173 | minimum: 1 174 | sheetName: 175 | type: string 176 | table: 177 | type: string 178 | -------------------------------------------------------------------------------- /profiles/dictionary/resource.yaml: -------------------------------------------------------------------------------- 1 | dataResource: 2 | title: Data Resource 3 | description: Data Resource. 4 | type: object 5 | oneOf: 6 | - required: 7 | - name 8 | - data 9 | - required: 10 | - name 11 | - path 12 | properties: 13 | $schema: 14 | "$ref": "#/definitions/$schema" 15 | default: https://datapackage.org/profiles/1.0/dataresource.json 16 | propertyOrder: 10 17 | name: 18 | "$ref": "#/definitions/name" 19 | propertyOrder: 20 20 | path: 21 | "$ref": "#/definitions/resourcePath" 22 | propertyOrder: 30 23 | data: 24 | "$ref": "#/definitions/data" 25 | propertyOrder: 230 26 | type: 27 | "$ref": "#/definitions/resourceType" 28 | propertyOrder: 235 29 | title: 30 | "$ref": "#/definitions/title" 31 | propertyOrder: 50 32 | description: 33 | "$ref": "#/definitions/description" 34 | propertyOrder: 60 35 | format: textarea 36 | homepage: 37 | "$ref": "#/definitions/homepage" 38 | propertyOrder: 70 39 | sources: 40 | "$ref": "#/definitions/sources" 41 | propertyOrder: 140 42 | options: 43 | hidden: true 44 | licenses: 45 | "$ref": "#/definitions/licenses" 46 | description: The license(s) under which the resource is published. 47 | propertyOrder: 150 48 | options: 49 | hidden: true 50 | format: 51 | "$ref": "#/definitions/format" 52 | propertyOrder: 80 53 | mediatype: 54 | "$ref": "#/definitions/mediatype" 55 | propertyOrder: 90 56 | encoding: 57 | "$ref": "#/definitions/encoding" 58 | propertyOrder: 100 59 | bytes: 60 | "$ref": "#/definitions/bytes" 61 | propertyOrder: 110 62 | options: 63 | hidden: true 64 | hash: 65 | "$ref": "#/definitions/hash" 66 | propertyOrder: 120 67 | options: 68 | hidden: true 69 | dialect: 70 | "$ref": "#/definitions/tableDialect" 71 | propertyOrder: 130 72 | schema: 73 | "$ref": "#/definitions/tableSchema" 74 | propertyOrder: 140 75 | pathArray: 76 | type: array 77 | minItems: 1 78 | items: 79 | "$ref": "#/definitions/path" 80 | examples: 81 | - | 82 | [ "file.csv" ] 83 | - | 84 | [ "http://example.com/file.csv" ] 85 | resourcePath: 86 | title: Path 87 | description: 88 | A reference to the data for this resource, as either a path as a string, or an array of paths as strings. 89 | of valid URIs. 90 | oneOf: [{ "$ref": "#/definitions/path" }, { "$ref": "#/definitions/pathArray" }] 91 | context: The dereferenced value of each referenced data source in `path` 92 | `MUST` be commensurate with a native, dereferenced representation of the data 93 | the resource describes. For example, in a *Tabular* Data Resource, this means 94 | that the dereferenced value of `path` `MUST` be an array. 95 | examples: 96 | - | 97 | { 98 | "path": [ 99 | "file.csv", 100 | "file2.csv" 101 | ] 102 | } 103 | - | 104 | { 105 | "path": [ 106 | "http://example.com/file.csv", 107 | "http://example.com/file2.csv" 108 | ] 109 | } 110 | - | 111 | { 112 | "path": "http://example.com/file.csv" 113 | } 114 | resourceType: 115 | type: string 116 | enum: 117 | - table 118 | format: 119 | title: Format 120 | description: The file format of this resource. 121 | context: "`csv`, `xls`, `json` are examples of common formats." 122 | type: string 123 | examples: 124 | - | 125 | { 126 | "format": "xls" 127 | } 128 | mediatype: 129 | title: Media Type 130 | description: The media type of this resource. Can be any valid media type listed 131 | with [IANA](https://www.iana.org/assignments/media-types/media-types.xhtml). 132 | type: string 133 | pattern: "^(.+)/(.+)$" 134 | examples: 135 | - | 136 | { 137 | "mediatype": "text/csv" 138 | } 139 | encoding: 140 | title: Encoding 141 | description: The file encoding of this resource. 142 | type: string 143 | default: utf-8 144 | examples: 145 | - | 146 | { 147 | "encoding": "utf-8" 148 | } 149 | bytes: 150 | title: Bytes 151 | description: The size of this resource in bytes. 152 | type: integer 153 | examples: 154 | - | 155 | { 156 | "bytes": 2082 157 | } 158 | hash: 159 | title: Hash 160 | type: string 161 | description: The MD5 hash of this resource. Indicate other hashing algorithms 162 | with the {algorithm}:{hash} format. 163 | pattern: "^([^:]+:[a-fA-F0-9]+|[a-fA-F0-9]{32}|)$" 164 | examples: 165 | - | 166 | { 167 | "hash": "d25c9c77f588f5dc32059d2da1136c02" 168 | } 169 | - | 170 | { 171 | "hash": "SHA256:5262f12512590031bbcc9a430452bfd75c2791ad6771320bb4b5728bfb78c4d0" 172 | } 173 | -------------------------------------------------------------------------------- /content/docs/overview/software.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: Software 3 | sidebar: 4 | order: 5 5 | --- 6 | 7 | import { software } from "../../../assets" 8 | import ImageLinkCard from "../../../components/ImageLinkCard.astro" 9 | import { LinkCard, CardGrid } from "@astrojs/starlight/components" 10 | 11 | :::tip[Contribution Note] 12 | Please let us know if there are Data Package aware software not listed below. Just open an issue or create a pull request to contibute. 13 | ::: 14 | 15 | Data Package is backed by a comprehensive list of software products supporting the standard. From no-code visual tool **Open Data Editor** to low-level drivers for 10 programming languages. Here is the list of available software: 16 | 17 | ## Visual 18 | 19 | 25 | 26 | 27 | 33 | 34 | 40 | 41 | 42 | ## Python 43 | 44 | 45 | 50 | 55 | 56 | 57 | ## JavaScript 58 | 59 | 60 | 65 | 70 | 71 | 72 | ## R 73 | 74 | 75 | 80 | 81 | 82 | ## Ruby 83 | 84 | 85 | 90 | 95 | 96 | 97 | ## PHP 98 | 99 | 100 | 105 | 110 | 111 | 112 | ## Java 113 | 114 | 115 | 120 | 125 | 126 | 127 | ## Swift 128 | 129 | 130 | 135 | 140 | 141 | 142 | ## Go 143 | 144 | 145 | 150 | 155 | 156 | 157 | ## Julia 158 | 159 | 160 | 165 | 170 | 171 | -------------------------------------------------------------------------------- /content/docs/standard/extensions.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: Extensions 3 | description: The Data Package Standard extensibility features for domain-specific needs. 4 | sidebar: 5 | order: 5 6 | authors: 7 | - Rufus Pollock 8 | - Paul Walsh 9 | - Adam Kariv 10 | - Evgeny Karev 11 | - Peter Desmet 12 | - Data Package Working Group 13 | --- 14 | 15 | 16 | 17 | 18 | 19 | 20 |
Authors{frontmatter.authors.join(", ")}
21 | 22 |

{frontmatter.description}

23 | 24 | ## Language 25 | 26 | The key words `MUST`, `MUST NOT`, `REQUIRED`, `SHALL`, `SHALL NOT`, `SHOULD`, `SHOULD NOT`, `RECOMMENDED`, `MAY`, and `OPTIONAL` in this document are to be interpreted as described in [RFC 2119](https://www.ietf.org/rfc/rfc2119.txt). 27 | 28 | ## Introduction 29 | 30 | The Data Package Standard provides a rich set of metadata and data features for general applications. At the same time, the Data Package Standard at its core is domain-agnostic and does not provide any builtin means to describe metadata in specific knowledge areas such as biology or medicine. 31 | 32 | A domain-specific extension is the way to enrich Data Package's metadata to meet specific needs of a knowledge domain. For example, there are some prominent Data Package extensions: 33 | 34 | - [Camera Trap Data Package](https://camtrap-dp.tdwg.org/) 35 | - [Fiscal Data Package](https://fiscal.datapackage.org) 36 | 37 | ## Extension 38 | 39 | The Data Package Standard has a simple yet powerful extension mechanism based on the [Profile](/standard/glossary/#profile) concept. An extension is, generally speaking, a project that provides one or more domain-specific profiles to the Data Package Standard specifications. 40 | 41 | From user-perspective, a custom profile can be provided as a `$schema` property in a corresponding specification [Descriptor](/standard/glossary/#descriptor). Having a profile instructs implementation to validate a descriptor using JSON Schema rules of the profile. 42 | 43 | Usually, Data Package is the specification that is extended. As a container format, it is the most natural target for metadata enrichment. At the same time, technically any of the core specifications can be extended. For example, if you build a Table Schema catalog, it is possible to extend a Table Schema specification using the same approach as described below. 44 | 45 | Note, that the Data Package Standard's extension system completely relies on the JSON Schema Standard without extending its builtin features in any way. It makes the system robust and provides rich tooling support such as [text editor validation](https://code.visualstudio.com/docs/languages/json#_mapping-in-the-json). 46 | 47 | Combining modern JSON Schema features with an ability to provide profiles to any of the core Data Package Standard specification descriptors, allows to achieve almost any of metadata enrichment goals including but not limited to: 48 | 49 | - Adding new domain-specific properties. 50 | - Requiring existing properties to comply with certain requirements. 51 | - Defining what resources are expected. 52 | - Requiring resources to meet certain dialect or schema requirements. 53 | - Combining existent profiles as a part of a high-level extension. 54 | - Creating domain-specific dialect and schema catalogues. 55 | 56 | ## Example 57 | 58 | For example, we will create a Spatial Data Package that requires a `geopoint` marker to be provided for each resource consisting a Data Package. 59 | 60 | ### Profile 61 | 62 | First of all, we need to create a Data Package profile. Note that it includes a default data package profile as per the [specification requirement](/standard/data-package/#schema): 63 | 64 | ```json 65 | { 66 | "$schema": "http://json-schema.org/draft-07/schema#", 67 | "title": "Spatial Data Package Profile", 68 | "type": "object", 69 | "allOf": [ 70 | { "$ref": "https://datapackage.org/profiles/2.0/datapackage.json" }, 71 | { "$ref": "#/definitions/spatialMixin" } 72 | ], 73 | "definitions": { 74 | "spatialMixin": { 75 | "type": "object", 76 | "properties": { 77 | "resources": { 78 | "type": "array", 79 | "item": { 80 | "type": "object", 81 | "required": ["geopoint"], 82 | "properties": { 83 | "geopoint": { 84 | "type": "object", 85 | "properties": { 86 | "lon": { "type": "number" }, 87 | "lat": { "type": "number" }, 88 | "additionalProperties": false 89 | } 90 | } 91 | } 92 | } 93 | } 94 | } 95 | } 96 | } 97 | } 98 | ``` 99 | 100 | ### Descriptor 101 | 102 | Consider that the profile above is published at `https://spatial.datapackage.org/profiles/1.0/datapackage.json`. In this case, a Data Package descriptor compatible to exemplar Spatial Data Package (v1) will look as below: 103 | 104 | ```json 105 | { 106 | "$schema": "https://spatial.datapackage.org/profiles/1.0/datapackage.json", 107 | "title": "Spatial Data Package Descriptor", 108 | "resources": [ 109 | { 110 | "name": "expedition-1", 111 | "path": "expedition-1.csv", 112 | "geopoint": { 113 | "lon": 90, 114 | "lat": 90 115 | } 116 | } 117 | ] 118 | } 119 | ``` 120 | 121 | ### Software 122 | 123 | Even though they are not aware of the extension, any Data Package software implementation will be validating a Spatial Data Package out of the box: both the domain-specific properties as well as the general Data Package properties. We do encourage extensions authors however to build on top of existing software to support domain-specific properties on the programming models level as well. 124 | -------------------------------------------------------------------------------- /content/docs/guides/mediawiki-tabular-data.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Comparison with MediaWiki Tabular Data 3 | sidebar: 4 | order: 3 5 | --- 6 | 7 | 8 | 9 | 10 | 11 | 12 |
AuthorsJakob Voß
13 | 14 | [MediaWiki](https://www.mediawiki.org/) is the software used to run Wikipedia and related projects of the Wikimedia Foundation, including the media file repository [Wikimedia Commons](https://commons.wikimedia.org/). Commons hosts mostly images but also some records with tabular data. The [MediaWiki Tabular Data Model](https://www.mediawiki.org/wiki/Help:Tabular_data) was inspired by Data Package version 1 but it slightly differs from current Data Package specification, as described below. 15 | 16 | ## Property Comparison 17 | 18 | A [MediaWiki tabular data page](https://www.mediawiki.org/wiki/Help:Tabular_data) describes and contains an individual table of data similar to a [Data Resource](/standard/data-resource/) with inline tabular data. Both are serialized as JSON objects, but the former comes as a page with unique name in a MediaWiki instance (such as Wikimedia Commons). 19 | 20 | ### Top-level Properties 21 | 22 | MediaWiki Tabular Data has three required and two optional top-level properties. Most of these properties map to corresponding properties of a Data Resource: 23 | 24 | | MediaWiki Tabular Data | Data Package Table Schema | 25 | | ------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------- | 26 | | - (implied by page name) | [name](/standard/data-resource/#name) (required) is a string | 27 | | [description](https://www.mediawiki.org/wiki/Help:Tabular_data#Top-level_fields) (optional) is a localized string | [description](/standard/data-resource/#description) (optional) is a CommonMark string | 28 | | [data](https://www.mediawiki.org/wiki/Help:Tabular_data#Top-level_fields) (required) | [data](/standard/data-resource/#name) (optional) | 29 | | [license](https://www.mediawiki.org/wiki/Help:Tabular_data#Top-level_fields) (required) is the string `CC0-1.0` or another known identifier | [licenses](/standard/data-resource/#licenses) (optional) is an array | 30 | | [schema](https://www.mediawiki.org/wiki/Help:Tabular_data#Top-level_fields) (required) as [described below](#schema-properties) | [schema](/standard/data-resource/#schema) (optional) can have multiple forms | 31 | | [sources](https://www.mediawiki.org/wiki/Help:Tabular_data#Top-level_fields) (optional) is a string with Wiki markup | [sources](/standard/data-resource/#sources) (optional) is an array of objects | 32 | 33 | The differences are: 34 | 35 | - property `name` does not exist but can be implied from page name 36 | - property `description` and `sources` have another format 37 | - property `data` is always an array of arrays and [data types](#data-types) of individual values can differ 38 | - property `schema` is required but it differs in definion of [schema properties](#schema-properties) 39 | - there is no property `licenses` but `license` fixed to plain string value `CC0-1.0` (other license indicators may be possible) 40 | 41 | ### Data Types 42 | 43 | Tabular Data supports four data types that overlap with [Table Schema data types](/standard/table-schema/#field-types): 44 | 45 | - `number` subset of Table Schema [number](/standard/table-schema/#number) (no `NaN`, `INF`, or `-INF`) 46 | - `boolean` same as Table Schema [boolean](/standard/table-schema/#boolean) 47 | - `string` subset of Table Schema [string](/standard/table-schema/#string) (limited to 400 characters at most and must not include `\n` or `\t`) 48 | - `localized ` refers to an object that maps language codes to strings with same limitations as `string` type. 49 | This type is not supported in Table Schema. 50 | 51 | Individual values in a MediaWiki Tabular Data table can always be `null`, while in Table Schema you need to explicitly list values that should be considered missing in [schema.missingValues](/standard/table-schema/#missingValues). 52 | 53 | ### Schema Properties 54 | 55 | The `schema` property of MediaWiki tabular contains an object with property `fields` just like [Table Schema](/standard/table-schema/) but no other properties are allowed. Elements of this array are like Table Schema [field descriptors](/standard/table-schema/#field) limited to three properties and different value spaces: 56 | 57 | | MediaWiki Tabular Data | Data Package Table Schema | 58 | | ---------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------- | 59 | | [name](https://www.mediawiki.org/wiki/Help:Tabular_data#Top-level_fields) (required) must be a string matching `^[a-zA-Z_][a-zA-Z_0-9]*` | [name](/standard/table-schema/#name) (required) can be any string | 60 | | [type](https://www.mediawiki.org/wiki/Help:Tabular_data#Top-level_fields) (required) is one of the [Data Types above](#data-types) | [type](/standard/table-schema/#type) (optional) with [different data types](#data-types) | 61 | | [title](https://www.mediawiki.org/wiki/Help:Tabular_data#Top-level_fields) (optional) is a localized string | [title](/standard/table-schema/#title) (optional) is a plain string | 62 | -------------------------------------------------------------------------------- /content/docs/recipes/metadata-in-table-schema.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Metadata in Table Schema 3 | sidebar: 4 | hidden: true 5 | --- 6 | 7 | 8 | 9 | 10 | 11 | 12 |
AuthorsChristophe Benz, Johan Richer
13 | 14 | ## Overview 15 | 16 | Table Schemas need their own metadata to be stand-alone and interpreted without relying on other contextual information (Data Package metadata for example). Adding metadata to describe schemas in a structured way would help users to understand them and would increase their sharing and reuse. 17 | 18 | Currently it is possible to add custom properties to a Table Schema, but the lack of consensus about those properties restricts common tooling and wider adoption. 19 | 20 | ## Use cases 21 | 22 | - Documentation: generating Markdown documentation from the schema itself is a useful use case, and contextual information (description, version, authors...) needs to be retrieved. 23 | - Cataloging: open data standardisation can be increased by improving Table Schemas shareability, for example by searching and categorising them (by keywords, countries, full-text...) in catalogs. 24 | - Machine readability: tools like Goodtables could use catalogs to access Table Schemas in order to help users validate tabular files against existing schemas. Metadata would be needed for tools to find and read those schemas. 25 | 26 | ## Specification 27 | 28 | This pattern introduces the following properties to the Table Schema spec (using [the Frictionless Data core dictionary](https://github.com/frictionlessdata/specs/blob/master/schemas/dictionary/common.yml) as much as possible): 29 | 30 | - `name`: An identifier string for this schema. 31 | - `title`: A human-readable title for this schema. 32 | - `description`: A text description for this schema. 33 | - `keywords`: The keyword(s) that describe this schema. 34 | _Tags are useful to categorise and catalog schemas._ 35 | - `countryCode`: The ISO 3166-1 alpha-2 code for the country where this schema is primarily used. 36 | _Since open data schemas are very country-specific, it's useful to have this information in a structured way._ 37 | - `homepage`: The home on the web that is related to this schema. 38 | - `path`: A fully qualified URL for this schema. 39 | _The direct path to the schema itself can be useful to help accessing it (i.e. machine readability)._ 40 | - `image`: An image to represent this schema. 41 | _An optional illustration can be useful for example in catalogs to differentiate schemas in a list._ 42 | - `licenses`: The license(s) under which this schema is published. 43 | - `resources`: Example tabular data resource(s) validated or invalidated against this schema. 44 | _Oftentimes, schemas are shared with example resources to illustrate them, with valid or even invalid files (e.g. with constraint errors)._ 45 | - `sources`: The source(s) used to created this schema. 46 | _In some cases, schemas are created after a legal text or some draft specification in a human-readable document. In those cases, it's useful to share them with the schema._ 47 | - `created`: The datetime on which this schema was created. 48 | - `lastModified`: The datetime on which this schema was last modified. 49 | - `version`: A unique version number for this schema. 50 | - `contributors`: The contributors to this schema. 51 | 52 | ## Example schema 53 | 54 | ``` 55 | { 56 | "$schema": "https://specs.frictionlessdata.io/schemas/table-schema.json", 57 | "name": "irve", 58 | "title": "Infrastructures de recharge de véhicules électriques", 59 | "description": "Spécification du fichier d'échange relatif aux données concernant la localisation géographique et les caractéristiques techniques des stations et des points de recharge pour véhicules électriques", 60 | "keywords": [ 61 | "electric vehicle", 62 | "ev", 63 | "charging station", 64 | "mobility" 65 | ], 66 | "countryCode": "FR", 67 | "homepage": "https://github.com/etalab/schema-irve", 68 | "path": "https://github.com/etalab/schema-irve/raw/v1.0.1/schema.json", 69 | "image": "https://github.com/etalab/schema-irve/raw/v1.0.1/irve.png", 70 | "licenses": [ 71 | { 72 | "title": "Creative Commons Zero v1.0 Universal", 73 | "name": "CC0-1.0", 74 | "path": "https://creativecommons.org/publicdomain/zero/1.0/" 75 | } 76 | ], 77 | "resources": [ 78 | { 79 | "title": "Valid resource", 80 | "name": "exemple-valide", 81 | "path": "https://github.com/etalab/schema-irve/raw/v1.0.1/exemple-valide.csv" 82 | }, 83 | { 84 | "title": "Invalid resource", 85 | "name": "exemple-invalide", 86 | "path": "https://github.com/etalab/schema-irve/raw/v1.0.1/exemple-invalide.csv" 87 | } 88 | ], 89 | "sources": [ 90 | { 91 | "title": "Arrêté du 12 janvier 2017 relatif aux données concernant la localisation géographique et les caractéristiques techniques des stations et des points de recharge pour véhicules électriques", 92 | "path": "https://www.legifrance.gouv.fr/eli/arrete/2017/1/12/ECFI1634257A/jo/texte" 93 | } 94 | ], 95 | "created": "2018-06-29", 96 | "lastModified": "2019-05-06", 97 | "version": "1.0.1", 98 | "contributors": [ 99 | { 100 | "title": "John Smith", 101 | "email": "john.smith@etalab.gouv.fr", 102 | "organization": "Etalab", 103 | "role": "author" 104 | }, 105 | { 106 | "title": "Jane Doe", 107 | "email": "jane.doe@aol.com", 108 | "organization": "Civil Society Organization X", 109 | "role": "contributor" 110 | } 111 | ], 112 | "fields": [ ] 113 | } 114 | ``` 115 | 116 | ## Implementations 117 | 118 | The following links are actual examples already using this pattern, but not 100 % aligned with our proposal. The point is to make the Table Schema users converge towards a common pattern, before considering changing the spec. 119 | 120 | - @OpenDataFrance has initiated the creation of [Table Schemas](http://git.opendatafrance.net/scdl/) to standardise common French open data datasets. [Their Markdown documentation](http://scdl.opendatafrance.net/) is generated automatically from the schemas ([using some scripts](https://git.opendatafrance.net/validata/validata-doc-generator/)), including contextual information. 121 | - A tool called [Validata](https://go.validata.fr/) was developed, based on Goodtables, to help French open data producers follow the schemas. It uses metadata from the schemas to present them. 122 | - @Etalab has launched [schema.data.gouv.fr](http://schema.data.gouv.fr/), an official open data schema catalog, which is specific to France. [It needs additional metadata in the schemas to validate them](https://schema.data.gouv.fr/documentation/validation-schemas#validations-sp%C3%A9cifiques-au-format-table-schema). 123 | - [Example Table Schema](https://github.com/etalab/schema-irve/blob/master/schema.json) from @Etalab using metadata properties. 124 | -------------------------------------------------------------------------------- /content/docs/blog/2023-11-15-v2-announcement.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Data Package (v2) work started 3 | date: 2023-11-15 4 | excerpt: We are very pleased to announce that thanks to the generous support of NLnet we have kickstarted the Data Package (v2) work. After a first discussion with the community in the last call, we are setting up a working group to help us with the v2 release. 5 | tags: 6 | - news 7 | authors: 8 | - sapetti9 9 | --- 10 | 11 | :::note 12 | Originally published on: https://frictionlessdata.io/blog/2023/11/15/frictionless-specs-update/ 13 | ::: 14 | 15 | We are very pleased to announce that thanks to the generous support of [NLnet](https://nlnet.nl/) we have kickstarted the [Data Package (v2)](https://specs.frictionlessdata.io/) update. 16 | 17 | After a first discussion with the community in [the last call](https://frictionlessdata.io/blog/2023/11/06/community-call/), we are setting up a working group to help us with the v2 release. 18 | 19 | Taking into account the group's concerns about the shortness of the initial timeline we proposed, we were able to work out an extension of the initial time frame until mid-2024. We are now aiming at **releasing the v2 of the Frictionless specs by June 2024**. 20 | 21 | ## Goal 22 | 23 | **Our overarching goal is to make the Frictionless specs**, namely the Data Package, Data Resource, File Dialect, and Table Schema standards, **a finished product**, establishing a sustainable mechanism for future maintenance extension. 24 | 25 | The modular approach will of course still be the cornerstone of the Frictionless specs v2, and we won’t introduce any breaking changes. 26 | 27 | ## Additional deliverables 28 | 29 | - Together with the working group, we will start a reference collection of data types we want the v2 to support in a GitHub issue. 30 | 31 | - We will work with leading data publishing portals, namely CKAN and Zenodo, to provide native Data Package export integration. 32 | 33 | - The Frictionless core team at OKFN, will work on a new small Python library, a metadata mapper, with the objective of making it simpler to create standard extensions, and being able to use it in the data publishing systems integrations (Zenodo/CKAN/etc). 34 | 35 | - A new website and new documentation will be published, with improved technical language and better accessibility. 36 | 37 | ## Roadmap 38 | 39 | The project roadmap will be mainly guided by the list of issues submitted by the community throughout the years, and collected [here](https://github.com/frictionlessdata/specs/milestone/6). 40 | 41 | ## Social Aspect 42 | 43 | We will hold **monthly update calls** for the working group, which are of course not compulsory, and which will be open to the broader community too. In parallel **we will work asynchronously with the working group, using a review model** for any changes in the specifications themselves. 44 | 45 | ## Decision-making 46 | 47 | For every GitHub issue on the specs submitted by the community throughout the years, the Frictionless core team or any working group member will propose a recommended action to the working group. The action proposed will be **accepted if consensus with the working group is reached**, meaning we have arrived at a decision, or at least a compromise, that everyone can live with. 48 | 49 | The working group will be invited to share their view in a devoted GitHub issue. If a broader conversation is needed, the proposal discussion can be elevated to the monthly call for deliberation. The working group will be given a reasonable amount of time to review the proposed action. 50 | 51 | **Consensus is reached and the issue is closed if at least ⅔ of the working group members participate in the discussion and express their favourable opinion**. In case of serious and explicitly stated concerns, working group members who are core library investors (at the moment: OKFN, INBO, Datopian) may veto a proposed action. 52 | 53 | The community manager at OKFN will reach out to working group members who did not participate in the discussion to make sure their opinion is also captured. We want to ensure that reminders of your participation are handled with care. You can expect a gentle and considerate approach, such as receiving an email once every two weeks highlighting any issues where your vote is pending. Our goal is to keep you informed without causing any unnecessary inconvenience, so you can feel confident and at ease with the process 54 | 55 | Decision-making on the technical maintenance of the specs will be centralised by OKFN. 56 | 57 | ## Diversity 58 | 59 | Leveraging diversity is an opportunity that we must embrace for the benefits it brings. Extensive research consistently demonstrates that diverse participation leads to better outcomes and more resilient technologies — diverse communities are unequivocally stronger communities. 60 | 61 | We acknowledge the need for greater diversity in our community, and we understand that fostering diversity, equity, and inclusion requires substantial effort. We cannot underestimate the challenges before us. Confronting the deep-rooted and centuries-old racist components of our system is a huge challenge, and we understand that achieving racial equity is a continual journey with no predefined endpoint. 62 | 63 | Our mission is to build equity within our community, fostering inclusion and amplifying diversity across all aspects of the Frictionless project. For our specs update, we are proactively encouraging every member of the community involved in the working group to identify and invite candidates interested in contributing to the update work, with a particular emphasis on enhancing the diversity of our group. 64 | 65 | We welcome contributors from diverse backgrounds, recognising that individuals with varied experiences bring forth new and innovative ideas that help create an inclusive and welcoming ecosystem. Our goal is to create an inclusive and friendly environment based on mutual respect and exchange, ensuring that no one feels isolated, and everyone is motivated to actively contribute to the project's development. 66 | 67 | Acknowledging the ethical imperative, we understand that recruiting individuals into a community historically designed to exclude them is neither ethical nor effective without adequate support. We therefore prioritise transparency with new community members, providing a [contributor's guide](https://frictionlessdata.io/work-with-us/contribute/), a [published roadmap](https://frictionlessdata.io/development/roadmap/), and comprehensive documentation to ensure clarity and support throughout their engagement. Our [Code of Conduct](https://frictionlessdata.io/work-with-us/code-of-conduct/) applies to all activities linked to the Frictionless Data project, and it is enforced by our community manager. As part of this project, we will make it a priority to enhance the accessibility of our documentation and website, aiming to make them as inclusive as possible. 68 | 69 | We welcome any other suggestions that would help us enhance diversity, equity, and inclusion in our work. Please let us know if you have a good idea to share! 70 | 71 | ## Funding 72 | 73 | This project is funded through [NGI0 Entrust](https://nlnet.nl/entrust), a fund established by [NLnet](https://nlnet.nl) with financial support from the European Commission's [Next Generation Internet](https://ngi.eu) program. Learn more at the [NLnet project page](https://nlnet.nl/project/FrictionlessStandards/). 74 | 75 | [NLnet foundation logo](https://nlnet.nl) 76 | [NGI Zero Logo](https://nlnet.nl/entrust) 77 | -------------------------------------------------------------------------------- /content/docs/blog/2024-06-26-v2-release.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Data Package (v2) is released! 3 | date: 2024-06-26 4 | excerpt: We are very excited to announce the release of the version 2.0 of the Data Package standard (previously known as Frictionless Specs). Thanks to the generous support of NLnet from November last year we were able to focus on reviewing Data Package in order to include features that were often requested throughout the years and improve extensibility for domain-specific implementations. 5 | tags: 6 | - news 7 | authors: 8 | - sapetti9 9 | --- 10 | 11 | :::note 12 | Originally published on: https://frictionlessdata.io/blog/2024/06/26/datapackage-v2-release/ 13 | ::: 14 | 15 | We are very excited to announce the release of version 2.0 of the Data Package standard (previously known as Frictionless Specs). Thanks to the generous support of [NLnet](https://nlnet.nl/), starting [from November last year](https://frictionlessdata.io/blog/2023/11/15/frictionless-specs-update/#additional-deliverables), we were able to focus on reviewing Data Package in order to include features that were often requested throughout the years and improve extensibility for domain-specific implementations. 16 | 17 | Data Package is a standard for data containerisation, which consists of a set of simple yet extensible specifications to describe datasets, data files and tabular data. It is a data definition language (DDL) and data API that enhances data FAIRness (findability, accessibility, interoperability, and reusability). Since its initial release in 2007, the community has suggested many features that could improve or extend the standard for use cases that weren't initially envisioned. Those were sometimes adopted, but there wasn't a versioning or governance process in place to truly evolve the standard. 18 | 19 | We started with [the issues that had accumulated in the GitHub repository](https://github.com/frictionlessdata/datapackage/issues) to build our Roadmap for v2. Many of the requested features are now adopted, making Data Package the answer for even more use cases. 20 | 21 | In parallel we assembled an outstanding Data Package Working Group composed of experts from the community. We carefully selected a diverse group of people who brought different use-cases, formats, and data types that we would need the Standard to support. Together with them, we crafted [a governance model](https://datapackage.org/overview/governance/) that is explicit, in order to create an environment that adequately supports new contributions and ensures project sustainability. 22 | 23 | We would like to thank each one of them for their remarkable contribution and for the incredibly insightful conversations we had during these months. Thank you to my colleague Evgeny Karev, Peter Desmet from the [Research Institute for Nature and Forest (INBO)](https://www.vlaanderen.be/inbo/en-gb/homepage/), Phil Schumm from [CTDS - University of Chicago](https://ctds.uchicago.edu/), Kyle Husmann from the [PennState University](https://www.psu.edu/), Keith Hughitt from the [National Institutes of Health](https://www.nih.gov/), Jakob Voß from the [Verbundzentrale des GBV (VZG)](https://www.gbv.de/), Ethan Welty from the [World Glacier Monitoring Service](https://wgms.ch/), Paul Walsh from [Link Digital](https://linkdigital.com.au/), Pieter Huybrechts from the [Research Institute for Nature and Forest (INBO)](https://www.vlaanderen.be/inbo/en-gb/homepage/), Martin Durant from [Anaconda, inc.](https://www.anaconda.com/), Adam Kariv from [The Public Knowledge Workshop](https://www.hasadna.org.il/), Johan Richer from [Multi](https://www.multi.coop/?locale=fr), and Stephen Diggs from the [University of California Digital Library](https://cdlib.org/). 24 | 25 | If you are curious about the conversations we had during the Standard review, they are all captured (and recorded) in [the blog summaries of the community calls](https://frictionlessdata.io/blog/). Alternatively you can also check out the [closed issues on GitHub](https://github.com/frictionlessdata/datapackage/milestone/6?closed=1). 26 | 27 | ## So what is new in version 2? 28 | 29 | During these months we have been working on the core specifications that compose the Standard, namely: [Data Package](https://datapackage.org/standard/data-package/) – a simple container format for describing a coherent collection of data in a single ‘package’, [Data Resource](https://datapackage.org/standard/data-resource/) to describe and package a single data resource, [Table Dialect](https://datapackage.org/standard/table-dialect/) to describe how tabular data is stored in a file, and [Table Schema](https://datapackage.org/standard/table-schema/) to declare a schema for tabular data. 30 | 31 | During the update process we tried to be as little disruptive as possible, avoiding breaking changes when possible. 32 | 33 | We put a lot of effort into removing ambiguity, cutting or clarifying under-defined features, and promoting some well-oiled recipes into the Standard itself. An example of a recipe (or pattern, as they were called in v1) that has been promoted to the Standard is the [Missing values per field](https://specs.frictionlessdata.io/patterns/#missing-values-per-field). We also added a versioning mechanism, support for categorical data, and changes that make it easier to extend the Standard. 34 | 35 | If you would like to know the details about what has changed, see the [Changelog](https://datapackage.org/overview/changelog/) we published. 36 | 37 | To increase and facilitate adoption, we published a [metadata mapper written in Python](https://github.com/frictionlessdata/dplib-py). We have also worked on Data Package integrations for the most notable open data portals out there. Many people from the community use Zenodo, so we definitely wanted to target that. They have recently migrated their infrastructure to [Invenio RDM](https://inveniordm.web.cern.ch/) and we proposed a Data Package serializer for better integration with the Standard (more info on this integration will be announced in an upcoming blog!). We also created a pull request that exposes `datapackage.json` as a metadata export target in the [Open Science Framework](https://www.cos.io/) system, and built an extension that adds a `datapackage.json` endpoint to every dataset in [CKAN](https://github.com/frictionlessdata/ckanext-datapackage). 38 | 39 | If you want to know more about how to coordinate a standard update, we shared our main takeaways at FOSDEM 2024. The presentation was recorded, and you can watch it [here](https://fosdem.org/2024/schedule/event/fosdem-2024-3109-updating-open-data-standards/). 40 | 41 | ## And what happens now? 42 | 43 | While the work on Data Package 2.0 is done (for now!), we will keep working on the [Data Package website and documentation](https://datapackage.org/) together with the Working Group, to make it as clear and straightforward as possible for newcomers. In parallel, we will also start integrating the version 2 changes in the [software implementations](https://datapackage.org/overview/software/)). 44 | 45 | Would you like to contribute? We always welcome new people to the project! Go and have a look at our [Contribution page](https://frictionlessdata.io/work-with-us/contribute/) to understand the general guideline. Please get in touch with us by joining [our community chat on Slack](https://frictionlessdata.io/development/roadmap/) (also accessible via [Matrix](https://matrix.to/#/#frictionlessdata:matrix.okfn.org)), or feel free to jump in any of [the discussions on GitHub](https://github.com/frictionlessdata/datapackage/issues). 46 | 47 | ## Funding 48 | 49 | This project was funded through [NGI0 Entrust](https://nlnet.nl/entrust), a fund established by [NLnet](https://nlnet.nl) with financial support from the European Commission's [Next Generation Internet](https://ngi.eu) program. Learn more at the [NLnet project page](https://nlnet.nl/project/FrictionlessStandards/). 50 | 51 | [NLnet foundation logo](https://nlnet.nl) 52 | [NGI Zero Logo](https://nlnet.nl/entrust) 53 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Contributing 3 | sidebar: 4 | order: 8 5 | --- 6 | 7 | There are many ways to contribute to the Data Package standard. Here are a few: 8 | 9 | - **Use the standard**: The best way to contribute is to use the standard in your projects and provide feedback on what works and what doesn't. 10 | 11 | - **Spread the word**: Help us spread the word about the Data Package standard. The more people who know about it, the more people who can contribute. 12 | 13 | - **Participate in discussions**: Share your ideas and feedback in the [Data Package Discusions](https://github.com/frictionlessdata/datapackage/discussions). We use a voting system to prioritize issues and feature requests. 14 | 15 | - **Report issues**: If you find a bug or have a feature request, please report it in the [GitHub issue tracker](https://github.com/frictionlessdata/datapackage/issues). There are a few predefined issue templates to help you get started. 16 | 17 | - **Contribute to the repository**: You can contribute to the Data Package standard repository. Please read this guide for more details on the process. 18 | 19 | ## Research and development 20 | 21 | The Data Package standard is developed using a research and development model. This means that the standard is developed in an iterative way, with new features and changes being proposed, discussed, and implemented in a collaborative way. 22 | 23 | There are two main feedback trackers for the Data Package standard: 24 | 25 | 1. **[Data Package Discussions](https://github.com/frictionlessdata/datapackage/discussions)**: This is where general discussions and new feature proposals to the Data Package standard take place. You can use this forum to share your ideas, ask questions, and provide feedback. Using discussions and the voting mechanism, the community and the Data Package Working Group can help prioritize issues and feature requests. The discussions are a good place to design and share a feature draft or implementation details. Generally speaking, this is a research phase. 26 | 27 | 1. **[Data Package Issues](https://github.com/frictionlessdata/datapackage/issues)**: This is where bugs, general improvements, and specific feature requests are tracked. The issues here must be actionable and have a clear scope. The Data Package maintainers might move an issue to the Discussions if it needs to be discussed or voted first or close the issue if it is not planned to be implemented. Generally speaking, this is the development phase. 28 | 29 | ## Branching and releasing 30 | 31 | :::note 32 | The term `profile` is used to refer to the Data Package profiles -- JSONSchema files generated from the `profiles` folder in the repository. 33 | ::: 34 | 35 | The Data Package project uses two main branches: 36 | 37 | 1. **`main`**: This is the main public branch. The live https://datapackage.org website is built from this branch. The following types of pull requests are allowed: documentation updates, chore improvements, and other minor changes. 38 | 39 | 1. **`next`**: This is the development branch. New features and changes are implemented in this branch. It also includes all the bug fixes that need to update profiles, as Data Package follows the immutable public profiles model. 40 | 41 | ### Releasing model 42 | 43 | This process is used for a new Data Package version release: 44 | 45 | - A maintainer creates a new pull request named `Release vX.Y` from the `next` branch to the `main` branch that includes a version bump in `package.json` with `npm run generate` command run to update the profiles. 46 | 47 | - All pull request meant to be included in the release should be merged into the `next` branch following standard [review/voting process](/overview/governance#decision-making). It is recommended to include a changelog entry as a part of a pull request. Maintainers merge these pull requests using the "Squash and merge" strategy. 48 | 49 | - When the `Release vX.Y` is ready to be released, the maintainer ensures the changelog correctness, resolves conflicts if needed, and merges the `next` branch into the `main` branch using the "Create a merge commit" strategy. After the website successfully builds, the maintainer creates a new tag and release on GitHub linking the changelog entry with release notes for the version released. 50 | 51 | ### Codebase contribution 52 | 53 | The Data Package project is based on Astro Starlight static-site generator and uses TypeScript/Node.js for building process. Here are the steps to contribute to the codebase. 54 | 55 | 1. **Fork the repository**: Click the "Fork" button in the upper right corner of the repository page. 56 | 2. **Clone the repository**: Clone the forked repository to your local machine. 57 | 3. **Install dependencies**: Run `npm install` to install the project dependencies. 58 | 4. **Start a dev server**: Run `npm start` to see the changes locally. 59 | 5. **Make changes**: Make your changes to the codebase. 60 | 6. **Generate profiles**: Run `npm run generate` to generate the profiles. 61 | 7. **Format style**: Run `npm run format` to format the style. 62 | 8. **Run tests**: Run `npm test` to run the tests. 63 | 9. **Commit changes**: Commit your changes to your forked repository. 64 | 10. **Create a pull request**: Create a pull request from your forked repository to the appropriate branch of the main repository (see the Branching Model above). 65 | 66 | When a pull request is created, it will be reviewed by the Data Package maintainers. Github Automation creates a live preview site for every new pull request. Once the pull request is approved, it will be merged into the main repository. 67 | 68 | Note that the project uses two different types of produced artifacts: 69 | 70 | 1. **npm run generate**: This command generates the profiles from the `profiles` folder and sync profile version in the specs. It is required to run this command after changing the YAML/JSON files in `profiles` folder, and the output has to be committed to the repository. 71 | 72 | 1. **npm run build**: This command builds the project and generates the static site. This command is run by automation and the output is not commited to the repository. As a contributor, you don't need to run this command although you can use `npm run preview` to debug the site built in production-mode locally. 73 | 74 | ## Backward compatibility 75 | 76 | This section outlines the rules for backward compatibility in the Data Package specification. 77 | 78 | 1. An existing `datapackage.json` that is valid MUST NOT becoming invalid in the future. 79 | 2. A new `datapackage.json` MAY be invalid because a software implementation does not support the latest version of the specification (yet). 80 | 81 | ### Versioning 82 | 83 | 1. The Data Package specification is versioned. This is new over 1.0, where changes were added without increasing the version. 84 | 2. The Data Package specification is versioned as a whole: a number of changes are considered, discussed, added or refused and released as a new minor version. 85 | 86 | ### Property changes 87 | 88 | 1. A property MUST NOT change `type` 89 | 2. A property MAY allow additional `type` (array) 90 | 3. A property MUST NOT become `required` 91 | 4. A property MAY become optional. Example: https://github.com/frictionlessdata/datapackage/pull/7 92 | 5. A property MUST NOT add `enum` 93 | 6. A property MAY remove `enum`. Example: https://github.com/frictionlessdata/specs/pull/809 94 | 7. A property MUST NOT remove `enum` values 95 | 8. A property MAY add `enum` values 96 | 97 | ### Table schema changes 98 | 99 | 1. A field type MUST NOT change default `format` 100 | 2. A field type MUST NOT remove `format` pattern options 101 | 3. A field type MAY add `format` pattern options 102 | 103 | ### New properties 104 | 105 | 1. A new property MAY make a `datapackage.json` invalid (because of general rule 2). Example: https://github.com/frictionlessdata/datapackage/pull/24 106 | 2. A new property CANNOT be `required` 107 | 108 | ### Removed properties 109 | 110 | 1. Removing a property CANNOT make a `datapackage.json` invalid (because of general rule 1) 111 | -------------------------------------------------------------------------------- /content/docs/standard/glossary.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: Glossary 3 | description: A glossary of terms used in the Data Package standard. 4 | sidebar: 5 | order: 6 6 | authors: 7 | - Rufus Pollock 8 | - Paul Walsh 9 | - Adam Kariv 10 | - Evgeny Karev 11 | - Peter Desmet 12 | - Data Package Working Group 13 | --- 14 | 15 | 16 | 17 | 18 | 19 | 20 |
Authors{frontmatter.authors.join(", ")}
21 | 22 |

{frontmatter.description}

23 | 24 | ## Language 25 | 26 | The key words `MUST`, `MUST NOT`, `REQUIRED`, `SHALL`, `SHALL NOT`, `SHOULD`, `SHOULD NOT`, `RECOMMENDED`, `MAY`, and `OPTIONAL` in this document are to be interpreted as described in [RFC 2119](https://www.ietf.org/rfc/rfc2119.txt). 27 | 28 | ## Definitions 29 | 30 | ### Profile 31 | 32 | A profile is a URL that `MUST`: 33 | 34 | - resolves to a valid JSON Schema descriptor under the `draft-07` version 35 | - be versioned and immutable i.e. once published under some version it cannot be changed 36 | 37 | A profile is both used as a metadata version identifier and the location of a JSON Schema against which a descriptor having it as a root level `$schema` property `MUST` be valid and `MUST` be validated. 38 | 39 | Similarly to [JSON Schema](https://json-schema.org/understanding-json-schema/reference/schema#schema), the `$schema` property has effect only on the root level of a descriptor. For example, if a Table Dialect is published as a file it can include a `$schema` property that affects its validation. If the same dialect is an object inlined into a Data Package descriptor, the dialect's `$schema` property `MUST` be ignored and the descriptor as whole `MUST` be validated against a root level `$schema` property provided by the package. 40 | 41 | Data Package Standard employes profiles as a mechanism for creating extensions as per [Extensions](/standard/extensions) specification. 42 | 43 | :::note[Implementation Note] 44 | It is recommended to cache profiles using their URL as a unique key. 45 | ::: 46 | 47 | ### Descriptor 48 | 49 | The Data Package Standard uses a concept of a `descriptor` to represent metadata defined according to the core specefications such as Data Package or Table Schema. 50 | 51 | On logical level, a descriptor is represented by a data structure. The data structure `MUST` be a JSON `object` as defined in [RFC 4627](http://www.ietf.org/rfc/rfc4627.txt). 52 | 53 | On physical level, a descriptor is represented by a file. The file `MUST` contain a valid JSON `object` as defined in [RFC 4627](http://www.ietf.org/rfc/rfc4627.txt). 54 | 55 | This specification does not define any discoverability mechanisms. Any URI can be used to directly reference a file containing a descriptor. 56 | 57 | :::note[File Formats] 58 | A descriptor `MAY` be serialized using alternative formats like YAML or TOML as an internal part of some project or system if supported by corresponding implementations. A descriptor `SHOULD NOT` be externally published in any other format rather than JSON. 59 | ::: 60 | 61 | ### Custom Properties 62 | 63 | The Data Package specifications define a set of standard properties to be used and allows custom properties to be added. It is `RECOMMENDED` to use `namespace:property` naming convention for custom properties. It is `RECOMMENDED` to use [lower camel case](https://en.wikipedia.org/wiki/Camel_case) convention for naming custom properties, for example, `namespace:propertyName`. 64 | 65 | Adherence to a specification does not imply that additional, non-specified properties cannot be used: a descriptor `MAY` include any number of properties in additional to those described as required and optional properties. For example, if you were storing time series data and wanted to list the temporal coverage of the data in the Data Package you could add a property `temporal` (cf [Dublin Core](http://dublincore.org/documents/usageguide/qualifiers.shtml#temporal)): 66 | 67 | ```json 68 | { 69 | "dc:temporal": { 70 | "name": "19th Century", 71 | "start": "1800-01-01", 72 | "end": "1899-12-31" 73 | } 74 | } 75 | ``` 76 | 77 | This flexibility enables specific communities to extend Data Packages as appropriate for the data they manage. As an example, the [Fiscal Data Package](https://fiscal.datapackage.org) specification extends Data Package for publishing and consuming fiscal data. 78 | 79 | ### URL or Path 80 | 81 | A `URL or Path` is a `string` with the following additional constraints: 82 | 83 | - `MUST` either be a URL or a POSIX path 84 | - [URLs](https://en.wikipedia.org/wiki/Uniform_Resource_Locator) `MUST` be fully qualified. `MUST` be using either `http`, `https`, `ftp`, or `ftps` scheme. (Absence of a scheme indicates `MUST` be a POSIX path) 85 | - [POSIX paths](https://en.wikipedia.org/wiki/Path_%28computing%29#POSIX_pathname_definition) (unix-style with `/` as separator) are supported for referencing local files, with the security restraint that they `MUST` be relative siblings or children of the descriptor. Absolute paths `/`, relative parent paths `../`, hidden folders starting from a dot `.hidden` `MUST NOT` be used. 86 | 87 | Example of a fully qualified url: 88 | 89 | ```json 90 | { 91 | "path": "http://ex.datapackages.org/big-csv/my-big.csv" 92 | } 93 | ``` 94 | 95 | Example of a relative path that this will work both as a relative path on disk and online: 96 | 97 | ```json 98 | { 99 | "path": "my-data-directory/my-csv.csv" 100 | } 101 | ``` 102 | 103 | :::caution[Security] 104 | `/` (absolute path) and `../` (relative parent path) are forbidden to avoid security vulnerabilities when implementing data package software. These limitations on resource `path` ensure that resource paths only point to files within the data package directory and its subdirectories. This prevents data package software being exploited by a malicious user to gain unintended access to sensitive information. For example, suppose a data package hosting service stores packages on disk and allows access via an API. A malicious user uploads a data package with a resource path like `/etc/passwd`. The user then requests the data for that resource and the server naively opens `/etc/passwd` and returns that data to the caller. 105 | ::: 106 | 107 | ### Tabular Data 108 | 109 | Tabular data consists of a set of rows. Each row has a set of fields (columns). We usually expect that each row has the same set of fields and thus we can talk about _the_ fields for the table as a whole. 110 | 111 | In case of tables in spreadsheets or CSV files we often interpret the first row as a header row, giving the names of the fields. By contrast, in other situations, e.g. tables in SQL databases, the field names are explicitly designated. 112 | 113 | To illustrate, here's a classic spreadsheet table: 114 | 115 | ```text 116 | field field 117 | | | 118 | | | 119 | V V 120 | 121 | A | B | C | D <--- Row (Header) 122 | ------------------------------------ 123 | valA | valB | valC | valD <--- Row 124 | ... 125 | ``` 126 | 127 | In JSON, a table would be: 128 | 129 | ```json 130 | [ 131 | { "A": value, "B": value, ... }, 132 | { "A": value, "B": value, ... }, 133 | ... 134 | ] 135 | ``` 136 | 137 | ### Data Representation 138 | 139 | In order to talk about the representation and processing of tabular data from text-based sources, it is useful to introduce the concepts of the _physical_ and the _logical_ representation of data. 140 | 141 | The _physical representation_ of data refers to the representation of data as text on disk, for example, in a CSV or JSON file. This representation can have some _type_ information (JSON, where the primitive types that JSON supports can be used) or not (CSV, where all data is represented in string form). 142 | 143 | The _logical representation_ of data refers to the "ideal" representation of the data in terms of primitive types, data structures, and relations, all as defined by the specification. We could say that the specification is about the logical representation of data, as well as about ways in which to handle conversion of a physical representation to a logical one. 144 | 145 | In this document, we'll explicitly refer to either the _physical_ or _logical_ representation in places where it prevents ambiguity for those engaging with the specification, especially implementors. 146 | 147 | For example, `constraints` `SHOULD` be tested on the logical representation of data, whereas a property like `missingValues` applies to the physical representation of the data. 148 | -------------------------------------------------------------------------------- /assets/logo-light.svg: -------------------------------------------------------------------------------- 1 | 2 | 15 | 33 | 37 | 41 | 45 | 49 | 53 | 57 | 61 | 65 | 69 | 73 | 77 | 81 | 85 | 91 | 97 | 101 | 105 | 109 | 111 | 118 | 123 | 128 | 129 | 130 | 131 | -------------------------------------------------------------------------------- /profiles/dictionary/common.yaml: -------------------------------------------------------------------------------- 1 | $schema: 2 | title: Profile 3 | description: The profile of this descriptor. 4 | type: string 5 | name: 6 | title: Name 7 | description: An identifier string. 8 | type: string 9 | context: This is ideally a url-usable and human-readable name. Name `SHOULD` be 10 | invariant, meaning it `SHOULD NOT` change when its parent descriptor is updated. 11 | examples: 12 | - | 13 | { 14 | "name": "my-nice-name" 15 | } 16 | id: 17 | title: ID 18 | description: A property reserved for globally unique identifiers. Examples of 19 | identifiers that are unique include UUIDs and DOIs. 20 | context: A common usage pattern for Data Packages is as a packaging format within 21 | the bounds of a system or platform. In these cases, a unique identifier for 22 | a package is desired for common data handling workflows, such as updating an 23 | existing package. While at the level of the specification, global uniqueness 24 | cannot be validated, consumers using the `id` property `MUST` ensure identifiers 25 | are globally unique. 26 | type: string 27 | examples: 28 | - | 29 | { 30 | "id": "b03ec84-77fd-4270-813b-0c698943f7ce" 31 | } 32 | - | 33 | { 34 | "id": "http://dx.doi.org/10.1594/PANGAEA.726855" 35 | } 36 | title: 37 | title: Title 38 | description: A human-readable title. 39 | type: string 40 | examples: 41 | - | 42 | { 43 | "title": "My Package Title" 44 | } 45 | email: 46 | title: Email 47 | description: An email address. 48 | type: string 49 | format: email 50 | examples: 51 | - | 52 | { 53 | "email": "example@example.com" 54 | } 55 | description: 56 | title: Description 57 | description: A text description. Markdown is encouraged. 58 | type: string 59 | examples: 60 | - | 61 | { 62 | "description": "# My Package description\nAll about my package." 63 | } 64 | example: 65 | title: Example 66 | description: An example value for the field. 67 | type: string 68 | examples: 69 | - | 70 | { 71 | "example": "Put here an example value for your field" 72 | } 73 | homepage: 74 | title: Home Page 75 | description: The home on the web that is related to this data package. 76 | type: string 77 | format: uri 78 | examples: 79 | - | 80 | { 81 | "homepage": "http://example.com/" 82 | } 83 | version: 84 | title: Version 85 | description: A unique version number for this descriptor. 86 | type: string 87 | examples: 88 | - | 89 | { 90 | "version": "0.0.1" 91 | } 92 | - | 93 | { 94 | "version": "1.0.1-beta" 95 | } 96 | path: 97 | title: Path 98 | description: A fully qualified URL, or a POSIX file path. 99 | type: string 100 | # https://regex101.com/r/sLZLeK/3 101 | pattern: '^((?=[^./~])(?!file:)((?!\/\.\.\/)(?!\\)(?!:\/\/).)*|(http|ftp)s?:\/\/.*)$' 102 | # ^ — start of string 103 | # ( — two alternatives, the POSIX path or the (HTTP|FTP)(S) URL 104 | # (?=[^./~]) — first character of POSIX path is not . / or ~ 105 | # (?!file:) — must not start with file: 106 | # ( 107 | # (?!/\.\./) — must not contain /../ 108 | # (?!\\) — must not contain backslashes 109 | # (?!:\/\/) — must not contain URL-like schemes, ftp:// etc. 110 | # . — a character 111 | # )* — repeat to the end 112 | # | 113 | # (http|ftp)s?:\/\/.* — or must start http://, https://, ftp://, or ftps:// 114 | # )$ — end of string 115 | examples: 116 | - | 117 | { 118 | "path": "file.csv" 119 | } 120 | - | 121 | { 122 | "path": "http://example.com/file.csv" 123 | } 124 | context: "Implementations need to negotiate the type of path provided, and dereference 125 | the data accordingly." 126 | data: 127 | title: Data 128 | description: Inline data for this resource. 129 | tabularData: 130 | title: Tabular Data 131 | description: Inline data for this tabular resource. 132 | type: array 133 | minItems: 1 134 | items: 135 | type: 136 | - array 137 | - object 138 | created: 139 | title: Created 140 | description: The datetime on which this descriptor was created. 141 | context: The datetime must conform to the string formats for datetime as described 142 | in [RFC3339](https://tools.ietf.org/html/rfc3339#section-5.6) 143 | type: string 144 | format: date-time 145 | examples: 146 | - | 147 | { 148 | "created": "1985-04-12T23:20:50.52Z" 149 | } 150 | keywords: 151 | title: Keywords 152 | description: A list of keywords that describe this package. 153 | type: array 154 | minItems: 1 155 | items: 156 | type: string 157 | examples: 158 | - | 159 | { 160 | "keywords": [ 161 | "data", 162 | "fiscal", 163 | "transparency" 164 | ] 165 | } 166 | image: 167 | title: Image 168 | description: A image to represent this package. 169 | type: string 170 | examples: 171 | - | 172 | { 173 | "image": "http://example.com/image.jpg" 174 | } 175 | - | 176 | { 177 | "image": "relative/to/image.jpg" 178 | } 179 | anySchema: 180 | title: Schema 181 | description: A schema for this resource. 182 | type: 183 | - string 184 | - object 185 | countryCode: 186 | title: ISO 3166-1 Alpha-2 Country code 187 | description: A valid 2-digit ISO country code (ISO 3166-1 alpha-2), or, an array of valid ISO codes. 188 | oneOf: 189 | - type: string 190 | pattern: "^[A-Z]{2}$" 191 | - type: array 192 | minItems: 1 193 | items: 194 | type: string 195 | pattern: "^[A-Z]{2}$" 196 | contributor: 197 | title: Contributor 198 | description: A contributor to this descriptor. 199 | properties: 200 | title: 201 | "$ref": "#/definitions/title" 202 | path: 203 | "$ref": "#/definitions/path" 204 | email: 205 | "$ref": "#/definitions/email" 206 | givenName: 207 | type: string 208 | familyName: 209 | type: string 210 | organization: 211 | title: Organization 212 | description: An organizational affiliation for this contributor. 213 | type: string 214 | roles: 215 | type: array 216 | minItems: 1 217 | items: 218 | type: string 219 | minProperties: 1 220 | context: Use of this property does not imply that the person was the original 221 | creator of, or a contributor to, the data in the descriptor, but refers to the 222 | composition of the descriptor itself. 223 | contributors: 224 | title: Contributors 225 | description: The contributors to this descriptor. 226 | type: array 227 | minItems: 1 228 | items: 229 | "$ref": "#/definitions/contributor" 230 | examples: 231 | - | 232 | { 233 | "contributors": [ 234 | { 235 | "title": "Joe Bloggs" 236 | } 237 | ] 238 | } 239 | - | 240 | { 241 | "contributors": [ 242 | { 243 | "title": "Joe Bloggs", 244 | "email": "joe@example.com", 245 | "role": "author" 246 | } 247 | ] 248 | } 249 | license: 250 | title: License 251 | description: A license for this descriptor. 252 | type: object 253 | anyOf: 254 | - required: 255 | - name 256 | - required: 257 | - path 258 | properties: 259 | name: 260 | title: Open Definition license identifier 261 | description: MUST be an Open Definition license identifier, see http://licenses.opendefinition.org/ 262 | type: string 263 | pattern: "^([-a-zA-Z0-9._])+$" 264 | path: 265 | "$ref": "#/definitions/path" 266 | title: 267 | "$ref": "#/definitions/title" 268 | context: Use of this property does not imply that the person was the original 269 | creator of, or a contributor to, the data in the descriptor, but refers to the 270 | composition of the descriptor itself. 271 | licenses: 272 | title: Licenses 273 | description: The license(s) under which this package is published. 274 | type: array 275 | minItems: 1 276 | items: 277 | "$ref": "#/definitions/license" 278 | context: This property is not legally binding and does not guarantee that the 279 | package is licensed under the terms defined herein. 280 | examples: 281 | - | 282 | { 283 | "licenses": [ 284 | { 285 | "name": "odc-pddl-1.0", 286 | "path": "http://opendatacommons.org/licenses/pddl/", 287 | "title": "Open Data Commons Public Domain Dedication and License v1.0" 288 | } 289 | ] 290 | } 291 | source: 292 | title: Source 293 | description: A source file. 294 | type: object 295 | minProperties: 1 296 | properties: 297 | title: 298 | "$ref": "#/definitions/title" 299 | path: 300 | "$ref": "#/definitions/path" 301 | email: 302 | "$ref": "#/definitions/email" 303 | version: 304 | type: string 305 | sources: 306 | title: Sources 307 | description: The raw sources for this resource. 308 | type: array 309 | minItems: 0 310 | items: 311 | "$ref": "#/definitions/source" 312 | examples: 313 | - | 314 | { 315 | "sources": [ 316 | { 317 | "title": "World Bank and OECD", 318 | "path": "http://data.worldbank.org/indicator/NY.GDP.MKTP.CD" 319 | } 320 | ] 321 | } 322 | -------------------------------------------------------------------------------- /assets/logo-dark.svg: -------------------------------------------------------------------------------- 1 | 2 | 15 | 33 | 38 | 43 | 48 | 53 | 58 | 63 | 68 | 73 | 78 | 83 | 88 | 92 | 96 | 102 | 108 | 112 | 116 | 120 | 122 | 129 | 134 | 139 | 140 | 141 | 142 | -------------------------------------------------------------------------------- /content/docs/recipes/relationship-between-fields.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Relationship between Fields 3 | --- 4 | 5 | 6 | 7 | 8 | 9 | 10 |
AuthorsPhilippe Thomy
11 | 12 | The structure of tabular datasets is simple: a set of fields grouped in a table. 13 | 14 | However, the data present is often complex and reflects an interdependence between fields (see explanations in the Internet-Draft [NTV tabular format (NTV-TAB)](https://www.ietf.org/archive/id/draft-thomy-ntv-tab-00.html#section-2)). 15 | 16 | Let's take the example of the following dataset: 17 | 18 | | country | region | code | population | 19 | | ------- | -------------- | ---- | ---------- | 20 | | France | European Union | FR | 449 | 21 | | Spain | European Union | ES | 48 | 22 | | Estonia | European Union | ES | 449 | 23 | | Nigeria | Africa | NI | 1460 | 24 | 25 | The data schema for this dataset has the following `description`: 26 | 27 | - for the `code` field : "country code alpha-2" 28 | - for the `population` field: "region population in 2022 (millions)" 29 | 30 | If we now look at the data we see that this dataset is not consistent because it contains two structural errors: 31 | 32 | - The value of the `code` Ffeld must be unique for each country, we cannot therefore have "ES" for "Spain" and "Estonia", 33 | - The value of the `population` field of "European Union" cannot have two different values (449 and 48) 34 | 35 | These structural errors make the data unusable and yet they are not detected in the validation of the dataset (in the current version of Table Schema, there are no Descriptors to express this dependency between two fields). 36 | 37 | The purpose of this specification is therefore on the one hand to express these structural constraints in the data schema and on the other hand to define the controls associated with the validation of a dataset. 38 | 39 | ## Context 40 | 41 | This subject was studied and treated for databases and led to the definition of a methodology for specifying relationships and to the implementation of consistent relational databases. 42 | 43 | The methodology is mainly based on the [Entity–relationship model](https://en.wikipedia.org/wiki/Entity%E2%80%93relationship_model): 44 | 45 | > _An entity–relationship model (or ER model) describes interrelated things of interest in a specific domain of knowledge. A basic ER model is composed of entity types (which classify the things of interest) and specifies relationships that can exist between entities (instances of those entity types)._ 46 | 47 | The Entity–relationship model is broken down according to the conceptual-logical-physical hierarchy. 48 | 49 | The Relationships are expressed literally by a name and in a structured way by a [cardinality](). 50 | 51 | The Entity–relationship model for the example presented in the [Overview](#overview) is detailed in [this NoteBook](https://nbviewer.org/github/loco-philippe/Environmental-Sensing/blob/main/property_relationship/example_schema.ipynb). 52 | 53 | ## Principles 54 | 55 | Two aspects need to be addressed: 56 | 57 | - **relationship expression**: 58 | 59 | This methodology applied for databases can also be applied for tabular data whose structure is similar to that of relational database tables but whose representation of relationships is different (see [patterns](https://www.ietf.org/archive/id/draft-thomy-ntv-tab-00.html#section-2) used in tabular representations). 60 | 61 | This variation is explained in the [linked notebook](https://github.com/loco-philippe/Environmental-Sensing/blob/main/property_relationship/methodology.ipynb) and presented in the [example](https://nbviewer.org/github/loco-philippe/Environmental-Sensing/blob/main/property_relationship/example_schema.ipynb). 62 | 63 | Using a data model is a simple way to express relationships but it is not required. We can express the relationships directly at the data schema level. 64 | 65 | - **validity of a dataset**: 66 | 67 | Checking the validity of a relationship for a defined dataset is one of the functions of [tabular structure analysis](https://github.com/loco-philippe/tab-analysis/blob/main/docs/tabular_analysis.pdf). It only requires counting functions accessible for any type of language (see [example of implementation](https://github.com/loco-philippe/Environmental-Sensing/blob/main/property_relationship/example.ipynb)). 68 | 69 | ## Proposed extensions 70 | 71 | A relationship is defined by the following information: 72 | 73 | - the two fields involved (the order of the fields is important with the `derived` link), 74 | - the textual representation of the relationship, 75 | - the nature of the relationship 76 | 77 | Three proposals for extending Table Schema are being considered: 78 | 79 | - New field descriptor 80 | - New constraint property 81 | - New table descriptor 82 | 83 | After discussions only the third is retained (a relationship between fields associated to a field) and presented below: 84 | 85 | - **New table descriptor**: 86 | 87 | A `relationships` table descriptor is added. 88 | The properties associated with this descriptor could be: 89 | 90 | - `fields`: array with the names of the two fields involved 91 | - `description`: description string (optional) 92 | - `link`: nature of the relationship 93 | 94 | Pros: 95 | 96 | - No mixing with fields descriptors 97 | 98 | Cons: 99 | 100 | - Need to add a new table descriptor 101 | - The order of the fields in the array is important with the `derived` link 102 | 103 | Example: 104 | 105 | ```json 106 | { 107 | "fields": [ ], 108 | "relationships": [ 109 | { 110 | "fields" : ["country", "code"], 111 | "description" : "is the country code alpha-2 of", 112 | "link" : "coupled" 113 | } 114 | { 115 | "fields" : ["region", "population"], 116 | "description" : "is the population of", 117 | "link" : "derived" 118 | } 119 | ] 120 | } 121 | ``` 122 | 123 | ## Specification 124 | 125 | Assuming solution 3 (table descriptor), the specification could be as follows: 126 | 127 | The `relationships` descriptor MAY be used to define the dependency between fields. 128 | 129 | The `relationships` descriptor, if present, MUST be an array where each entry in the array is an object and MUST contain two required properties and one optional: 130 | 131 | - `fields`: Array with the property `name` of the two fields linked (required) 132 | - `link` : String with the nature of the relationship between them (required) 133 | - `description` : String with the description of the relationship between the two fields (optional) 134 | 135 | The `link` property value MUST be one of the three following : 136 | 137 | - `derived`: 138 | 139 | - The values of the child (second array element) field are dependant on the values of the parent (first array element) field (i.e. a value in the parent field is associated with a single value in the child field). 140 | - e.g. The `name` field ["john", "paul", "leah", "paul"] and the `nickname` field ["jock", "paulo", "lili", "paulo"] are derived, 141 | - i.e. if a new entry "leah" is added, the corresponding `nickname` value must be "lili". 142 | 143 | - `coupled`: 144 | 145 | - The values of one field are associated to the values of the other field. 146 | - e.g. The `Country` field ["france", "spain", "estonia", "spain"] and the `code alpha-2` field ["FR", "ES", "EE", "ES"] are coupled, 147 | - i.e. if a new entry "estonia" is added, the corresponding `code alpha-2` value must be "EE" just as if a new entry "EE" is added, the corresponding `Country` value must be "estonia". 148 | 149 | - `crossed`: 150 | 151 | - This relationship means that all the different values of one field are associated with all the different values of the other field. 152 | - e.g. the `Year` field [2020, 2020, 2021, 2021] and the `Population` field [ "estonia", "spain", "estonia", "spain"] are crossed 153 | - i.e the year 2020 is associated to population of "spain" and "estonia", just as the population of "estonia" is associated with years 2020 and 2021 154 | 155 | ## Implementations 156 | 157 | The implementation of a new descriptor is not discussed here (no particular point to address). 158 | 159 | The control implementation is based on the following principles: 160 | 161 | - calculation of the number of different values for the two fields, 162 | - calculation of the number of different values for the virtual field composed of tuples of each of the values of the two fields 163 | - comparison of these three values to deduce the type of relationship 164 | - comparison of the calculated relationship type with that defined in the data schema 165 | 166 | The [implementation example](https://github.com/loco-philippe/Environmental-Sensing/blob/main/property_relationship/example.ipynb) presents calculation function. 167 | 168 | An [analysis tool](https://github.com/loco-philippe/tab-analysis/blob/main/README.md) is also available and accessible from pandas data. 169 | 170 | An example of implementation as `custom_check` is available [here](https://nbviewer.org/github/loco-philippe/Environmental-Sensing/blob/main/property_relationship/relationship_descriptor.ipynb). 171 | 172 | ## Notes 173 | 174 | If the relationships are defined in a data model, the generation of the relationships in the data schema can be automatic. 175 | 176 | The example presented in the [Overview](#overview) and the rule for converting a Data model into a Table schema are detailed in [this NoteBook](https://nbviewer.org/github/loco-philippe/Environmental-Sensing/blob/main/property_relationship/example_schema.ipynb). 177 | 178 | A complete example (60 000 rows, 50 fields) is used to validate the methodology and the tools: [open-data IRVE](https://www.data.gouv.fr/fr/reuses/les-donnees-irve-sont-elles-coherentes/) 179 | -------------------------------------------------------------------------------- /content/docs/standard/security.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: Security 3 | description: Security considerations around Data Packages and Data Resources. 4 | sidebar: 5 | order: 7 6 | authors: 7 | - Johannes Jander 8 | - Data Package Working Group 9 | --- 10 | 11 | 12 | 13 | 14 | 15 | 16 |
Authors{frontmatter.authors.join(", ")}
17 | 18 |

{frontmatter.description}

19 | 20 | ## Language 21 | 22 | The key words `MUST`, `MUST NOT`, `REQUIRED`, `SHALL`, `SHALL NOT`, `SHOULD`, `SHOULD NOT`, `RECOMMENDED`, `MAY`, and `OPTIONAL` in this document are to be interpreted as described in [RFC 2119](https://www.ietf.org/rfc/rfc2119.txt). 23 | 24 | ## Usage Perspective 25 | 26 | Data packages is a container format that allows the creator to specify payload data (Resources) either as JSON objects/arrays or via pointers. There are two pointer formats: 27 | 28 | - local file system references. Those follow POSIX naming conventions and have to be relative to the Package Descriptor file ("datapackage.json"). Absolute paths are disallowed as they would open data exfiltration attacks. They would also be rarely useful, considering you typically cannot know the file system layout of the user's computer 29 | - URLs as pointers to remote Resources. They are intended to load datasets from sites like statistic's offices as the basis of Data Packages. Only HTTP/HTTPS URLs are allowed, library maintainers have to filter out others like file-URLs 30 | 31 | Both formats can open security holes that can be used to attack the user's computer and/or network. It is therefore STRONGLY recommended to limit the kind of Resource pointers you allow on your machines if you accept Data Packages from third party sources. 32 | 33 | ONLY in a trusted environment (eg. your own computer during development of Data Packages) is it recommended to allow all kinds of Resource pointers. In every other environment, you MUST keep the various attack scenarios in mind and filter out potentially dangerous Resource pointer types 34 | 35 | ### Dangerous Descriptor/Resource pointer combinations 36 | 37 | How to read the table: if your "datapackage.json"-file comes from one of the sources on the left, you should treat Resources in the format on the top as: 38 | 39 | - red: disallowed 40 | - yellow: potentially dangerous 41 | - green: safe to use 42 | 43 | ![Security Matrix](./assets/security-matrix.png) 44 | 45 | #### Descriptor source is a URL 46 | 47 | If your descriptor is loaded via URL, and the server to which the URL points is not fully trusted, you SHOULD NOT allow Data Packages with Resource pointers in 48 | 49 | - URLs. As described in [issue #650](https://github.com/frictionlessdata/specs/issues/650), URLs crafted by the author of a Data Package can be used in a "keyhole" attack to probe your network layout. 50 | - Absolute file system references. Absolute paths can be used to exfiltrate system files (eg. /etc/passwd on Unix-like systems). Relative paths will be converted to URLs relative to the descriptor URL, so they will not load data from the local file system and are therefore safe. 51 | 52 | URL-based Resource pointers can furthermore be used for denial of service attacks on either the user's system or a service hosting Resource data. A relatively small Data Package could still hold thousands of Resource URLs that each could point to very large CSV files hosted somewhere. The Data Package processing library would load all those CSV files which might overwhelm the user's computer. If an attacker were able to spread such a malicious Data Package, this could exhaust the resources of a hosting service. 53 | 54 | #### Descriptor source is a local relative path 55 | 56 | If your descriptor is loaded via a local relative path, and the source of the Data Package is not fully trusted, you SHOULD NOT allow Data Packages with Resource pointers in 57 | 58 | - URLs. As described in [issue #650](https://github.com/frictionlessdata/specs/issues/650), URLs crafted by the author of a Data Package can be used in a "keyhole" attack to probe your network layout. 59 | - Absolute file system references. Absolute paths can be used to exfiltrate system files (eg. /etc/passwd on Unix-like systems). Relative paths will be converted to paths relative to the Descriptor file system reference, so they are considered harmless. 60 | 61 | As long as the producer of the Data Package is on the same local network as the computer/server parsing it, it is considered safe to reference Resources via URLs, as the creator could map the network from their own workstation just as well as crafting malicious Data Packages. In the above table, this case is therefore coded in yellow. 62 | 63 | If Data Package parsing is part of a service offered to computers across subnets on the same LAN or even open to the internet, it NEVER safe to accept Data Packages containing URL-based Resource pointers. 64 | 65 | #### Descriptor source is a local relative path 66 | 67 | While it is never safe to accept absolute file paths for Resources, it is perfectly safe to accept them for Descriptor files. If your descriptor is loaded via a local absolute path, and the source of the Data Package is not fully trusted, you SHOULD NOT allow Data Packages with Resource pointers in 68 | 69 | - URLs. As described in [issue #650](https://github.com/frictionlessdata/specs/issues/650), URLs crafted by the author of a Data Package can be used in a "keyhole" attack to probe your network layout. 70 | - Absolute file system references. Absolute paths can be used to exfiltrate system files (eg. /etc/passwd on Unix-like systems). Relative paths will be converted to paths relative to the Descriptor file system reference, so they are considered harmless. 71 | 72 | As long as the producer of the Data Package is on the same local network as the computer/server parsing it, it is considered safe to reference Resources via URLs, as the creator could map the network from their own workstation just as well as crafting malicious Data Packages. In the above table, this case is therefore coded in yellow. 73 | 74 | If Data Package parsing is part of a service offered to computers across subnets on the same LAN or even open to the internet, it NEVER safe to accept Data Packages containing URL-based Resource pointers. 75 | 76 | #### Descriptor source is a JSON object 77 | 78 | If the Descriptor is not loaded from file but created in-memory and the source of the Data Package is not fully trusted, you SHOULD NOT allow Data Packages with Resource pointers in 79 | 80 | - URLs. As described in [issue #650](https://github.com/frictionlessdata/specs/issues/650), URLs crafted by the author of a Data Package can be used in a "keyhole" attack to probe your network layout. 81 | - file system references, relative or absolute. Absolute paths can be used to exfiltrate system files (eg. /etc/passwd on Unix-like systems). Relative paths would be constructed relative to the parsing software's working directory and could be used to guess at configuration files to exfiltrate. OTOH, in creation of a Data Package, and if the relative paths are confined to a subdirectory, it is safe to use relative paths. 82 | 83 | As long as the producer of the Data Package is on the same local network as the computer/server parsing it, it is considered safe to reference Resources via URLs, as the creator could map the network from their own workstation just as well as crafting malicious Data Packages. In the above table, this case is therefore coded in yellow. 84 | 85 | If Data Package parsing is part of a service offered to computers across subnets on the same LAN or even open to the internet, it NEVER safe to accept Data Packages containing URL-based Resource pointers. 86 | 87 | #### Descriptor source is a self-created JSON object 88 | 89 | If the Descriptor is not loaded from file or created via a third-party application but by your software, it is generally assumed you know what you do and therefore, loading Resources from URLs or file is considered safe. You still SHOULD NOT use absolute paths as a matter of precaution - and implementing libraries should filter them out. 90 | 91 | ## Implemention Perspective 92 | 93 | Two kinds of Resource pointers can never be guaranteed to be totally safe: 94 | 95 | - Absolute file system references. Absolute paths can be used to exfiltrate system files (eg. /etc/passwd on Unix-like systems). In your implementation, you SHOULD either raise an error if an absolute local path is encountered or relativize it to the Descriptor path. 96 | - URLs. As described in [issue #650](https://github.com/frictionlessdata/specs/issues/650), URLs crafted by the author of a Data Package can be used in a "keyhole" attack to probe your user's network layout. It is up to the library creator to create means that allow their users to mitigate this attack. 97 | 98 | As URLs are part of the DNA of Data Packages, it is not advisable to disallow their use completely. However, you should allow for a security setting that stops your implementation from loading URL-based Resources. This could be done 99 | 100 | - via a setting switch (`insecure`/`default`) that allows the user of your library implementation to allow or disallow absolute file paths and URL-based Resource pointers 101 | - via a pluggable security filter that is applied as an interceptor _before_ loading any pointer-based Resources. If you decide to use such a scheme, you SHOULD provide default implementations for a filter disallowing URL-based Resource and an insecure filter that allows loading of all Resources. 102 | 103 | ### Security Filters 104 | 105 | If disallowing all URL-based Resources is too heavy-handed and allowing all is too insecure, finer-grained filters should be implemented. Those finer security filters can be implemented as either blacklist or whitelist filters. Blacklist filters in principle allow all URLs and restrict some, whereas whitelist filters deny all as a default and have a limited list of allowed URLs. 106 | 107 | Blacklist filters in their most basic implementation would have to disallow all non-routed IP-addresses like the 192.168.x.x range or the 10.100.x.x range. This would blunt mapping attacks against the internal network of your users but needs to be well thought out as even one omission could endanger network security 108 | 109 | Whitelist filters are much more secure as they allow the loading of Resources from a named list of domains only, but might be too restrictive for some uses. 110 | -------------------------------------------------------------------------------- /content/docs/overview/changelog.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Changelog 3 | sidebar: 4 | order: 10 5 | --- 6 | 7 | This document includes all meaningful changes made to the **Data Package standard**. It does not cover changes made to other documents like Recipes or Guides. 8 | 9 | ## v2.0 10 | 11 | This release includes a rich set of specification improvements to make Data Package a finished product (see [announcement](https://frictionlessdata.io/blog/2023/11/15/frictionless-specs-update/)). All changes were reviewed and accepted by the Data Package Working Group. 12 | 13 | > June 26, 2024 14 | 15 | ##### Tabular Data Package (removed) 16 | 17 | The [Tabular Data Package](https://specs.frictionlessdata.io/tabular-data-package/) (`package.profile: "tabular-data-package"`) is removed. It does not add any benefits over defining `type: "table"` (previously `resource.profile: "tabular-data-resource"`) for its resources, which is more modular ([#52](https://github.com/frictionlessdata/datapackage-v2-draft/pull/52)). 18 | 19 | ##### `package.$schema` (new) 20 | 21 | [`$schema`](/standard/data-package/#dollar-schema) replaces the `profile` property and allows easier extension and versioning ([#42](https://github.com/frictionlessdata/datapackage-v2-draft/pull/42)). 22 | 23 | ##### `package.contributors` (updated) 24 | 25 | [`contributors`](/standard/data-package/#contributors) was updated: 26 | 27 | - `contributor.title` is no longer required ([#7](https://github.com/frictionlessdata/datapackage-v2-draft/pull/7)). 28 | - `contributor.givenName` and `contributor.familyName` are new properties to specify the given and family name of contributor, if it is a person ([#20](https://github.com/frictionlessdata/datapackage-v2-draft/pull/20)). 29 | - `contributor.role` has been deprecated in favour of `contributor.roles`, see further ([#18](https://github.com/frictionlessdata/datapackage-v2-draft/pull/18)). 30 | - `contributor.roles` is a new property that allows to specify multiple roles per contributor, rather than having to duplicate the contributor. It recommendeds to follow an established vocabulary and has suggested values that are different from the deprecated `contributor.role` ([#18](https://github.com/frictionlessdata/datapackage-v2-draft/pull/18)). 31 | 32 | ##### `package.version` (updated) 33 | 34 | [`version`](/standard/data-package/#version) is now included in the specification, while in Data Package v1 it was erroneously only part of the documentation ([#3](https://github.com/frictionlessdata/datapackage-v2-draft/pull/3)). 35 | 36 | ##### `package.sources` (updated) 37 | 38 | [`sources`](/standard/data-package/#sources) was updated: 39 | 40 | - `source.title` is no longer required ([#7](https://github.com/frictionlessdata/datapackage-v2-draft/pull/7)). 41 | - `source.version` is a new property to specify which version of a source was used ([#10](https://github.com/frictionlessdata/datapackage-v2-draft/pull/10)). 42 | 43 | ##### `resource.name` (updated) 44 | 45 | [name](/standard/data-resource/#name) now allows any string. It previously required the name to only consist of lowercase alphanumeric characters plus `.`, `-` and `_`. The property is still required and must be unique among resources ([#27](https://github.com/frictionlessdata/datapackage-v2-draft/pull/27)). 46 | 47 | ##### `resource.path` (updated) 48 | 49 | [path](/standard/data-resource/#path-or-data-required) now explicitely forbids hidden folders (starting with dot `.`) ([#19](https://github.com/frictionlessdata/datapackage-v2-draft/pull/19)). 50 | 51 | ##### `resource.type` (new) 52 | 53 | [`type`](/standard/data-resource/#type) allows to specify the resource type ([#51](https://github.com/frictionlessdata/datapackage-v2-draft/pull/51)). `resource.type: "table"` replaces `resource.profile: "tabular-data-resource"`. 54 | 55 | ##### `resource.$schema` (new) 56 | 57 | [`$schema`](/standard/data-resource/#dollar-schema) replaces the `profile` property and allows easier extension and versioning ([#42](https://github.com/frictionlessdata/datapackage-v2-draft/pull/42)). See also [resource.type](#resource-type-new). 58 | 59 | ##### `resource.encoding` (updated) 60 | 61 | [encoding](/standard/data-resource/#encoding)'s definition has been updated to support binary formats like Parquet ([#15](https://github.com/frictionlessdata/datapackage-v2-draft/pull/15)). 62 | 63 | ##### `resource.sources` (updated) 64 | 65 | [`sources`](/standard/data-resource/#sources) now inherits from a containing data package ([#57](https://github.com/frictionlessdata/datapackage-v2-draft/pull/57)). 66 | 67 | ##### Table Dialect (new) 68 | 69 | [Table Dialect](/standard/table-dialect) is a new specification that superseeds and extends the CSV Dialect specification. It support other formats like JSON or Excel ([#41](https://github.com/frictionlessdata/datapackage-v2-draft/pull/41)). 70 | 71 | ##### `dialect.schema` (new) 72 | 73 | [`schema`](/standard/table-dialect/#dollar-schema) allows extension and versioning ([#42](https://github.com/frictionlessdata/datapackage-v2-draft/pull/42)). 74 | 75 | ##### `dialect.table` (new) 76 | 77 | [`table`](/standard/table-dialect/#table) allows to specify a table in a database ([#64](https://github.com/frictionlessdata/datapackage-v2-draft/pull/64)). 78 | 79 | ##### `schema.$schema` (new) 80 | 81 | [`$schema`](/standard/table-schema/#dollar-schema) allows extension and versioning ([#42](https://github.com/frictionlessdata/datapackage-v2-draft/pull/42)). 82 | 83 | ##### `schema.fieldsMatch` (new) 84 | 85 | [fieldsMatch](/standard/table-schema/#fieldsMatch) allows to specify how fields in a Table Schema match the fields in the data source. The default (`exact`) matches the Data Package v1 behaviour, but other values (e.g. `subset`, `superset`) allow to define fewer or more fields and match on field names. This new property extends and makes explicit the `schema_sync` option in Frictionless Framework ([#39](https://github.com/frictionlessdata/datapackage-v2-draft/pull/39)). 86 | 87 | ##### `schema.missingValues` (updated) 88 | 89 | [`missingValues`](/standard/table-schema/#missingValues) now allow to specify labeled missingness ([#68](https://github.com/frictionlessdata/datapackage-v2-draft/pull/68)). 90 | 91 | ##### `schema.primaryKey` (updated) 92 | 93 | [`primaryKey`](/standard/table-schema/#primaryKey) should now always be an array of strings, not a string ([#28](https://github.com/frictionlessdata/datapackage-v2-draft/pull/28)). 94 | 95 | ##### `schema.uniqueKeys` (new) 96 | 97 | [`uniqueKeys`](/standard/table-schema/#uniqueKeys) allows to specify which fields are required to have unique logical values. It is an alternative to `field.contraints.unique` and is modelled after the corresponding SQL feature ([#30](https://github.com/frictionlessdata/datapackage-v2-draft/pull/30)). 98 | 99 | ##### `schema.foreignKeys` (updated) 100 | 101 | [`foreignKeys`](/standard/table-schema/#foreignKeys) was updated: 102 | 103 | - It should now always be an array of strings, not a string ([#28](https://github.com/frictionlessdata/datapackage-v2-draft/pull/28)). 104 | - `foreignKeys.reference.resource` can now be omitted for self-referencing foreign keys. Previously it required setting `resource` to an empty string ([#29](https://github.com/frictionlessdata/datapackage-v2-draft/pull/29)). 105 | 106 | ##### `field.categories` (new) 107 | 108 | [`categories`](/standard/table-schema/#categories) adds support for categorical data for the `string` and `integer` field types ([#68](https://github.com/frictionlessdata/datapackage-v2-draft/pull/68)). 109 | 110 | ##### `field.categoriesOrdered` (new) 111 | 112 | [`categoriesOrdered`](/standard/table-schema/#categoriesOrdered) adds support for ordered categorical data for the `string` and `integer` field types ([#68](https://github.com/frictionlessdata/datapackage-v2-draft/pull/68)). 113 | 114 | ##### `field.missingValues` (new) 115 | 116 | [`missingValues`](/standard/table-schema/#field-missingValues) allows to specify missing values per field, and overwrites `missingValues` specified at a resource level ([#24](https://github.com/frictionlessdata/datapackage-v2-draft/pull/24)). 117 | 118 | ##### `integer` field type (updated) 119 | 120 | [`integer`](/standard/table-schema/#integer) now has a `groupChar` property. It was already available for `number` ([#6](https://github.com/frictionlessdata/datapackage-v2-draft/pull/6)). 121 | 122 | ##### `list` field type (new) 123 | 124 | [`list`](/standard/table-schema/#list) allows to specify fields containing collections of primary values separated by a delimiter (e.g. `value1,value2`) ([#38](https://github.com/frictionlessdata/datapackage-v2-draft/pull/38)). 125 | 126 | ##### `datetime` field type (updated) 127 | 128 | [`datetime`](/standard/table-schema/#datetime)'s default `format` is now extended to allow optional milliseconds and timezone parts ([#23](https://github.com/frictionlessdata/datapackage-v2-draft/pull/23)). 129 | 130 | ##### `geopoint` field type (updated) 131 | 132 | [`geopoint`](/standard/table-schema/#geopoint)'s definition now clarifies that floating point numbers can be used for coordinate definitions ([#14](https://github.com/frictionlessdata/datapackage-v2-draft/pull/14)). 133 | 134 | ##### `any` field type (updated) 135 | 136 | [`any`](/standard/table-schema/#any) is now the default field type and clarifies that the field type should not be inferred if not provided ([#13](https://github.com/frictionlessdata/datapackage-v2-draft/pull/13)). 137 | 138 | ##### `minimum` and `maximum` field constraints (updated) 139 | 140 | [`minimum`](/standard/table-schema/#minimum) and [`maximum`](/standard/table-schema/#maximum) are now extended to support the `duration` field type ([#8](https://github.com/frictionlessdata/datapackage-v2-draft/pull/8)). 141 | 142 | ##### `exclusiveMinimum` and `exclusiveMaximum` field constraints (new) 143 | 144 | [`exclusiveMinimum`](/standard/table-schema/#exclusiveMinimum) and [`exclusiveMaximum`](/standard/table-schema/#exclusiveMaximum) can be used to specify exclusive minimum and maximum values ([#11](https://github.com/frictionlessdata/datapackage-v2-draft/pull/11)). 145 | 146 | ##### `jsonschema` field constraint (new) 147 | 148 | [`jsonSchema`](/standard/table-schema/#jsonSchema) can be used for the `object` and `array` field types ([#32](https://github.com/frictionlessdata/datapackage-v2-draft/pull/32)). 149 | 150 | ## v1.0 151 | 152 | > September 5, 2017 153 | 154 | Please refer to the [Data Package (v1) website](https://specs.frictionlessdata.io/). 155 | --------------------------------------------------------------------------------