├── LEAD.md ├── spec ├── fixtures │ ├── foo.txt │ ├── empty_schema.json │ ├── empty_registry.json │ ├── not_a_json │ ├── test-pkg │ │ ├── README.md │ │ ├── test.csv │ │ └── valid-datapackage.json │ ├── fake_profile.json │ ├── base_profile.json │ ├── test-pkg.zip │ ├── data │ │ ├── names.csv │ │ └── prices.csv │ ├── datapackage_with_foo.txt_resource.json │ ├── base_registry.json │ ├── definitions.json │ ├── nested-definitions.json │ ├── registry_nonjson_profile.json │ ├── unicode_registry.json │ ├── base_and_tabular_registry.json │ └── invalid-datapackage.json ├── test-pkg.zip ├── spec_helper.rb ├── registry_spec.rb ├── helpers_spec.rb ├── profile_spec.rb ├── interpreter_spec.rb ├── resource_spec.rb └── package_spec.rb ├── .coveralls.yml ├── Gemfile ├── .rspec ├── lib ├── datapackage │ ├── version.rb │ ├── exceptions.rb │ ├── defaults.rb │ ├── registry.rb │ ├── interpreter.rb │ ├── profile.rb │ ├── helpers.rb │ ├── resource.rb │ └── package.rb ├── datapackage.rb └── profiles │ ├── registry.json │ ├── data-resource.json │ ├── data-package.json │ └── table-schema.json ├── .gitignore ├── .github ├── pull_request_template.md ├── issue_template.md ├── stale.yml └── workflows │ └── general.yml ├── bin ├── console └── datapackage ├── Makefile ├── LICENSE.md ├── Rakefile ├── datapackage.gemspec ├── CHANGELOG.md └── README.md /LEAD.md: -------------------------------------------------------------------------------- 1 | roll 2 | -------------------------------------------------------------------------------- /spec/fixtures/foo.txt: -------------------------------------------------------------------------------- 1 | bar 2 | -------------------------------------------------------------------------------- /spec/fixtures/empty_schema.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /spec/fixtures/empty_registry.json: -------------------------------------------------------------------------------- 1 | [] 2 | -------------------------------------------------------------------------------- /spec/fixtures/not_a_json: -------------------------------------------------------------------------------- 1 | I'm not a JSON 2 | -------------------------------------------------------------------------------- /.coveralls.yml: -------------------------------------------------------------------------------- 1 | service_name: travis-ci 2 | 3 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source "http://rubygems.org" 2 | 3 | gemspec 4 | -------------------------------------------------------------------------------- /spec/fixtures/test-pkg/README.md: -------------------------------------------------------------------------------- 1 | Simple test package 2 | -------------------------------------------------------------------------------- /spec/fixtures/fake_profile.json: -------------------------------------------------------------------------------- 1 | { 2 | "key": "value" 3 | } 4 | -------------------------------------------------------------------------------- /.rspec: -------------------------------------------------------------------------------- 1 | --color 2 | --require spec_helper 3 | --format documentation 4 | -------------------------------------------------------------------------------- /spec/fixtures/base_profile.json: -------------------------------------------------------------------------------- 1 | { 2 | "title": "base_profile" 3 | } 4 | -------------------------------------------------------------------------------- /spec/fixtures/test-pkg/test.csv: -------------------------------------------------------------------------------- 1 | ID,Price 2 | abc,100 3 | def,300 4 | ghi,750 -------------------------------------------------------------------------------- /lib/datapackage/version.rb: -------------------------------------------------------------------------------- 1 | module DataPackage 2 | VERSION = "1.1.1" 3 | end 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .buildpath 2 | .project 3 | coverage 4 | *~ 5 | Gemfile.lock 6 | .ruby-version 7 | /pkg/ 8 | -------------------------------------------------------------------------------- /spec/test-pkg.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage-rb/HEAD/spec/test-pkg.zip -------------------------------------------------------------------------------- /spec/fixtures/test-pkg.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage-rb/HEAD/spec/fixtures/test-pkg.zip -------------------------------------------------------------------------------- /spec/fixtures/data/names.csv: -------------------------------------------------------------------------------- 1 | id,name 2 | 3343-sdsw,Clarissa Dalloway 3 | 9213-twer,Septimus Warren Smith 4 | 6700-lkmm,Peter Walsh 5 | 3241-regm,Sally Seton 6 | -------------------------------------------------------------------------------- /spec/fixtures/datapackage_with_foo.txt_resource.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "datapackage", 3 | "resources": [ 4 | { 5 | "name": "bar", 6 | "path": "foo.txt" 7 | } 8 | ] 9 | } 10 | -------------------------------------------------------------------------------- /spec/fixtures/base_registry.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": "base", 4 | "title": "Data Package", 5 | "schema": "http://example.com/one.json", 6 | "specification": "http://example.com" 7 | } 8 | ] 9 | -------------------------------------------------------------------------------- /spec/fixtures/definitions.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-04/schema#", 3 | "define": { 4 | "name": { 5 | "title": "Name", 6 | "type": "string" 7 | } 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /spec/fixtures/nested-definitions.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-04/schema#", 3 | "define": { 4 | "name": { 5 | "title": "Nested name", 6 | "type": "string" 7 | } 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | # Overview 2 | 3 | Please replace this line with full information about your pull request. Make sure that tests pass before publishing it 4 | 5 | --- 6 | 7 | Please preserve this line to notify @roll (lead of this repository) 8 | -------------------------------------------------------------------------------- /.github/issue_template.md: -------------------------------------------------------------------------------- 1 | # Overview 2 | 3 | Please replace this line with full information about your idea or problem. If it's a bug share as much as possible to reproduce it 4 | 5 | --- 6 | 7 | Please preserve this line to notify @roll (lead of this repository) 8 | -------------------------------------------------------------------------------- /spec/fixtures/registry_nonjson_profile.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": "notajson", 4 | "title": "Not a JSON", 5 | "schema": "http://example.com/one.json", 6 | "specification": "http://example.com", 7 | "schema_path": "not_a_json" 8 | } 9 | ] 10 | -------------------------------------------------------------------------------- /spec/fixtures/unicode_registry.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": "base", 4 | "title": "Iñtërnâtiônàlizætiøn", 5 | "schema": "3", 6 | "specification": "4" 7 | }, 8 | { 9 | "id": "a", 10 | "title": "b", 11 | "schema": "c", 12 | "specification": "d" 13 | } 14 | ] 15 | -------------------------------------------------------------------------------- /bin/console: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | require "bundler/setup" 4 | require "datapackage" 5 | 6 | # You can add fixtures and/or initialization code here to make experimenting 7 | # with your gem easier. You can also use a different console, if you like. 8 | 9 | require "pry" 10 | Pry.start 11 | -------------------------------------------------------------------------------- /lib/datapackage/exceptions.rb: -------------------------------------------------------------------------------- 1 | module DataPackage 2 | class Exception < ::Exception; end 3 | class RegistryException < Exception; end 4 | class ResourceException < Exception; end 5 | class ProfileException < Exception; end 6 | class PackageException < Exception; end 7 | class ValidationError < Exception; end 8 | end 9 | -------------------------------------------------------------------------------- /spec/spec_helper.rb: -------------------------------------------------------------------------------- 1 | require 'coveralls' 2 | Coveralls.wear! 3 | 4 | require 'fakeweb' 5 | 6 | FakeWeb.allow_net_connect = %r[^https?:\/\/coveralls.io.+$] 7 | 8 | require 'datapackage' 9 | 10 | RSpec.configure do |config| 11 | config.order = "random" 12 | config.tty = true 13 | end 14 | 15 | def test_package_filename(filename="valid-datapackage.json") 16 | File.join( File.dirname(__FILE__), "fixtures", "test-pkg", filename ) 17 | end 18 | -------------------------------------------------------------------------------- /spec/fixtures/base_and_tabular_registry.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": "base", 4 | "title": "Data Package", 5 | "schema": "http://example.com/one.json", 6 | "schema_path": "base_profile.json", 7 | "specification": "http://example.com" 8 | }, 9 | { 10 | "id": "tabular", 11 | "title": "Tabular Data Package", 12 | "schema": "http://example.com/two.json", 13 | "schema_path": "tabular_profile.json", 14 | "specification": "http://example.com" 15 | } 16 | ] 17 | -------------------------------------------------------------------------------- /lib/datapackage.rb: -------------------------------------------------------------------------------- 1 | require 'date' 2 | require 'uri' 3 | require 'net/http' 4 | require 'csv' 5 | require 'json' 6 | require 'json-schema' 7 | require 'zip' 8 | require 'ruby_dig' 9 | require 'tableschema' 10 | 11 | require 'datapackage/defaults' 12 | require 'datapackage/helpers' 13 | require 'datapackage/version' 14 | require 'datapackage/exceptions' 15 | require 'datapackage/profile' 16 | require 'datapackage/resource' 17 | require 'datapackage/package' 18 | require 'datapackage/registry' 19 | require 'datapackage/interpreter' 20 | -------------------------------------------------------------------------------- /lib/datapackage/defaults.rb: -------------------------------------------------------------------------------- 1 | module DataPackage 2 | DEFAULTS = { 3 | resource: { 4 | profile: 'data-resource', 5 | tabular_profile: 'tabular-data-resource', 6 | encoding: 'utf-8', 7 | }, 8 | package: { 9 | profile: 'data-package', 10 | }, 11 | schema: { 12 | format: 'default', 13 | type: 'string', 14 | missing_values: [''], 15 | }, 16 | dialect: { 17 | delimiter: ',', 18 | doubleQuote: true, 19 | lineTerminator: '\r\n', 20 | quoteChar: '"', 21 | escapeChar: '\\', 22 | skipInitialSpace: true, 23 | header: true, 24 | caseSensitiveHeader: false, 25 | }, 26 | }.freeze 27 | end 28 | -------------------------------------------------------------------------------- /spec/fixtures/data/prices.csv: -------------------------------------------------------------------------------- 1 | id,price,year_to_market,added_on,updated_at,cutoff_time 2 | abc,100,1990,01/30/2019,2019-11-17 12:43:01 -0500,17:00 3 | def,300,2019,3/30/2019,2019-11-17 12:43:01 -0500,00:00 4 | ghi,750,2018,2/2/2019,2019-11-17 12:43:01 -0500,09:00 5 | wer,100,2019,3/14/2019,2019-11-17 12:43:01 -0500,09:30 6 | tre,300,2019,03/1/2019,2019-11-17 12:43:01 -0500,18:00 7 | qqq,750,2019,3/30/2019,2019-11-17 12:43:01 -0500,17:00 8 | aaa,100,2017,01/30/2019,2019-11-17 12:43:01 -0500,17:00 9 | wdl,300,1999,01/30/2019,2019-11-17 12:43:01 -0500,17:00 10 | plo,750,1999,01/30/2019,2019-11-17 12:43:01 -0500,17:00 11 | oil,100,1999,04/30/2019,2019-11-17 12:43:01 -0500,17:00 12 | tyh,300,1999,5/30/2019,2019-11-17 12:43:01 -0500,17:00 13 | hnb,750,1999,5/30/2019,2019-11-17 12:43:01 -0500,17:00 -------------------------------------------------------------------------------- /.github/stale.yml: -------------------------------------------------------------------------------- 1 | # Number of days of inactivity before an issue becomes stale 2 | daysUntilStale: 90 3 | 4 | # Number of days of inactivity before a stale issue is closed 5 | daysUntilClose: 30 6 | 7 | # Issues with these labels will never be considered stale 8 | exemptLabels: 9 | - feature 10 | - enhancement 11 | - bug 12 | 13 | # Label to use when marking an issue as stale 14 | staleLabel: wontfix 15 | 16 | # Comment to post when marking an issue as stale. Set to `false` to disable 17 | markComment: > 18 | This issue has been automatically marked as stale because it has not had 19 | recent activity. It will be closed if no further activity occurs. Thank you 20 | for your contributions. 21 | 22 | # Comment to post when closing a stale issue. Set to `false` to disable 23 | closeComment: false 24 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all list release templates version 2 | 3 | 4 | VERSION := $(shell cat lib/datapackage/version.rb | awk -F\" '{ print $$2 }' | xargs) 5 | LEAD := $(shell head -n 1 LEAD.md) 6 | 7 | 8 | all: list 9 | 10 | list: 11 | @grep '^\.PHONY' Makefile | cut -d' ' -f2- | tr ' ' '\n' 12 | 13 | release: 14 | git checkout master && git pull origin && git fetch -p && git diff 15 | @echo "\nContinuing in 10 seconds. Press to abort\n" && sleep 10 16 | @git log --pretty=format:"%C(yellow)%h%Creset %s%Cgreen%d" --reverse -20 17 | @echo "\nReleasing v$(VERSION) in 10 seconds. Press to abort\n" && sleep 10 18 | git commit -a -m 'v$(VERSION)' && git tag -a v$(VERSION) -m 'v$(VERSION)' 19 | git push --follow-tags 20 | 21 | templates: 22 | sed -i -E "s/@(\w*)/@$(LEAD)/" .github/issue_template.md 23 | sed -i -E "s/@(\w*)/@$(LEAD)/" .github/pull_request_template.md 24 | 25 | version: 26 | @echo $(VERSION) 27 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | ##Copyright (c) 2016 The Open Data Institute 2 | 3 | #MIT License 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 20 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 22 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /bin/datapackage: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | $:.unshift File.join( File.dirname(__FILE__), "..", "lib") 3 | require 'colorize' 4 | require 'datapackage' 5 | 6 | 7 | if ARGV.length == 0 8 | puts "Usage: datapackage " 9 | puts " should be a datapackage.json file, directory, or url" 10 | exit 1 11 | end 12 | 13 | package = DataPackage::Package.new(ARGV[0]) 14 | 15 | messages = package.validate(:simpledataformat) 16 | 17 | print "Package #{package.name.colorize(:light_blue)} is " 18 | 19 | if messages[:errors].length == 0 20 | puts "VALID".green 21 | else 22 | puts "INVALID".red 23 | puts 24 | puts "Errors: ".red 25 | messages[:errors].each_with_index do |msg, idx| 26 | puts "#{idx+1}. #{msg[:type].capitalize.to_s.red} error at #{msg[:fragment]}" 27 | puts " #{msg[:message]}" 28 | end 29 | end 30 | 31 | puts 32 | if !messages[:warnings].empty? 33 | puts "Warnings: ".yellow 34 | messages[:warnings].each_with_index do |msg, idx| 35 | puts "#{idx+1}. #{msg[:type].capitalize.to_s.yellow} warning at #{msg[:fragment]}" 36 | puts " #{msg[:message]}" 37 | end 38 | end 39 | 40 | exit( messages[:errors].length ) -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | $:.unshift File.join( File.dirname(__FILE__), "lib") 2 | 3 | require "bundler/gem_tasks" 4 | require "rspec/core/rake_task" 5 | require "open-uri" 6 | require "json" 7 | 8 | RSpec::Core::RakeTask.new(:spec) 9 | 10 | task :default => :spec 11 | 12 | task :update_profiles do 13 | 14 | DEFAULT_REGISTRY_URL = 'https://specs.frictionlessdata.io/schemas/registry.json' 15 | DEFAULT_REGISTRY_PATH = File.join(File.expand_path(File.dirname(__FILE__)), 'lib', 'profiles', 'registry.json') 16 | 17 | cache_folder = Pathname.new(DEFAULT_REGISTRY_PATH).split[0] 18 | remote_registry = open(DEFAULT_REGISTRY_URL).read 19 | remote_resources = JSON.parse(remote_registry, symbolize_names: true) 20 | 21 | File.open(DEFAULT_REGISTRY_PATH, 'w') do |local_registry| 22 | local_registry << remote_registry 23 | end 24 | remote_resources.each do |resource_meta| 25 | file_name = Pathname.new(resource_meta[:schema]).split[1] 26 | local_file_path = cache_folder.join(file_name) 27 | open(resource_meta[:schema]) do |remote_resource| 28 | File.open(local_file_path, 'w') do |local_resource| 29 | local_resource << remote_resource.read 30 | end 31 | end 32 | end 33 | 34 | end 35 | -------------------------------------------------------------------------------- /datapackage.gemspec: -------------------------------------------------------------------------------- 1 | lib = File.expand_path('../lib/', __FILE__) 2 | $:.unshift lib unless $:.include?(lib) 3 | require 'datapackage/version' 4 | 5 | Gem::Specification.new do |s| 6 | s.name = 'datapackage' 7 | s.version = DataPackage::VERSION 8 | s.authors = ['Leigh Dodds', 'pezholio', 'pikesley'] 9 | s.email = ['ops@theodi.org'] 10 | s.homepage = 'http://github.com/theodi/datapackage.rb' 11 | s.summary = 'Library for working with data packages' 12 | s.files = Dir['{bin,lib}/**/*'] + ['LICENSE.md', 'README.md'] 13 | s.executables << 'datapackage' 14 | s.executables << 'console' 15 | s.license = 'MIT' 16 | s.required_ruby_version = '>= 2.0' 17 | 18 | s.add_dependency 'json-schema' 19 | s.add_dependency 'colorize' 20 | s.add_dependency 'rubyzip' 21 | s.add_dependency 'ruby_dig' 22 | s.add_dependency 'tableschema' 23 | 24 | s.add_development_dependency 'bundler', '~> 2.1' 25 | s.add_development_dependency 'rake' 26 | s.add_development_dependency 'rspec' 27 | s.add_development_dependency 'fakeweb', '~> 1.3' 28 | s.add_development_dependency 'coveralls' 29 | s.add_development_dependency 'pry' 30 | end 31 | -------------------------------------------------------------------------------- /lib/datapackage/registry.rb: -------------------------------------------------------------------------------- 1 | module DataPackage 2 | # Allow loading Data Package profiles from the official registry. 3 | 4 | class Registry 5 | include DataPackage::Helpers 6 | 7 | attr_reader :path, :profiles 8 | 9 | DEFAULT_REGISTRY_URL = 'https://specs.frictionlessdata.io/schemas/registry.json'.freeze 10 | DEFAULT_REGISTRY_PATH = File.join(File.expand_path(File.dirname(__FILE__)), '..', 'profiles', 'registry.json').freeze 11 | 12 | def initialize 13 | @path = DEFAULT_REGISTRY_PATH 14 | @profiles = get_registry(DEFAULT_REGISTRY_PATH) 15 | rescue Errno::ENOENT 16 | raise RegistryException.new 'Registry path is not valid' 17 | rescue OpenURI::HTTPError, SocketError => e 18 | raise RegistryException.new "Registry URL returned #{e.message}" 19 | rescue JSON::ParserError 20 | raise RegistryException.new 'Registry descriptor is not valid JSON' 21 | rescue KeyError 22 | raise RegistryException.new 'Property `id` is mandatory for profiles' 23 | end 24 | 25 | private 26 | 27 | def get_registry(descriptor) 28 | resources = load_json(descriptor) 29 | resources.reduce({}) do |registry, resource| 30 | registry[resource['id']] = resource 31 | registry 32 | end 33 | end 34 | 35 | end 36 | end 37 | -------------------------------------------------------------------------------- /spec/fixtures/invalid-datapackage.json: -------------------------------------------------------------------------------- 1 | { 2 | "title": "Test Package", 3 | "description": "Description", 4 | "homepage": "http://example.org", 5 | "last_modified": "2013-12-05", 6 | "licenses": [{ 7 | "id": "odc-pddl", 8 | "path": "http://opendatacommons.org/licenses/pddl/1.0/" 9 | } 10 | ], 11 | "datapackage_version": "1.0-beta.5", 12 | "keywords": [ "test", "testing" ], 13 | "resources": [ 14 | { 15 | "name": "Test Data", 16 | "path": "test.csv", 17 | "format": "csv", 18 | "mediatype": "text/csv", 19 | "encoding": "UTF-8", 20 | "dialect": { 21 | "delimiter": ",", 22 | "lineterminator": "\n", 23 | "quotechar": "\"", 24 | "doublequote": true, 25 | "skipinitialspace": true 26 | }, 27 | "schema": { 28 | "fields": [ 29 | { 30 | "name": "ID", 31 | "title": "Transaction unique identifier", 32 | "type": "string" 33 | }, 34 | { 35 | "name": "Price", 36 | "title": "Price", 37 | "type": "integer" 38 | } 39 | ] 40 | } 41 | } 42 | ] 43 | } 44 | -------------------------------------------------------------------------------- /spec/fixtures/test-pkg/valid-datapackage.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "test-package", 3 | "title": "Test Package", 4 | "description": "Description", 5 | "homepage": "http://example.org", 6 | "last_modified": "2013-12-05", 7 | "licenses": [ 8 | { 9 | "id": "odc-pddl", 10 | "uri": "http://opendatacommons.org/licenses/pddl/1.0/" 11 | } 12 | ], 13 | "profile": "tabular-data-package", 14 | "datapackage_version": "1.0-beta.5", 15 | "keywords": [ "test", "testing" ], 16 | "resources": [ 17 | { 18 | "name": "test-data", 19 | "path": "test.csv", 20 | "format": "csv", 21 | "mediatype": "text/csv", 22 | "encoding": "UTF-8", 23 | "profile": "tabular-data-resource", 24 | "dialect": { 25 | "delimiter": ",", 26 | "lineterminator": "\n", 27 | "quotechar": "\"", 28 | "doublequote": true, 29 | "skipinitialspace": true 30 | }, 31 | "schema": { 32 | "fields": [ 33 | { 34 | "name": "ID", 35 | "title": "Transaction unique identifier", 36 | "type": "string" 37 | }, 38 | { 39 | "name": "Price", 40 | "title": "Price", 41 | "type": "integer" 42 | } 43 | ] 44 | } 45 | } 46 | ] 47 | } 48 | -------------------------------------------------------------------------------- /.github/workflows/general.yml: -------------------------------------------------------------------------------- 1 | name: general 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | tags: 8 | - v*.*.* 9 | pull_request: 10 | branches: 11 | - main 12 | 13 | jobs: 14 | 15 | # Test 16 | 17 | test: 18 | runs-on: ubuntu-latest 19 | strategy: 20 | matrix: 21 | ruby-version: [2.4] 22 | steps: 23 | - name: Checkout repository 24 | uses: actions/checkout@v2 25 | - name: Install Ruby 26 | uses: ruby/setup-ruby@v1 27 | with: 28 | ruby-version: ${{ matrix.ruby-version }} 29 | - name: Install dependencies 30 | run: | 31 | gem install bundler -v 1.11.2 32 | bundle 33 | - name: Test software 34 | run: rake spec 35 | - name: Report coverage 36 | uses: codecov/codecov-action@v1 37 | 38 | # Release 39 | 40 | release: 41 | if: github.event_name == 'push' && contains(github.ref, 'refs/tags/') 42 | runs-on: ubuntu-latest 43 | needs: [test] 44 | steps: 45 | - name: Checkout repository 46 | uses: actions/checkout@v2 47 | - name: Install Ruby 48 | uses: ruby/setup-ruby@v1 49 | with: 50 | ruby-version: 2.4 51 | - name: Release Gem 52 | uses: cadwallion/publish-rubygems-action@1 53 | env: 54 | GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} 55 | RUBYGEMS_API_KEY: ${{secrets.RUBYGEMS_API_KEY}} 56 | RELEASE_COMMAND: rake release 57 | - name: Release to GitHub 58 | uses: softprops/action-gh-release@v1 59 | env: 60 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 61 | -------------------------------------------------------------------------------- /spec/registry_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe DataPackage::Registry do 4 | 5 | before(:each) do 6 | @base_registry_path = File.join('spec', 'fixtures', 'base_registry.json') 7 | @empty_registry_path = File.join('spec', 'fixtures', 'empty_registry.json') 8 | @base_and_tabular_registry_path = File.join('spec', 'fixtures', 'base_and_tabular_registry.json') 9 | @unicode_registry_path = File.join('spec', 'fixtures', 'unicode_registry.json') 10 | @base_profile_path = File.join('spec', 'fixtures', 'base_profile.json') 11 | end 12 | 13 | it 'has a default registry url' do 14 | expect(DataPackage::Registry.new.path).to_not be_nil 15 | expect(DataPackage::Registry.new.path).to_not be_empty 16 | end 17 | 18 | context 'profiles' do 19 | 20 | it 'returns list of profiles' do 21 | registry = DataPackage::Registry.new 22 | 23 | expect(registry.profiles).to_not be_empty 24 | expect(registry.profiles['tabular-data-package'].keys).to include( 25 | 'id', 26 | 'schema', 27 | 'schema_path', 28 | ) 29 | end 30 | 31 | it 'cannot be set' do 32 | expect { DataPackage::Registry.new.profiles = {} }.to raise_error(NoMethodError) 33 | end 34 | 35 | it 'returns nil when profile is not found' do 36 | expect(DataPackage::Registry.new.profiles['no-such-profile']).to be_nil 37 | end 38 | 39 | end 40 | 41 | context 'path' do 42 | 43 | it 'defaults to the local cache path' do 44 | expect(DataPackage::Registry.new.path).to eq(DataPackage::Registry::DEFAULT_REGISTRY_PATH) 45 | end 46 | 47 | it 'cannot be set' do 48 | expect { DataPackage::Registry.new.path = "some/path" }.to raise_error(NoMethodError) 49 | end 50 | 51 | end 52 | 53 | end 54 | -------------------------------------------------------------------------------- /lib/profiles/registry.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": "data-package", 4 | "title": "Data Package", 5 | "schema": "https://specs.frictionlessdata.io/schemas/data-package.json", 6 | "schema_path": "data-package.json", 7 | "specification": "https://specs.frictionlessdata.io/data-package/" 8 | }, 9 | { 10 | "id": "tabular-data-package", 11 | "title": "Tabular Data Package", 12 | "schema": "https://specs.frictionlessdata.io/schemas/tabular-data-package.json", 13 | "schema_path": "tabular-data-package.json", 14 | "specification": "http://specs.frictionlessdata.io/tabular-data-package/" 15 | }, 16 | { 17 | "id": "fiscal-data-package", 18 | "title": "Fiscal Data Package", 19 | "schema": "https://specs.frictionlessdata.io/schemas/fiscal-data-package.json", 20 | "schema_path": "fiscal-data-package.json", 21 | "specification": "https://specs.frictionlessdata.io/fiscal-data-package/" 22 | }, 23 | { 24 | "id": "data-resource", 25 | "title": "Data Resource", 26 | "schema": "https://specs.frictionlessdata.io/schemas/data-resource.json", 27 | "schema_path": "data-resource.json", 28 | "specification": "https://specs.frictionlessdata.io/data-resource" 29 | }, 30 | { 31 | "id": "tabular-data-resource", 32 | "title": "Tabular Data Resource", 33 | "schema": "https://specs.frictionlessdata.io/schemas/tabular-data-resource.json", 34 | "schema_path": "tabular-data-resource.json", 35 | "specification": "https://specs.frictionlessdata.io/tabular-data-resource" 36 | }, 37 | { 38 | "id": "table-schema", 39 | "title": "Table Schema", 40 | "schema": "https://specs.frictionlessdata.io/schemas/table-schema.json", 41 | "schema_path": "table-schema.json", 42 | "specification": "https://specs.frictionlessdata.io/table-schema/" 43 | } 44 | ] 45 | -------------------------------------------------------------------------------- /lib/datapackage/interpreter.rb: -------------------------------------------------------------------------------- 1 | module DataPackage 2 | class Interpreter 3 | INFER_THRESHOLD = 10 4 | INFER_CONFIDENCE = 0.75 5 | YEAR_PATTERN = /[12]\d{3}/ 6 | DATE_PATTERN = /(\d{1,2}[-\/]\d{1,2}[-\/]\d{2,4})|(\d{4}[-\/]\d{1,2}[-\/]\d{1,2})/ 7 | DATETIME_PATTERN = /(\d{1,2}[-\/]\d{1,2}[-\/]\d{2,4}|\d{4}[-\/]\d{1,2}[-\/]\d{1,2}).\d{1,2}:\d{2}/ 8 | TIME_PATTERN = /^\d{1,2}((:\d{1,2})|(am|pm|AM|PM))$/ 9 | INTEGER_PATTERN = /^\d+$/ 10 | DEFAULT_TYPE_FORMAT = {'type' => 'any', 'format' => 'default'} 11 | 12 | attr_reader :csv, :threshold 13 | 14 | def initialize(csv) 15 | @csv = csv 16 | @threshold = [csv.length, INFER_THRESHOLD].min 17 | end 18 | 19 | def type_and_format_at(header) 20 | values = csv.values_at(header).flatten 21 | counter = {} 22 | type_and_format = DEFAULT_TYPE_FORMAT 23 | 24 | values.each_with_index do |value, i| 25 | inspection_count = i + 1 26 | 27 | inspection = inspect_value(value) 28 | counter[inspection] = (counter[inspection] || 0) + 1 29 | if inspection_count >= threshold 30 | if counter[inspection] / inspection_count >= INFER_CONFIDENCE 31 | type_and_format = inspection 32 | break 33 | end 34 | end 35 | end 36 | 37 | type_and_format 38 | end 39 | 40 | def inspect_value(value) 41 | return DEFAULT_TYPE_FORMAT unless value.is_a?(String) 42 | 43 | if value.length == 4 && value.match(YEAR_PATTERN) 44 | return { 'type' => 'year', 'format' => 'default' } 45 | end 46 | 47 | if value.match(DATETIME_PATTERN) 48 | return { 'type' => 'datetime', 'format' => 'default' } 49 | end 50 | 51 | if value.match(DATE_PATTERN) 52 | return { 'type' => 'date', 'format' => 'default' } 53 | end 54 | 55 | if value.match(TIME_PATTERN) 56 | return { 'type' => 'time', 'format' => 'default' } 57 | end 58 | 59 | if value.match(INTEGER_PATTERN) 60 | return { 'type' => 'integer', 'format' => 'default' } 61 | end 62 | 63 | DEFAULT_TYPE_FORMAT 64 | end 65 | end 66 | end 67 | -------------------------------------------------------------------------------- /lib/datapackage/profile.rb: -------------------------------------------------------------------------------- 1 | module DataPackage 2 | class Profile < Hash 3 | include DataPackage::Helpers 4 | 5 | # Public 6 | 7 | attr_reader :name 8 | 9 | def initialize(descriptor) 10 | unless descriptor.is_a?(String) 11 | raise ProfileException.new 'Profile must be a URL or registry identifier' 12 | end 13 | @name = descriptor 14 | if is_fully_qualified_url?(descriptor) 15 | self.merge!(load_json(descriptor)) 16 | else 17 | self.merge!(get_profile_from_registry(descriptor)) 18 | end 19 | rescue OpenURI::HTTPError, SocketError => e 20 | raise ProfileException.new "Profile URL returned #{e.message}" 21 | rescue JSON::ParserError 22 | raise ProfileException.new 'Profile is not valid JSON' 23 | end 24 | 25 | def jsonschema 26 | self.to_h 27 | end 28 | 29 | # Validate data against this profile. Returns true or raises DataPackage::ValidationError 30 | def validate(data) 31 | JSON::Validator.validate!(self, data) 32 | rescue JSON::Schema::ValidationError => e 33 | raise DataPackage::ValidationError.new(e.message) 34 | end 35 | 36 | # Lazily yields each ValidationError raised for data 37 | def iter_errors(data) 38 | JSON::Validator.fully_validate(self, data).each{ |error| yield error } 39 | end 40 | 41 | # Returns true if there are no errors in data, false if there are 42 | def valid?(data) 43 | JSON::Validator.validate(self, data) 44 | end 45 | 46 | alias :valid :valid? 47 | 48 | # Private 49 | 50 | private 51 | 52 | def get_profile_from_registry(descriptor) 53 | @registry = DataPackage::Registry.new 54 | profile_metadata = @registry.profiles.fetch(descriptor) 55 | if profile_metadata.fetch('schema_path', nil) 56 | profile_path = join_paths(base_path(@registry.path), profile_metadata['schema_path']) 57 | else 58 | profile_path = profile_metadata['schema'] 59 | end 60 | load_json(profile_path) 61 | rescue KeyError 62 | raise ProfileException.new "Couldn't find profile with id `#{descriptor}` in registry" 63 | end 64 | 65 | end 66 | end 67 | -------------------------------------------------------------------------------- /lib/datapackage/helpers.rb: -------------------------------------------------------------------------------- 1 | module DataPackage 2 | module Helpers 3 | 4 | # Dereference a resource that can be a URL or path to a JSON file or a hash 5 | # Returns a Hash with all values that are URLs or paths dereferenced 6 | def dereference_descriptor(resource, base_path: nil, reference_fields: nil) 7 | options = { 8 | base_path: base_path, 9 | reference_fields: reference_fields, 10 | } 11 | case resource 12 | when Hash 13 | resource.inject({}) do |new_resource, (key, val)| 14 | if reference_fields.nil? || reference_fields.include?(key) 15 | new_resource[key] = dereference_descriptor(val, **options) 16 | else 17 | new_resource[key] = val 18 | end 19 | new_resource 20 | end 21 | when Enumerable 22 | resource.map{ |el| dereference_descriptor(el, **options)} 23 | when String 24 | begin 25 | resolve_json_reference(resource, deep_dereference: true, base_path: base_path) 26 | rescue Errno::ENOENT 27 | resource 28 | end 29 | else 30 | resource 31 | end 32 | end 33 | 34 | # Resolve a reference to a JSON file; Returns the JSON as hash 35 | # Raises JSON::ParserError, OpenURI::HTTPError, SocketError or TypeError for invalid references 36 | def resolve_json_reference(reference, deep_dereference: false, base_path: nil) 37 | # Try to extract JSON from file or webpage 38 | reference = join_paths(base_path, reference) 39 | resolved_reference = load_json(reference) 40 | if deep_dereference == true 41 | dereference_descriptor(resolved_reference, base_path: base_path) 42 | else 43 | resolved_reference 44 | end 45 | end 46 | 47 | # Load JSON from path or URL; 48 | # Raises: Errno::ENOENT, OpenURI::HTTPError, SocketError, JSON::ParserError 49 | def load_json(reference) 50 | JSON.parse open(reference).read 51 | end 52 | 53 | def base_path(path_or_url) 54 | path_or_url = path_or_url.to_s 55 | if path_or_url.empty? 56 | nil 57 | elsif path_or_url =~ /\A#{URI::regexp}\z/ 58 | uri = URI.parse path_or_url 59 | return "#{uri.scheme}://#{uri.host}#{File.dirname uri.path}".chomp('/') 60 | else 61 | if File.directory?(path_or_url) 62 | return path_or_url 63 | else 64 | return File.expand_path File.dirname path_or_url 65 | end 66 | end 67 | end 68 | 69 | def join_paths(base_path, resource) 70 | if base_path.nil? || base_path.empty? 71 | resource 72 | elsif base_path =~ /\A#{URI::regexp}\z/ 73 | URI.join(base_path, resource).to_s 74 | elsif File.directory?(base_path) 75 | File.join(base_path, resource).to_s 76 | elsif File.file?(base_path) 77 | base_path 78 | else 79 | resource 80 | end 81 | end 82 | 83 | def is_fully_qualified_url?(string) 84 | uri = URI.parse(string) 85 | uri.is_a?(URI::HTTP) && !uri.host.nil? 86 | rescue URI::InvalidURIError 87 | false 88 | end 89 | 90 | def is_safe_path?(string) 91 | path = Pathname.new(string) 92 | return false if path.absolute? 93 | return false unless /^\.+$/.match(path.to_s.split('/').first).nil? 94 | true 95 | end 96 | 97 | end 98 | end 99 | -------------------------------------------------------------------------------- /spec/helpers_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe DataPackage::Helpers do 4 | 5 | include DataPackage::Helpers 6 | 7 | context 'dereference_descriptor' do 8 | it 'doesn\'t change non-referencial values' do 9 | hash = {'string'=> 'string', 'integer'=> 6} 10 | 11 | expect(dereference_descriptor(hash)).to eq(hash) 12 | end 13 | 14 | xit 'dereferences URLs' do 15 | url = 'http://example.org/thing.json' 16 | nested_url = 'http://example.org/nested_thing.json' 17 | nested_body = {'nested_attr'=> 3} 18 | body = {'ref_to_nested_url'=> nested_url} 19 | 20 | FakeWeb.register_uri(:get, nested_url, :body => JSON.dump(nested_body)) 21 | FakeWeb.register_uri(:get, url, :body => JSON.dump(body)) 22 | 23 | expect(dereference_descriptor(url)).to eq({ 24 | 'ref_to_nested_url'=> { 25 | 'nested_attr'=> 3 26 | } 27 | }) 28 | end 29 | 30 | it 'dereferences paths' do 31 | filepath = File.join( File.dirname(__FILE__), 'fixtures', 'base_profile.json' ) 32 | 33 | expect(dereference_descriptor(filepath)).to eq({ 34 | 'title'=> 'base_profile' 35 | }) 36 | end 37 | 38 | it 'dereferences paths with base_path' do 39 | filename = 'base_profile.json' 40 | base_path = File.join( File.dirname(__FILE__), 'fixtures') 41 | 42 | expect(dereference_descriptor(filename, base_path: base_path)).to eq({ 43 | 'title'=> 'base_profile' 44 | }) 45 | end 46 | 47 | it 'dereferences nested reference' do 48 | filename = 'base_profile.json' 49 | base_path = File.join( File.dirname(__FILE__), 'fixtures') 50 | descriptor = { 51 | 'resources'=> [ 52 | { 53 | 'resource_attrs'=> filename 54 | } 55 | ] 56 | } 57 | 58 | expect(dereference_descriptor(descriptor, base_path: base_path)).to eq({ 59 | 'resources' => [ 60 | { 61 | 'resource_attrs' => { 62 | 'title'=> 'base_profile' 63 | } 64 | } 65 | ] 66 | }) 67 | end 68 | 69 | xit 'dereferences only reference_fields if present' do 70 | schema_url = 'http://example.org/schema.json' 71 | random_url = 'http://example.org/random.json' 72 | schema_body = {'fields'=> [{'name'=>'Price', 'title'=>'Price', 'type'=>'integer'}]} 73 | random_body = {'field_name'=> 3} 74 | descriptor = { 75 | 'schema'=> schema_url, 76 | 'random'=> random_url, 77 | } 78 | 79 | FakeWeb.register_uri(:get, schema_url, :body => JSON.dump(schema_body)) 80 | FakeWeb.register_uri(:get, random_url, :body => JSON.dump(random_body)) 81 | 82 | expect(dereference_descriptor(descriptor, reference_fields: ['schema'])).to eq({ 83 | 'schema'=> { 84 | 'fields' => [{'name'=>'Price', 'title'=>'Price', 'type'=>'integer'}] 85 | }, 86 | 'random'=> random_url 87 | }) 88 | end 89 | 90 | end 91 | 92 | context 'is_fully_qualified_url' do 93 | 94 | it 'is true for a fully qualified HTTP/HTTPS URL' do 95 | expect(is_fully_qualified_url?('http://example.org/schema.json')).to be true 96 | end 97 | 98 | it 'is false for non-URI strings' do 99 | expect(is_fully_qualified_url?('def not an URI')).to be false 100 | end 101 | 102 | it 'is false for URI strings if they are not HTTP(S)' do 103 | expect(is_fully_qualified_url?('ftp://example.org/schema.json')).to be false 104 | end 105 | 106 | end 107 | 108 | context 'is_safe_path' do 109 | 110 | it 'is true for a relative path' do 111 | expect(is_safe_path?('test-pkg/test.csv')).to be true 112 | end 113 | 114 | it 'is false for an absolute path' do 115 | expect(is_safe_path?('/test.csv')).to be false 116 | end 117 | 118 | it 'is false for a parent relative path' do 119 | expect(is_safe_path?('../fixtures/test-pkg/test.csv')).to be false 120 | end 121 | end 122 | 123 | end 124 | -------------------------------------------------------------------------------- /spec/profile_spec.rb: -------------------------------------------------------------------------------- 1 | describe DataPackage::Profile do 2 | include DataPackage::Helpers 3 | 4 | context 'initialize' do 5 | 6 | xit 'loads a profile from a url' do 7 | url = 'http://example.org/thing.json' 8 | body = File.read File.join('spec', 'fixtures', 'fake_profile.json') 9 | FakeWeb.register_uri(:get, url, :body => body) 10 | profile = DataPackage::Profile.new(url) 11 | 12 | expect(profile).to eq ({ 13 | 'key' => 'value' 14 | }) 15 | end 16 | 17 | xit 'loads a profile from the registry' do 18 | profile = DataPackage::Profile.new('tabular-data-package') 19 | 20 | expect(profile).to_not be_empty 21 | end 22 | 23 | context 'raises an error' do 24 | 25 | xit 'when the url is not json' do 26 | url = 'http://example.org/thing.json' 27 | body = File.read File.join('spec', 'fixtures', 'not_a_json') 28 | FakeWeb.register_uri(:get, url, :body => body) 29 | 30 | expect { DataPackage::Profile.new(url) }.to raise_exception { |exception| 31 | expect(exception).to be_a DataPackage::ProfileException 32 | expect(exception.message).to eq ("Profile is not valid JSON") 33 | } 34 | end 35 | 36 | xit 'when the url does not exist' do 37 | url = 'http://bad.org/terrible.json' 38 | FakeWeb.register_uri(:get, url, :body => "", :status => ["404", "Not Found"]) 39 | 40 | expect { DataPackage::Profile.new(url) }.to raise_exception { |exception| 41 | expect(exception).to be_a DataPackage::ProfileException 42 | expect(exception.message).to eq ("Profile URL returned 404 Not Found") 43 | } 44 | end 45 | 46 | it 'when the profile id can\'t be found in the registry' do 47 | profile_id = 'no-such-profile' 48 | expect { DataPackage::Profile.new(profile_id) }.to raise_exception { |exception| 49 | expect(exception).to be_a DataPackage::ProfileException 50 | expect(exception.message).to start_with("Couldn't find profile with id `#{profile_id}`") 51 | } 52 | end 53 | 54 | it 'when the profile descriptor is not a string' do 55 | expect { DataPackage::Profile.new(200) }.to raise_exception { |exception| 56 | expect(exception).to be_a DataPackage::ProfileException 57 | expect(exception.message).to eq ("Profile must be a URL or registry identifier") 58 | } 59 | end 60 | 61 | xit 'when the profile is not a JSON' do 62 | url = 'http://bad.org/not_a_json' 63 | body = File.read(File.join('spec', 'fixtures', 'not_a_json')) 64 | FakeWeb.register_uri(:get, url, :body => body) 65 | 66 | expect { DataPackage::Profile.new(url)}.to raise_exception{ |exception| 67 | expect(exception).to be_a DataPackage::ProfileException 68 | expect(exception.message).to eq('Profile is not valid JSON') 69 | } 70 | end 71 | 72 | end 73 | 74 | end 75 | 76 | context 'validation' do 77 | 78 | before(:each) do 79 | @profile = DataPackage::Profile.new('data-package') 80 | @valid_datapackage = JSON.parse(File.read File.join('spec', 'fixtures', 'test-pkg', 'valid-datapackage.json')) 81 | @invalid_datapackage = JSON.parse(File.read File.join('spec', 'fixtures', 'invalid-datapackage.json')) 82 | end 83 | 84 | context 'validate' do 85 | 86 | it 'if true for valid package' do 87 | expect(@profile.validate(@valid_datapackage)).to be true 88 | end 89 | 90 | it 'raises ValidationError for invalid package' do 91 | expect{ @profile.validate(@invalid_datapackage) }.to raise_error(DataPackage::ValidationError) 92 | end 93 | 94 | end 95 | 96 | context 'valid' do 97 | 98 | it 'is true for valid package' do 99 | expect(@profile.valid?(@valid_datapackage)).to be true 100 | end 101 | 102 | it 'is false for invalid package' do 103 | expect(@profile.valid?(@invalid_datapackage)).to be false 104 | end 105 | 106 | end 107 | 108 | context 'iter_errors' do 109 | 110 | it 'is empty when the package is valid' do 111 | errors = [] 112 | @profile.iter_errors(@valid_datapackage){ |err| errors << err } 113 | expect(errors).to be_empty 114 | end 115 | 116 | it 'raises ValidationError for invalid package' do 117 | errors = [] 118 | @profile.iter_errors(@invalid_datapackage){ |err| errors << err } 119 | expect(errors).to_not be_empty 120 | end 121 | 122 | end 123 | end 124 | 125 | end 126 | -------------------------------------------------------------------------------- /lib/datapackage/resource.rb: -------------------------------------------------------------------------------- 1 | module DataPackage 2 | class Resource < Hash 3 | include DataPackage::Helpers 4 | 5 | # Public 6 | 7 | attr_reader :errors, :profile, :name, :source 8 | 9 | def self.infer(filepath) 10 | name = File.basename(filepath) 11 | if name[-4..-1] != '.csv' 12 | raise ResourceException.new('Inferrable resource must have .csv extension') 13 | end 14 | 15 | descr = { 16 | 'format' => 'csv', 17 | 'mediatype' => 'text/csv', 18 | 'name' => name[0...-4], 19 | 'path' => filepath, 20 | 'schema' => { 21 | 'fields' => [] 22 | }, 23 | } 24 | 25 | csv = CSV.read(filepath, headers: true) 26 | interpreter = DataPackage::Interpreter.new(csv) 27 | csv.headers.each do |header| 28 | field = { 'name' => header, 'type' => 'string'} 29 | field.merge! interpreter.type_and_format_at(header) 30 | descr['schema']['fields'] << field 31 | end 32 | 33 | new(descr) 34 | end 35 | 36 | def initialize(resource, base_path = '') 37 | self.merge! dereference_descriptor(resource, base_path: base_path, 38 | reference_fields: ['schema', 'dialect']) 39 | apply_defaults! 40 | @profile = DataPackage::Profile.new(self['profile']) 41 | @name = self.fetch('name') 42 | get_source!(base_path) 43 | apply_table_defaults! if self.tabular? 44 | end 45 | 46 | def valid? 47 | @profile.valid?(self) 48 | end 49 | 50 | alias :valid :valid? 51 | 52 | def validate 53 | @profile.validate(self) 54 | end 55 | 56 | def iter_errors 57 | @profile.iter_errors(self){ |err| yield err } 58 | end 59 | 60 | def descriptor 61 | self.to_h 62 | end 63 | 64 | def inline? 65 | @source_type == 'inline' 66 | end 67 | 68 | alias :inline :inline? 69 | 70 | def local? 71 | @source_type == 'local' 72 | end 73 | 74 | alias :local :local? 75 | 76 | def remote? 77 | @source_type == 'remote' 78 | end 79 | 80 | alias :remote :remote? 81 | 82 | def miltipart? 83 | false 84 | end 85 | 86 | alias :miltipart :miltipart? 87 | 88 | def tabular? 89 | tabular_profile = DataPackage::DEFAULTS[:resource][:tabular_profile] 90 | return true if @profile.name == tabular_profile 91 | return true if DataPackage::Profile.new(tabular_profile).valid?(self) 92 | false 93 | end 94 | 95 | alias :tabular :tabular? 96 | 97 | def headers 98 | if !tabular 99 | nil 100 | end 101 | get_table.headers 102 | end 103 | 104 | def schema 105 | if !tabular 106 | nil 107 | end 108 | get_table.schema 109 | end 110 | 111 | def iter(*args, &block) 112 | if !tabular 113 | message ='Methods iter/read are not supported for non tabular data' 114 | raise ResourceException.new message 115 | end 116 | get_table.iter(*args, &block) 117 | end 118 | 119 | def read(*args, &block) 120 | if !tabular 121 | message ='Methods iter/read are not supported for non tabular data' 122 | raise ResourceException.new message 123 | end 124 | get_table.read(*args, &block) 125 | end 126 | 127 | # Deprecated 128 | 129 | def table 130 | get_table 131 | end 132 | 133 | # Private 134 | 135 | private 136 | 137 | def get_source!(base_path) 138 | if self.fetch('data', nil) 139 | @source = self['data'] 140 | @source_type = 'inline' 141 | elsif self.fetch('path', nil) 142 | unless is_safe_path?(self['path']) 143 | raise ResourceException.new "Path `#{self['path']}` is not safe" 144 | end 145 | @source = join_paths(base_path, self['path']) 146 | @source_type = is_fully_qualified_url?(@source) ? 'remote' : 'local' 147 | else 148 | raise ResourceException.new 'A resource descriptor must have a `path` or `data` property.' 149 | end 150 | end 151 | 152 | def get_table 153 | @table ||= TableSchema::Table.new(self.source, schema: self['schema']) if tabular? 154 | end 155 | 156 | def apply_defaults! 157 | self['profile'] ||= DataPackage::DEFAULTS[:resource][:profile] 158 | self['encoding'] ||= DataPackage::DEFAULTS[:resource][:encoding] 159 | end 160 | 161 | def apply_table_defaults! 162 | self['profile'] = DataPackage::DEFAULTS[:resource][:tabular_profile] 163 | if self.fetch('schema', nil) 164 | self['schema']['missingValues'] = DataPackage::DEFAULTS[:schema][:missing_values] 165 | self['schema'].fetch('fields', []).each do |field_descriptor| 166 | field_descriptor['type'] ||= DataPackage::DEFAULTS[:schema][:type] 167 | field_descriptor['format'] ||= DataPackage::DEFAULTS[:schema][:format] 168 | end 169 | end 170 | if self.fetch('dialect', nil) 171 | DataPackage::DEFAULTS[:dialect].each do |key, val| 172 | self['dialect'][key.to_s] ||= val 173 | end 174 | end 175 | end 176 | 177 | end 178 | end 179 | -------------------------------------------------------------------------------- /spec/interpreter_spec.rb: -------------------------------------------------------------------------------- 1 | describe DataPackage::Interpreter do 2 | describe '#initialize' do 3 | context 'when CSV is smaller than threshold' do 4 | it 'sets threshold as CSV length' do 5 | csv = CSV.read('spec/fixtures/data/names.csv', headers: true) 6 | interpreter = DataPackage::Interpreter.new(csv) 7 | 8 | expect(interpreter.threshold).to eq(csv.length) 9 | expect(interpreter.threshold).not_to eq(DataPackage::Interpreter::INFER_THRESHOLD) 10 | end 11 | end 12 | 13 | context 'when CSV is larger than threshold' do 14 | it 'sets threshold as constant' do 15 | csv = CSV.read('spec/fixtures/data/prices.csv', headers: true) 16 | interpreter = DataPackage::Interpreter.new(csv) 17 | 18 | expect(interpreter.threshold).to eq(DataPackage::Interpreter::INFER_THRESHOLD) 19 | end 20 | end 21 | end 22 | 23 | 24 | describe '#inspect_value' do 25 | # Which CSV we use doesn't matter here 26 | let!(:csv) { CSV.read('spec/fixtures/data/names.csv', headers: true) } 27 | subject { DataPackage::Interpreter.new(csv) } 28 | 29 | context 'dates' do 30 | it 'interprets %m-%d-%y' do 31 | expect(subject.inspect_value('01-30-91')).to eq({ 'type' => 'date', 'format' => 'default' }) 32 | end 33 | 34 | it 'interprets %-m-%-d-%Y' do 35 | expect(subject.inspect_value('1-4-1991')).to eq({ 'type' => 'date', 'format' => 'default' }) 36 | end 37 | 38 | it 'interprets %m/%d/%Y' do 39 | expect(subject.inspect_value('12/30/1991')).to eq({ 'type' => 'date', 'format' => 'default' }) 40 | end 41 | end 42 | 43 | context 'datetimes' do 44 | it 'interprets %Y-%m-%d %H:%M' do 45 | expect(subject.inspect_value('2019-11-17 12:43:01 -0500')).to eq({ 'type' => 'datetime', 'format' => 'default' }) 46 | end 47 | 48 | it 'interprets iso8601' do 49 | expect(subject.inspect_value('2019-11-17T13:23:20-05:00')).to eq({ 'type' => 'datetime', 'format' => 'default' }) 50 | end 51 | end 52 | 53 | context 'times' do 54 | it 'interprets %H:%M' do 55 | expect(subject.inspect_value('19:00')).to eq({ 'type' => 'time', 'format' => 'default' }) 56 | end 57 | 58 | it 'interprets %l:%M' do 59 | expect(subject.inspect_value('1:00')).to eq({ 'type' => 'time', 'format' => 'default' }) 60 | end 61 | 62 | it 'interprets %l%P' do 63 | expect(subject.inspect_value('1pm')).to eq({ 'type' => 'time', 'format' => 'default' }) 64 | end 65 | 66 | it 'interprets %l%p' do 67 | expect(subject.inspect_value('12AM')).to eq({ 'type' => 'time', 'format' => 'default' }) 68 | end 69 | end 70 | 71 | context 'integers' do 72 | it 'interprets integer as integer' do 73 | expect(subject.inspect_value('19')).to eq({ 'type' => 'integer', 'format' => 'default' }) 74 | end 75 | 76 | it 'does not interpret numbers and letters as integer' do 77 | expect(subject.inspect_value('19sdsds')).to eq({ 'type' => 'any', 'format' => 'default' }) 78 | end 79 | end 80 | end 81 | 82 | describe '#type_and_format_at' do 83 | context 'year' do 84 | it 'returns year as type' do 85 | csv = CSV.read('spec/fixtures/data/prices.csv', headers: true) 86 | interpreter = DataPackage::Interpreter.new(csv) 87 | expect(interpreter.type_and_format_at('year_to_market')).to eq({ 'type' => 'year', 'format' => 'default' }) 88 | end 89 | end 90 | 91 | context 'date' do 92 | it 'returns date as type' do 93 | csv = CSV.read('spec/fixtures/data/prices.csv', headers: true) 94 | interpreter = DataPackage::Interpreter.new(csv) 95 | expect(interpreter.type_and_format_at('added_on')).to eq({ 'type' => 'date', 'format' => 'default' }) 96 | end 97 | end 98 | 99 | context 'time' do 100 | it 'returns time as type' do 101 | csv = CSV.read('spec/fixtures/data/prices.csv', headers: true) 102 | interpreter = DataPackage::Interpreter.new(csv) 103 | expect(interpreter.type_and_format_at('cutoff_time')).to eq({ 'type' => 'time', 'format' => 'default' }) 104 | end 105 | end 106 | 107 | context 'datetime' do 108 | it 'returns datetime as type' do 109 | csv = CSV.read('spec/fixtures/data/prices.csv', headers: true) 110 | interpreter = DataPackage::Interpreter.new(csv) 111 | expect(interpreter.type_and_format_at('updated_at')).to eq({ 'type' => 'datetime', 'format' => 'default' }) 112 | end 113 | end 114 | 115 | context 'integer' do 116 | it 'returns integer' do 117 | csv = CSV.read('spec/fixtures/data/prices.csv', headers: true) 118 | interpreter = DataPackage::Interpreter.new(csv) 119 | expect(interpreter.type_and_format_at('price')).to eq({ 'type' => 'integer', 'format' => 'default' }) 120 | end 121 | end 122 | 123 | context 'string' do 124 | it 'returns default' do 125 | csv = CSV.read('spec/fixtures/data/prices.csv', headers: true) 126 | interpreter = DataPackage::Interpreter.new(csv) 127 | expect(interpreter.type_and_format_at('id')).to eq({ 'type' => 'any', 'format' => 'default' }) 128 | end 129 | end 130 | end 131 | end 132 | -------------------------------------------------------------------------------- /lib/datapackage/package.rb: -------------------------------------------------------------------------------- 1 | require 'open-uri' 2 | 3 | module DataPackage 4 | class Package < Hash 5 | include DataPackage::Helpers 6 | 7 | # Public 8 | 9 | attr_reader :errors, :profile 10 | 11 | # Parse or create a data package 12 | # Supports reading data from JSON file, directory, and a URL 13 | # descriptor:: Hash or String 14 | # opts:: Options used to customize reading and parsing 15 | def initialize(descriptor = nil, opts: {}) 16 | @opts = opts 17 | @dead_resources = [] 18 | self.merge! parse_package(descriptor) 19 | @profile = DataPackage::Profile.new(self.fetch('profile', DataPackage::DEFAULTS[:package][:profile])) 20 | self['profile'] = @profile.name 21 | define_properties! 22 | load_resources! 23 | rescue OpenURI::HTTPError, SocketError => e 24 | raise PackageException.new "Package URL returned #{e.message}" 25 | rescue JSON::ParserError 26 | raise PackageException.new 'Package descriptor is not valid JSON' 27 | end 28 | 29 | def valid? 30 | return false unless @profile.valid?(self) 31 | return false if self['resources'].map{ |resource| resource.valid? }.include?(false) 32 | true 33 | end 34 | 35 | alias :valid :valid? 36 | 37 | def validate 38 | @profile.validate(self) 39 | self['resources'].each { |resource| resource.validate } 40 | true 41 | end 42 | 43 | def iter_errors 44 | errors = @profile.iter_errors(self){ |err| err } 45 | self['resources'].each do |resource| 46 | resource.iter_errors{ |err| errors << err } 47 | end 48 | errors.each{ |error| yield error } 49 | end 50 | 51 | def descriptor 52 | self.to_h 53 | end 54 | 55 | def resources 56 | update_resources! 57 | self['resources'] 58 | end 59 | 60 | def resource_names 61 | update_resources! 62 | self['resources'].map{|res| res.name} 63 | end 64 | 65 | def get_resource(resource_name) 66 | update_resources! 67 | self['resources'].find{ |resource| resource.name == resource_name } 68 | end 69 | 70 | def add_resource(resource) 71 | resource = load_resource(resource) 72 | self['resources'].push(resource) 73 | begin 74 | self.validate 75 | resource 76 | rescue DataPackage::ValidationError 77 | self['resources'].pop 78 | nil 79 | end 80 | end 81 | 82 | def remove_resource(resource_name) 83 | update_resources! 84 | resource = get_resource(resource_name) 85 | self['resources'].reject!{ |resource| resource.name == resource_name } 86 | resource 87 | end 88 | 89 | def save(target=@location) 90 | update_resources! 91 | File.open(target, "w") { |file| file << JSON.pretty_generate(self) } 92 | true 93 | end 94 | 95 | # Deprecated 96 | 97 | # Returns the directory for a local file package or base url for a remote 98 | # Returns nil for an in-memory object (because it has no base as yet) 99 | def base 100 | # user can override base 101 | return @opts[:base] if @opts[:base] 102 | return '' unless @location 103 | # work out base directory or uri 104 | if local? 105 | return File.dirname(@location) 106 | else 107 | return @location.split('/')[0..-2].join('/') 108 | end 109 | end 110 | 111 | # Is this a local package? Returns true if created from an in-memory object or a file/directory reference 112 | def local? 113 | return @local if @local 114 | return false if @location =~ /\A#{URI::regexp}\z/ 115 | true 116 | end 117 | 118 | def property(property, default = nil) 119 | self[property] || default 120 | end 121 | 122 | def infer(base_path: nil, directory: nil) 123 | raise PackageException.new('Base path is required for infer') unless base_path 124 | raise PackageException.new('Directory is required for infer') unless directory 125 | 126 | dir_path = File.join(base_path, directory) 127 | Dir.glob("#{dir_path}/*.csv") do |filename| 128 | resource = Resource.infer(filename) 129 | add_resource(resource) 130 | end 131 | 132 | # If there were CSVs, this is a tabular data package 133 | if resources.count > 0 134 | self['profile'] = 'tabular-data-package' 135 | end 136 | 137 | descriptor 138 | end 139 | 140 | # Private 141 | 142 | private 143 | 144 | def define_properties! 145 | (@profile['properties'] || {}).each do |k, v| 146 | next if k == 'resources' || k == 'profile' 147 | define_singleton_method("#{k.to_sym}=", proc { |p| set_property(k, p) }) 148 | define_singleton_method(k.to_sym.to_s, proc { property k, default_value(v) }) 149 | end 150 | end 151 | 152 | def load_resources! 153 | self['resources'] ||= [] 154 | update_resources! 155 | end 156 | 157 | def update_resources! 158 | self['resources'].map! do |resource| 159 | begin 160 | load_resource(resource) 161 | rescue ResourceException 162 | @dead_resources << resource 163 | nil 164 | end 165 | end.compact! 166 | end 167 | 168 | def load_resource(resource) 169 | if resource.is_a?(Resource) 170 | resource 171 | else 172 | Resource.new(resource, base) 173 | end 174 | end 175 | 176 | def default_value(field_data) 177 | case field_data['type'] 178 | when 'array' 179 | [] 180 | when 'object' 181 | {} 182 | else 183 | nil 184 | end 185 | end 186 | 187 | def set_property(key, value) 188 | self[key] = value 189 | end 190 | 191 | def parse_package(descriptor) 192 | # TODO: base directory/url 193 | if descriptor.nil? 194 | {} 195 | elsif descriptor.class == Hash 196 | descriptor 197 | else 198 | read_package(descriptor) 199 | end 200 | end 201 | 202 | def read_package(descriptor) 203 | if File.extname(descriptor) == '.zip' 204 | unzip_package(descriptor) 205 | else 206 | @location = descriptor.to_s 207 | load_json(descriptor) 208 | end 209 | end 210 | 211 | def unzip_package(descriptor) 212 | descriptor = write_to_tempfile(descriptor) if descriptor =~ /\A#{URI::regexp}\z/ 213 | dir = Dir.mktmpdir 214 | package = {} 215 | Zip::File.open(descriptor) do |zip_file| 216 | # Extract all the files 217 | zip_file.each { |entry| entry.extract("#{dir}/#{File.basename entry.name}") } 218 | # Get and parse the datapackage metadata 219 | entry = zip_file.glob("*/#{@opts[:default_filename] || 'datapackage.json'}").first 220 | package = JSON.parse(entry.get_input_stream.read) 221 | end 222 | # Set the base dir to the directory we unzipped to 223 | @opts[:base] = dir 224 | # This is now a local file, not a URL 225 | @local = true 226 | package 227 | end 228 | 229 | def write_to_tempfile(url) 230 | tempfile = Tempfile.new('datapackage') 231 | tempfile.write(open(url).read) 232 | tempfile.rewind 233 | tempfile 234 | end 235 | end 236 | end 237 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | 3 | ## [0.1.1](https://github.com/theodi/datapackage.rb/tree/0.1.1) (2016-10-12) 4 | [Full Changelog](https://github.com/theodi/datapackage.rb/compare/0.1.0...0.1.1) 5 | 6 | **Closed issues:** 7 | 8 | - Support of tabular resources [\#25](https://github.com/theodi/datapackage.rb/issues/25) 9 | 10 | ## [0.1.0](https://github.com/theodi/datapackage.rb/tree/0.1.0) (2016-09-12) 11 | [Full Changelog](https://github.com/theodi/datapackage.rb/compare/CURRENT...0.1.0) 12 | 13 | **Merged pull requests:** 14 | 15 | - New validation API [\#24](https://github.com/theodi/datapackage.rb/pull/24) ([pezholio](https://github.com/pezholio)) 16 | - Manipulate Datapackage [\#23](https://github.com/theodi/datapackage.rb/pull/23) ([pezholio](https://github.com/pezholio)) 17 | - Load schema for validation [\#22](https://github.com/theodi/datapackage.rb/pull/22) ([pezholio](https://github.com/pezholio)) 18 | - Obtain a spec for a data package profile from the data package registry [\#21](https://github.com/theodi/datapackage.rb/pull/21) ([pezholio](https://github.com/pezholio)) 19 | - Modarn ruby [\#20](https://github.com/theodi/datapackage.rb/pull/20) ([pezholio](https://github.com/pezholio)) 20 | 21 | ## [CURRENT](https://github.com/theodi/datapackage.rb/tree/CURRENT) (2014-02-25) 22 | [Full Changelog](https://github.com/theodi/datapackage.rb/compare/master-2014-02-25_10-30-33...CURRENT) 23 | 24 | ## [master-2014-02-25_10-30-33](https://github.com/theodi/datapackage.rb/tree/master-2014-02-25_10-30-33) (2014-02-25) 25 | [Full Changelog](https://github.com/theodi/datapackage.rb/compare/master-2014-01-06_18-19-11...master-2014-02-25_10-30-33) 26 | 27 | **Implemented enhancements:** 28 | 29 | - Add warning if resource doesn't have a mediatype/format [\#16](https://github.com/theodi/datapackage.rb/issues/16) 30 | - Improve reporting when json table fields are invalid [\#13](https://github.com/theodi/datapackage.rb/issues/13) 31 | 32 | **Fixed bugs:** 33 | 34 | - csv? method should check more than format and mediatype [\#14](https://github.com/theodi/datapackage.rb/issues/14) 35 | 36 | **Merged pull requests:** 37 | 38 | - Fix ruby version, accidentally committed [\#19](https://github.com/theodi/datapackage.rb/pull/19) ([ldodds](https://github.com/ldodds)) 39 | 40 | ## [master-2014-01-06_18-19-11](https://github.com/theodi/datapackage.rb/tree/master-2014-01-06_18-19-11) (2014-01-06) 41 | [Full Changelog](https://github.com/theodi/datapackage.rb/compare/master-2013-12-05_20-12-47...master-2014-01-06_18-19-11) 42 | 43 | **Implemented enhancements:** 44 | 45 | - Add warnings about mis-spelt key names [\#17](https://github.com/theodi/datapackage.rb/issues/17) 46 | - Add warnings based on breaking spec changes from earlier version of json table schema [\#15](https://github.com/theodi/datapackage.rb/issues/15) 47 | - Add ANSI codes to command-line tool [\#12](https://github.com/theodi/datapackage.rb/issues/12) 48 | 49 | **Merged pull requests:** 50 | 51 | - Improve warnings [\#18](https://github.com/theodi/datapackage.rb/pull/18) ([ldodds](https://github.com/ldodds)) 52 | 53 | ## [master-2013-12-05_20-12-47](https://github.com/theodi/datapackage.rb/tree/master-2013-12-05_20-12-47) (2013-12-05) 54 | [Full Changelog](https://github.com/theodi/datapackage.rb/compare/master-2013-12-05_20-11-42...master-2013-12-05_20-12-47) 55 | 56 | ## [master-2013-12-05_20-11-42](https://github.com/theodi/datapackage.rb/tree/master-2013-12-05_20-11-42) (2013-12-05) 57 | [Full Changelog](https://github.com/theodi/datapackage.rb/compare/master-2013-12-05_19-29-37...master-2013-12-05_20-11-42) 58 | 59 | **Closed issues:** 60 | 61 | - When a CSV file is opened to read headers ensure we use right dialect [\#9](https://github.com/theodi/datapackage.rb/issues/9) 62 | 63 | **Merged pull requests:** 64 | 65 | - Parse default options to CSV parser based on CSVDDF dialect [\#11](https://github.com/theodi/datapackage.rb/pull/11) ([ldodds](https://github.com/ldodds)) 66 | 67 | ## [master-2013-12-05_19-29-37](https://github.com/theodi/datapackage.rb/tree/master-2013-12-05_19-29-37) (2013-12-05) 68 | [Full Changelog](https://github.com/theodi/datapackage.rb/compare/master-2013-12-05_15-40-07...master-2013-12-05_19-29-37) 69 | 70 | **Closed issues:** 71 | 72 | - Create a better structure for reporting errors and warnings [\#5](https://github.com/theodi/datapackage.rb/issues/5) 73 | 74 | **Merged pull requests:** 75 | 76 | - Improve error reporting [\#10](https://github.com/theodi/datapackage.rb/pull/10) ([ldodds](https://github.com/ldodds)) 77 | 78 | ## [master-2013-12-05_15-40-07](https://github.com/theodi/datapackage.rb/tree/master-2013-12-05_15-40-07) (2013-12-05) 79 | [Full Changelog](https://github.com/theodi/datapackage.rb/compare/master-2013-12-05_15-04-11...master-2013-12-05_15-40-07) 80 | 81 | **Merged pull requests:** 82 | 83 | - Add command line tool [\#8](https://github.com/theodi/datapackage.rb/pull/8) ([ldodds](https://github.com/ldodds)) 84 | 85 | ## [master-2013-12-05_15-04-11](https://github.com/theodi/datapackage.rb/tree/master-2013-12-05_15-04-11) (2013-12-05) 86 | [Full Changelog](https://github.com/theodi/datapackage.rb/compare/master-2013-12-05_14-58-56...master-2013-12-05_15-04-11) 87 | 88 | ## [master-2013-12-05_14-58-56](https://github.com/theodi/datapackage.rb/tree/master-2013-12-05_14-58-56) (2013-12-05) 89 | [Full Changelog](https://github.com/theodi/datapackage.rb/compare/master-2013-12-05_10-40-51...master-2013-12-05_14-58-56) 90 | 91 | **Implemented enhancements:** 92 | 93 | - Implement the additional validation rules for SDF [\#4](https://github.com/theodi/datapackage.rb/issues/4) 94 | 95 | **Merged pull requests:** 96 | 97 | - Implement SDF validation [\#7](https://github.com/theodi/datapackage.rb/pull/7) ([ldodds](https://github.com/ldodds)) 98 | 99 | ## [master-2013-12-05_10-40-51](https://github.com/theodi/datapackage.rb/tree/master-2013-12-05_10-40-51) (2013-12-05) 100 | [Full Changelog](https://github.com/theodi/datapackage.rb/compare/master-2013-12-04_17-43-29...master-2013-12-05_10-40-51) 101 | 102 | **Merged pull requests:** 103 | 104 | - Improve test coverage [\#6](https://github.com/theodi/datapackage.rb/pull/6) ([ldodds](https://github.com/ldodds)) 105 | 106 | ## [master-2013-12-04_17-43-29](https://github.com/theodi/datapackage.rb/tree/master-2013-12-04_17-43-29) (2013-12-04) 107 | [Full Changelog](https://github.com/theodi/datapackage.rb/compare/master-2013-12-04_14-16-07...master-2013-12-04_17-43-29) 108 | 109 | **Merged pull requests:** 110 | 111 | - Improve validation options [\#3](https://github.com/theodi/datapackage.rb/pull/3) ([ldodds](https://github.com/ldodds)) 112 | 113 | ## [master-2013-12-04_14-16-07](https://github.com/theodi/datapackage.rb/tree/master-2013-12-04_14-16-07) (2013-12-04) 114 | [Full Changelog](https://github.com/theodi/datapackage.rb/compare/master-2013-12-04_14-15-53...master-2013-12-04_14-16-07) 115 | 116 | ## [master-2013-12-04_14-15-53](https://github.com/theodi/datapackage.rb/tree/master-2013-12-04_14-15-53) (2013-12-04) 117 | **Merged pull requests:** 118 | 119 | - Add preferred ruby version and gemset [\#2](https://github.com/theodi/datapackage.rb/pull/2) ([Floppy](https://github.com/Floppy)) 120 | - Basic datapackage object and validation using JSON schema [\#1](https://github.com/theodi/datapackage.rb/pull/1) ([ldodds](https://github.com/ldodds)) 121 | 122 | 123 | 124 | \* *This Change Log was automatically generated by [github_changelog_generator](https://github.com/skywinder/Github-Changelog-Generator)* -------------------------------------------------------------------------------- /spec/resource_spec.rb: -------------------------------------------------------------------------------- 1 | describe DataPackage::Resource do 2 | 3 | let(:tabular_resource) { 4 | { 5 | 'name'=> 'tabular_resource', 6 | 'data'=> [['str', 'int'], [1,2]], 7 | 'schema'=> { 8 | 'fields'=> [ 9 | { 10 | 'name'=> 'str', 11 | }, 12 | { 13 | 'name'=> 'int', 14 | 'type'=> 'integer', 15 | } 16 | ] 17 | }, 18 | 'profile'=> DataPackage::DEFAULTS[:resource][:tabular_profile], 19 | } 20 | } 21 | 22 | context 'initialize' do 23 | 24 | it "raises if the resource doesn't have 'path' or 'data' " do 25 | resource_hash = { 26 | 'name'=> 'resource', 27 | 'foo'=> 'bar', 28 | } 29 | 30 | expect{ DataPackage::Resource.new(resource_hash) }.to raise_error(DataPackage::ResourceException) 31 | end 32 | 33 | it 'extends the resource with defaults' do 34 | resource_hash = { 35 | 'name'=> 'resource', 36 | 'data'=> 'whevs', 37 | } 38 | expected_resource = resource_hash.merge!({ 39 | 'profile'=> DataPackage::DEFAULTS[:resource][:profile], 40 | 'encoding'=> DataPackage::DEFAULTS[:resource][:encoding], 41 | }) 42 | resource = DataPackage::Resource.new(resource_hash) 43 | 44 | expect(resource).to eq(expected_resource) 45 | end 46 | 47 | it 'extends a tabular resource with table defaults' do 48 | resource = DataPackage::Resource.new(tabular_resource) 49 | 50 | expect(resource['schema']['missingValues']).to eq(DataPackage::DEFAULTS[:schema][:missing_values]) 51 | expect(resource['schema']['fields'][0]['type']).to eq(DataPackage::DEFAULTS[:schema][:type]) 52 | expect(resource['schema']['fields'][0]['format']).to eq(DataPackage::DEFAULTS[:schema][:format]) 53 | end 54 | 55 | context 'remote resource' do 56 | 57 | before(:each) do 58 | @url = 'http://example.com/test.csv' 59 | FakeWeb.register_uri(:get, @url, 60 | :body => File.read( test_package_filename('test.csv') ) ) 61 | end 62 | 63 | it 'correctly detects source_type' do 64 | resource_hash = { 65 | 'name' => 'remote resource', 66 | 'path' => @url 67 | } 68 | resource = DataPackage::Resource.new(resource_hash) 69 | 70 | expect(resource.remote).to eq(true) 71 | end 72 | 73 | it 'accepts full URL as source' do 74 | resource_hash = { 75 | 'name' => 'remote resource', 76 | 'path' => @url 77 | } 78 | resource = DataPackage::Resource.new(resource_hash) 79 | 80 | expect(resource.source).to eq(@url) 81 | end 82 | 83 | it 'constructs source from a base URL' do 84 | file = 'test.csv' 85 | resource_hash = { 86 | 'name' => 'remote resource', 87 | 'path' => file, 88 | } 89 | base_url = 'http://example.com/' 90 | resource = DataPackage::Resource.new(resource_hash, base_url) 91 | 92 | expect(resource.source).to eq(URI.join(base_url, file).to_s) 93 | end 94 | 95 | end 96 | 97 | context 'local resource' do 98 | 99 | before(:each) do 100 | @base_path = File.dirname(test_package_filename) 101 | end 102 | 103 | it 'correctly detects source_type' do 104 | resource_hash = { 105 | 'name' => 'local resource', 106 | 'path' => 'test.csv' 107 | } 108 | resource = DataPackage::Resource.new(resource_hash, @base_path) 109 | 110 | expect(resource.local).to eq(true) 111 | end 112 | 113 | it 'constructs source from a base path' do 114 | file = 'test.csv' 115 | resource_hash = { 116 | 'name' => 'local resource', 117 | 'path' => file, 118 | } 119 | resource = DataPackage::Resource.new(resource_hash, @base_path) 120 | 121 | expect(resource.source).to eq(File.join(@base_path, file).to_s) 122 | end 123 | 124 | it 'raises if absolute path is given' do 125 | resource_hash = { 126 | 'name' => 'local resource', 127 | 'path' => test_package_filename('test.csv') 128 | } 129 | 130 | expect{ DataPackage::Resource.new(resource_hash) }.to raise_error(DataPackage::ResourceException) 131 | end 132 | 133 | end 134 | 135 | context 'inline resource' do 136 | 137 | it 'correctly detects source_type' do 138 | resource_hash = { 139 | 'name' => 'inline resource', 140 | 'data' => 'whevs' 141 | } 142 | resource = DataPackage::Resource.new(resource_hash) 143 | 144 | expect(resource.inline).to eq(true) 145 | end 146 | 147 | it 'returns the data' do 148 | resource_hash = { 149 | 'name' => 'bar', 150 | 'data' => 'whevs' 151 | } 152 | resource = DataPackage::Resource.new(resource_hash) 153 | 154 | expect(resource.source).to eq('whevs') 155 | end 156 | 157 | end 158 | end 159 | 160 | context 'validate' do 161 | 162 | it 'should validate basic resource structure' do 163 | resource = DataPackage::Resource.new({ 164 | 'name'=> 'resource', 165 | 'data'=> 'random', 166 | }) 167 | 168 | expect(resource.valid?).to be true 169 | expect(resource.validate).to be true 170 | expect(resource.iter_errors{ |err| err }).to be_empty 171 | end 172 | 173 | it 'should detect an invalid resource' do 174 | schemaless = tabular_resource.reject{|k,v| k.to_s == 'schema'} 175 | resource = DataPackage::Resource.new(schemaless) 176 | 177 | expect(resource.valid?).to be false 178 | expect{ resource.validate }.to raise_error(DataPackage::ValidationError) 179 | expect(resource.iter_errors{ |err| err }).to_not be_empty 180 | end 181 | 182 | end 183 | 184 | context 'tabular' do 185 | 186 | it 'is true for resources with tabular profile' do 187 | resource = DataPackage::Resource.new(tabular_resource) 188 | 189 | expect(resource.tabular?).to be true 190 | end 191 | 192 | it 'is true for resources that comply with the tabular profile' do 193 | resource = DataPackage::Resource.new(tabular_resource.merge({ 194 | 'profile'=> DataPackage::DEFAULTS[:resource][:profile], 195 | })) 196 | 197 | expect(resource.tabular?).to be true 198 | end 199 | 200 | it 'is false for resources that don\'t comply with tabular profile' do 201 | resource = DataPackage::Resource.new({ 202 | 'name'=> 'resource', 203 | 'data'=> 'random', 204 | }) 205 | 206 | expect(resource.tabular?).to be false 207 | end 208 | 209 | end 210 | 211 | context 'table' do 212 | 213 | it 'returns a table for tabular resources' do 214 | expect(DataPackage::Resource.new(tabular_resource).table.class).to eq(TableSchema::Table) 215 | end 216 | 217 | it 'returns nil for a non-tabular resources' do 218 | resource = DataPackage::Resource.new({ 219 | 'name'=> 'resource', 220 | 'data'=> 'random', 221 | }) 222 | 223 | expect(resource.table).to eq(nil) 224 | end 225 | 226 | end 227 | 228 | context 'read' do 229 | 230 | it 'reads tabular data' do 231 | resource = DataPackage::Resource.new(tabular_resource) 232 | expect(resource.headers).to eq(['str', 'int']) 233 | expect(resource.schema.field_names).to eq(['str', 'int']) 234 | expect(resource.read).to eq([['1', 2]]) 235 | expect(resource.read(keyed: true)).to eq([{'str'=> '1', 'int'=> 2}]) 236 | end 237 | 238 | end 239 | 240 | describe '.infer' do 241 | it 'returns error for non-CSV files' do 242 | file_path = 'spec/fixtures/foo.txt' 243 | 244 | expect { DataPackage::Resource.infer(file_path) }.to raise_error(DataPackage::ResourceException) 245 | end 246 | 247 | it 'infers schema for CSV to use to initialise Resource instance' do 248 | csv_path = 'spec/fixtures/data/prices.csv' 249 | resource = DataPackage::Resource.infer(csv_path) 250 | 251 | 252 | expect(resource).to eq({ 253 | 'format' => 'csv', 254 | 'mediatype' => 'text/csv', 255 | 'name' => 'prices', 256 | 'path' => 'spec/fixtures/data/prices.csv', 257 | 'schema' => { 258 | 'fields' => [ 259 | {'format' => 'default', 'name' => 'id', 'type' => 'any'}, 260 | {'format' => 'default', 'name' => 'price', 'type' => 'integer'}, 261 | {'format' => 'default', 'name' => 'year_to_market', 'type' => 'year'}, 262 | {'format' => 'default', 'name' => 'added_on', 'type' => 'date'}, 263 | {'format' => 'default', 'name' => 'updated_at', 'type' => 'datetime'}, 264 | {'format' => 'default', 'name' => 'cutoff_time', 'type' => 'time'} 265 | ], 266 | 'missingValues'=>[''], 267 | }, 268 | 'profile' => 'tabular-data-resource', 269 | 'encoding' => 'utf-8' 270 | }) 271 | end 272 | end 273 | 274 | end 275 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # datapackage-rb 2 | 3 | [![Build](https://img.shields.io/github/workflow/status/frictionlessdata/datapackage-rb/general/main)](https://github.com/frictionlessdata/datapackage-rb/actions) 4 | [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/datapackage-rb/main)](https://codecov.io/gh/frictionlessdata/datapackage-rb) 5 | [![Release](http://img.shields.io/gem/v/datapackage.svg)](https://rubygems.org/gems/datapackage) 6 | [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/datapackage-rb) 7 | [![Support](https://img.shields.io/badge/support-discord-brightgreen)](https://discordapp.com/invite/Sewv6av) 8 | 9 | A ruby library for working with [Data Packages](https://specs.frictionlessdata.io/data-package/). 10 | 11 | The library is intending to support: 12 | 13 | * Parsing and using data package metadata and data 14 | * Validating data packages to ensure they conform with the Data Package specification 15 | 16 | ## Installation 17 | 18 | Add the gem into your Gemfile: 19 | 20 | ``` 21 | gem 'datapackage.rb' 22 | ``` 23 | 24 | Or: 25 | 26 | ``` 27 | gem install datapackage 28 | ``` 29 | 30 | ## Reading a Data Package 31 | 32 | Require the gem, if you need to: 33 | 34 | ```ruby 35 | require 'datapackage' 36 | ``` 37 | 38 | Parsing a data package descriptor from a remote location: 39 | 40 | ```ruby 41 | package = DataPackage::Package.new( "http://example.org/datasets/a/datapackage.json" ) 42 | ``` 43 | 44 | This assumes that `http://example.org/datasets/a/datapackage.json` exists. 45 | Similarly you can load a package descriptor from a local JSON file. 46 | 47 | ```ruby 48 | package = DataPackage::Package.new( "/my/data/package/datapackage.json" ) 49 | ``` 50 | 51 | The data package descriptor 52 | i.e. `datapackage.json` file, is expected to be at the _root_ directory 53 | of the data package and the `path` attribute of the package's `resources` will be resolved 54 | relative to it. 55 | 56 | You can also load a data package descriptor directly from a Hash: 57 | 58 | ```ruby 59 | descriptor = { 60 | 'resources'=> [ 61 | { 62 | 'name'=> 'example', 63 | 'profile'=> 'tabular-data-resource', 64 | 'data'=> [ 65 | ['height', 'age', 'name'], 66 | ['180', '18', 'Tony'], 67 | ['192', '32', 'Jacob'], 68 | ], 69 | 'schema'=> { 70 | 'fields'=> [ 71 | {'name'=> 'height', 'type'=> 'integer'}, 72 | {'name'=> 'age', 'type'=> 'integer'}, 73 | {'name'=> 'name', 'type'=> 'string'}, 74 | ], 75 | } 76 | } 77 | ] 78 | } 79 | 80 | package = DataPackage::Package.new(descriptor) 81 | ``` 82 | 83 | There are a set of helper methods for accessing data from the package, e.g: 84 | 85 | ```ruby 86 | package.name 87 | package.title 88 | package.description 89 | package.homepage 90 | package.license 91 | ``` 92 | 93 | ## Reading Data Resources 94 | 95 | A data package must contain an array of [Data Resources](https://specs.frictionlessdata.io/data-resource). 96 | You can access the resources in your Data Package either by their name or by their index in the `resources` array: 97 | 98 | ```ruby 99 | first_resource = package.resources[0] 100 | first_resource = package.get_resource('example') 101 | 102 | # Get info about the data source of this resource 103 | first_resource.inline? 104 | first_resource.local? 105 | first_resource.remote? 106 | first_resource.multipart? 107 | first_resource.tabular? 108 | first_resource.source 109 | ``` 110 | 111 | You can then read the source depending on its type. For example if resource is local and not multipart it could by open as a file: `File.open(resource.source)`. 112 | 113 | If a resource complies with the [Tabular Data Resource spec](https://specs.frictionlessdata.io/tabular-data-resource/) or uses the 114 | `tabular-data-resource` [profile](#profiles) you can read resource rows: 115 | 116 | ```ruby 117 | resoure = package.resources[0] 118 | resource.tabular? 119 | resource.headers 120 | resource.schema 121 | 122 | # Read the the whole rows at once 123 | data = resource.read 124 | data = resource.read(keyed: true) 125 | 126 | # Or iterate through it 127 | data = resource.iter {|row| print row} 128 | ``` 129 | 130 | See [TableSchema](https://github.com/frictionlessdata/tableschema-rb) documentation for other things you can do with tabular resource. 131 | 132 | ## Creating a Data Package 133 | 134 | ```ruby 135 | package = DataPackage::Package.new 136 | 137 | # Add package properties 138 | package.name = 'my_sleep_duration' 139 | 140 | # Add a resource 141 | package.add_resource( 142 | { 143 | 'name'=> 'sleep_durations_this_week', 144 | 'data'=> [7, 8, 5, 6, 9, 7, 8], 145 | } 146 | ) 147 | ``` 148 | 149 | If the resource is valid it will be added to the `resources` array of the Data Package; 150 | if it's invalid it will not be added and you should try creating and [validating](#validating-a-resource) your resource to see why it fails. 151 | 152 | ```ruby 153 | # Update a resource 154 | my_resource = package.get_resource('sleep_durations_this_week') 155 | my_resource['schema'] = { 156 | 'fields'=> [ 157 | {'name'=> 'number_hours', 'type'=> 'integer'}, 158 | ] 159 | } 160 | 161 | # Save the Data Package descriptor to the target file 162 | package.save('datapackage.json') 163 | 164 | # Remove a resource 165 | package.remove_resource('sleep_durations_this_week') 166 | ``` 167 | 168 | ## Profiles 169 | 170 | Data Package and Data Resource descriptors can be validated against [JSON schemas](https://tools.ietf.org/html/draft-zyp-json-schema-04) that we call `profiles`. 171 | 172 | By default, this gem uses the standard [Data Package profile](http://specs.frictionlessdata.io/schemas/data-package.json) and [Data Resource profile](http://specs.frictionlessdata.io/schemas/data-resource.json) but alternative profiles are available for both. 173 | 174 | According to the [specs](https://specs.frictionlessdata.io/profiles/) the value of 175 | the `profile` property can be either a URL or an indentifier from [the registry](https://specs.frictionlessdata.io/schemas/registry.json). 176 | 177 | ### Profiles in the local cache 178 | 179 | The profiles from the registry come bundled with the gem. You can reference them in your Data Package descriptor by their identifier in [the registry](https://specs.frictionlessdata.io/schemas/registry.json): 180 | 181 | - `data-package` the default profile for a [Data Package](https://specs.frictionlessdata.io/data-package/) 182 | - `data-resource` the default profile for a [Data Resource](https://specs.frictionlessdata.io/data-resource) 183 | - `tabular-data-package` for a [Tabular Data Package](http://specs.frictionlessdata.io/tabular-data-package/) 184 | - `tabular-data-resource` for a [Tabular Data Resource](https://specs.frictionlessdata.io/tabular-data-resource/) 185 | - `fiscal-data-package` for a [Fiscal Data Package](http://fiscal.dataprotocols.org/spec/) 186 | 187 | ```ruby 188 | { 189 | "profile": "tabular-data-package" 190 | } 191 | ``` 192 | 193 | ### Profiles from elsewhere 194 | 195 | If you have a custom profile schema you can reference it by its URL: 196 | 197 | ```ruby 198 | { 199 | "profile": "https://specs.frictionlessdata.io/schemas/tabular-data-package.json" 200 | } 201 | ``` 202 | 203 | ## Validation 204 | 205 | Data Resources and Data Packages are validated against their profiles to ensure they respect the expected structure. 206 | 207 | ### Validating a Resource 208 | 209 | ```ruby 210 | descriptor = { 211 | 'name'=> 'incorrect name', 212 | 'path'=> 'https://cdn.rawgit.com/frictionlessdata/datapackage-rb/master/spec/fixtures/test-pkg/test.csv', 213 | } 214 | resource = DataPackage::Resource.new(descriptor, base_path='') 215 | 216 | # Returns true if resource is valid, false otherwise 217 | resource.valid? 218 | 219 | # Returns true or raises DataPackage::ValidationError 220 | resource.validate 221 | 222 | # Iterate through validation errors 223 | resource.iter_errors{ |err| p err} 224 | ``` 225 | 226 | ### Validating a Package 227 | 228 | The same methods used to check the validity of a Resource - `valid?`, `validate` and `iter_errors`- are also available for a Package. 229 | The difference is that after a Package descriptor is validated against its `profile`, each of its `resources` are also validated against their `profile`. 230 | 231 | In order for a Package to be valid all its Resources have to be valid. 232 | 233 | ## Developer notes 234 | 235 | These notes are intended to help people that want to contribute to this package itself. If you just want to use it, you can safely ignore them. 236 | 237 | After checking out the repo, run `bundle` to install dependencies. Then, run `rake spec` to run the tests. 238 | 239 | To install this gem onto your local machine, run `bundle exec rake install`. 240 | To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, 241 | which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org). 242 | 243 | ### Updating the local schemas cache 244 | 245 | We cache the local schemas from https://specs.frictionlessdata.io/schemas/registry.json. 246 | The local schemas should be kept up to date with the remote ones using: 247 | 248 | ``` 249 | rake update_profiles 250 | ``` 251 | -------------------------------------------------------------------------------- /lib/profiles/data-resource.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-04/schema#", 3 | "title": "Data Resource", 4 | "description": "Data Resource.", 5 | "type": "object", 6 | "oneOf": [ 7 | { 8 | "required": [ 9 | "name", 10 | "data" 11 | ] 12 | }, 13 | { 14 | "required": [ 15 | "name", 16 | "path" 17 | ] 18 | } 19 | ], 20 | "properties": { 21 | "profile": { 22 | "propertyOrder": 10, 23 | "default": "data-resource", 24 | "title": "Profile", 25 | "description": "The profile of this descriptor.", 26 | "context": "Every Package and Resource descriptor has a profile. The default profile, if none is declared, is `data-package` for Package and `data-resource` for Resource.", 27 | "type": "string", 28 | "examples": [ 29 | "{\n \"profile\": \"tabular-data-package\"\n}\n", 30 | "{\n \"profile\": \"http://example.com/my-profiles-json-schema.json\"\n}\n" 31 | ] 32 | }, 33 | "name": { 34 | "propertyOrder": 20, 35 | "title": "Name", 36 | "description": "An identifier string. Lower case characters with `.`, `_`, `-` and `/` are allowed.", 37 | "type": "string", 38 | "pattern": "^([-a-z0-9._/])+$", 39 | "context": "This is ideally a url-usable and human-readable name. Name `SHOULD` be invariant, meaning it `SHOULD NOT` change when its parent descriptor is updated.", 40 | "examples": [ 41 | "{\n \"name\": \"my-nice-name\"\n}\n" 42 | ] 43 | }, 44 | "path": { 45 | "propertyOrder": 30, 46 | "title": "Path", 47 | "description": "A reference to the data for this resource, as either a path as a string, or an array of paths as strings. of valid URIs.", 48 | "oneOf": [ 49 | { 50 | "title": "Path", 51 | "description": "A fully qualified URL, or a POSIX file path..", 52 | "type": "string", 53 | "examples": [ 54 | "{\n \"path\": \"file.csv\"\n}\n", 55 | "{\n \"path\": \"http://example.com/file.csv\"\n}\n" 56 | ], 57 | "context": "Implementations need to negotiate the type of path provided, and dereference the data accordingly." 58 | }, 59 | { 60 | "type": "array", 61 | "minItems": 1, 62 | "items": { 63 | "title": "Path", 64 | "description": "A fully qualified URL, or a POSIX file path..", 65 | "type": "string", 66 | "examples": [ 67 | "{\n \"path\": \"file.csv\"\n}\n", 68 | "{\n \"path\": \"http://example.com/file.csv\"\n}\n" 69 | ], 70 | "context": "Implementations need to negotiate the type of path provided, and dereference the data accordingly." 71 | }, 72 | "examples": [ 73 | "[ \"file.csv\" ]\n", 74 | "[ \"http://example.com/file.csv\" ]\n" 75 | ] 76 | } 77 | ], 78 | "context": "The dereferenced value of each referenced data source in `path` `MUST` be commensurate with a native, dereferenced representation of the data the resource describes. For example, in a *Tabular* Data Resource, this means that the dereferenced value of `path` `MUST` be an array.", 79 | "examples": [ 80 | "{\n \"path\": [\n \"file.csv\",\n \"file2.csv\"\n ]\n}\n", 81 | "{\n \"path\": [\n \"http://example.com/file.csv\",\n \"http://example.com/file2.csv\"\n ]\n}\n", 82 | "{\n \"path\": \"http://example.com/file.csv\"\n}\n" 83 | ] 84 | }, 85 | "data": { 86 | "propertyOrder": 230, 87 | "title": "Data", 88 | "description": "Inline data for this resource." 89 | }, 90 | "schema": { 91 | "propertyOrder": 40, 92 | "title": "Schema", 93 | "description": "A schema for this resource.", 94 | "type": "object" 95 | }, 96 | "title": { 97 | "propertyOrder": 50, 98 | "title": "Title", 99 | "description": "A human-readable title.", 100 | "type": "string", 101 | "examples": [ 102 | "{\n \"title\": \"My Package Title\"\n}\n" 103 | ] 104 | }, 105 | "description": { 106 | "propertyOrder": 60, 107 | "format": "textarea", 108 | "title": "Description", 109 | "description": "A text description. Markdown is encouraged.", 110 | "type": "string", 111 | "examples": [ 112 | "{\n \"description\": \"# My Package description\\nAll about my package.\"\n}\n" 113 | ] 114 | }, 115 | "homepage": { 116 | "propertyOrder": 70, 117 | "title": "Home Page", 118 | "description": "The home on the web that is related to this data package.", 119 | "type": "string", 120 | "format": "uri", 121 | "examples": [ 122 | "{\n \"homepage\": \"http://example.com/\"\n}\n" 123 | ] 124 | }, 125 | "sources": { 126 | "propertyOrder": 140, 127 | "options": { 128 | "hidden": true 129 | }, 130 | "title": "Sources", 131 | "description": "The raw sources for this resource.", 132 | "type": "array", 133 | "minItems": 1, 134 | "items": { 135 | "title": "Source", 136 | "description": "A source file.", 137 | "type": "object", 138 | "required": [ 139 | "title" 140 | ], 141 | "properties": { 142 | "title": { 143 | "title": "Title", 144 | "description": "A human-readable title.", 145 | "type": "string", 146 | "examples": [ 147 | "{\n \"title\": \"My Package Title\"\n}\n" 148 | ] 149 | }, 150 | "path": { 151 | "title": "Path", 152 | "description": "A fully qualified URL, or a POSIX file path..", 153 | "type": "string", 154 | "examples": [ 155 | "{\n \"path\": \"file.csv\"\n}\n", 156 | "{\n \"path\": \"http://example.com/file.csv\"\n}\n" 157 | ], 158 | "context": "Implementations need to negotiate the type of path provided, and dereference the data accordingly." 159 | }, 160 | "email": { 161 | "title": "Email", 162 | "description": "An email address.", 163 | "type": "string", 164 | "format": "email", 165 | "examples": [ 166 | "{\n \"email\": \"example@example.com\"\n}\n" 167 | ] 168 | } 169 | } 170 | }, 171 | "examples": [ 172 | "{\n \"sources\": [\n {\n \"name\": \"World Bank and OECD\",\n \"uri\": \"http://data.worldbank.org/indicator/NY.GDP.MKTP.CD\"\n }\n ]\n}\n" 173 | ] 174 | }, 175 | "licenses": { 176 | "description": "The license(s) under which the resource is published.", 177 | "propertyOrder": 150, 178 | "options": { 179 | "hidden": true 180 | }, 181 | "title": "Licenses", 182 | "type": "array", 183 | "minItems": 1, 184 | "items": { 185 | "title": "License", 186 | "description": "A license for this descriptor.", 187 | "type": "object", 188 | "properties": { 189 | "name": { 190 | "title": "Open Definition license identifier", 191 | "description": "MUST be an Open Definition license identifier, see http://licenses.opendefinition.org/", 192 | "type": "string", 193 | "pattern": "^([-a-zA-Z0-9._])+$" 194 | }, 195 | "path": { 196 | "title": "Path", 197 | "description": "A fully qualified URL, or a POSIX file path..", 198 | "type": "string", 199 | "examples": [ 200 | "{\n \"path\": \"file.csv\"\n}\n", 201 | "{\n \"path\": \"http://example.com/file.csv\"\n}\n" 202 | ], 203 | "context": "Implementations need to negotiate the type of path provided, and dereference the data accordingly." 204 | }, 205 | "title": { 206 | "title": "Title", 207 | "description": "A human-readable title.", 208 | "type": "string", 209 | "examples": [ 210 | "{\n \"title\": \"My Package Title\"\n}\n" 211 | ] 212 | } 213 | }, 214 | "context": "Use of this property does not imply that the person was the original creator of, or a contributor to, the data in the descriptor, but refers to the composition of the descriptor itself." 215 | }, 216 | "context": "This property is not legally binding and does not guarantee that the package is licensed under the terms defined herein.", 217 | "examples": [ 218 | "{\n \"licenses\": [\n {\n \"name\": \"odc-pddl-1.0\",\n \"uri\": \"http://opendatacommons.org/licenses/pddl/\"\n }\n ]\n}\n" 219 | ] 220 | }, 221 | "format": { 222 | "propertyOrder": 80, 223 | "title": "Format", 224 | "description": "The file format of this resource.", 225 | "context": "`csv`, `xls`, `json` are examples of common formats.", 226 | "type": "string", 227 | "examples": [ 228 | "{\n \"format\": \"xls\"\n}\n" 229 | ] 230 | }, 231 | "mediatype": { 232 | "propertyOrder": 90, 233 | "title": "Media Type", 234 | "description": "The media type of this resource. Can be any valid media type listed with [IANA](https://www.iana.org/assignments/media-types/media-types.xhtml).", 235 | "type": "string", 236 | "pattern": "^(.+)/(.+)$", 237 | "examples": [ 238 | "{\n \"mediatype\": \"text/csv\"\n}\n" 239 | ] 240 | }, 241 | "encoding": { 242 | "propertyOrder": 100, 243 | "title": "Encoding", 244 | "description": "The file encoding of this resource.", 245 | "type": "string", 246 | "default": "utf-8", 247 | "examples": [ 248 | "{\n \"encoding\": \"utf-8\"\n}\n" 249 | ] 250 | }, 251 | "bytes": { 252 | "propertyOrder": 110, 253 | "options": { 254 | "hidden": true 255 | }, 256 | "title": "Bytes", 257 | "description": "The size of this resource in bytes.", 258 | "type": "integer", 259 | "examples": [ 260 | "{\n \"bytes\": 2082\n}\n" 261 | ] 262 | }, 263 | "hash": { 264 | "propertyOrder": 120, 265 | "options": { 266 | "hidden": true 267 | }, 268 | "title": "Hash", 269 | "type": "string", 270 | "description": "The MD5 hash of this resource. Indicate other hashing algorithms with the {algorithm}:{hash} format.", 271 | "pattern": "^([^:]+:[a-fA-F0-9]+|[a-fA-F0-9]{32}|)$", 272 | "examples": [ 273 | "{\n \"hash\": \"d25c9c77f588f5dc32059d2da1136c02\"\n}\n", 274 | "{\n \"hash\": \"SHA256:5262f12512590031bbcc9a430452bfd75c2791ad6771320bb4b5728bfb78c4d0\"\n}\n" 275 | ] 276 | } 277 | } 278 | } 279 | -------------------------------------------------------------------------------- /spec/package_spec.rb: -------------------------------------------------------------------------------- 1 | describe DataPackage::Package do 2 | 3 | context "creating a package" do 4 | 5 | it "allows initialization without an object or string" do 6 | package = DataPackage::Package.new 7 | expect(package.name).to eq(nil) 8 | end 9 | 10 | it "allows properties to be set" do 11 | package = DataPackage::Package.new 12 | 13 | package.name = "My awesome datapackage" 14 | 15 | expect(package.name).to eq("My awesome datapackage") 16 | end 17 | 18 | context "profile" do 19 | 20 | it "uses the base profile by default" do 21 | package = DataPackage::Package.new 22 | 23 | expect(package.profile).to be_a(DataPackage::Profile) 24 | expect(package.profile.name).to eq('data-package') 25 | end 26 | 27 | xit "allows a custom profile to be specified" do 28 | profile_url = 'http://example.org/thing.json' 29 | profile_body = File.read File.join('spec', 'fixtures', 'fake_profile.json') 30 | FakeWeb.register_uri(:get, profile_url, :body => profile_body) 31 | package = DataPackage::Package.new({ 32 | 'profile' => profile_url 33 | }) 34 | 35 | expect(package.profile).to eq({ 36 | 'key' => 'value' 37 | }) 38 | end 39 | 40 | end 41 | 42 | context "allows a resource to be specified" do 43 | 44 | it "with a file" do 45 | file = 'test.csv' 46 | package = DataPackage::Package.new( test_package_filename ) 47 | package.resources << { 48 | 'name' => 'resource', 49 | 'path' => file 50 | } 51 | 52 | expect(package.resources[0]).to be_a_kind_of(DataPackage::Resource) 53 | expect(package.resources[0].source).to eq(File.join(package.base, file)) 54 | end 55 | 56 | it "with a url" do 57 | url = 'http://example.com/test.csv' 58 | FakeWeb.register_uri(:get, url, body: '') 59 | package = DataPackage::Package.new 60 | package.resources << { 61 | 'name' => 'resource', 62 | 'path' => url 63 | } 64 | 65 | expect(package.resources[0]).to be_a_kind_of(DataPackage::Resource) 66 | expect(package.resources[0].source).to eq(url) 67 | end 68 | 69 | it "with inline data" do 70 | package = DataPackage::Package.new 71 | data = [ 72 | ['foo', 'bar', 'baz'] 73 | ] 74 | 75 | package.resources << { 76 | 'name' => 'resource', 77 | 'data' => data 78 | } 79 | 80 | expect(package.resources[0]).to be_a_kind_of(DataPackage::Resource) 81 | expect(package.resources[0].source).to eq(data) 82 | end 83 | 84 | end 85 | 86 | end 87 | 88 | context "when parsing packages" do 89 | 90 | it "should initialize from an object" do 91 | package = { 92 | "name" => "test-package", 93 | "description" => "description", 94 | "resources" => [ { "name" => "resource", "data" => "test" }] 95 | } 96 | package = DataPackage::Package.new(package) 97 | expect( package.name ).to eql("test-package") 98 | expect( package.resources.length ).to eql(1) 99 | end 100 | 101 | it "should support reading properties directly" do 102 | package = { 103 | "name" => "test-package", 104 | "description" => "description", 105 | "my-property" => "value" 106 | } 107 | package = DataPackage::Package.new(package) 108 | expect( package.property("my-property") ).to eql("value") 109 | expect( package.property("another-property") ).to eql(nil) 110 | expect( package.property("another-property", "default") ).to eql("default") 111 | end 112 | 113 | it "should allow properties to be changed" do 114 | package = { 115 | "name" => "test-package", 116 | "description" => "description", 117 | "my-property" => "value" 118 | } 119 | package = DataPackage::Package.new(package) 120 | package.name = 'new-package' 121 | 122 | expect(package.name).to eq('new-package') 123 | end 124 | 125 | it "should load from a local file" do 126 | package = DataPackage::Package.new( test_package_filename ) 127 | expect( package.name ).to eql("test-package") 128 | expect( package.title ).to eql("Test Package") 129 | expect( package.description ).to eql("Description") 130 | expect( package.created ).to eq(nil) 131 | expect( package.homepage ).to eql("http://example.org") 132 | [:sources, :contributors].each do |key| 133 | expect( package.send(key) ).to eql([]) 134 | end 135 | expect( package.sources ).to eql([]) 136 | expect( package.keywords ).to eql( [ "test", "testing" ] ) 137 | expect( package.resources.length ).to eql(1) 138 | end 139 | 140 | it "should load from a zip file" do 141 | path = File.join( File.dirname(__FILE__), "fixtures", "test-pkg.zip" ) 142 | 143 | package = DataPackage::Package.new(path) 144 | 145 | expect( package.name ).to eql("test-package") 146 | expect( package.title ).to eql("Test Package") 147 | expect( package.description ).to eql("Description") 148 | expect( package.created ).to eq(nil) 149 | expect( package.homepage ).to eql({ "path"=> "http://example.org" }) 150 | [:sources, :contributors].each do |key| 151 | expect( package.send(key) ).to eql([]) 152 | end 153 | expect( package.sources ).to eql([]) 154 | expect( package.keywords ).to eql( [ "test", "testing" ] ) 155 | expect( package.resources.length ).to eql(1) 156 | end 157 | 158 | xit "should load from an explicit URL" do 159 | FakeWeb.register_uri(:get, "http://example.com/datapackage.json", 160 | :body => File.read( test_package_filename ) ) 161 | FakeWeb.register_uri(:get, "http://example.com/test.csv", 162 | :body => File.read( test_package_filename('test.csv') ) ) 163 | package = DataPackage::Package.new( "http://example.com/datapackage.json" ) 164 | expect( package.name ).to eql("test-package") 165 | expect( package.resources.length ).to eql(1) 166 | end 167 | 168 | xit "should load from a zipfile at an explicit URL" do 169 | package_body = File.read( File.join( File.dirname(__FILE__), "fixtures", "test-pkg.zip" ) ) 170 | FakeWeb.register_uri(:get, "http://example.com/datapackage.zip", 171 | :body => package_body) 172 | package = DataPackage::Package.new( "http://example.com/datapackage.zip" ) 173 | expect( package.name ).to eql("test-package") 174 | expect( package.resources.length ).to eql(1) 175 | end 176 | 177 | xit "should distinguish between local and remote packages" do 178 | package = DataPackage::Package.new( { "name" => "test"} ) 179 | expect( package.local? ).to eql(true) 180 | expect( package.base ).to eql("") 181 | 182 | file = test_package_filename 183 | package = DataPackage::Package.new(file) 184 | expect( package.local? ).to eql(true) 185 | expect( package.base ).to eql( File.join( File.dirname(__FILE__), "fixtures", "test-pkg") ) 186 | 187 | FakeWeb.register_uri(:get, "http://example.com/datapackage.json", 188 | :body => File.read( test_package_filename ) ) 189 | package = DataPackage::Package.new( "http://example.com/datapackage.json" ) 190 | expect( package.local? ).to eql(false) 191 | expect( package.base ).to eql( "http://example.com" ) 192 | end 193 | 194 | xit 'raises if URL does not exist' do 195 | url = 'http://example.org/datapackage.json' 196 | FakeWeb.register_uri(:get, url, 197 | :body => "", :status => ["404", "Not Found"] ) 198 | 199 | expect { DataPackage::Package.new(url) }.to raise_error(DataPackage::PackageException) 200 | end 201 | 202 | xit 'raises if the descriptor is not a JSON' do 203 | url = 'http://example.org/datapackage.json' 204 | body = File.read File.join('spec', 'fixtures', 'not_a_json') 205 | FakeWeb.register_uri(:get, url, :body => body) 206 | 207 | expect { DataPackage::Package.new(url) }.to raise_error(DataPackage::PackageException) 208 | end 209 | end 210 | 211 | context "tabular datapackages" do 212 | 213 | it "returns a table" do 214 | package = DataPackage::Package.new( test_package_filename ) 215 | expect(package.resources[0].table.class).to eq(TableSchema::Table) 216 | end 217 | 218 | it "table contains data in tabular form" do 219 | package = DataPackage::Package.new( test_package_filename ) 220 | data = package.resources[0].table.read(keyed: true) 221 | expect(data).to eq([ 222 | {"ID"=>"abc", "Price"=>100}, 223 | {"ID"=>"def", "Price"=>300}, 224 | {"ID"=>"ghi", "Price"=>750} 225 | ]) 226 | end 227 | 228 | it 'table returns nil for non-tabular packages' do 229 | filename = File.join( File.dirname(__FILE__), 'fixtures', 'datapackage_with_foo.txt_resource.json' ) 230 | package = DataPackage::Package.new( filename ) 231 | 232 | expect(package.resources[0].table).to eq(nil) 233 | end 234 | 235 | end 236 | 237 | context "validation" do 238 | 239 | it "should validate basic datapackage structure" do 240 | package = DataPackage::Package.new(test_package_filename) 241 | 242 | expect(package.valid?).to be true 243 | expect(package.validate).to be true 244 | expect(package.iter_errors{ |err| err }).to be_empty 245 | end 246 | 247 | it "should detect an invalid datapackage" do 248 | package = DataPackage::Package.new( { "name" => "this is invalid" } ) 249 | expect( package.valid? ).to be false 250 | expect{ package.validate }.to raise_error(DataPackage::ValidationError) 251 | expect(package.iter_errors{ |err| err }).to_not be_empty 252 | end 253 | 254 | xit "should validate on the fly" do 255 | profile_body = { 256 | 'properties' => { 257 | 'name' => {} 258 | }, 259 | 'required' => ['name'] 260 | } 261 | profile_url = 'http://example.org/my_profile.json' 262 | FakeWeb.register_uri(:get, profile_url, :body => JSON.dump(profile_body)) 263 | 264 | package = DataPackage::Package.new({ 265 | 'profile' => profile_url 266 | }) 267 | expect(package.valid?).to be false 268 | 269 | package.name = 'A name' 270 | expect(package.valid?).to be true 271 | end 272 | 273 | xit 'should fail if resources don\'t validate against their profiles' do 274 | profile_body = { 275 | 'properties' => { 276 | 'name' => {}, 277 | 'title'=> {}, 278 | }, 279 | 'required' => ['name', 'title'] 280 | } 281 | profile_url = 'http://example.org/my_profile.json' 282 | FakeWeb.register_uri(:get, profile_url, :body => JSON.dump(profile_body)) 283 | package = DataPackage::Package.new({ 284 | 'name'=> 'package', 285 | 'profile'=> profile_url, 286 | 'title'=> 'this resource should have a title', 287 | 'resources'=> [ 288 | { 289 | 'name'=> 'resource_without_title', 290 | 'profile'=> profile_url, 291 | 'data'=> 'cmon', 292 | } 293 | ] 294 | }) 295 | 296 | expect(package.valid?).to be false 297 | expect(package.resources.first.valid?).to be false 298 | expect{ package.validate }.to raise_error(DataPackage::ValidationError) 299 | expect(package.iter_errors{ |err| err }).to_not be_empty 300 | end 301 | 302 | end 303 | 304 | context 'add_resource' do 305 | 306 | before(:each) do 307 | profile_body = { 308 | 'properties' => { 309 | 'name' => {}, 310 | 'resources'=> { 311 | 'items'=> { 312 | 'required' => ['name', 'title'] 313 | } 314 | } 315 | } 316 | } 317 | profile_url = 'http://example.org/my_profile.json' 318 | FakeWeb.register_uri(:get, profile_url, :body => JSON.dump(profile_body)) 319 | @package = DataPackage::Package.new({ 320 | 'name'=> 'package', 321 | 'profile'=> profile_url, 322 | 'resources'=> [] 323 | }) 324 | end 325 | 326 | xit 'doesn\'t add a resource that fails package validation' do 327 | resource = { 328 | 'name'=> 'resource_without_title', 329 | 'data'=> 'cmon', 330 | } 331 | 332 | expect(@package.add_resource(resource)).to be_nil 333 | expect(@package.resources).to be_empty 334 | end 335 | 336 | xit 'doesn\'t add a resource that fails resource validation' do 337 | resource = { 338 | 'name'=> 'incorrect_tabular', 339 | 'data'=> 'cmon', 340 | 'title'=> 'This has title but it\'s not a valid tabular', 341 | 'profile'=> 'tabular-data-resource', 342 | } 343 | 344 | expect(@package.add_resource(resource)).to be_nil 345 | expect(@package.resources).to be_empty 346 | end 347 | 348 | xit 'adds a valid resource' do 349 | resource = { 350 | 'name'=> 'resource_with_title', 351 | 'data'=> 'cmon', 352 | 'title'=> 'This will pass', 353 | 'encoding'=> DataPackage::DEFAULTS[:resource][:encoding], 354 | 'profile'=> DataPackage::DEFAULTS[:resource][:profile], 355 | } 356 | 357 | expect(@package.add_resource(resource)).to eq(resource) 358 | expect(@package.resources).to eq([resource]) 359 | end 360 | 361 | end 362 | 363 | context 'remove resource' do 364 | 365 | before(:each) do 366 | @resource = { 367 | 'name'=> 'resource', 368 | 'data'=> 'whevs', 369 | 'encoding'=> DataPackage::DEFAULTS[:resource][:encoding], 370 | 'profile'=> DataPackage::DEFAULTS[:resource][:profile], 371 | } 372 | @package = DataPackage::Package.new({ 373 | 'name'=> 'package', 374 | 'resources'=> [ @resource ] 375 | }) 376 | end 377 | 378 | it 'removes resource by name' do 379 | expect(@package.remove_resource('resource')).to eq(@resource) 380 | expect(@package.resources).to be_empty 381 | end 382 | 383 | it 'returns nil if resource not found' do 384 | expect(@package.remove_resource('inexistent')).to be_nil 385 | expect(@package.resources).to_not be_empty 386 | end 387 | 388 | end 389 | 390 | context 'save' do 391 | 392 | before(:each) do 393 | @content = { 394 | 'name'=> 'package', 395 | 'resources'=> [] 396 | } 397 | @buffer = StringIO.new(JSON.dump(@content)) 398 | @filename = test_package_filename 399 | allow(File).to receive(:open).with(@filename,'r').and_yield(@buffer) 400 | allow(File).to receive(:open).with(@filename,'w').and_yield(@buffer) 401 | end 402 | 403 | it 'updates the package location by default' do 404 | package = DataPackage::Package.new(@filename) 405 | new_name = 'will_it_save' 406 | package['name'] = new_name 407 | 408 | expect(package.save).to be true 409 | expect(JSON.load(@buffer.string)['name']).to eq(new_name) 410 | end 411 | 412 | it 'updates the file given as target' do 413 | package = DataPackage::Package.new(@content) 414 | new_name = 'will_it_save' 415 | package['name'] = new_name 416 | 417 | expect(package.save(@filename)).to be true 418 | expect(JSON.load(@buffer.string)['name']).to eq(new_name) 419 | end 420 | 421 | end 422 | 423 | # context 'inferring a datapackage from a data directory' do 424 | # it 'infers a datapackage.json schema referencing all CSVs in the directory' do 425 | # package = DataPackage::Package.new 426 | # descriptor = package.infer(directory: 'data', base_path: 'spec/fixtures') 427 | 428 | # expect(descriptor).to eq({ 429 | # 'profile' => 'tabular-data-package', 430 | # 'resources' => [ 431 | # { 432 | # 'format' => 'csv', 433 | # 'mediatype' => 'text/csv', 434 | # 'name' => 'prices', 435 | # 'path' => 'spec/fixtures/data/prices.csv', 436 | # 'schema' => { 437 | # 'fields' => [ 438 | # {'format' => 'default', 'name' => 'id', 'type' => 'any'}, 439 | # {'format' => 'default', 'name' => 'price', 'type' => 'integer'}, 440 | # {'format' => 'default', 'name' => 'year_to_market', 'type' => 'year'}, 441 | # {'format' => 'default', 'name' => 'added_on', 'type' => 'date'}, 442 | # {'format' => 'default', 'name' => 'updated_at', 'type' => 'datetime'}, 443 | # {'format' => 'default', 'name' => 'cutoff_time', 'type' => 'time'}, 444 | # ], 445 | # 'missingValues'=>[''], 446 | # }, 447 | # 'profile' => 'tabular-data-resource', 448 | # 'encoding' => 'utf-8' 449 | # }, 450 | # { 451 | # 'encoding' => 'utf-8', 452 | # 'format' => 'csv', 453 | # 'mediatype' => 'text/csv', 454 | # 'name' => 'names', 455 | # 'path' => 'spec/fixtures/data/names.csv', 456 | # 'profile' => 'tabular-data-resource', 457 | # 'schema' => { 458 | # 'fields' => [ 459 | # {'format' => 'default', 'name' => 'id', 'type' => 'any'}, 460 | # {'format' => 'default', 'name' => 'name', 'type' => 'any'}], 461 | # 'missingValues'=>[''] 462 | # }, 463 | # 'profile' => 'tabular-data-resource', 464 | # 'encoding' => 'utf-8' 465 | # } 466 | # ] 467 | # }) 468 | # end 469 | # end 470 | end 471 | -------------------------------------------------------------------------------- /lib/profiles/data-package.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-04/schema#", 3 | "title": "Data Package", 4 | "description": "Data Package is a simple specification for data access and delivery.", 5 | "type": "object", 6 | "required": [ 7 | "resources" 8 | ], 9 | "properties": { 10 | "profile": { 11 | "propertyOrder": 10, 12 | "default": "data-package", 13 | "title": "Profile", 14 | "description": "The profile of this descriptor.", 15 | "context": "Every Package and Resource descriptor has a profile. The default profile, if none is declared, is `data-package` for Package and `data-resource` for Resource.", 16 | "type": "string", 17 | "examples": [ 18 | "{\n \"profile\": \"tabular-data-package\"\n}\n", 19 | "{\n \"profile\": \"http://example.com/my-profiles-json-schema.json\"\n}\n" 20 | ] 21 | }, 22 | "name": { 23 | "propertyOrder": 20, 24 | "title": "Name", 25 | "description": "An identifier string. Lower case characters with `.`, `_`, `-` and `/` are allowed.", 26 | "type": "string", 27 | "pattern": "^([-a-z0-9._/])+$", 28 | "context": "This is ideally a url-usable and human-readable name. Name `SHOULD` be invariant, meaning it `SHOULD NOT` change when its parent descriptor is updated.", 29 | "examples": [ 30 | "{\n \"name\": \"my-nice-name\"\n}\n" 31 | ] 32 | }, 33 | "id": { 34 | "propertyOrder": 30, 35 | "title": "ID", 36 | "description": "A property reserved for globally unique identifiers. Examples of identifiers that are unique include UUIDs and DOIs.", 37 | "context": "A common usage pattern for Data Packages is as a packaging format within the bounds of a system or platform. In these cases, a unique identifier for a package is desired for common data handling workflows, such as updating an existing package. While at the level of the specification, global uniqueness cannot be validated, consumers using the `id` property `MUST` ensure identifiers are globally unique.", 38 | "type": "string", 39 | "examples": [ 40 | "{\n \"id\": \"b03ec84-77fd-4270-813b-0c698943f7ce\"\n}\n", 41 | "{\n \"id\": \"http://dx.doi.org/10.1594/PANGAEA.726855\"\n}\n" 42 | ] 43 | }, 44 | "title": { 45 | "propertyOrder": 40, 46 | "title": "Title", 47 | "description": "A human-readable title.", 48 | "type": "string", 49 | "examples": [ 50 | "{\n \"title\": \"My Package Title\"\n}\n" 51 | ] 52 | }, 53 | "description": { 54 | "propertyOrder": 50, 55 | "format": "textarea", 56 | "title": "Description", 57 | "description": "A text description. Markdown is encouraged.", 58 | "type": "string", 59 | "examples": [ 60 | "{\n \"description\": \"# My Package description\\nAll about my package.\"\n}\n" 61 | ] 62 | }, 63 | "homepage": { 64 | "propertyOrder": 60, 65 | "title": "Home Page", 66 | "description": "The home on the web that is related to this data package.", 67 | "type": "string", 68 | "format": "uri", 69 | "examples": [ 70 | "{\n \"homepage\": \"http://example.com/\"\n}\n" 71 | ] 72 | }, 73 | "created": { 74 | "propertyOrder": 70, 75 | "title": "Created", 76 | "description": "The datetime on which this descriptor was created.", 77 | "context": "The datetime must conform to the string formats for datetime as described in [RFC3339](https://tools.ietf.org/html/rfc3339#section-5.6)", 78 | "type": "string", 79 | "format": "date-time", 80 | "examples": [ 81 | "{\n \"created\": \"1985-04-12T23:20:50.52Z\"\n}\n" 82 | ] 83 | }, 84 | "contributors": { 85 | "propertyOrder": 80, 86 | "title": "Contributors", 87 | "description": "The contributors to this descriptor.", 88 | "type": "array", 89 | "minItems": 1, 90 | "items": { 91 | "title": "Contributor", 92 | "description": "A contributor to this descriptor.", 93 | "properties": { 94 | "title": { 95 | "title": "Title", 96 | "description": "A human-readable title.", 97 | "type": "string", 98 | "examples": [ 99 | "{\n \"title\": \"My Package Title\"\n}\n" 100 | ] 101 | }, 102 | "path": { 103 | "title": "Path", 104 | "description": "A fully qualified URL, or a POSIX file path..", 105 | "type": "string", 106 | "examples": [ 107 | "{\n \"path\": \"file.csv\"\n}\n", 108 | "{\n \"path\": \"http://example.com/file.csv\"\n}\n" 109 | ], 110 | "context": "Implementations need to negotiate the type of path provided, and dereference the data accordingly." 111 | }, 112 | "email": { 113 | "title": "Email", 114 | "description": "An email address.", 115 | "type": "string", 116 | "format": "email", 117 | "examples": [ 118 | "{\n \"email\": \"example@example.com\"\n}\n" 119 | ] 120 | }, 121 | "organisation": { 122 | "title": "Organisation", 123 | "description": "An organizational affiliation for this contributor.", 124 | "type": "string" 125 | }, 126 | "role": { 127 | "type": "string", 128 | "enum": [ 129 | "publisher", 130 | "author", 131 | "maintainer", 132 | "wrangler", 133 | "contributor" 134 | ] 135 | } 136 | }, 137 | "required": [ 138 | "title" 139 | ], 140 | "context": "Use of this property does not imply that the person was the original creator of, or a contributor to, the data in the descriptor, but refers to the composition of the descriptor itself." 141 | }, 142 | "examples": [ 143 | "{\n \"contributors\": [\n {\n \"title\": \"Joe Bloggs\"\n }\n ]\n}\n", 144 | "{\n \"contributors\": [\n {\n \"title\": \"Joe Bloggs\",\n \"email\": \"joe@example.com\",\n \"role\": \"author\"\n }\n ]\n}\n" 145 | ] 146 | }, 147 | "keywords": { 148 | "propertyOrder": 90, 149 | "title": "Keywords", 150 | "description": "A list of keywords that describe this package.", 151 | "type": "array", 152 | "minItems": 1, 153 | "items": { 154 | "type": "string" 155 | }, 156 | "examples": [ 157 | "{\n \"keywords\": [\n \"data\",\n \"fiscal\",\n \"transparency\"\n ]\n}\n" 158 | ] 159 | }, 160 | "licenses": { 161 | "propertyOrder": 100, 162 | "title": "Licenses", 163 | "description": "The license(s) under which this package is published.", 164 | "type": "array", 165 | "minItems": 1, 166 | "items": { 167 | "title": "License", 168 | "description": "A license for this descriptor.", 169 | "type": "object", 170 | "properties": { 171 | "name": { 172 | "title": "Open Definition license identifier", 173 | "description": "MUST be an Open Definition license identifier, see http://licenses.opendefinition.org/", 174 | "type": "string", 175 | "pattern": "^([-a-zA-Z0-9._])+$" 176 | }, 177 | "path": { 178 | "title": "Path", 179 | "description": "A fully qualified URL, or a POSIX file path..", 180 | "type": "string", 181 | "examples": [ 182 | "{\n \"path\": \"file.csv\"\n}\n", 183 | "{\n \"path\": \"http://example.com/file.csv\"\n}\n" 184 | ], 185 | "context": "Implementations need to negotiate the type of path provided, and dereference the data accordingly." 186 | }, 187 | "title": { 188 | "title": "Title", 189 | "description": "A human-readable title.", 190 | "type": "string", 191 | "examples": [ 192 | "{\n \"title\": \"My Package Title\"\n}\n" 193 | ] 194 | } 195 | }, 196 | "context": "Use of this property does not imply that the person was the original creator of, or a contributor to, the data in the descriptor, but refers to the composition of the descriptor itself." 197 | }, 198 | "context": "This property is not legally binding and does not guarantee that the package is licensed under the terms defined herein.", 199 | "examples": [ 200 | "{\n \"licenses\": [\n {\n \"name\": \"odc-pddl-1.0\",\n \"uri\": \"http://opendatacommons.org/licenses/pddl/\"\n }\n ]\n}\n" 201 | ] 202 | }, 203 | "resources": { 204 | "propertyOrder": 110, 205 | "title": "Data Resources", 206 | "description": "An `array` of Data Resource objects, each compliant with the [Data Resource](/data-resource/) specification.", 207 | "type": "array", 208 | "minItems": 1, 209 | "items": { 210 | "title": "Data Resource", 211 | "description": "Data Resource.", 212 | "type": "object", 213 | "oneOf": [ 214 | { 215 | "required": [ 216 | "name", 217 | "data" 218 | ] 219 | }, 220 | { 221 | "required": [ 222 | "name", 223 | "path" 224 | ] 225 | } 226 | ], 227 | "properties": { 228 | "profile": { 229 | "propertyOrder": 10, 230 | "default": "data-resource", 231 | "title": "Profile", 232 | "description": "The profile of this descriptor.", 233 | "context": "Every Package and Resource descriptor has a profile. The default profile, if none is declared, is `data-package` for Package and `data-resource` for Resource.", 234 | "type": "string", 235 | "examples": [ 236 | "{\n \"profile\": \"tabular-data-package\"\n}\n", 237 | "{\n \"profile\": \"http://example.com/my-profiles-json-schema.json\"\n}\n" 238 | ] 239 | }, 240 | "name": { 241 | "propertyOrder": 20, 242 | "title": "Name", 243 | "description": "An identifier string. Lower case characters with `.`, `_`, `-` and `/` are allowed.", 244 | "type": "string", 245 | "pattern": "^([-a-z0-9._/])+$", 246 | "context": "This is ideally a url-usable and human-readable name. Name `SHOULD` be invariant, meaning it `SHOULD NOT` change when its parent descriptor is updated.", 247 | "examples": [ 248 | "{\n \"name\": \"my-nice-name\"\n}\n" 249 | ] 250 | }, 251 | "path": { 252 | "propertyOrder": 30, 253 | "title": "Path", 254 | "description": "A reference to the data for this resource, as either a path as a string, or an array of paths as strings. of valid URIs.", 255 | "oneOf": [ 256 | { 257 | "title": "Path", 258 | "description": "A fully qualified URL, or a POSIX file path..", 259 | "type": "string", 260 | "examples": [ 261 | "{\n \"path\": \"file.csv\"\n}\n", 262 | "{\n \"path\": \"http://example.com/file.csv\"\n}\n" 263 | ], 264 | "context": "Implementations need to negotiate the type of path provided, and dereference the data accordingly." 265 | }, 266 | { 267 | "type": "array", 268 | "minItems": 1, 269 | "items": { 270 | "title": "Path", 271 | "description": "A fully qualified URL, or a POSIX file path..", 272 | "type": "string", 273 | "examples": [ 274 | "{\n \"path\": \"file.csv\"\n}\n", 275 | "{\n \"path\": \"http://example.com/file.csv\"\n}\n" 276 | ], 277 | "context": "Implementations need to negotiate the type of path provided, and dereference the data accordingly." 278 | }, 279 | "examples": [ 280 | "[ \"file.csv\" ]\n", 281 | "[ \"http://example.com/file.csv\" ]\n" 282 | ] 283 | } 284 | ], 285 | "context": "The dereferenced value of each referenced data source in `path` `MUST` be commensurate with a native, dereferenced representation of the data the resource describes. For example, in a *Tabular* Data Resource, this means that the dereferenced value of `path` `MUST` be an array.", 286 | "examples": [ 287 | "{\n \"path\": [\n \"file.csv\",\n \"file2.csv\"\n ]\n}\n", 288 | "{\n \"path\": [\n \"http://example.com/file.csv\",\n \"http://example.com/file2.csv\"\n ]\n}\n", 289 | "{\n \"path\": \"http://example.com/file.csv\"\n}\n" 290 | ] 291 | }, 292 | "data": { 293 | "propertyOrder": 230, 294 | "title": "Data", 295 | "description": "Inline data for this resource." 296 | }, 297 | "schema": { 298 | "propertyOrder": 40, 299 | "title": "Schema", 300 | "description": "A schema for this resource.", 301 | "type": "object" 302 | }, 303 | "title": { 304 | "propertyOrder": 50, 305 | "title": "Title", 306 | "description": "A human-readable title.", 307 | "type": "string", 308 | "examples": [ 309 | "{\n \"title\": \"My Package Title\"\n}\n" 310 | ] 311 | }, 312 | "description": { 313 | "propertyOrder": 60, 314 | "format": "textarea", 315 | "title": "Description", 316 | "description": "A text description. Markdown is encouraged.", 317 | "type": "string", 318 | "examples": [ 319 | "{\n \"description\": \"# My Package description\\nAll about my package.\"\n}\n" 320 | ] 321 | }, 322 | "homepage": { 323 | "propertyOrder": 70, 324 | "title": "Home Page", 325 | "description": "The home on the web that is related to this data package.", 326 | "type": "string", 327 | "format": "uri", 328 | "examples": [ 329 | "{\n \"homepage\": \"http://example.com/\"\n}\n" 330 | ] 331 | }, 332 | "sources": { 333 | "propertyOrder": 140, 334 | "options": { 335 | "hidden": true 336 | }, 337 | "title": "Sources", 338 | "description": "The raw sources for this resource.", 339 | "type": "array", 340 | "minItems": 1, 341 | "items": { 342 | "title": "Source", 343 | "description": "A source file.", 344 | "type": "object", 345 | "required": [ 346 | "title" 347 | ], 348 | "properties": { 349 | "title": { 350 | "title": "Title", 351 | "description": "A human-readable title.", 352 | "type": "string", 353 | "examples": [ 354 | "{\n \"title\": \"My Package Title\"\n}\n" 355 | ] 356 | }, 357 | "path": { 358 | "title": "Path", 359 | "description": "A fully qualified URL, or a POSIX file path..", 360 | "type": "string", 361 | "examples": [ 362 | "{\n \"path\": \"file.csv\"\n}\n", 363 | "{\n \"path\": \"http://example.com/file.csv\"\n}\n" 364 | ], 365 | "context": "Implementations need to negotiate the type of path provided, and dereference the data accordingly." 366 | }, 367 | "email": { 368 | "title": "Email", 369 | "description": "An email address.", 370 | "type": "string", 371 | "format": "email", 372 | "examples": [ 373 | "{\n \"email\": \"example@example.com\"\n}\n" 374 | ] 375 | } 376 | } 377 | }, 378 | "examples": [ 379 | "{\n \"sources\": [\n {\n \"name\": \"World Bank and OECD\",\n \"uri\": \"http://data.worldbank.org/indicator/NY.GDP.MKTP.CD\"\n }\n ]\n}\n" 380 | ] 381 | }, 382 | "licenses": { 383 | "description": "The license(s) under which the resource is published.", 384 | "propertyOrder": 150, 385 | "options": { 386 | "hidden": true 387 | }, 388 | "title": "Licenses", 389 | "type": "array", 390 | "minItems": 1, 391 | "items": { 392 | "title": "License", 393 | "description": "A license for this descriptor.", 394 | "type": "object", 395 | "properties": { 396 | "name": { 397 | "title": "Open Definition license identifier", 398 | "description": "MUST be an Open Definition license identifier, see http://licenses.opendefinition.org/", 399 | "type": "string", 400 | "pattern": "^([-a-zA-Z0-9._])+$" 401 | }, 402 | "path": { 403 | "title": "Path", 404 | "description": "A fully qualified URL, or a POSIX file path..", 405 | "type": "string", 406 | "examples": [ 407 | "{\n \"path\": \"file.csv\"\n}\n", 408 | "{\n \"path\": \"http://example.com/file.csv\"\n}\n" 409 | ], 410 | "context": "Implementations need to negotiate the type of path provided, and dereference the data accordingly." 411 | }, 412 | "title": { 413 | "title": "Title", 414 | "description": "A human-readable title.", 415 | "type": "string", 416 | "examples": [ 417 | "{\n \"title\": \"My Package Title\"\n}\n" 418 | ] 419 | } 420 | }, 421 | "context": "Use of this property does not imply that the person was the original creator of, or a contributor to, the data in the descriptor, but refers to the composition of the descriptor itself." 422 | }, 423 | "context": "This property is not legally binding and does not guarantee that the package is licensed under the terms defined herein.", 424 | "examples": [ 425 | "{\n \"licenses\": [\n {\n \"name\": \"odc-pddl-1.0\",\n \"uri\": \"http://opendatacommons.org/licenses/pddl/\"\n }\n ]\n}\n" 426 | ] 427 | }, 428 | "format": { 429 | "propertyOrder": 80, 430 | "title": "Format", 431 | "description": "The file format of this resource.", 432 | "context": "`csv`, `xls`, `json` are examples of common formats.", 433 | "type": "string", 434 | "examples": [ 435 | "{\n \"format\": \"xls\"\n}\n" 436 | ] 437 | }, 438 | "mediatype": { 439 | "propertyOrder": 90, 440 | "title": "Media Type", 441 | "description": "The media type of this resource. Can be any valid media type listed with [IANA](https://www.iana.org/assignments/media-types/media-types.xhtml).", 442 | "type": "string", 443 | "pattern": "^(.+)/(.+)$", 444 | "examples": [ 445 | "{\n \"mediatype\": \"text/csv\"\n}\n" 446 | ] 447 | }, 448 | "encoding": { 449 | "propertyOrder": 100, 450 | "title": "Encoding", 451 | "description": "The file encoding of this resource.", 452 | "type": "string", 453 | "default": "utf-8", 454 | "examples": [ 455 | "{\n \"encoding\": \"utf-8\"\n}\n" 456 | ] 457 | }, 458 | "bytes": { 459 | "propertyOrder": 110, 460 | "options": { 461 | "hidden": true 462 | }, 463 | "title": "Bytes", 464 | "description": "The size of this resource in bytes.", 465 | "type": "integer", 466 | "examples": [ 467 | "{\n \"bytes\": 2082\n}\n" 468 | ] 469 | }, 470 | "hash": { 471 | "propertyOrder": 120, 472 | "options": { 473 | "hidden": true 474 | }, 475 | "title": "Hash", 476 | "type": "string", 477 | "description": "The MD5 hash of this resource. Indicate other hashing algorithms with the {algorithm}:{hash} format.", 478 | "pattern": "^([^:]+:[a-fA-F0-9]+|[a-fA-F0-9]{32}|)$", 479 | "examples": [ 480 | "{\n \"hash\": \"d25c9c77f588f5dc32059d2da1136c02\"\n}\n", 481 | "{\n \"hash\": \"SHA256:5262f12512590031bbcc9a430452bfd75c2791ad6771320bb4b5728bfb78c4d0\"\n}\n" 482 | ] 483 | } 484 | } 485 | }, 486 | "examples": [ 487 | "{\n \"resources\": [\n {\n \"name\": \"my-data\",\n \"data\": [\n \"data.csv\"\n ],\n \"mediatype\": \"text/csv\"\n }\n ]\n}\n" 488 | ] 489 | }, 490 | "sources": { 491 | "propertyOrder": 200, 492 | "options": { 493 | "hidden": true 494 | }, 495 | "title": "Sources", 496 | "description": "The raw sources for this resource.", 497 | "type": "array", 498 | "minItems": 1, 499 | "items": { 500 | "title": "Source", 501 | "description": "A source file.", 502 | "type": "object", 503 | "required": [ 504 | "title" 505 | ], 506 | "properties": { 507 | "title": { 508 | "title": "Title", 509 | "description": "A human-readable title.", 510 | "type": "string", 511 | "examples": [ 512 | "{\n \"title\": \"My Package Title\"\n}\n" 513 | ] 514 | }, 515 | "path": { 516 | "title": "Path", 517 | "description": "A fully qualified URL, or a POSIX file path..", 518 | "type": "string", 519 | "examples": [ 520 | "{\n \"path\": \"file.csv\"\n}\n", 521 | "{\n \"path\": \"http://example.com/file.csv\"\n}\n" 522 | ], 523 | "context": "Implementations need to negotiate the type of path provided, and dereference the data accordingly." 524 | }, 525 | "email": { 526 | "title": "Email", 527 | "description": "An email address.", 528 | "type": "string", 529 | "format": "email", 530 | "examples": [ 531 | "{\n \"email\": \"example@example.com\"\n}\n" 532 | ] 533 | } 534 | } 535 | }, 536 | "examples": [ 537 | "{\n \"sources\": [\n {\n \"name\": \"World Bank and OECD\",\n \"uri\": \"http://data.worldbank.org/indicator/NY.GDP.MKTP.CD\"\n }\n ]\n}\n" 538 | ] 539 | } 540 | } 541 | } 542 | -------------------------------------------------------------------------------- /lib/profiles/table-schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-04/schema#", 3 | "title": "Table Schema", 4 | "description": "A Table Schema for this resource, compliant with the [Table Schema](/tableschema/) specification.", 5 | "type": "object", 6 | "required": [ 7 | "fields" 8 | ], 9 | "properties": { 10 | "fields": { 11 | "type": "array", 12 | "minItems": 1, 13 | "items": { 14 | "title": "Table Schema Field", 15 | "type": "object", 16 | "anyOf": [ 17 | { 18 | "type": "object", 19 | "title": "String Field", 20 | "description": "The field contains strings, that is, sequences of characters.", 21 | "required": [ 22 | "name" 23 | ], 24 | "properties": { 25 | "name": { 26 | "title": "Name", 27 | "description": "A name for this field.", 28 | "type": "string" 29 | }, 30 | "title": { 31 | "title": "Title", 32 | "description": "A human-readable title.", 33 | "type": "string", 34 | "examples": [ 35 | "{\n \"title\": \"My Package Title\"\n}\n" 36 | ] 37 | }, 38 | "description": { 39 | "title": "Description", 40 | "description": "A text description. Markdown is encouraged.", 41 | "type": "string", 42 | "examples": [ 43 | "{\n \"description\": \"# My Package description\\nAll about my package.\"\n}\n" 44 | ] 45 | }, 46 | "type": { 47 | "description": "The type keyword, which `MUST` be a value of `string`.", 48 | "enum": [ 49 | "string" 50 | ] 51 | }, 52 | "format": { 53 | "description": "The format keyword options for `string` are `default`, `email`, `uri`, `binary`, and `uuid`.", 54 | "context": "The following `format` options are supported:\n * **default**: any valid string.\n * **email**: A valid email address.\n * **uri**: A valid URI.\n * **binary**: A base64 encoded string representing binary data.\n * **uuid**: A string that is a uuid.", 55 | "enum": [ 56 | "default", 57 | "email", 58 | "uri", 59 | "binary", 60 | "uuid" 61 | ], 62 | "default": "default" 63 | }, 64 | "constraints": { 65 | "title": "Constraints", 66 | "description": "The following constraints are supported for `string` fields.", 67 | "type": "object", 68 | "properties": { 69 | "required": { 70 | "type": "boolean", 71 | "description": "Indicates whether a property must have a value for each instance.", 72 | "context": "An empty string is considered to be a missing value." 73 | }, 74 | "unique": { 75 | "type": "boolean", 76 | "description": "When `true`, each value for the property `MUST` be unique." 77 | }, 78 | "pattern": { 79 | "type": "string", 80 | "description": "A regular expression pattern to test each value of the property against, where a truthy response indicates validity.", 81 | "context": "Regular expressions `SHOULD` conform to the [XML Schema regular expression syntax](http://www.w3.org/TR/xmlschema-2/#regexs)." 82 | }, 83 | "enum": { 84 | "type": "array", 85 | "minItems": 1, 86 | "uniqueItems": true, 87 | "items": { 88 | "type": "string" 89 | } 90 | }, 91 | "minLength": { 92 | "type": "integer", 93 | "description": "An integer that specifies the minimum length of a value." 94 | }, 95 | "maxLength": { 96 | "type": "integer", 97 | "description": "An integer that specifies the maximum length of a value." 98 | } 99 | } 100 | }, 101 | "rdfType": { 102 | "type": "string", 103 | "description": "The RDF type for this field." 104 | } 105 | }, 106 | "examples": [ 107 | "{\n \"name\": \"name\",\n \"type\": \"string\"\n}\n", 108 | "{\n \"name\": \"name\",\n \"type\": \"string\",\n \"format\": \"email\"\n}\n", 109 | "{\n \"name\": \"name\",\n \"type\": \"string\",\n \"constraints\": {\n \"minLength\": 3,\n \"maxLength\": 35\n }\n}\n" 110 | ] 111 | }, 112 | { 113 | "type": "object", 114 | "title": "Number Field", 115 | "description": "The field contains numbers of any kind including decimals.", 116 | "context": "The lexical formatting follows that of decimal in [XMLSchema](https://www.w3.org/TR/xmlschema-2/#decimal): a non-empty finite-length sequence of decimal digits separated by a period as a decimal indicator. An optional leading sign is allowed. If the sign is omitted, '+' is assumed. Leading and trailing zeroes are optional. If the fractional part is zero, the period and following zero(es) can be omitted. For example: '-1.23', '12678967.543233', '+100000.00', '210'.\n\nThe following special string values are permitted (case does not need to be respected):\n - NaN: not a number\n - INF: positive infinity\n - -INF: negative infinity\n\nA number `MAY` also have a trailing:\n - exponent: this `MUST` consist of an E followed by an optional + or - sign followed by one or more decimal digits (0-9)\n - percentage: the percentage sign: `%`. In conversion percentages should be divided by 100.\n\nIf both exponent and percentages are present the percentage `MUST` follow the exponent e.g. '53E10%' (equals 5.3).", 117 | "required": [ 118 | "name" 119 | ], 120 | "properties": { 121 | "name": { 122 | "title": "Name", 123 | "description": "A name for this field.", 124 | "type": "string" 125 | }, 126 | "title": { 127 | "title": "Title", 128 | "description": "A human-readable title.", 129 | "type": "string", 130 | "examples": [ 131 | "{\n \"title\": \"My Package Title\"\n}\n" 132 | ] 133 | }, 134 | "description": { 135 | "title": "Description", 136 | "description": "A text description. Markdown is encouraged.", 137 | "type": "string", 138 | "examples": [ 139 | "{\n \"description\": \"# My Package description\\nAll about my package.\"\n}\n" 140 | ] 141 | }, 142 | "type": { 143 | "description": "The type keyword, which `MUST` be a value of `number`.", 144 | "enum": [ 145 | "number" 146 | ] 147 | }, 148 | "format": { 149 | "description": "There are no format keyword options for `number`: only `default` is allowed.", 150 | "enum": [ 151 | "default" 152 | ], 153 | "default": "default" 154 | }, 155 | "bareNumber": { 156 | "type": "boolean", 157 | "title": "bareNumber", 158 | "description": "a boolean field with a default of `true`. If `true` the physical contents of this field must follow the formatting constraints already set out. If `false` the contents of this field may contain leading and/or trailing non-numeric characters (which implementors MUST therefore strip). The purpose of `bareNumber` is to allow publishers to publish numeric data that contains trailing characters such as percentages e.g. `95%` or leading characters such as currencies e.g. `€95` or `EUR 95`. Note that it is entirely up to implementors what, if anything, they do with stripped text.", 159 | "default": true 160 | }, 161 | "decimalChar": { 162 | "type": "string", 163 | "description": "A string whose value is used to represent a decimal point within the number. The default value is `.`." 164 | }, 165 | "groupChar": { 166 | "type": "string", 167 | "description": "A string whose value is used to group digits within the number. The default value is `null`. A common value is `,` e.g. '100,000'." 168 | }, 169 | "constraints": { 170 | "title": "Constraints", 171 | "description": "The following constraints are supported for `number` fields.", 172 | "type": "object", 173 | "properties": { 174 | "required": { 175 | "type": "boolean", 176 | "description": "Indicates whether a property must have a value for each instance.", 177 | "context": "An empty string is considered to be a missing value." 178 | }, 179 | "unique": { 180 | "type": "boolean", 181 | "description": "When `true`, each value for the property `MUST` be unique." 182 | }, 183 | "pattern": { 184 | "type": "string", 185 | "description": "A regular expression pattern to test each value of the property against, where a truthy response indicates validity.", 186 | "context": "Regular expressions `SHOULD` conform to the [XML Schema regular expression syntax](http://www.w3.org/TR/xmlschema-2/#regexs)." 187 | }, 188 | "enum": { 189 | "oneOf": [ 190 | { 191 | "type": "array", 192 | "minItems": 1, 193 | "uniqueItems": true, 194 | "items": { 195 | "type": "string" 196 | } 197 | }, 198 | { 199 | "type": "array", 200 | "minItems": 1, 201 | "uniqueItems": true, 202 | "items": { 203 | "type": "number" 204 | } 205 | } 206 | ] 207 | }, 208 | "minimum": { 209 | "oneOf": [ 210 | { 211 | "type": "string" 212 | }, 213 | { 214 | "type": "number" 215 | } 216 | ] 217 | }, 218 | "maximum": { 219 | "oneOf": [ 220 | { 221 | "type": "string" 222 | }, 223 | { 224 | "type": "number" 225 | } 226 | ] 227 | } 228 | } 229 | }, 230 | "rdfType": { 231 | "type": "string", 232 | "description": "The RDF type for this field." 233 | } 234 | }, 235 | "examples": [ 236 | "{\n \"name\": \"field-name\",\n \"type\": \"number\"\n}\n", 237 | "{\n \"name\": \"field-name\",\n \"type\": \"number\",\n \"constraints\": {\n \"enum\": [ \"1.00\", \"1.50\", \"2.00\" ]\n }\n}\n" 238 | ] 239 | }, 240 | { 241 | "type": "object", 242 | "title": "Integer Field", 243 | "description": "The field contains integers - that is whole numbers.", 244 | "context": "Integer values are indicated in the standard way for any valid integer.", 245 | "required": [ 246 | "name", 247 | "type" 248 | ], 249 | "properties": { 250 | "name": { 251 | "title": "Name", 252 | "description": "A name for this field.", 253 | "type": "string" 254 | }, 255 | "title": { 256 | "title": "Title", 257 | "description": "A human-readable title.", 258 | "type": "string", 259 | "examples": [ 260 | "{\n \"title\": \"My Package Title\"\n}\n" 261 | ] 262 | }, 263 | "description": { 264 | "title": "Description", 265 | "description": "A text description. Markdown is encouraged.", 266 | "type": "string", 267 | "examples": [ 268 | "{\n \"description\": \"# My Package description\\nAll about my package.\"\n}\n" 269 | ] 270 | }, 271 | "type": { 272 | "description": "The type keyword, which `MUST` be a value of `integer`.", 273 | "enum": [ 274 | "integer" 275 | ] 276 | }, 277 | "format": { 278 | "description": "There are no format keyword options for `integer`: only `default` is allowed.", 279 | "enum": [ 280 | "default" 281 | ], 282 | "default": "default" 283 | }, 284 | "bareNumber": { 285 | "type": "boolean", 286 | "title": "bareNumber", 287 | "description": "a boolean field with a default of `true`. If `true` the physical contents of this field must follow the formatting constraints already set out. If `false` the contents of this field may contain leading and/or trailing non-numeric characters (which implementors MUST therefore strip). The purpose of `bareNumber` is to allow publishers to publish numeric data that contains trailing characters such as percentages e.g. `95%` or leading characters such as currencies e.g. `€95` or `EUR 95`. Note that it is entirely up to implementors what, if anything, they do with stripped text.", 288 | "default": true 289 | }, 290 | "constraints": { 291 | "title": "Constraints", 292 | "description": "The following constraints are supported for `integer` fields.", 293 | "type": "object", 294 | "properties": { 295 | "required": { 296 | "type": "boolean", 297 | "description": "Indicates whether a property must have a value for each instance.", 298 | "context": "An empty string is considered to be a missing value." 299 | }, 300 | "unique": { 301 | "type": "boolean", 302 | "description": "When `true`, each value for the property `MUST` be unique." 303 | }, 304 | "pattern": { 305 | "type": "string", 306 | "description": "A regular expression pattern to test each value of the property against, where a truthy response indicates validity.", 307 | "context": "Regular expressions `SHOULD` conform to the [XML Schema regular expression syntax](http://www.w3.org/TR/xmlschema-2/#regexs)." 308 | }, 309 | "enum": { 310 | "oneOf": [ 311 | { 312 | "type": "array", 313 | "minItems": 1, 314 | "uniqueItems": true, 315 | "items": { 316 | "type": "string" 317 | } 318 | }, 319 | { 320 | "type": "array", 321 | "minItems": 1, 322 | "uniqueItems": true, 323 | "items": { 324 | "type": "integer" 325 | } 326 | } 327 | ] 328 | }, 329 | "minimum": { 330 | "oneOf": [ 331 | { 332 | "type": "string" 333 | }, 334 | { 335 | "type": "integer" 336 | } 337 | ] 338 | }, 339 | "maximum": { 340 | "oneOf": [ 341 | { 342 | "type": "string" 343 | }, 344 | { 345 | "type": "integer" 346 | } 347 | ] 348 | } 349 | } 350 | }, 351 | "rdfType": { 352 | "type": "string", 353 | "description": "The RDF type for this field." 354 | } 355 | }, 356 | "examples": [ 357 | "{\n \"name\": \"age\",\n \"type\": \"integer\",\n \"constraints\": {\n \"unique\": true,\n \"minimum\": 100,\n \"maximum\": 9999\n }\n}\n" 358 | ] 359 | }, 360 | { 361 | "type": "object", 362 | "title": "Date Field", 363 | "description": "The field contains temporal date values.", 364 | "required": [ 365 | "name", 366 | "type" 367 | ], 368 | "properties": { 369 | "name": { 370 | "title": "Name", 371 | "description": "A name for this field.", 372 | "type": "string" 373 | }, 374 | "title": { 375 | "title": "Title", 376 | "description": "A human-readable title.", 377 | "type": "string", 378 | "examples": [ 379 | "{\n \"title\": \"My Package Title\"\n}\n" 380 | ] 381 | }, 382 | "description": { 383 | "title": "Description", 384 | "description": "A text description. Markdown is encouraged.", 385 | "type": "string", 386 | "examples": [ 387 | "{\n \"description\": \"# My Package description\\nAll about my package.\"\n}\n" 388 | ] 389 | }, 390 | "type": { 391 | "description": "The type keyword, which `MUST` be a value of `date`.", 392 | "enum": [ 393 | "date" 394 | ] 395 | }, 396 | "format": { 397 | "description": "The format keyword options for `date` are `default`, `any`, and `{PATTERN}`.", 398 | "context": "The following `format` options are supported:\n * **default**: An ISO8601 format string of YYYY-MM-DD.\n * **any**: Any parsable representation of a date. The implementing library can attempt to parse the datetime via a range of strategies.\n * **{PATTERN}**: The value can be parsed according to `{PATTERN}`, which `MUST` follow the date formatting syntax of C / Python [strftime](http://strftime.org/).", 399 | "default": "default" 400 | }, 401 | "constraints": { 402 | "title": "Constraints", 403 | "description": "The following constraints are supported for `date` fields.", 404 | "type": "object", 405 | "properties": { 406 | "required": { 407 | "type": "boolean", 408 | "description": "Indicates whether a property must have a value for each instance.", 409 | "context": "An empty string is considered to be a missing value." 410 | }, 411 | "unique": { 412 | "type": "boolean", 413 | "description": "When `true`, each value for the property `MUST` be unique." 414 | }, 415 | "enum": { 416 | "type": "array", 417 | "minItems": 1, 418 | "uniqueItems": true, 419 | "items": { 420 | "type": "string" 421 | } 422 | }, 423 | "minimum": { 424 | "type": "string" 425 | }, 426 | "maximum": { 427 | "type": "string" 428 | } 429 | } 430 | }, 431 | "rdfType": { 432 | "type": "string", 433 | "description": "The RDF type for this field." 434 | } 435 | }, 436 | "examples": [ 437 | "{\n \"name\": \"date_of_birth\",\n \"type\": \"date\"\n}\n", 438 | "{\n \"name\": \"date_of_birth\",\n \"type\": \"date\",\n \"constraints\": {\n \"minimum\": \"01-01-1900\"\n }\n}\n", 439 | "{\n \"name\": \"date_of_birth\",\n \"type\": \"date\",\n \"format\": \"MM-DD-YYYY\"\n}\n" 440 | ] 441 | }, 442 | { 443 | "type": "object", 444 | "title": "Time Field", 445 | "description": "The field contains temporal time values.", 446 | "required": [ 447 | "name", 448 | "type" 449 | ], 450 | "properties": { 451 | "name": { 452 | "title": "Name", 453 | "description": "A name for this field.", 454 | "type": "string" 455 | }, 456 | "title": { 457 | "title": "Title", 458 | "description": "A human-readable title.", 459 | "type": "string", 460 | "examples": [ 461 | "{\n \"title\": \"My Package Title\"\n}\n" 462 | ] 463 | }, 464 | "description": { 465 | "title": "Description", 466 | "description": "A text description. Markdown is encouraged.", 467 | "type": "string", 468 | "examples": [ 469 | "{\n \"description\": \"# My Package description\\nAll about my package.\"\n}\n" 470 | ] 471 | }, 472 | "type": { 473 | "description": "The type keyword, which `MUST` be a value of `time`.", 474 | "enum": [ 475 | "time" 476 | ] 477 | }, 478 | "format": { 479 | "description": "The format keyword options for `time` are `default`, `any`, and `{PATTERN}`.", 480 | "context": "The following `format` options are supported:\n * **default**: An ISO8601 format string for time.\n * **any**: Any parsable representation of a date. The implementing library can attempt to parse the datetime via a range of strategies.\n * **{PATTERN}**: The value can be parsed according to `{PATTERN}`, which `MUST` follow the date formatting syntax of C / Python [strftime](http://strftime.org/).", 481 | "default": "default" 482 | }, 483 | "constraints": { 484 | "title": "Constraints", 485 | "description": "The following constraints are supported for `time` fields.", 486 | "type": "object", 487 | "properties": { 488 | "required": { 489 | "type": "boolean", 490 | "description": "Indicates whether a property must have a value for each instance.", 491 | "context": "An empty string is considered to be a missing value." 492 | }, 493 | "unique": { 494 | "type": "boolean", 495 | "description": "When `true`, each value for the property `MUST` be unique." 496 | }, 497 | "enum": { 498 | "type": "array", 499 | "minItems": 1, 500 | "uniqueItems": true, 501 | "items": { 502 | "type": "string" 503 | } 504 | }, 505 | "minimum": { 506 | "type": "string" 507 | }, 508 | "maximum": { 509 | "type": "string" 510 | } 511 | } 512 | }, 513 | "rdfType": { 514 | "type": "string", 515 | "description": "The RDF type for this field." 516 | } 517 | }, 518 | "examples": [ 519 | "{\n \"name\": \"appointment_start\",\n \"type\": \"time\"\n}\n", 520 | "{\n \"name\": \"appointment_start\",\n \"type\": \"time\",\n \"format\": \"any\"\n}\n" 521 | ] 522 | }, 523 | { 524 | "type": "object", 525 | "title": "Date Time Field", 526 | "description": "The field contains temporal datetime values.", 527 | "required": [ 528 | "name", 529 | "type" 530 | ], 531 | "properties": { 532 | "name": { 533 | "title": "Name", 534 | "description": "A name for this field.", 535 | "type": "string" 536 | }, 537 | "title": { 538 | "title": "Title", 539 | "description": "A human-readable title.", 540 | "type": "string", 541 | "examples": [ 542 | "{\n \"title\": \"My Package Title\"\n}\n" 543 | ] 544 | }, 545 | "description": { 546 | "title": "Description", 547 | "description": "A text description. Markdown is encouraged.", 548 | "type": "string", 549 | "examples": [ 550 | "{\n \"description\": \"# My Package description\\nAll about my package.\"\n}\n" 551 | ] 552 | }, 553 | "type": { 554 | "description": "The type keyword, which `MUST` be a value of `datetime`.", 555 | "enum": [ 556 | "datetime" 557 | ] 558 | }, 559 | "format": { 560 | "description": "The format keyword options for `datetime` are `default`, `any`, and `{PATTERN}`.", 561 | "context": "The following `format` options are supported:\n * **default**: An ISO8601 format string for datetime.\n * **any**: Any parsable representation of a date. The implementing library can attempt to parse the datetime via a range of strategies.\n * **{PATTERN}**: The value can be parsed according to `{PATTERN}`, which `MUST` follow the date formatting syntax of C / Python [strftime](http://strftime.org/).", 562 | "default": "default" 563 | }, 564 | "constraints": { 565 | "title": "Constraints", 566 | "description": "The following constraints are supported for `datetime` fields.", 567 | "type": "object", 568 | "properties": { 569 | "required": { 570 | "type": "boolean", 571 | "description": "Indicates whether a property must have a value for each instance.", 572 | "context": "An empty string is considered to be a missing value." 573 | }, 574 | "unique": { 575 | "type": "boolean", 576 | "description": "When `true`, each value for the property `MUST` be unique." 577 | }, 578 | "enum": { 579 | "type": "array", 580 | "minItems": 1, 581 | "uniqueItems": true, 582 | "items": { 583 | "type": "string" 584 | } 585 | }, 586 | "minimum": { 587 | "type": "string" 588 | }, 589 | "maximum": { 590 | "type": "string" 591 | } 592 | } 593 | }, 594 | "rdfType": { 595 | "type": "string", 596 | "description": "The RDF type for this field." 597 | } 598 | }, 599 | "examples": [ 600 | "{\n \"name\": \"timestamp\",\n \"type\": \"datetime\"\n}\n", 601 | "{\n \"name\": \"timestamp\",\n \"type\": \"datetime\",\n \"format\": \"default\"\n}\n" 602 | ] 603 | }, 604 | { 605 | "type": "object", 606 | "title": "Year Field", 607 | "description": "A calendar year, being an integer with 4 digits. Equivalent to [gYear in XML Schema](https://www.w3.org/TR/xmlschema-2/#gYear)", 608 | "required": [ 609 | "name", 610 | "type" 611 | ], 612 | "properties": { 613 | "name": { 614 | "title": "Name", 615 | "description": "A name for this field.", 616 | "type": "string" 617 | }, 618 | "title": { 619 | "title": "Title", 620 | "description": "A human-readable title.", 621 | "type": "string", 622 | "examples": [ 623 | "{\n \"title\": \"My Package Title\"\n}\n" 624 | ] 625 | }, 626 | "description": { 627 | "title": "Description", 628 | "description": "A text description. Markdown is encouraged.", 629 | "type": "string", 630 | "examples": [ 631 | "{\n \"description\": \"# My Package description\\nAll about my package.\"\n}\n" 632 | ] 633 | }, 634 | "type": { 635 | "description": "The type keyword, which `MUST` be a value of `year`.", 636 | "enum": [ 637 | "year" 638 | ] 639 | }, 640 | "format": { 641 | "description": "There are no format keyword options for `year`: only `default` is allowed.", 642 | "enum": [ 643 | "default" 644 | ], 645 | "default": "default" 646 | }, 647 | "constraints": { 648 | "title": "Constraints", 649 | "description": "The following constraints are supported for `year` fields.", 650 | "type": "object", 651 | "properties": { 652 | "required": { 653 | "type": "boolean", 654 | "description": "Indicates whether a property must have a value for each instance.", 655 | "context": "An empty string is considered to be a missing value." 656 | }, 657 | "unique": { 658 | "type": "boolean", 659 | "description": "When `true`, each value for the property `MUST` be unique." 660 | }, 661 | "enum": { 662 | "oneOf": [ 663 | { 664 | "type": "array", 665 | "minItems": 1, 666 | "uniqueItems": true, 667 | "items": { 668 | "type": "string" 669 | } 670 | }, 671 | { 672 | "type": "array", 673 | "minItems": 1, 674 | "uniqueItems": true, 675 | "items": { 676 | "type": "integer" 677 | } 678 | } 679 | ] 680 | }, 681 | "minimum": { 682 | "oneOf": [ 683 | { 684 | "type": "string" 685 | }, 686 | { 687 | "type": "integer" 688 | } 689 | ] 690 | }, 691 | "maximum": { 692 | "oneOf": [ 693 | { 694 | "type": "string" 695 | }, 696 | { 697 | "type": "integer" 698 | } 699 | ] 700 | } 701 | } 702 | }, 703 | "rdfType": { 704 | "type": "string", 705 | "description": "The RDF type for this field." 706 | } 707 | }, 708 | "examples": [ 709 | "{\n \"name\": \"year\",\n \"type\": \"year\"\n}\n", 710 | "{\n \"name\": \"year\",\n \"type\": \"year\",\n \"constraints\": {\n \"minimum\": 1970,\n \"maximum\": 2003\n }\n}\n" 711 | ] 712 | }, 713 | { 714 | "type": "object", 715 | "title": "Year Month Field", 716 | "description": "A calendar year month, being an integer with 1 or 2 digits. Equivalent to [gYearMonth in XML Schema](https://www.w3.org/TR/xmlschema-2/#gYearMonth)", 717 | "required": [ 718 | "name", 719 | "type" 720 | ], 721 | "properties": { 722 | "name": { 723 | "title": "Name", 724 | "description": "A name for this field.", 725 | "type": "string" 726 | }, 727 | "title": { 728 | "title": "Title", 729 | "description": "A human-readable title.", 730 | "type": "string", 731 | "examples": [ 732 | "{\n \"title\": \"My Package Title\"\n}\n" 733 | ] 734 | }, 735 | "description": { 736 | "title": "Description", 737 | "description": "A text description. Markdown is encouraged.", 738 | "type": "string", 739 | "examples": [ 740 | "{\n \"description\": \"# My Package description\\nAll about my package.\"\n}\n" 741 | ] 742 | }, 743 | "type": { 744 | "description": "The type keyword, which `MUST` be a value of `yearmonth`.", 745 | "enum": [ 746 | "yearmonth" 747 | ] 748 | }, 749 | "format": { 750 | "description": "There are no format keyword options for `yearmonth`: only `default` is allowed.", 751 | "enum": [ 752 | "default" 753 | ], 754 | "default": "default" 755 | }, 756 | "constraints": { 757 | "title": "Constraints", 758 | "description": "The following constraints are supported for `yearmonth` fields.", 759 | "type": "object", 760 | "properties": { 761 | "required": { 762 | "type": "boolean", 763 | "description": "Indicates whether a property must have a value for each instance.", 764 | "context": "An empty string is considered to be a missing value." 765 | }, 766 | "unique": { 767 | "type": "boolean", 768 | "description": "When `true`, each value for the property `MUST` be unique." 769 | }, 770 | "pattern": { 771 | "type": "string", 772 | "description": "A regular expression pattern to test each value of the property against, where a truthy response indicates validity.", 773 | "context": "Regular expressions `SHOULD` conform to the [XML Schema regular expression syntax](http://www.w3.org/TR/xmlschema-2/#regexs)." 774 | }, 775 | "enum": { 776 | "type": "array", 777 | "minItems": 1, 778 | "uniqueItems": true, 779 | "items": { 780 | "type": "string" 781 | } 782 | }, 783 | "minimum": { 784 | "type": "string" 785 | }, 786 | "maximum": { 787 | "type": "string" 788 | } 789 | } 790 | }, 791 | "rdfType": { 792 | "type": "string", 793 | "description": "The RDF type for this field." 794 | } 795 | }, 796 | "examples": [ 797 | "{\n \"name\": \"month\",\n \"type\": \"yearmonth\"\n}\n", 798 | "{\n \"name\": \"month\",\n \"type\": \"yearmonth\",\n \"constraints\": {\n \"minimum\": 1,\n \"maximum\": 6\n }\n}\n" 799 | ] 800 | }, 801 | { 802 | "type": "object", 803 | "title": "Boolean Field", 804 | "description": "The field contains boolean (true/false) data.", 805 | "required": [ 806 | "name", 807 | "type" 808 | ], 809 | "properties": { 810 | "name": { 811 | "title": "Name", 812 | "description": "A name for this field.", 813 | "type": "string" 814 | }, 815 | "title": { 816 | "title": "Title", 817 | "description": "A human-readable title.", 818 | "type": "string", 819 | "examples": [ 820 | "{\n \"title\": \"My Package Title\"\n}\n" 821 | ] 822 | }, 823 | "description": { 824 | "title": "Description", 825 | "description": "A text description. Markdown is encouraged.", 826 | "type": "string", 827 | "examples": [ 828 | "{\n \"description\": \"# My Package description\\nAll about my package.\"\n}\n" 829 | ] 830 | }, 831 | "type": { 832 | "description": "The type keyword, which `MUST` be a value of `boolean`.", 833 | "enum": [ 834 | "boolean" 835 | ] 836 | }, 837 | "trueValues": { 838 | "type": "array", 839 | "minItems": 1, 840 | "items": { 841 | "type": "string" 842 | }, 843 | "default": [ 844 | "true", 845 | "True", 846 | "TRUE", 847 | "1" 848 | ] 849 | }, 850 | "falseValues": { 851 | "type": "array", 852 | "minItems": 1, 853 | "items": { 854 | "type": "string" 855 | }, 856 | "default": [ 857 | "false", 858 | "False", 859 | "FALSE", 860 | "0" 861 | ] 862 | }, 863 | "constraints": { 864 | "title": "Constraints", 865 | "description": "The following constraints are supported for `boolean` fields.", 866 | "type": "object", 867 | "properties": { 868 | "required": { 869 | "type": "boolean", 870 | "description": "Indicates whether a property must have a value for each instance.", 871 | "context": "An empty string is considered to be a missing value." 872 | }, 873 | "enum": { 874 | "type": "array", 875 | "minItems": 1, 876 | "uniqueItems": true, 877 | "items": { 878 | "type": "boolean" 879 | } 880 | } 881 | } 882 | }, 883 | "rdfType": { 884 | "type": "string", 885 | "description": "The RDF type for this field." 886 | } 887 | }, 888 | "examples": [ 889 | "{\n \"name\": \"registered\",\n \"type\": \"boolean\"\n}\n" 890 | ] 891 | }, 892 | { 893 | "type": "object", 894 | "title": "Object Field", 895 | "description": "The field contains data which can be parsed as a valid JSON object.", 896 | "required": [ 897 | "name", 898 | "type" 899 | ], 900 | "properties": { 901 | "name": { 902 | "title": "Name", 903 | "description": "A name for this field.", 904 | "type": "string" 905 | }, 906 | "title": { 907 | "title": "Title", 908 | "description": "A human-readable title.", 909 | "type": "string", 910 | "examples": [ 911 | "{\n \"title\": \"My Package Title\"\n}\n" 912 | ] 913 | }, 914 | "description": { 915 | "title": "Description", 916 | "description": "A text description. Markdown is encouraged.", 917 | "type": "string", 918 | "examples": [ 919 | "{\n \"description\": \"# My Package description\\nAll about my package.\"\n}\n" 920 | ] 921 | }, 922 | "type": { 923 | "description": "The type keyword, which `MUST` be a value of `object`.", 924 | "enum": [ 925 | "object" 926 | ] 927 | }, 928 | "format": { 929 | "description": "There are no format keyword options for `object`: only `default` is allowed.", 930 | "enum": [ 931 | "default" 932 | ], 933 | "default": "default" 934 | }, 935 | "constraints": { 936 | "title": "Constraints", 937 | "description": "The following constraints apply for `object` fields.", 938 | "type": "object", 939 | "properties": { 940 | "required": { 941 | "type": "boolean", 942 | "description": "Indicates whether a property must have a value for each instance.", 943 | "context": "An empty string is considered to be a missing value." 944 | }, 945 | "unique": { 946 | "type": "boolean", 947 | "description": "When `true`, each value for the property `MUST` be unique." 948 | }, 949 | "enum": { 950 | "oneOf": [ 951 | { 952 | "type": "array", 953 | "minItems": 1, 954 | "uniqueItems": true, 955 | "items": { 956 | "type": "string" 957 | } 958 | }, 959 | { 960 | "type": "array", 961 | "minItems": 1, 962 | "uniqueItems": true, 963 | "items": { 964 | "type": "object" 965 | } 966 | } 967 | ] 968 | }, 969 | "minLength": { 970 | "type": "integer", 971 | "description": "An integer that specifies the minimum length of a value." 972 | }, 973 | "maxLength": { 974 | "type": "integer", 975 | "description": "An integer that specifies the maximum length of a value." 976 | } 977 | } 978 | }, 979 | "rdfType": { 980 | "type": "string", 981 | "description": "The RDF type for this field." 982 | } 983 | }, 984 | "examples": [ 985 | "{\n \"name\": \"extra\"\n \"type\": \"object\"\n}\n" 986 | ] 987 | }, 988 | { 989 | "type": "object", 990 | "title": "GeoPoint Field", 991 | "description": "The field contains data describing a geographic point.", 992 | "required": [ 993 | "name", 994 | "type" 995 | ], 996 | "properties": { 997 | "name": { 998 | "title": "Name", 999 | "description": "A name for this field.", 1000 | "type": "string" 1001 | }, 1002 | "title": { 1003 | "title": "Title", 1004 | "description": "A human-readable title.", 1005 | "type": "string", 1006 | "examples": [ 1007 | "{\n \"title\": \"My Package Title\"\n}\n" 1008 | ] 1009 | }, 1010 | "description": { 1011 | "title": "Description", 1012 | "description": "A text description. Markdown is encouraged.", 1013 | "type": "string", 1014 | "examples": [ 1015 | "{\n \"description\": \"# My Package description\\nAll about my package.\"\n}\n" 1016 | ] 1017 | }, 1018 | "type": { 1019 | "description": "The type keyword, which `MUST` be a value of `geopoint`.", 1020 | "enum": [ 1021 | "geopoint" 1022 | ] 1023 | }, 1024 | "format": { 1025 | "description": "The format keyword options for `geopoint` are `default`,`array`, and `object`.", 1026 | "context": "The following `format` options are supported:\n * **default**: A string of the pattern 'lon, lat', where `lon` is the longitude and `lat` is the latitude.\n * **array**: An array of exactly two items, where each item is either a number, or a string parsable as a number, and the first item is `lon` and the second item is `lat`.\n * **object**: A JSON object with exactly two keys, `lat` and `lon`", 1027 | "notes": [ 1028 | "Implementations `MUST` strip all white space in the default format of `lon, lat`." 1029 | ], 1030 | "enum": [ 1031 | "default", 1032 | "array", 1033 | "object" 1034 | ], 1035 | "default": "default" 1036 | }, 1037 | "constraints": { 1038 | "title": "Constraints", 1039 | "description": "The following constraints are supported for `geopoint` fields.", 1040 | "type": "object", 1041 | "properties": { 1042 | "required": { 1043 | "type": "boolean", 1044 | "description": "Indicates whether a property must have a value for each instance.", 1045 | "context": "An empty string is considered to be a missing value." 1046 | }, 1047 | "unique": { 1048 | "type": "boolean", 1049 | "description": "When `true`, each value for the property `MUST` be unique." 1050 | }, 1051 | "enum": { 1052 | "oneOf": [ 1053 | { 1054 | "type": "array", 1055 | "minItems": 1, 1056 | "uniqueItems": true, 1057 | "items": { 1058 | "type": "string" 1059 | } 1060 | }, 1061 | { 1062 | "type": "array", 1063 | "minItems": 1, 1064 | "uniqueItems": true, 1065 | "items": { 1066 | "type": "array" 1067 | } 1068 | }, 1069 | { 1070 | "type": "array", 1071 | "minItems": 1, 1072 | "uniqueItems": true, 1073 | "items": { 1074 | "type": "object" 1075 | } 1076 | } 1077 | ] 1078 | } 1079 | } 1080 | }, 1081 | "rdfType": { 1082 | "type": "string", 1083 | "description": "The RDF type for this field." 1084 | } 1085 | }, 1086 | "examples": [ 1087 | "{\n \"name\": \"post_office\",\n \"type\": \"geopoint\"\n}\n", 1088 | "{\n \"name\": \"post_office\",\n \"type\": \"geopoint\",\n \"format\": \"array\"\n}\n" 1089 | ] 1090 | }, 1091 | { 1092 | "type": "object", 1093 | "title": "GeoJSON Field", 1094 | "description": "The field contains a JSON object according to GeoJSON or TopoJSON", 1095 | "required": [ 1096 | "name", 1097 | "type" 1098 | ], 1099 | "properties": { 1100 | "name": { 1101 | "title": "Name", 1102 | "description": "A name for this field.", 1103 | "type": "string" 1104 | }, 1105 | "title": { 1106 | "title": "Title", 1107 | "description": "A human-readable title.", 1108 | "type": "string", 1109 | "examples": [ 1110 | "{\n \"title\": \"My Package Title\"\n}\n" 1111 | ] 1112 | }, 1113 | "description": { 1114 | "title": "Description", 1115 | "description": "A text description. Markdown is encouraged.", 1116 | "type": "string", 1117 | "examples": [ 1118 | "{\n \"description\": \"# My Package description\\nAll about my package.\"\n}\n" 1119 | ] 1120 | }, 1121 | "type": { 1122 | "description": "The type keyword, which `MUST` be a value of `geojson`.", 1123 | "enum": [ 1124 | "geojson" 1125 | ] 1126 | }, 1127 | "format": { 1128 | "description": "The format keyword options for `geojson` are `default` and `topojson`.", 1129 | "context": "The following `format` options are supported:\n * **default**: A geojson object as per the [GeoJSON spec](http://geojson.org/).\n * **topojson**: A topojson object as per the [TopoJSON spec](https://github.com/topojson/topojson-specification/blob/master/README.md)", 1130 | "enum": [ 1131 | "default", 1132 | "topojson" 1133 | ], 1134 | "default": "default" 1135 | }, 1136 | "constraints": { 1137 | "title": "Constraints", 1138 | "description": "The following constraints are supported for `geojson` fields.", 1139 | "type": "object", 1140 | "properties": { 1141 | "required": { 1142 | "type": "boolean", 1143 | "description": "Indicates whether a property must have a value for each instance.", 1144 | "context": "An empty string is considered to be a missing value." 1145 | }, 1146 | "unique": { 1147 | "type": "boolean", 1148 | "description": "When `true`, each value for the property `MUST` be unique." 1149 | }, 1150 | "enum": { 1151 | "oneOf": [ 1152 | { 1153 | "type": "array", 1154 | "minItems": 1, 1155 | "uniqueItems": true, 1156 | "items": { 1157 | "type": "string" 1158 | } 1159 | }, 1160 | { 1161 | "type": "array", 1162 | "minItems": 1, 1163 | "uniqueItems": true, 1164 | "items": { 1165 | "type": "object" 1166 | } 1167 | } 1168 | ] 1169 | }, 1170 | "minLength": { 1171 | "type": "integer", 1172 | "description": "An integer that specifies the minimum length of a value." 1173 | }, 1174 | "maxLength": { 1175 | "type": "integer", 1176 | "description": "An integer that specifies the maximum length of a value." 1177 | } 1178 | } 1179 | }, 1180 | "rdfType": { 1181 | "type": "string", 1182 | "description": "The RDF type for this field." 1183 | } 1184 | }, 1185 | "examples": [ 1186 | "{\n \"name\": \"city_limits\",\n \"type\": \"geojson\"\n}\n", 1187 | "{\n \"name\": \"city_limits\",\n \"type\": \"geojson\",\n \"format\": \"topojson\"\n}\n" 1188 | ] 1189 | }, 1190 | { 1191 | "type": "object", 1192 | "title": "Array Field", 1193 | "description": "The field contains data which can be parsed as a valid JSON array.", 1194 | "required": [ 1195 | "name", 1196 | "type" 1197 | ], 1198 | "properties": { 1199 | "name": { 1200 | "title": "Name", 1201 | "description": "A name for this field.", 1202 | "type": "string" 1203 | }, 1204 | "title": { 1205 | "title": "Title", 1206 | "description": "A human-readable title.", 1207 | "type": "string", 1208 | "examples": [ 1209 | "{\n \"title\": \"My Package Title\"\n}\n" 1210 | ] 1211 | }, 1212 | "description": { 1213 | "title": "Description", 1214 | "description": "A text description. Markdown is encouraged.", 1215 | "type": "string", 1216 | "examples": [ 1217 | "{\n \"description\": \"# My Package description\\nAll about my package.\"\n}\n" 1218 | ] 1219 | }, 1220 | "type": { 1221 | "description": "The type keyword, which `MUST` be a value of `array`.", 1222 | "enum": [ 1223 | "array" 1224 | ] 1225 | }, 1226 | "format": { 1227 | "description": "There are no format keyword options for `array`: only `default` is allowed.", 1228 | "enum": [ 1229 | "default" 1230 | ], 1231 | "default": "default" 1232 | }, 1233 | "constraints": { 1234 | "title": "Constraints", 1235 | "description": "The following constraints apply for `array` fields.", 1236 | "type": "object", 1237 | "properties": { 1238 | "required": { 1239 | "type": "boolean", 1240 | "description": "Indicates whether a property must have a value for each instance.", 1241 | "context": "An empty string is considered to be a missing value." 1242 | }, 1243 | "unique": { 1244 | "type": "boolean", 1245 | "description": "When `true`, each value for the property `MUST` be unique." 1246 | }, 1247 | "enum": { 1248 | "oneOf": [ 1249 | { 1250 | "type": "array", 1251 | "minItems": 1, 1252 | "uniqueItems": true, 1253 | "items": { 1254 | "type": "string" 1255 | } 1256 | }, 1257 | { 1258 | "type": "array", 1259 | "minItems": 1, 1260 | "uniqueItems": true, 1261 | "items": { 1262 | "type": "array" 1263 | } 1264 | } 1265 | ] 1266 | }, 1267 | "minLength": { 1268 | "type": "integer", 1269 | "description": "An integer that specifies the minimum length of a value." 1270 | }, 1271 | "maxLength": { 1272 | "type": "integer", 1273 | "description": "An integer that specifies the maximum length of a value." 1274 | } 1275 | } 1276 | }, 1277 | "rdfType": { 1278 | "type": "string", 1279 | "description": "The RDF type for this field." 1280 | } 1281 | }, 1282 | "examples": [ 1283 | "{\n \"name\": \"options\"\n \"type\": \"array\"\n}\n" 1284 | ] 1285 | }, 1286 | { 1287 | "type": "object", 1288 | "title": "Duration Field", 1289 | "description": "The field contains a duration of time.", 1290 | "context": "The lexical representation for duration is the [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Durations) extended format `PnYnMnDTnHnMnS`, where `nY` represents the number of years, `nM` the number of months, `nD` the number of days, 'T' is the date/time separator, `nH` the number of hours, `nM` the number of minutes and `nS` the number of seconds. The number of seconds can include decimal digits to arbitrary precision. Date and time elements including their designator may be omitted if their value is zero, and lower order elements may also be omitted for reduced precision. Here we follow the definition of [XML Schema duration datatype](http://www.w3.org/TR/xmlschema-2/#duration) directly and that definition is implicitly inlined here.", 1291 | "required": [ 1292 | "name", 1293 | "type" 1294 | ], 1295 | "properties": { 1296 | "name": { 1297 | "title": "Name", 1298 | "description": "A name for this field.", 1299 | "type": "string" 1300 | }, 1301 | "title": { 1302 | "title": "Title", 1303 | "description": "A human-readable title.", 1304 | "type": "string", 1305 | "examples": [ 1306 | "{\n \"title\": \"My Package Title\"\n}\n" 1307 | ] 1308 | }, 1309 | "description": { 1310 | "title": "Description", 1311 | "description": "A text description. Markdown is encouraged.", 1312 | "type": "string", 1313 | "examples": [ 1314 | "{\n \"description\": \"# My Package description\\nAll about my package.\"\n}\n" 1315 | ] 1316 | }, 1317 | "type": { 1318 | "description": "The type keyword, which `MUST` be a value of `duration`.", 1319 | "enum": [ 1320 | "duration" 1321 | ] 1322 | }, 1323 | "format": { 1324 | "description": "There are no format keyword options for `duration`: only `default` is allowed.", 1325 | "enum": [ 1326 | "default" 1327 | ], 1328 | "default": "default" 1329 | }, 1330 | "constraints": { 1331 | "title": "Constraints", 1332 | "description": "The following constraints are supported for `duration` fields.", 1333 | "type": "object", 1334 | "properties": { 1335 | "required": { 1336 | "type": "boolean", 1337 | "description": "Indicates whether a property must have a value for each instance.", 1338 | "context": "An empty string is considered to be a missing value." 1339 | }, 1340 | "unique": { 1341 | "type": "boolean", 1342 | "description": "When `true`, each value for the property `MUST` be unique." 1343 | }, 1344 | "enum": { 1345 | "type": "array", 1346 | "minItems": 1, 1347 | "uniqueItems": true, 1348 | "items": { 1349 | "type": "string" 1350 | } 1351 | }, 1352 | "minimum": { 1353 | "type": "string" 1354 | }, 1355 | "maximum": { 1356 | "type": "string" 1357 | } 1358 | } 1359 | }, 1360 | "rdfType": { 1361 | "type": "string", 1362 | "description": "The RDF type for this field." 1363 | } 1364 | }, 1365 | "examples": [ 1366 | "{\n \"name\": \"period\"\n \"type\": \"duration\"\n}\n" 1367 | ] 1368 | }, 1369 | { 1370 | "type": "object", 1371 | "title": "Any Field", 1372 | "description": "Any value is accepted, including values that are not captured by the type/format/constraint requirements of the specification.", 1373 | "required": [ 1374 | "name", 1375 | "type" 1376 | ], 1377 | "properties": { 1378 | "name": { 1379 | "title": "Name", 1380 | "description": "A name for this field.", 1381 | "type": "string" 1382 | }, 1383 | "title": { 1384 | "title": "Title", 1385 | "description": "A human-readable title.", 1386 | "type": "string", 1387 | "examples": [ 1388 | "{\n \"title\": \"My Package Title\"\n}\n" 1389 | ] 1390 | }, 1391 | "description": { 1392 | "title": "Description", 1393 | "description": "A text description. Markdown is encouraged.", 1394 | "type": "string", 1395 | "examples": [ 1396 | "{\n \"description\": \"# My Package description\\nAll about my package.\"\n}\n" 1397 | ] 1398 | }, 1399 | "type": { 1400 | "description": "The type keyword, which `MUST` be a value of `any`.", 1401 | "enum": [ 1402 | "any" 1403 | ] 1404 | }, 1405 | "constraints": { 1406 | "title": "Constraints", 1407 | "description": "The following constraints apply to `any` fields.", 1408 | "type": "object", 1409 | "properties": { 1410 | "required": { 1411 | "type": "boolean", 1412 | "description": "Indicates whether a property must have a value for each instance.", 1413 | "context": "An empty string is considered to be a missing value." 1414 | }, 1415 | "unique": { 1416 | "type": "boolean", 1417 | "description": "When `true`, each value for the property `MUST` be unique." 1418 | }, 1419 | "enum": { 1420 | "type": "array", 1421 | "minItems": 1, 1422 | "uniqueItems": true 1423 | } 1424 | } 1425 | }, 1426 | "rdfType": { 1427 | "type": "string", 1428 | "description": "The RDF type for this field." 1429 | } 1430 | }, 1431 | "examples": [ 1432 | "{\n \"name\": \"notes\",\n \"type\": \"any\"\n" 1433 | ] 1434 | } 1435 | ] 1436 | }, 1437 | "description": "An `array` of Table Schema Field objects.", 1438 | "examples": [ 1439 | "{\n \"fields\": [\n {\n \"name\": \"my-field-name\"\n }\n ]\n}\n", 1440 | "{\n \"fields\": [\n {\n \"name\": \"my-field-name\",\n \"type\": \"number\"\n },\n {\n \"name\": \"my-field-name-2\",\n \"type\": \"string\",\n \"format\": \"email\"\n }\n ]\n}\n" 1441 | ] 1442 | }, 1443 | "primaryKey": { 1444 | "oneOf": [ 1445 | { 1446 | "type": "array", 1447 | "minItems": 1, 1448 | "uniqueItems": true, 1449 | "items": { 1450 | "type": "string" 1451 | } 1452 | }, 1453 | { 1454 | "type": "string" 1455 | } 1456 | ], 1457 | "description": "A primary key is a field name or an array of field names, whose values `MUST` uniquely identify each row in the table.", 1458 | "context": "Field name in the `primaryKey` `MUST` be unique, and `MUST` match a field name in the associated table. It is acceptable to have an array with a single value, indicating that the value of a single field is the primary key.", 1459 | "examples": [ 1460 | "{\n \"primaryKey\": [\n \"name\"\n ]\n}\n", 1461 | "{\n \"primaryKey\": [\n \"first_name\",\n \"last_name\"\n ]\n}\n" 1462 | ] 1463 | }, 1464 | "foreignKeys": { 1465 | "type": "array", 1466 | "minItems": 1, 1467 | "items": { 1468 | "title": "Table Schema Foreign Key", 1469 | "description": "Table Schema Foreign Key", 1470 | "type": "object", 1471 | "required": [ 1472 | "fields", 1473 | "reference" 1474 | ], 1475 | "oneOf": [ 1476 | { 1477 | "properties": { 1478 | "fields": { 1479 | "type": "array", 1480 | "items": { 1481 | "type": "string", 1482 | "minItems": 1, 1483 | "uniqueItems": true, 1484 | "description": "Fields that make up the primary key." 1485 | } 1486 | }, 1487 | "reference": { 1488 | "type": "object", 1489 | "required": [ 1490 | "resource", 1491 | "fields" 1492 | ], 1493 | "properties": { 1494 | "resource": { 1495 | "type": "string", 1496 | "default": "" 1497 | }, 1498 | "fields": { 1499 | "type": "array", 1500 | "items": { 1501 | "type": "string" 1502 | }, 1503 | "minItems": 1, 1504 | "uniqueItems": true 1505 | } 1506 | } 1507 | } 1508 | } 1509 | }, 1510 | { 1511 | "properties": { 1512 | "fields": { 1513 | "type": "string", 1514 | "description": "Fields that make up the primary key." 1515 | }, 1516 | "reference": { 1517 | "type": "object", 1518 | "required": [ 1519 | "resource", 1520 | "fields" 1521 | ], 1522 | "properties": { 1523 | "resource": { 1524 | "type": "string", 1525 | "default": "" 1526 | }, 1527 | "fields": { 1528 | "type": "string" 1529 | } 1530 | } 1531 | } 1532 | } 1533 | } 1534 | ] 1535 | }, 1536 | "examples": [ 1537 | "{\n \"foreignKeys\": [\n {\n \"fields\": \"state\",\n \"reference\": {\n \"resource\": \"the-resource\",\n \"fields\": \"state_id\"\n }\n }\n ]\n}\n", 1538 | "{\n \"foreignKeys\": [\n {\n \"fields\": \"state\",\n \"reference\": {\n \"resource\": \"\",\n \"fields\": \"id\"\n }\n }\n ]\n}\n" 1539 | ] 1540 | }, 1541 | "missingValues": { 1542 | "type": "array", 1543 | "minItems": 1, 1544 | "items": { 1545 | "type": "string" 1546 | }, 1547 | "default": [ 1548 | "" 1549 | ], 1550 | "description": "Values that when encountered in the source, should be considered as `null`, 'not present', or 'blank' values.", 1551 | "context": "Many datasets arrive with missing data values, either because a value was not collected or it never existed.\nMissing values may be indicated simply by the value being empty in other cases a special value may have been used e.g. `-`, `NaN`, `0`, `-9999` etc.\nThe `missingValues` property provides a way to indicate that these values should be interpreted as equivalent to null.\n\n`missingValues` are strings rather than being the data type of the particular field. This allows for comparison prior to casting and for fields to have missing value which are not of their type, for example a `number` field to have missing values indicated by `-`.\n\nThe default value of `missingValue` for a non-string type field is the empty string `''`. For string type fields there is no default for `missingValue` (for string fields the empty string `''` is a valid value and need not indicate null).", 1552 | "examples": [ 1553 | "{\n \"missingValues\": [\n \"-\",\n \"NaN\",\n \"\"\n ]\n}\n" 1554 | ] 1555 | } 1556 | }, 1557 | "examples": [ 1558 | "{\n \"schema\": {\n \"fields\": [\n {\n \"name\": \"first_name\",\n \"type\": \"string\"\n \"constraints\": {\n \"required\": true\n }\n },\n {\n \"name\": \"age\",\n \"type\": \"integer\"\n },\n ],\n \"primaryKey\": [\n \"name\"\n ]\n }\n}\n" 1559 | ] 1560 | } 1561 | --------------------------------------------------------------------------------