├── .github ├── issue_template.md ├── pull_request_template.md ├── stale.yml └── workflows │ └── ci.yml ├── .gitignore ├── CONTRIBUTING.md ├── LEAD.md ├── LICENSE ├── Makefile ├── README.md ├── clone ├── clone.go └── clone_test.go ├── datapackage ├── package.go ├── package_test.go ├── path.go ├── resource.go ├── resource_test.go ├── schema.go └── test_package.zip ├── doc.go ├── examples ├── build_package │ ├── cities.csv │ └── main.go ├── inline │ ├── datapackage.json │ └── main.go ├── load │ ├── data.csv │ ├── datapackage.json │ ├── main.go │ └── schema.json ├── load_zip │ ├── main.go │ └── package.zip ├── multipart │ ├── data.csv │ ├── data1.csv │ ├── datapackage.json │ └── main.go ├── remote │ └── main.go └── zip │ ├── .gitignore │ ├── data │ └── data.csv │ ├── datapackage.json │ └── main.go ├── go.mod ├── go.sum └── validator ├── jsonschema.go ├── profile_cache ├── data-package.json ├── data-resource.json ├── fiscal-data-package.json ├── profile_cache.go ├── registry.json ├── table-schema.json ├── tabular-data-package.json └── tabular-data-resource.json ├── registry.go ├── registry_test.go ├── validator.go └── validator_test.go /.github/issue_template.md: -------------------------------------------------------------------------------- 1 | # Overview 2 | 3 | Please replace this line with full information about your idea or problem. If it's a bug share as much as possible to reproduce it 4 | 5 | --- 6 | 7 | Please preserve this line to notify @danielfireman (lead of this repository) 8 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | # Overview 2 | 3 | Please replace this line with full information about your pull request. Make sure that tests pass before publishing it 4 | 5 | --- 6 | 7 | Please preserve this line to notify @danielfireman (lead of this repository) 8 | -------------------------------------------------------------------------------- /.github/stale.yml: -------------------------------------------------------------------------------- 1 | # Number of days of inactivity before an issue becomes stale 2 | daysUntilStale: 90 3 | 4 | # Number of days of inactivity before a stale issue is closed 5 | daysUntilClose: 30 6 | 7 | # Issues with these labels will never be considered stale 8 | exemptLabels: 9 | - feature 10 | - enhancement 11 | - bug 12 | 13 | # Label to use when marking an issue as stale 14 | staleLabel: wontfix 15 | 16 | # Comment to post when marking an issue as stale. Set to `false` to disable 17 | markComment: > 18 | This issue has been automatically marked as stale because it has not had 19 | recent activity. It will be closed if no further activity occurs. Thank you 20 | for your contributions. 21 | 22 | # Comment to post when closing a stale issue. Set to `false` to disable 23 | closeComment: false 24 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | 11 | CI: 12 | name: "Run CI" 13 | strategy: 14 | fail-fast: false 15 | matrix: 16 | os: ["windows-latest","ubuntu-latest", "macOS-latest"] 17 | go: ["1.15.x", "1.16.x", "1.17.x", "1.18.x"] 18 | runs-on: ${{ matrix.os }} 19 | 20 | steps: 21 | - uses: actions/checkout@v3 22 | 23 | - name: Set up Go 24 | uses: actions/setup-go@v3 25 | with: 26 | go-version: 1.17 27 | 28 | - name: Build 29 | run: go build -v ./... 30 | 31 | - name: Vet 32 | run: "go vet ./..." 33 | 34 | - uses: dominikh/staticcheck-action@v1.0.0 35 | with: 36 | version: "2021.1.1" 37 | install-go: false 38 | cache-key: ${{ matrix.go }} 39 | 40 | - name: Test 41 | run: go test -v -race -covermode atomic -coverprofile=covprofile ./... 42 | 43 | - name: Send coverage 44 | uses: shogo82148/actions-goveralls@v1 45 | with: 46 | path-to-profile: covprofile 47 | ignore: validator/profile_cache/*.go,examples/remote/*.go,examples/inline/*.go,examples/load_zip/*.go,examples/load/*.go,examples/zip/*.go,examples/build_package/*.go,examples/multipart/*.go -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.dll 4 | *.so 5 | *.dylib 6 | 7 | # Test binary, build with `go test -c` 8 | *.test 9 | 10 | # Output of the go coverage tool, specifically when used with LiteIDE 11 | *.out 12 | 13 | # Project-local glide cache, RE: https://github.com/Masterminds/glide/issues/736 14 | .glide/ 15 | 16 | covprofile 17 | 18 | .DS_Store -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to datapackage-go 2 | 3 | Found a problem and would like to fix it? Have that great idea and would love to see it done? Let's do it! 4 | 5 | > Please open an issue before start working 6 | 7 | That could save a lot of time from everyone and we are super happy to answer questions and help you alonge the way. 8 | 9 | This project shares Go's code of conduct [values](https://golang.org/conduct#values) and [unwelcomed behavior](https://golang.org/conduct#unwelcome_behavior). Not sure what those mean or why we need those? Please give yourself a few minutes to get acquainted to those topics. 10 | 11 | * Before start coding: 12 | * Fork and pull the latest version of the master branch 13 | * Make sure you have go 1.8+ installed and you're using it 14 | * Install [dep](https://github.com/golang/dep) and ensure the dependencies are updated 15 | 16 | ```sh 17 | $ go get -u github.com/golang/dep/cmd/dep 18 | $ dep ensure 19 | ``` 20 | 21 | * Requirements 22 | * Compliance with [these guidelines](https://code.google.com/p/go-wiki/wiki/CodeReviewComments) 23 | * Good unit test coverage 24 | * [Good commit messages](http://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html) 25 | 26 | * Before sending the PR 27 | 28 | ```sh 29 | $ cd $GOPATH/src/github.com/frictionlessdata/datapackage-go 30 | $ ./fmt.sh 31 | $ go test ./.. 32 | ``` 33 | 34 | If all tests pass, you're ready to send the PR! :D 35 | 36 | ## Updating local data pakcage schema registry 37 | 38 | To speed up development and usage, datapackage-go comes with a local copy of the [Data Package Schema Registry](http://frictionlessdata.io/schemas/registry.json). As Go does not support resources out of the box, we are using [esc] 39 | (https://github.com/mjibson/esc). Esc generates nice, gzipped strings, one per file generates a set of go functions that 40 | allow us to access the schema files from go code. 41 | 42 | To add or update JSONSchemas from the local registry one first needs to install esc. 43 | 44 | ```sh 45 | $ go get github.com/mjibson/esc 46 | ``` 47 | 48 | After all editing/adding, simply invoke `esc` 49 | 50 | ```sh 51 | $ cd $GOPATH/src/github.com/frictionlessdata/datapackage-go 52 | $ cd validator/profile_cache 53 | $ esc -o profile_cache.go -pkg profile_cache -ignore profile_cache.go . 54 | $ cd ../.. 55 | $ go test ./.. 56 | ``` 57 | 58 | If all tests pass, you're ready to send the PR! :D 59 | -------------------------------------------------------------------------------- /LEAD.md: -------------------------------------------------------------------------------- 1 | loleg 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Frictionless Data 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all list templates 2 | 3 | 4 | LEAD := $(shell head -n 1 LEAD.md) 5 | 6 | 7 | all: list 8 | 9 | list: 10 | @grep '^\.PHONY' Makefile | cut -d' ' -f2- | tr ' ' '\n' 11 | 12 | templates: 13 | sed -i -E "s/@(\w*)/@$(LEAD)/" .github/issue_template.md 14 | sed -i -E "s/@(\w*)/@$(LEAD)/" .github/pull_request_template.md 15 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # datapackage-go 2 | 3 | [![Build Status](https://github.com/frictionlessdata/datapackage-go/actions/workflows/ci.yml/badge.svg)](https://github.com/frictionlessdata/datapackage-go/actions/workflows/ci.yml) 4 | [![Coverage Status](https://coveralls.io/repos/github/frictionlessdata/datapackage-go/badge.svg?branch=main)](https://coveralls.io/github/frictionlessdata/datapackage-go?branch=main) 5 | [![Go Report Card](https://goreportcard.com/badge/github.com/frictionlessdata/datapackage-go)](https://goreportcard.com/report/github.com/frictionlessdata/datapackage-go) 6 | [![GoDoc](https://godoc.org/github.com/frictionlessdata/datapackage-go?status.svg)](https://godoc.org/github.com/frictionlessdata/datapackage-go) 7 | [![Sourcegraph](https://sourcegraph.com/github.com/frictionlessdata/datapackage-go/-/badge.svg)](https://sourcegraph.com/github.com/frictionlessdata/datapackage-go?badge) 8 | [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/datapackage-go) 9 | [![Support](https://img.shields.io/badge/support-discord-brightgreen)](https://discordapp.com/invite/Sewv6av) 10 | 11 | A Go library for working with [Data Packages](http://specs.frictionlessdata.io/data-package/). 12 | 13 | - [datapackage-go](#datapackage-go) 14 | - [Install](#install) 15 | - [Main Features](#main-features) 16 | - [Loading and validating tabular data package descriptors](#loading-and-validating-tabular-data-package-descriptors) 17 | - [Accessing data package resources](#accessing-data-package-resources) 18 | - [Loading zip bundles](#loading-zip-bundles) 19 | - [Creating a zip bundle with the data package.](#creating-a-zip-bundle-with-the-data-package) 20 | - [CSV dialect support](#csv-dialect-support) 21 | - [Loading multipart resources](#loading-multipart-resources) 22 | - [Loading non-tabular resources](#loading-non-tabular-resources) 23 | - [Manipulating data packages programatically](#manipulating-data-packages-programatically) 24 | 25 | ## Install 26 | 27 | ```sh 28 | $ go get -u github.com/frictionlessdata/datapackage-go/... 29 | ``` 30 | 31 | ## Main Features 32 | 33 | ### Loading and validating tabular data package descriptors 34 | 35 | A [data package](http://frictionlessdata.io/specs/data-package/) is a collection of [resources](http://frictionlessdata.io/specs/data-resource/). The [datapackage.Package](https://godoc.org/github.com/frictionlessdata/datapackage-go/datapackage#Package) provides various capabilities like loading local or remote data package, saving a data package descriptor and many more. 36 | 37 | Consider we have some local csv file and a JSON descriptor in a `data` directory: 38 | 39 | > data/population.csv 40 | ```csv 41 | city,year,population 42 | london,2017,8780000 43 | paris,2017,2240000 44 | rome,2017,2860000 45 | ``` 46 | 47 | > data/datapackage.json 48 | ```json 49 | { 50 | "name": "world", 51 | "resources": [ 52 | { 53 | "name": "population", 54 | "path": "population.csv", 55 | "profile":"tabular-data-resource", 56 | "schema": { 57 | "fields": [ 58 | {"name": "city", "type": "string"}, 59 | {"name": "year", "type": "integer"}, 60 | {"name": "population", "type": "integer"} 61 | ] 62 | } 63 | } 64 | ] 65 | } 66 | ``` 67 | 68 | Let's create a data package based on this data using the [datapackage.Package](https://godoc.org/github.com/frictionlessdata/datapackage-go/datapackage#Package) class: 69 | 70 | ```go 71 | pkg, err := datapackage.Load("data/datapackage.json") 72 | // Check error. 73 | ``` 74 | 75 | ### Accessing data package resources 76 | 77 | Once the data package is loaded, we could use the [datapackage.Resource](https://godoc.org/github.com/frictionlessdata/datapackage-go/datapackage#Resource) class to read data resource's contents: 78 | 79 | ```go 80 | resource := pkg.GetResource("population") 81 | contents, _ := resource.ReadAll() 82 | fmt.Println(contents) 83 | // [[london 2017 8780000] [paris 2017 2240000] [rome 20172860000]] 84 | ``` 85 | 86 | Or you could cast to Go types, making it easier to perform further processing: 87 | 88 | ```go 89 | type Population struct { 90 | City string `tableheader:"city"` 91 | Year string `tableheader:"year"` 92 | Population int `tableheader:"population"` 93 | } 94 | 95 | var cities []Population 96 | resource.Cast(&cities, csv.LoadHeaders()) 97 | fmt.Printf("+v", cities) 98 | // [{City:london Year:2017 Population:8780000} {City:paris Year:2017 Population:2240000} {City:rome Year:2017 Population:2860000}] 99 | ``` 100 | 101 | If the data is to big to be loaded at once or if you would like to perform line-by-line processing, you could iterate through the resource contents: 102 | 103 | ```go 104 | iter, _ := resource.Iter(csv.LoadHeaders()) 105 | sch, _ := resource.GetSchema() 106 | for iter.Next() { 107 | var p Population 108 | sch.CastRow(iter.Row(), &cp) 109 | fmt.Printf("%+v\n", p) 110 | } 111 | // {City:london Year:2017 Population:8780000 112 | // {City:paris Year:2017 Population:2240000} 113 | // {City:rome Year:2017 Population:2860000}] 114 | ``` 115 | 116 | Or you might want to process specific columns, for instance to perform an statical analysis: 117 | 118 | ```go 119 | var population []float64 120 | resource.CastColumn("population", &population, csv.LoadHeaders()) 121 | fmt.Println(ages) 122 | // Output: [8780000 2240000 2860000] 123 | ``` 124 | 125 | ### Loading zip bundles 126 | 127 | It is very common to store the data in zip bundles containing the descriptor and data files. Those are natively supported by our the [datapackage.Load](https://godoc.org/github.com/frictionlessdata/datapackage-go/datapackage#Load) method. For example, lets say we have the following `package.zip` bundle: 128 | 129 | |- package.zip 130 | |- datapackage.json 131 | |- data.csv 132 | 133 | We could load this package by simply: 134 | 135 | ```go 136 | pkg, err := datapackage.Load("package.zip") 137 | // Check error. 138 | ``` 139 | 140 | And the library will unzip the package contents to a temporary directory and wire everything up for us. 141 | 142 | A complete example can be found [here](https://github.com/frictionlessdata/datapackage-go/tree/master/examples/load_zip). 143 | 144 | ### Creating a zip bundle with the data package. 145 | 146 | You could also easily create a zip file containing the descriptor and all the data resources. Let's say you have a [datapackage.Package](https://godoc.org/github.com/frictionlessdata/datapackage-go/datapackage#Package) instance, to create a zip file containing all resources simply: 147 | 148 | ```go 149 | err := pkg.Zip("package.zip") 150 | // Check error. 151 | ``` 152 | 153 | This call also download remote resources. A complete example can be found [here](https://github.com/frictionlessdata/datapackage-go/tree/master/examples/zip) 154 | 155 | ### CSV dialect support 156 | 157 | Basic support for configuring [CSV dialect](http://frictionlessdata.io/specs/csv-dialect/) has been added. In particular `delimiter`, `skipInitialSpace` and `header` fields are supported. For instance, lets assume the population file has a different field delimiter: 158 | 159 | > data/population.csv 160 | ```csv 161 | city,year,population 162 | london;2017;8780000 163 | paris;2017;2240000 164 | rome;2017;2860000 165 | ``` 166 | 167 | One could easily parse by adding following `dialect` property to the `world` resource: 168 | 169 | ```json 170 | "dialect":{ 171 | "delimiter":";" 172 | } 173 | ``` 174 | 175 | A complete example can be found [here](https://github.com/frictionlessdata/datapackage-go/tree/master/examples/load). 176 | 177 | ### Loading multipart resources 178 | 179 | Sometimes you have data scattered across many local or remote files. Datapackage-go offers an easy way you to deal all those file as one big 180 | file. We call it multipart resources. To use this feature, simply list your files in the `path` property of the resource. For example, lets 181 | say our population data is now split between north and south hemispheres. To deal with this, we only need change to change the package descriptor: 182 | 183 | > data/datapackage.json 184 | ```json 185 | { 186 | "name": "world", 187 | "resources": [ 188 | { 189 | "name": "population", 190 | "path": ["north.csv","south.csv"], 191 | "profile":"tabular-data-resource", 192 | "schema": { 193 | "fields": [ 194 | {"name": "city", "type": "string"}, 195 | {"name": "year", "type": "integer"}, 196 | {"name": "population", "type": "integer"} 197 | ] 198 | } 199 | } 200 | ] 201 | } 202 | ``` 203 | 204 | And all the rest of the code would still be working. 205 | 206 | A complete example can be found [here](https://github.com/frictionlessdata/datapackage-go/tree/master/examples/multipart). 207 | 208 | 209 | ### Loading non-tabular resources 210 | 211 | A [Data package](https://frictionlessdata.io/data-packages/) is a container format used to describe and package a collection of data. Even though there is additional support for dealing with tabular resources, it can be used to package any kind of data. 212 | 213 | For instance, lets say an user needs to load JSON-LD information along with some tabular data (for more on this use case, please take a look at [this](https://github.com/frictionlessdata/datapackage-go/issues/13) issue). That can be packed together in a data package descriptor: 214 | 215 | ```json 216 | { 217 | "name": "carp-lake", 218 | "title": "Carp Lake Title", 219 | "description": "Tephra and Lithology from Carp Lake", 220 | "resources": [ 221 | { 222 | "name":"data", 223 | "path": "data/carpLakeCoreStratigraphy.csv", 224 | "format": "csv", 225 | "schema": { 226 | "fields": [ 227 | {"name": "depth", "type": "number"}, 228 | {"name": "notes", "type": "text"}, 229 | {"name": "core_segments", "type": "text"} 230 | ] 231 | } 232 | }, 233 | { 234 | "name": "schemaorg", 235 | "path": "data/schemaorg-ld.json", 236 | "format": "application/ld+json" 237 | } 238 | ] 239 | } 240 | ``` 241 | 242 | The package loading proceeds as usual. 243 | 244 | 245 | ```go 246 | pkg, err := datapackage.Load("data/datapackage.json") 247 | // Check error. 248 | ``` 249 | 250 | Once the data package is loaded, we could use the [Resource.RawRead](https://godoc.org/github.com/frictionlessdata/datapackage-go/datapackage#Resource.GetSchema) method to access `schemaorg` resource contents as a byte slice. 251 | 252 | ```go 253 | so := pkg.GetResource("schemaorg") 254 | rc, _ := so.RawRead() 255 | defer rc.Close() 256 | contents, _ := ioutil.ReadAll(rc) 257 | // Use contents. For instance, one could validate the JSON-LD schema and unmarshal it into a data structure. 258 | 259 | data := pkg.GetResource("data") 260 | dataContents, err := data.ReadAll() 261 | // As data is a tabular resource, its content can be loaded as [][]string. 262 | ``` 263 | 264 | ### Manipulating data packages programatically 265 | 266 | The datapackage-go library also makes it easy to save packages. Let's say you're creating a program that produces data packages and would like to add or remove resource: 267 | 268 | ```go 269 | descriptor := map[string]interface{}{ 270 | "resources": []interface{}{ 271 | map[string]interface{}{ 272 | "name": "books", 273 | "path": "books.csv", 274 | "format": "csv", 275 | "profile": "tabular-data-resource", 276 | "schema": map[string]interface{}{ 277 | "fields": []interface{}{ 278 | map[string]interface{}{"name": "author", "type": "string"}, 279 | map[string]interface{}{"name": "title", "type": "string"}, 280 | map[string]interface{}{"name": "year", "type": "integer"}, 281 | }, 282 | }, 283 | }, 284 | }, 285 | } 286 | pkg, err := datapackage.New(descriptor, ".", validator.InMemoryLoader()) 287 | if err != nil { 288 | panic(err) 289 | } 290 | // Removing resource. 291 | pkg.RemoveResource("books") 292 | 293 | // Adding new resource. 294 | pkg.AddResource(map[string]interface{}{ 295 | "name": "cities", 296 | "path": "cities.csv", 297 | "format": "csv", 298 | "profile": "tabular-data-resource", 299 | "schema": map[string]interface{}{ 300 | "fields": []interface{}{ 301 | map[string]interface{}{"name": "city", "type": "string"}, 302 | map[string]interface{}{"name": "year", "type": "integer"}, 303 | map[string]interface{}{"name": "population", "type": "integer"} 304 | }, 305 | }, 306 | }) 307 | 308 | // Printing resource contents. 309 | cities, _ := pkg.GetResource("cities").ReadAll() 310 | fmt.Println(cities) 311 | // [[london 2017 8780000] [paris 2017 2240000] [rome 20172860000]] 312 | ``` 313 | -------------------------------------------------------------------------------- /clone/clone.go: -------------------------------------------------------------------------------- 1 | package clone 2 | 3 | import ( 4 | "bytes" 5 | "encoding/gob" 6 | ) 7 | 8 | func init() { 9 | gob.Register(map[string]interface{}{}) // descriptor. 10 | gob.Register([]interface{}{}) // data-package resources. 11 | } 12 | 13 | // Descriptor deep-copies the passed-in descriptor and returns its copy. 14 | func Descriptor(d map[string]interface{}) (map[string]interface{}, error) { 15 | var buf bytes.Buffer 16 | if err := gob.NewEncoder(&buf).Encode(d); err != nil { 17 | return nil, err 18 | } 19 | var c map[string]interface{} 20 | if err := gob.NewDecoder(&buf).Decode(&c); err != nil { 21 | return nil, err 22 | } 23 | return c, nil 24 | } 25 | -------------------------------------------------------------------------------- /clone/clone_test.go: -------------------------------------------------------------------------------- 1 | package clone 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/matryer/is" 7 | ) 8 | 9 | func TestDescriptor(t *testing.T) { 10 | is := is.New(t) 11 | d := map[string]interface{}{ 12 | "name": "pkg1", 13 | "boo": 1, 14 | "resources": []interface{}{ 15 | map[string]interface{}{"name": "res1"}, map[string]interface{}{"name": "res2"}, 16 | }, 17 | } 18 | cpy, err := Descriptor(d) 19 | is.NoErr(err) 20 | is.Equal(d, cpy) 21 | 22 | // Error: Unregistered gob type: map[int]interface{}. 23 | _, err = Descriptor(map[string]interface{}{"boo": map[int]interface{}{}}) 24 | if err == nil { 25 | t.Fatal("want:err got:nil") 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /datapackage/package.go: -------------------------------------------------------------------------------- 1 | package datapackage 2 | 3 | import ( 4 | "archive/zip" 5 | "bufio" 6 | "bytes" 7 | "encoding/gob" 8 | "encoding/json" 9 | "fmt" 10 | "io" 11 | "io/ioutil" 12 | "net/http" 13 | "os" 14 | "path/filepath" 15 | "reflect" 16 | "strings" 17 | 18 | "github.com/frictionlessdata/datapackage-go/clone" 19 | "github.com/frictionlessdata/datapackage-go/validator" 20 | ) 21 | 22 | const ( 23 | resourcePropName = "resources" 24 | profilePropName = "profile" 25 | encodingPropName = "encoding" 26 | defaultDataPackageProfile = "data-package" 27 | defaultResourceEncoding = "utf-8" 28 | defaultResourceProfile = "data-resource" 29 | tabularDataPackageProfileName = "tabular-data-package" 30 | descriptorFileNameWithinZip = "datapackage.json" 31 | ) 32 | 33 | // Needed to registry gob types. 34 | // See FromReader for more details. 35 | func init() { 36 | var i json.Number 37 | gob.Register(i) 38 | } 39 | 40 | // Package represents a https://specs.frictionlessdata.io/data-package/ 41 | type Package struct { 42 | resources []*Resource 43 | 44 | basePath string 45 | descriptor map[string]interface{} 46 | valRegistry validator.Registry 47 | } 48 | 49 | // GetResource return the resource which the passed-in name or nil if the resource is not part of the package. 50 | func (p *Package) GetResource(name string) *Resource { 51 | for _, r := range p.resources { 52 | if r.name == name { 53 | return r 54 | } 55 | } 56 | return nil 57 | } 58 | 59 | // ResourceNames return a slice containing the name of the resources. 60 | func (p *Package) ResourceNames() []string { 61 | s := make([]string, len(p.resources)) 62 | for i, r := range p.resources { 63 | s[i] = r.name 64 | } 65 | return s 66 | } 67 | 68 | // Resources returns a copy of data package resources. 69 | func (p *Package) Resources() []*Resource { 70 | // NOTE: Ignoring errors because we are not changing anything. Just cloning a valid package descriptor and building 71 | // its resources. 72 | cpy, _ := clone.Descriptor(p.descriptor) 73 | res, _ := buildResources(cpy[resourcePropName], p.basePath, p.valRegistry) 74 | return res 75 | } 76 | 77 | // AddResource adds a new resource to the package, updating its descriptor accordingly. 78 | func (p *Package) AddResource(d map[string]interface{}) error { 79 | resDesc, err := clone.Descriptor(d) 80 | if err != nil { 81 | return err 82 | } 83 | fillResourceDescriptorWithDefaultValues(resDesc) 84 | rSlice, ok := p.descriptor[resourcePropName].([]interface{}) 85 | if !ok { 86 | return fmt.Errorf("invalid resources property:\"%v\"", p.descriptor[resourcePropName]) 87 | } 88 | rSlice = append(rSlice, resDesc) 89 | r, err := buildResources(rSlice, p.basePath, p.valRegistry) 90 | if err != nil { 91 | return err 92 | } 93 | p.descriptor[resourcePropName] = rSlice 94 | p.resources = r 95 | return nil 96 | } 97 | 98 | //RemoveResource removes the resource from the package, updating its descriptor accordingly. 99 | func (p *Package) RemoveResource(name string) { 100 | index := -1 101 | rSlice, ok := p.descriptor[resourcePropName].([]interface{}) 102 | if !ok { 103 | return 104 | } 105 | for i := range rSlice { 106 | r := rSlice[i].(map[string]interface{}) 107 | if r["name"] == name { 108 | index = i 109 | break 110 | } 111 | } 112 | if index > -1 { 113 | newSlice := append(rSlice[:index], rSlice[index+1:]...) 114 | r, err := buildResources(newSlice, p.basePath, p.valRegistry) 115 | if err != nil { 116 | return 117 | } 118 | p.descriptor[resourcePropName] = newSlice 119 | p.resources = r 120 | } 121 | } 122 | 123 | // Descriptor returns a deep copy of the underlying descriptor which describes the package. 124 | func (p *Package) Descriptor() map[string]interface{} { 125 | // Package cescriptor is always valid. Don't need to make the interface overcomplicated. 126 | c, _ := clone.Descriptor(p.descriptor) 127 | return c 128 | } 129 | 130 | // Update the package with the passed-in descriptor. The package will only be updated if the 131 | // the new descriptor is valid, otherwise the error will be returned. 132 | func (p *Package) Update(newDescriptor map[string]interface{}, loaders ...validator.RegistryLoader) error { 133 | newP, err := New(newDescriptor, p.basePath, loaders...) 134 | if err != nil { 135 | return err 136 | } 137 | *p = *newP 138 | return nil 139 | } 140 | 141 | func (p *Package) write(w io.Writer) error { 142 | b, err := json.MarshalIndent(p.descriptor, "", " ") 143 | if err != nil { 144 | return err 145 | } 146 | _, err = w.Write(b) 147 | if err != nil { 148 | return err 149 | } 150 | return nil 151 | } 152 | 153 | // SaveDescriptor saves the data package descriptor to the passed-in file path. 154 | // It create creates the named file with mode 0666 (before umask), truncating 155 | // it if it already exists. 156 | func (p *Package) SaveDescriptor(path string) error { 157 | f, err := os.Create(path) 158 | if err != nil { 159 | return err 160 | } 161 | defer f.Close() 162 | return p.write(f) 163 | } 164 | 165 | // Zip saves a zip-compressed file containing the package descriptor and all resource data. 166 | // It create creates the named file with mode 0666 (before umask), truncating 167 | // it if it already exists. 168 | func (p *Package) Zip(path string) error { 169 | dir, err := ioutil.TempDir("", "datapackage_zip") 170 | if err != nil { 171 | return err 172 | } 173 | defer os.RemoveAll(dir) 174 | 175 | // Saving descriptor. 176 | descriptorPath := filepath.Join(dir, descriptorFileNameWithinZip) 177 | if err := p.SaveDescriptor(descriptorPath); err != nil { 178 | return err 179 | } 180 | // Downloading resources. 181 | fPaths := []string{descriptorPath} 182 | for _, r := range p.resources { 183 | for _, p := range r.path { 184 | _, c, err := read(filepath.Join(r.basePath, p)) 185 | if err != nil { 186 | return err 187 | } 188 | fDir := filepath.Join(dir, filepath.Dir(p)) 189 | if err := os.MkdirAll(fDir, os.ModePerm); err != nil { 190 | return err 191 | } 192 | fPath := filepath.Join(fDir, filepath.Base(p)) 193 | if err := ioutil.WriteFile(fPath, c, os.ModePerm); err != nil { 194 | return err 195 | } 196 | fPaths = append(fPaths, fPath) 197 | } 198 | } 199 | // Zipping everything. 200 | return zipFiles(path, dir, fPaths) 201 | } 202 | 203 | func zipFiles(filename string, basePath string, files []string) error { 204 | newfile, err := os.Create(filename) 205 | if err != nil { 206 | return err 207 | } 208 | defer newfile.Close() 209 | zipWriter := zip.NewWriter(newfile) 210 | defer zipWriter.Close() 211 | for _, file := range files { 212 | zipfile, err := os.Open(file) 213 | if err != nil { 214 | return err 215 | } 216 | defer zipfile.Close() 217 | info, err := zipfile.Stat() 218 | if err != nil { 219 | return err 220 | } 221 | header, err := zip.FileInfoHeader(info) 222 | if err != nil { 223 | return err 224 | } 225 | // default is Store 0 (no compression!) 226 | // see http://golang.org/pkg/archive/zip/#pkg-constants 227 | header.Method = zip.Deflate 228 | t := strings.TrimPrefix(strings.TrimPrefix(file, basePath), "/") 229 | if filepath.Dir(t) != "." { 230 | header.Name = t 231 | } 232 | writer, err := zipWriter.CreateHeader(header) 233 | if err != nil { 234 | return err 235 | } 236 | _, err = io.Copy(writer, zipfile) 237 | if err != nil { 238 | return err 239 | } 240 | } 241 | return nil 242 | } 243 | 244 | // New creates a new data package based on the descriptor. 245 | func New(descriptor map[string]interface{}, basePath string, loaders ...validator.RegistryLoader) (*Package, error) { 246 | cpy, err := clone.Descriptor(descriptor) 247 | if err != nil { 248 | return nil, err 249 | } 250 | fillPackageDescriptorWithDefaultValues(cpy) 251 | loadPackageSchemas(cpy) 252 | profile, ok := cpy[profilePropName].(string) 253 | if !ok { 254 | return nil, fmt.Errorf("%s property MUST be a string", profilePropName) 255 | } 256 | registry, err := validator.NewRegistry(loaders...) 257 | if err != nil { 258 | return nil, err 259 | } 260 | if err := validator.Validate(cpy, profile, registry); err != nil { 261 | return nil, err 262 | } 263 | resources, err := buildResources(cpy[resourcePropName], basePath, registry) 264 | if err != nil { 265 | return nil, err 266 | } 267 | return &Package{ 268 | resources: resources, 269 | descriptor: cpy, 270 | valRegistry: registry, 271 | basePath: basePath, 272 | }, nil 273 | } 274 | 275 | // FromReader creates a data package from an io.Reader. 276 | func FromReader(r io.Reader, basePath string, loaders ...validator.RegistryLoader) (*Package, error) { 277 | // JSON doesn't differentiate between floats and integers. When parsed from JSON, large integers 278 | // get converted into scientific notation 279 | // Issue: https://github.com/frictionlessdata/datapackage-go/issues/28 280 | // Example at TestBigNumBytesIsValid. 281 | d := json.NewDecoder(bufio.NewReader(r)) 282 | d.UseNumber() 283 | 284 | var descriptor map[string]interface{} 285 | if err := d.Decode(&descriptor); err != nil { 286 | return nil, err 287 | } 288 | return New(descriptor, basePath, loaders...) 289 | } 290 | 291 | // FromString creates a data package from a string representation of the package descriptor. 292 | func FromString(in string, basePath string, loaders ...validator.RegistryLoader) (*Package, error) { 293 | return FromReader(strings.NewReader(in), basePath, loaders...) 294 | } 295 | 296 | // Load the data package descriptor from the specified URL or file path. 297 | // If path has the ".zip" extension, it will be saved in local filesystem and decompressed before loading. 298 | func Load(path string, loaders ...validator.RegistryLoader) (*Package, error) { 299 | localPath, contents, err := read(path) 300 | if err != nil { 301 | return nil, fmt.Errorf("error reading path contents (%s): %w", path, err) 302 | } 303 | if !strings.HasSuffix(path, ".zip") { 304 | return FromReader(bytes.NewBuffer(contents), getBasepath(path), loaders...) 305 | } 306 | // Special case for zip paths. BasePath will be the temporary directory. 307 | dir, err := ioutil.TempDir("", "datapackage_decompress") 308 | if err != nil { 309 | return nil, fmt.Errorf("error creating temporary directory: %w", err) 310 | } 311 | fNames, err := unzip(localPath, dir) 312 | if err != nil { 313 | return nil, fmt.Errorf("error unzipping path contents (%s): %w", localPath, err) 314 | } 315 | if _, ok := fNames[descriptorFileNameWithinZip]; ok { 316 | return Load(filepath.Join(dir, descriptorFileNameWithinZip), loaders...) 317 | } 318 | return nil, fmt.Errorf("zip file %s does not contain a file called %s", localPath, descriptorFileNameWithinZip) 319 | } 320 | 321 | func read(path string) (string, []byte, error) { 322 | if strings.HasPrefix(path, "http") { 323 | resp, err := http.Get(path) 324 | if err != nil { 325 | return "", nil, fmt.Errorf("error performing HTTP GET(%s): %w", path, err) 326 | } 327 | defer resp.Body.Close() 328 | buf, err := ioutil.ReadAll(resp.Body) 329 | if err != nil { 330 | return "", nil, fmt.Errorf("error reading response body contents (%s): %w", path, err) 331 | } 332 | // Making sure zip file is materialized. 333 | // This makes debugging easier. 334 | localPath, err := func() (string, error) { 335 | f, err := ioutil.TempFile("", "*.zip") 336 | if err != nil { 337 | return "", fmt.Errorf("error creating temp file to save zip (dir:%s): %w", os.TempDir(), err) 338 | } 339 | defer f.Close() 340 | if _, err := f.Write(buf); err != nil { 341 | return f.Name(), fmt.Errorf("error writing temp file to save zip (%s): %w", f.Name(), err) 342 | } 343 | return f.Name(), nil 344 | }() 345 | return localPath, buf, err 346 | } 347 | buf, err := ioutil.ReadFile(path) 348 | if err != nil { 349 | return "", nil, fmt.Errorf("error reading local file contents (%s): %w", path, err) 350 | } 351 | return path, buf, nil 352 | } 353 | 354 | func unzip(archive, basePath string) (map[string]struct{}, error) { 355 | fileNames := make(map[string]struct{}) 356 | reader, err := zip.OpenReader(archive) 357 | if err != nil { 358 | return nil, fmt.Errorf("error opening zip reader(%s): %w", archive, err) 359 | } 360 | if err := os.MkdirAll(basePath, os.ModePerm); err != nil { 361 | return nil, fmt.Errorf("error creating directory (%s): %w", basePath, err) 362 | } 363 | for _, file := range reader.File { 364 | fileNames[file.Name] = struct{}{} 365 | path := filepath.Join(basePath, file.Name) 366 | if filepath.Dir(file.Name) != "." { 367 | dotDir := filepath.Join(basePath, filepath.Dir(file.Name)) 368 | if err := os.MkdirAll(dotDir, os.ModePerm); err != nil { 369 | return nil, fmt.Errorf("error creating directory (%s): %w", dotDir, err) 370 | } 371 | } 372 | fileReader, err := file.Open() 373 | if err != nil { 374 | return nil, fmt.Errorf("error opening internal zip file (%s, %s): %w", archive, file.Name, err) 375 | } 376 | defer fileReader.Close() 377 | targetFile, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, os.ModePerm) 378 | if err != nil { 379 | return nil, fmt.Errorf("error opening target external zip file (%s, %s): %w", archive, path, err) 380 | } 381 | defer targetFile.Close() 382 | if _, err := io.Copy(targetFile, fileReader); err != nil { 383 | return nil, fmt.Errorf("error filling target external zip file (%s, %s): %w", archive, path, err) 384 | } 385 | } 386 | return fileNames, nil 387 | } 388 | 389 | func fillPackageDescriptorWithDefaultValues(descriptor map[string]interface{}) { 390 | if descriptor[profilePropName] == nil { 391 | descriptor[profilePropName] = defaultDataPackageProfile 392 | } 393 | rSlice, ok := descriptor[resourcePropName].([]interface{}) 394 | if ok { 395 | for i := range rSlice { 396 | r, ok := rSlice[i].(map[string]interface{}) 397 | if ok { 398 | fillResourceDescriptorWithDefaultValues(r) 399 | } 400 | } 401 | } 402 | } 403 | 404 | func loadPackageSchemas(d map[string]interface{}) error { 405 | var err error 406 | if schStr, ok := d[schemaProp].(string); ok { 407 | d[schemaProp], err = loadSchema(schStr) 408 | if err != nil { 409 | return err 410 | } 411 | } 412 | resources, _ := d[resourcePropName].([]interface{}) 413 | for _, r := range resources { 414 | resMap, _ := r.(map[string]interface{}) 415 | if schStr, ok := resMap[schemaProp].(string); ok { 416 | resMap[schemaProp], err = loadSchema(schStr) 417 | if err != nil { 418 | return err 419 | } 420 | } 421 | } 422 | return nil 423 | } 424 | 425 | func buildResources(resI interface{}, basePath string, reg validator.Registry) ([]*Resource, error) { 426 | rSlice, ok := resI.([]interface{}) 427 | if !ok { 428 | return nil, fmt.Errorf("invalid resources property. Value:\"%v\" Type:\"%v\"", resI, reflect.TypeOf(resI)) 429 | } 430 | resources := make([]*Resource, len(rSlice)) 431 | for pos, rInt := range rSlice { 432 | rDesc, ok := rInt.(map[string]interface{}) 433 | if !ok { 434 | return nil, fmt.Errorf("resources must be a json object. got:%v", rInt) 435 | } 436 | r, err := NewResource(rDesc, reg) 437 | if err != nil { 438 | return nil, err 439 | } 440 | r.basePath = basePath 441 | resources[pos] = r 442 | } 443 | return resources, nil 444 | } 445 | -------------------------------------------------------------------------------- /datapackage/package_test.go: -------------------------------------------------------------------------------- 1 | package datapackage 2 | 3 | import ( 4 | "archive/zip" 5 | "bytes" 6 | "encoding/json" 7 | "fmt" 8 | "io" 9 | "io/ioutil" 10 | "net/http" 11 | "net/http/httptest" 12 | "os" 13 | "path/filepath" 14 | "reflect" 15 | "strconv" 16 | "strings" 17 | "testing" 18 | 19 | "github.com/frictionlessdata/datapackage-go/validator" 20 | "github.com/matryer/is" 21 | ) 22 | 23 | var invalidResource = map[string]interface{}{"name": "res1"} 24 | var r1 = map[string]interface{}{"name": "res1", "path": "foo.csv"} 25 | var r1Filled = map[string]interface{}{"name": "res1", "path": "foo.csv", "profile": "data-resource", "encoding": "utf-8"} 26 | var r1Str = `{ 27 | "profile": "data-package", 28 | "resources": [ 29 | { 30 | "encoding": "utf-8", 31 | "name": "res1", 32 | "path": "foo.csv", 33 | "profile": "data-resource" 34 | } 35 | ] 36 | }` 37 | var r2 = map[string]interface{}{"name": "res2", "path": "bar.csv"} 38 | var r2Filled = map[string]interface{}{"name": "res2", "path": "bar.csv", "profile": "data-resource", "encoding": "utf-8"} 39 | 40 | func ExampleLoad_readAll() { 41 | dir, _ := ioutil.TempDir("", "datapackage_exampleload") 42 | dir = filepath.Clean(dir) // removes possible trailing slashes. 43 | defer os.RemoveAll(dir) 44 | descriptorPath := filepath.Join(dir, "pkg.json") 45 | descriptorContents := `{"resources": [{ 46 | "name": "res1", 47 | "path": "data.csv", 48 | "profile": "tabular-data-resource", 49 | "schema": {"fields": [{"name":"name", "type":"string"}]} 50 | }]}` 51 | ioutil.WriteFile(descriptorPath, []byte(descriptorContents), 0666) 52 | 53 | resPath := filepath.Join(dir, "data.csv") 54 | resContent := []byte("foo\nbar") 55 | ioutil.WriteFile(resPath, resContent, 0666) 56 | 57 | pkg, _ := Load(descriptorPath, validator.InMemoryLoader()) 58 | contents, _ := pkg.GetResource("res1").ReadAll() 59 | fmt.Println(contents) 60 | // Output: [[foo] [bar]] 61 | } 62 | 63 | func ExampleLoad_readRaw() { 64 | dir, _ := ioutil.TempDir("", "datapackage_exampleload") 65 | dir = filepath.Clean(dir) // removes possible trailing slashes. 66 | defer os.RemoveAll(dir) 67 | descriptorPath := filepath.Join(dir, "pkg.json") 68 | descriptorContents := `{"resources": [{ 69 | "name": "res1", 70 | "path": "schemaorg.json", 71 | "format": "application/ld+json", 72 | "profile": "data-resource" 73 | }]}` 74 | ioutil.WriteFile(descriptorPath, []byte(descriptorContents), 0666) 75 | 76 | resPath := filepath.Join(dir, "schemaorg.json") 77 | resContent := []byte(`{"@context": {"@vocab": "http://schema.org/"}}`) 78 | ioutil.WriteFile(resPath, resContent, 0666) 79 | 80 | pkg, _ := Load(descriptorPath, validator.InMemoryLoader()) 81 | rc, _ := pkg.GetResource("res1").RawRead() 82 | defer rc.Close() 83 | contents, _ := ioutil.ReadAll(rc) 84 | fmt.Println(string(contents)) 85 | // Output: {"@context": {"@vocab": "http://schema.org/"}} 86 | } 87 | 88 | func ExampleLoad_readAllRemote() { 89 | ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 90 | // If the request is for data, returns the content. 91 | switch { 92 | case r.RequestURI == "/data.csv": 93 | fmt.Fprintf(w, "foo\nbar") 94 | default: 95 | fmt.Fprintf(w, `{"resources": [{ 96 | "name": "res1", 97 | "path": "data.csv", 98 | "profile": "tabular-data-resource", 99 | "schema": {"fields": [{"name":"name", "type":"string"}]} 100 | }]}`) 101 | } 102 | })) 103 | defer ts.Close() 104 | pkg, _ := Load(ts.URL, validator.InMemoryLoader()) 105 | contents, _ := pkg.GetResource("res1").ReadAll() 106 | fmt.Println(contents) 107 | // Output: [[foo] [bar]] 108 | } 109 | 110 | func ExampleLoad_cast() { 111 | dir, _ := ioutil.TempDir("", "datapackage_exampleload") 112 | defer os.RemoveAll(dir) 113 | descriptorPath := filepath.Join(dir, "pkg.json") 114 | descriptorContents := `{"resources": [{ 115 | "name": "res1", 116 | "path": "data.csv", 117 | "profile": "tabular-data-resource", 118 | "schema": {"fields": [{"name":"name", "type":"string"}]} 119 | }]}` 120 | ioutil.WriteFile(descriptorPath, []byte(descriptorContents), 0666) 121 | 122 | resPath := filepath.Join(dir, "data.csv") 123 | resContent := []byte("foo\nbar") 124 | ioutil.WriteFile(resPath, resContent, 0666) 125 | 126 | pkg, _ := Load(descriptorPath, validator.InMemoryLoader()) 127 | res := pkg.GetResource("res1") 128 | people := []struct { 129 | Name string `tableheader:"name"` 130 | }{} 131 | res.Cast(&people) 132 | fmt.Printf("%+v", people) 133 | // Output: [{Name:foo} {Name:bar}] 134 | } 135 | 136 | func TestPackage_GetResource(t *testing.T) { 137 | is := is.New(t) 138 | pkg, err := New(map[string]interface{}{"resources": []interface{}{r1}}, ".", validator.InMemoryLoader()) 139 | is.NoErr(err) 140 | is.Equal(pkg.GetResource("res1").name, "res1") 141 | is.True(pkg.GetResource("foooooo") == nil) 142 | } 143 | 144 | func TestPackage_AddResource(t *testing.T) { 145 | t.Run("ValidDescriptor", func(t *testing.T) { 146 | is := is.New(t) 147 | pkg, _ := New(map[string]interface{}{"resources": []interface{}{r1}}, ".", validator.InMemoryLoader()) 148 | is.NoErr(pkg.AddResource(r2)) 149 | 150 | // Checking resources. 151 | is.Equal(len(pkg.resources), 2) 152 | is.Equal(pkg.resources[0].name, "res1") 153 | is.Equal(pkg.resources[1].name, "res2") 154 | 155 | // Checking descriptor. 156 | resDesc := pkg.descriptor["resources"].([]interface{}) 157 | is.Equal(len(resDesc), 2) 158 | is.Equal(resDesc[0], r1Filled) 159 | is.Equal(resDesc[1], r2Filled) 160 | }) 161 | t.Run("InvalidResource", func(t *testing.T) { 162 | pkg, _ := New(map[string]interface{}{"resources": []interface{}{r1}}, ".", validator.InMemoryLoader()) 163 | if err := pkg.AddResource(invalidResource); err == nil { 164 | t.Fatalf("want:err got:nil") 165 | } 166 | }) 167 | } 168 | 169 | func TestPackage_RemoveResource(t *testing.T) { 170 | t.Run("Existing", func(t *testing.T) { 171 | is := is.New(t) 172 | pkg, _ := New(map[string]interface{}{"resources": []interface{}{r1, r2}}, ".", validator.InMemoryLoader()) 173 | pkg.RemoveResource("res1") 174 | 175 | resDesc := pkg.descriptor["resources"].([]interface{}) 176 | is.Equal(len(resDesc), 1) 177 | is.Equal(resDesc[0], r2Filled) 178 | is.Equal(len(pkg.resources), 1) 179 | is.Equal(pkg.resources[0].name, "res2") 180 | }) 181 | t.Run("NonExisting", func(t *testing.T) { 182 | is := is.New(t) 183 | pkg, _ := New(map[string]interface{}{"resources": []interface{}{r1}}, ".", validator.InMemoryLoader()) 184 | pkg.RemoveResource("invalid") 185 | 186 | resDesc := pkg.descriptor["resources"].([]interface{}) 187 | is.Equal(len(resDesc), 1) 188 | is.Equal(resDesc[0], r1Filled) 189 | is.Equal(len(pkg.resources), 1) 190 | is.Equal(pkg.resources[0].name, "res1") 191 | }) 192 | } 193 | 194 | func TestPackage_ResourceNames(t *testing.T) { 195 | is := is.New(t) 196 | pkg, _ := New(map[string]interface{}{"resources": []interface{}{r1, r2}}, ".", validator.InMemoryLoader()) 197 | is.Equal(pkg.ResourceNames(), []string{"res1", "res2"}) 198 | } 199 | 200 | func TestPackage_Resources(t *testing.T) { 201 | is := is.New(t) 202 | pkg, _ := New(map[string]interface{}{"resources": []interface{}{r1, r2}}, ".", validator.InMemoryLoader()) 203 | resources := pkg.Resources() 204 | is.Equal(len(resources), 2) 205 | is.Equal(resources[0].name, "res1") 206 | is.Equal(resources[1].name, "res2") 207 | 208 | // Changing the returned slice must not change the package. 209 | resources = append(resources, &Resource{name: "foo"}) 210 | is.Equal(len(resources), 3) 211 | is.Equal(len(pkg.ResourceNames()), 2) 212 | } 213 | 214 | func TestPackage_Descriptor(t *testing.T) { 215 | is := is.New(t) 216 | pkg, _ := New(map[string]interface{}{"resources": []interface{}{r1}}, ".", validator.InMemoryLoader()) 217 | cpy := pkg.Descriptor() 218 | is.Equal(pkg.descriptor, cpy) 219 | } 220 | 221 | func TestPackage_Update(t *testing.T) { 222 | t.Run("ValidResource", func(t *testing.T) { 223 | is := is.New(t) 224 | pkg, _ := New(map[string]interface{}{"resources": []interface{}{r1}}, ".", validator.InMemoryLoader()) 225 | newDesc := map[string]interface{}{"resources": []interface{}{r2}} 226 | is.NoErr(pkg.Update(newDesc, validator.InMemoryLoader())) 227 | is.Equal(pkg.Descriptor(), map[string]interface{}{"profile": "data-package", "resources": []interface{}{r2Filled}}) 228 | }) 229 | t.Run("InvalidResource", func(t *testing.T) { 230 | pkg, _ := New(map[string]interface{}{"resources": []interface{}{r1}}, ".", validator.InMemoryLoader()) 231 | newDesc := map[string]interface{}{"resources": []interface{}{invalidResource}} 232 | if err := pkg.Update(newDesc, validator.InMemoryLoader()); err == nil { 233 | t.Fatalf("want:err got:nil") 234 | } 235 | }) 236 | } 237 | 238 | func TestFromDescriptor(t *testing.T) { 239 | t.Run("ValidationErrors", func(t *testing.T) { 240 | data := []struct { 241 | desc string 242 | descriptor map[string]interface{} 243 | }{ 244 | {"EmptyMap", map[string]interface{}{}}, 245 | {"InvalidResourcePropertyType", map[string]interface{}{"resources": 10}}, 246 | {"InvalidResource", map[string]interface{}{"resources": []interface{}{map[string]interface{}{}}}}, 247 | {"InvalidResourceType", map[string]interface{}{"resources": []interface{}{1}}}, 248 | {"ProfileNotAString", map[string]interface{}{"profile": 1, "resources": []interface{}{r1}}}, 249 | {"ErrorCloning", map[string]interface{}{"profile": [][][]string{}, "resources": []interface{}{r1}}}, 250 | } 251 | for _, d := range data { 252 | t.Run(d.desc, func(t *testing.T) { 253 | if _, err := New(d.descriptor, ".", validator.InMemoryLoader()); err == nil { 254 | t.Fatalf("want:err got:nil") 255 | } 256 | }) 257 | } 258 | }) 259 | t.Run("ValidDescriptor", func(t *testing.T) { 260 | is := is.New(t) 261 | pkg, err := New(map[string]interface{}{"resources": []interface{}{r1}}, ".", validator.InMemoryLoader()) 262 | is.NoErr(err) 263 | is.Equal(len(pkg.resources), 1) 264 | is.Equal(pkg.resources[0].name, "res1") 265 | resources := pkg.descriptor["resources"].([]interface{}) 266 | is.Equal(len(resources), 1) 267 | is.Equal(resources[0], r1Filled) 268 | }) 269 | } 270 | 271 | func TestPackage_SaveDescriptor(t *testing.T) { 272 | t.Run("Valid", func(t *testing.T) { 273 | is := is.New(t) 274 | 275 | // Creating temporary empty directory and making sure we remove it. 276 | dir, err := ioutil.TempDir("", "datapackage_save") 277 | is.NoErr(err) 278 | defer os.RemoveAll(dir) 279 | fName := filepath.Join(dir, "pkg.json") 280 | 281 | // Saving package descriptor. 282 | pkg, _ := New(map[string]interface{}{"resources": []interface{}{r1}}, ".", validator.InMemoryLoader()) 283 | is.NoErr(pkg.SaveDescriptor(fName)) 284 | 285 | // Checking descriptor contents. 286 | buf, err := ioutil.ReadFile(fName) 287 | is.NoErr(err) 288 | is.Equal(string(buf), r1Str) 289 | }) 290 | } 291 | 292 | func TestPackage_Zip(t *testing.T) { 293 | is := is.New(t) 294 | 295 | // Creating temporary empty file and making sure we remove it. 296 | dir, err := ioutil.TempDir("", "datapackage_testzip") 297 | is.NoErr(err) 298 | defer os.RemoveAll(dir) 299 | fName := filepath.Join(dir, "pkg.zip") 300 | 301 | // Creating contents and zipping package. 302 | descriptorContents := `{"resources": [{ 303 | "name": "res1", 304 | "path": "data.csv", 305 | "profile": "tabular-data-resource", 306 | "schema": {"fields": [{"name":"name", "type":"string"}]} 307 | }]}` 308 | pkg, _ := FromString(descriptorContents, dir, validator.InMemoryLoader()) 309 | fmt.Println(pkg.Descriptor()) 310 | 311 | resPath := filepath.Join(dir, "data.csv") 312 | resContents := []byte("foo\nbar") 313 | ioutil.WriteFile(resPath, resContents, 0666) 314 | is.NoErr(pkg.Zip(fName)) 315 | 316 | // Checking zip contents. 317 | reader, err := zip.OpenReader(fName) 318 | is.NoErr(err) 319 | defer reader.Close() 320 | is.Equal(2, len(reader.File)) 321 | 322 | var buf bytes.Buffer 323 | descriptor, err := reader.File[0].Open() 324 | is.NoErr(err) 325 | defer descriptor.Close() 326 | io.Copy(&buf, descriptor) 327 | 328 | filledDescriptor := `{ 329 | "profile": "data-package", 330 | "resources": [ 331 | { 332 | "encoding": "utf-8", 333 | "name": "res1", 334 | "path": "data.csv", 335 | "profile": "tabular-data-resource", 336 | "schema": { 337 | "fields": [ 338 | { 339 | "name": "name", 340 | "type": "string" 341 | } 342 | ] 343 | } 344 | } 345 | ] 346 | }` 347 | is.Equal(buf.String(), filledDescriptor) 348 | 349 | buf.Reset() 350 | data, err := reader.File[1].Open() 351 | is.NoErr(err) 352 | defer data.Close() 353 | io.Copy(&buf, data) 354 | is.Equal(buf.String(), string(resContents)) 355 | } 356 | 357 | func TestValidZip_DataInSubDir(t *testing.T) { 358 | is := is.New(t) 359 | 360 | // Creating temporary empty directory and making sure we remove it. 361 | dir, err := ioutil.TempDir("", "datapackage_testzip") 362 | is.NoErr(err) 363 | dir = filepath.Clean(dir) // removes possible trailing slashes. 364 | defer os.Remove(dir) 365 | 366 | dataDir := filepath.Join(dir, "data") 367 | is.NoErr(os.Mkdir(dataDir, os.ModePerm)) 368 | resPath := filepath.Join(dataDir, "data.csv") 369 | resContents := []byte("foo\nbar") 370 | is.NoErr(ioutil.WriteFile(resPath, resContents, os.ModePerm)) 371 | 372 | // Path to subdir. 373 | resSubdirPath := filepath.Join("data", "data.csv") 374 | 375 | // Creating contents and zipping package. 376 | desc := map[string]interface{}{ 377 | "profile": "data-package", 378 | "resources": []interface{}{ 379 | map[string]interface{}{ 380 | "name": "res1", 381 | "path": resSubdirPath, 382 | "format": "csv", 383 | "profile": "data-resource", 384 | "encoding": "utf-8", 385 | }, 386 | }, 387 | } 388 | pkg, _ := New(desc, dir, validator.InMemoryLoader()) 389 | fName := filepath.Join(dir, "pkg.zip") 390 | is.NoErr(pkg.Zip(fName)) 391 | 392 | // Checking zip contents. 393 | reader, err := zip.OpenReader(fName) 394 | is.NoErr(err) 395 | defer reader.Close() 396 | is.Equal(2, len(reader.File)) 397 | 398 | // Check descriptor. Doing that by not comparing JSON contents 399 | // because backslash "\" is a reserved chat and ends up being 400 | // failing in windows. 401 | func() { 402 | f, err := reader.File[0].Open() 403 | is.NoErr(err) 404 | defer f.Close() 405 | 406 | b, err := ioutil.ReadAll(f) 407 | is.NoErr(err) 408 | var readDesc map[string]interface{} 409 | is.NoErr(json.Unmarshal(b, &readDesc)) 410 | is.True(reflect.DeepEqual(readDesc, desc)) 411 | }() 412 | 413 | // Check data contents. 414 | func() { 415 | f, err := reader.File[1].Open() 416 | is.NoErr(err) 417 | defer f.Close() 418 | 419 | b, err := ioutil.ReadAll(f) 420 | is.NoErr(err) 421 | is.Equal(b, resContents) 422 | }() 423 | } 424 | 425 | func TestFromReader(t *testing.T) { 426 | t.Run("ValidJSON", func(t *testing.T) { 427 | is := is.New(t) 428 | _, err := FromReader(strings.NewReader(`{"resources":[{"name":"res", "path":"foo.csv"}]}`), ".", validator.InMemoryLoader()) 429 | is.NoErr(err) 430 | }) 431 | t.Run("InvalidJSON", func(t *testing.T) { 432 | is := is.New(t) 433 | _, err := FromReader(strings.NewReader(`{resources}`), ".", validator.InMemoryLoader()) 434 | is.True(err != nil) 435 | }) 436 | } 437 | 438 | func TestLoad(t *testing.T) { 439 | is := is.New(t) 440 | // Creating temporary empty directory and making sure we remove it. 441 | dir, err := ioutil.TempDir("", "datapackage_load") 442 | is.NoErr(err) 443 | dir = filepath.Clean(dir) // removes possible trailing slashes. 444 | defer os.RemoveAll(dir) 445 | 446 | t.Run("Local", func(t *testing.T) { 447 | is := is.New(t) 448 | fName := filepath.Join(dir, "pkg.json") 449 | is.NoErr(ioutil.WriteFile(fName, []byte(r1Str), 0666)) 450 | defer os.Remove(fName) 451 | 452 | pkg, err := Load(fName, validator.InMemoryLoader()) 453 | is.NoErr(err) 454 | res := pkg.GetResource("res1") 455 | is.Equal(res.name, "res1") 456 | is.Equal(res.path, []string{"foo.csv"}) 457 | }) 458 | t.Run("LocalZip", func(t *testing.T) { 459 | is := is.New(t) 460 | pkg, err := Load("test_package.zip", validator.InMemoryLoader()) 461 | is.NoErr(err) 462 | res := pkg.GetResource("books") 463 | is.Equal(res.name, "books") 464 | is.Equal(res.path, []string{"data.csv"}) 465 | }) 466 | t.Run("LocalZipWithSubdirs", func(t *testing.T) { 467 | is := is.New(t) 468 | // Creating a zip file. 469 | fName := filepath.Join(dir, "pkg.zip") 470 | zipFile, err := os.Create(fName) 471 | is.NoErr(err) 472 | defer zipFile.Close() 473 | 474 | // Adding a datapackage.json file to the zip with proper contents. 475 | w := zip.NewWriter(zipFile) 476 | f, err := w.Create("datapackage.json") 477 | is.NoErr(err) 478 | 479 | osPath := filepath.Join("data", "foo.csv") 480 | jsonPath, _ := json.Marshal(osPath) 481 | is.NoErr(err) 482 | content := fmt.Sprintf(`{ 483 | "profile": "data-package", 484 | "resources": [ 485 | { 486 | "encoding": "utf-8", 487 | "name": "res1", 488 | "path": %s, 489 | "profile": "data-resource" 490 | } 491 | ] 492 | }`, jsonPath) 493 | _, err = f.Write([]byte(content)) 494 | is.NoErr(err) 495 | // Writing a file which is in a subdir. 496 | f1, err := w.Create(osPath) 497 | is.NoErr(err) 498 | _, err = f1.Write([]byte(`foo`)) 499 | is.NoErr(err) 500 | is.NoErr(w.Close()) 501 | 502 | // Load and check package. 503 | pkg, err := Load(fName, validator.InMemoryLoader()) 504 | is.NoErr(err) 505 | res := pkg.GetResource("res1") 506 | is.Equal(res.name, "res1") 507 | is.Equal(res.path, []string{osPath}) 508 | contents, err := res.ReadAll() 509 | is.NoErr(err) 510 | is.Equal(contents[0], []string{"foo"}) 511 | }) 512 | t.Run("Remote", func(t *testing.T) { 513 | is := is.New(t) 514 | ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 515 | fmt.Fprintln(w, r1Str) 516 | })) 517 | defer ts.Close() 518 | data := []struct { 519 | desc string 520 | pathSuffix string 521 | }{ 522 | {"Empty Path", ""}, 523 | {"Non-EmptyPath", "/datapackage.json"}, 524 | {"EndsInSlash", "/"}, 525 | } 526 | for _, d := range data { 527 | t.Run(d.desc, func(t *testing.T) { 528 | is := is.New(t) 529 | pkg, err := Load(ts.URL+d.pathSuffix, validator.InMemoryLoader()) 530 | is.NoErr(err) 531 | res := pkg.GetResource("res1") 532 | is.Equal(res.name, "res1") 533 | is.Equal(res.path, []string{"foo.csv"}) 534 | is.Equal(res.basePath, ts.URL+"/") 535 | }) 536 | } 537 | }) 538 | t.Run("RemoteZip", func(t *testing.T) { 539 | is := is.New(t) 540 | ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 541 | f, err := os.Open("test_package.zip") 542 | is.NoErr(err) 543 | defer f.Close() 544 | 545 | stat, err := f.Stat() 546 | is.NoErr(err) 547 | w.Header().Set("Content-Type", "application/octet-stream") 548 | w.Header().Set("Content-Length", strconv.FormatInt(stat.Size(), 10)) //Get file size as a string 549 | io.Copy(w, f) 550 | })) 551 | defer ts.Close() 552 | pkg, err := Load(ts.URL+"/package.zip", validator.InMemoryLoader()) 553 | is.NoErr(err) 554 | res := pkg.GetResource("books") 555 | is.Equal(res.name, "books") 556 | is.Equal(res.path, []string{"data.csv"}) 557 | }) 558 | t.Run("InvalidPath", func(t *testing.T) { 559 | _, err := Load("foobar", validator.InMemoryLoader()) 560 | if err == nil { 561 | t.Fatalf("want:err got:nil") 562 | } 563 | }) 564 | t.Run("InvalidZipFile", func(t *testing.T) { 565 | is := is.New(t) 566 | // Creating an empty zip file. 567 | fName := filepath.Join(dir, "pkg.zip") 568 | zipFile, err := os.Create(fName) 569 | is.NoErr(err) 570 | defer zipFile.Close() 571 | // Asserting error. 572 | _, err = Load(fName, validator.InMemoryLoader()) 573 | if err == nil { 574 | t.Fatalf("want:err got:nil") 575 | } 576 | }) 577 | t.Run("InvalidZipFileNameInContent", func(t *testing.T) { 578 | is := is.New(t) 579 | // Creating a zip file. 580 | fName := filepath.Join(dir, "pkg.zip") 581 | zipFile, err := os.Create(fName) 582 | is.NoErr(err) 583 | defer zipFile.Close() 584 | 585 | // Adding a file to the zip with proper contents. 586 | w := zip.NewWriter(zipFile) 587 | f, err := w.Create("otherpackage.json") 588 | is.NoErr(err) 589 | _, err = f.Write([]byte(r1Str)) 590 | is.NoErr(err) 591 | is.NoErr(w.Close()) 592 | 593 | // Asserting error. 594 | _, err = Load(fName, validator.InMemoryLoader()) 595 | if err == nil { 596 | t.Fatalf("want:err got:nil") 597 | } 598 | }) 599 | } 600 | 601 | func TestLoadPackageSchemas(t *testing.T) { 602 | is := is.New(t) 603 | schStr := `{"fields": [{"name":"name", "type":"string"}]}` 604 | schMap := map[string]interface{}{"fields": []interface{}{map[string]interface{}{"name": "name", "type": "string"}}} 605 | ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 606 | fmt.Fprintln(w, schStr) 607 | })) 608 | defer ts.Close() 609 | in := fmt.Sprintf(`{ 610 | "schema": "%s", 611 | "resources": [{ 612 | "name": "res1", 613 | "path": "data.csv", 614 | "profile": "tabular-data-resource", 615 | "schema": "%s" 616 | }]}`, ts.URL, ts.URL) 617 | pkg, err := FromString(in, ".", validator.InMemoryLoader()) 618 | is.NoErr(err) 619 | is.Equal(pkg.Descriptor()["schema"], schMap) 620 | is.Equal(pkg.GetResource("res1").Descriptor()["schema"], schMap) 621 | } 622 | 623 | // Issue https://github.com/frictionlessdata/datapackage-go/issues/28 624 | func TestBigNumBytesIsValid(t *testing.T) { 625 | is := is.New(t) 626 | in := 627 | `{ 628 | "created": "2021-11-25T10:11:24Z", 629 | "name": "new_dataset", 630 | "profile": "data-package", 631 | "resources": [ 632 | { 633 | "bytes": 17747417, 634 | "created": "Thursday, 25-Nov-21 11:09:13 UTC", 635 | "description": "", 636 | "filename": "excel_no_spaces_digit_sheets.xlsx", 637 | "modified": "Thursday, 25-Nov-21 11:09:13 UTC", 638 | "name": "732920043605108807", 639 | "path": "https://127.0.0.1:9000/minio/bcodmo-submissions-staging/5711471826818791508/files/excel_no_spaces_digit_sheets.xlsx" 640 | } 641 | ], 642 | "title": "New dataset", 643 | "updated": "2021-11-25T11:09:13Z" 644 | }` 645 | pkg, err := FromString(in, ".", validator.InMemoryLoader()) 646 | is.NoErr(err) 647 | is.Equal(pkg.GetResource("732920043605108807").Descriptor()["bytes"], json.Number("17747417")) 648 | } 649 | -------------------------------------------------------------------------------- /datapackage/path.go: -------------------------------------------------------------------------------- 1 | package datapackage 2 | 3 | import ( 4 | "fmt" 5 | "net/url" 6 | "path" 7 | "path/filepath" 8 | "strings" 9 | ) 10 | 11 | func parseRemotePath(path string) (*url.URL, bool) { 12 | u, err := url.Parse(path) 13 | return u, err == nil && u.Scheme != "" && u.Host != "" 14 | } 15 | 16 | func joinPaths(basePath, finalPath string) string { 17 | if u, isRemote := parseRemotePath(basePath); isRemote { 18 | u.Path = path.Join(u.Path, finalPath) 19 | return u.String() 20 | } 21 | return filepath.Join(basePath, finalPath) 22 | } 23 | 24 | func getBasepath(p string) string { 25 | // If it is a remote-like URL, should not treat slashs in a system OS-dependent way. 26 | if u, isRemote := parseRemotePath(p); isRemote { 27 | uStr := strings.TrimSuffix(u.String(), "/") 28 | uPath := strings.TrimSuffix(u.Path, "/") 29 | if uPath == "" { 30 | return fmt.Sprintf("%s/", uStr) 31 | } 32 | return strings.TrimSuffix(uStr, path.Base(u.String())) 33 | } 34 | // It local path. 35 | return filepath.Dir(p) 36 | } 37 | -------------------------------------------------------------------------------- /datapackage/resource.go: -------------------------------------------------------------------------------- 1 | package datapackage 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "fmt" 7 | "io" 8 | "io/ioutil" 9 | "net/http" 10 | "net/url" 11 | "os" 12 | "path" 13 | "strings" 14 | "sync" 15 | "time" 16 | 17 | "github.com/frictionlessdata/datapackage-go/clone" 18 | "github.com/frictionlessdata/datapackage-go/validator" 19 | "github.com/frictionlessdata/tableschema-go/csv" 20 | "github.com/frictionlessdata/tableschema-go/schema" 21 | "github.com/frictionlessdata/tableschema-go/table" 22 | ) 23 | 24 | // Accepted tabular formats. 25 | var tabularFormats = map[string]struct{}{ 26 | "csv": struct{}{}, 27 | "tsv": struct{}{}, 28 | "xls": struct{}{}, 29 | "xlsx": struct{}{}, 30 | } 31 | 32 | const ( 33 | tabularDataResourceProfile = "tabular-data-resource" 34 | ) 35 | 36 | type pathType byte 37 | 38 | const ( 39 | urlPath pathType = 0 40 | relativePath pathType = 1 41 | ) 42 | 43 | const ( 44 | schemaProp = "schema" 45 | nameProp = "name" 46 | formatProp = "format" 47 | mediaTypeProp = "mediatype" 48 | pathProp = "path" 49 | dataProp = "data" 50 | jsonFormat = "json" 51 | profileProp = "profile" 52 | dialectProp = "dialect" 53 | delimiterProp = "delimiter" 54 | skipInitialSpaceProp = "skipInitialSpace" 55 | headerProp = "header" 56 | doubleQuoteProp = "doubleQuote" 57 | ) 58 | 59 | // dialect represents CSV dialect configuration options. 60 | // http://frictionlessdata.io/specs/csv-dialect/ 61 | type dialect struct { 62 | // Delimiter specifies the character sequence which should separate fields (aka columns). 63 | Delimiter rune 64 | // Specifies how to interpret whitespace which immediately follows a delimiter; 65 | // if false, it means that whitespace immediately after a delimiter should be treated as part of the following field. 66 | SkipInitialSpace bool 67 | // Header indicates whether the file includes a header row. If true the first row in the file is a header row, not data. 68 | Header bool 69 | // Controls the handling of quotes inside fields. If true, two consecutive quotes should be interpreted as one. 70 | DoubleQuote bool 71 | } 72 | 73 | var defaultDialect = dialect{ 74 | Delimiter: ',', 75 | SkipInitialSpace: true, 76 | Header: true, 77 | DoubleQuote: true, 78 | } 79 | 80 | // Resource describes a data resource such as an individual file or table. 81 | type Resource struct { 82 | descriptor map[string]interface{} 83 | path []string 84 | data interface{} 85 | name string 86 | basePath string 87 | } 88 | 89 | // Name returns the resource name. 90 | func (r *Resource) Name() string { 91 | return r.name 92 | } 93 | 94 | // Descriptor returns a copy of the underlying descriptor which describes the resource. 95 | func (r *Resource) Descriptor() map[string]interface{} { 96 | // Resource cescriptor is always valid. Don't need to make the interface overcomplicated. 97 | c, _ := clone.Descriptor(r.descriptor) 98 | return c 99 | } 100 | 101 | // Update the resource with the passed-in descriptor. The resource will only be updated if the 102 | // the new descriptor is valid, otherwise the error will be returned. 103 | func (r *Resource) Update(d map[string]interface{}, loaders ...validator.RegistryLoader) error { 104 | reg, err := validator.NewRegistry(loaders...) 105 | if err != nil { 106 | return err 107 | } 108 | res, err := NewResource(d, reg) 109 | if err != nil { 110 | return err 111 | } 112 | *r = *res 113 | return nil 114 | } 115 | 116 | // Tabular checks whether the resource is tabular. 117 | func (r *Resource) Tabular() bool { 118 | if pStr, ok := r.descriptor[profileProp].(string); ok && pStr == tabularDataResourceProfile { 119 | return true 120 | } 121 | fStr, _ := r.descriptor[formatProp].(string) 122 | if _, ok := tabularFormats[fStr]; ok { 123 | return true 124 | } 125 | if len(r.path) > 0 && all(r.path, isFileTabular) { 126 | return true 127 | } 128 | return false 129 | } 130 | 131 | func all(strings []string, f func(string) bool) bool { 132 | for _, s := range strings { 133 | if !f(s) { 134 | return false 135 | } 136 | } 137 | return true 138 | } 139 | 140 | func isFileTabular(path string) bool { 141 | for extension := range tabularFormats { 142 | if strings.HasSuffix(path, extension) { 143 | return true 144 | } 145 | } 146 | return false 147 | } 148 | 149 | func dialectOpts(i interface{}) []csv.CreationOpts { 150 | if i == nil { 151 | return []csv.CreationOpts{} 152 | } 153 | d := defaultDialect 154 | // Overriding default setting with valid values. 155 | dMap, ok := i.(map[string]interface{}) 156 | if ok { 157 | if v, ok := dMap[delimiterProp].(string); ok { 158 | s := []rune(v) 159 | if len(s) > 0 { 160 | d.Delimiter = s[0] 161 | } 162 | } 163 | if v, ok := dMap[skipInitialSpaceProp].(bool); ok { 164 | d.SkipInitialSpace = v 165 | } 166 | if v, ok := dMap[headerProp].(bool); ok { 167 | d.Header = v 168 | } 169 | } 170 | // Mapping dialect to proper csv CreationOpts. 171 | opts := []csv.CreationOpts{csv.Delimiter(d.Delimiter)} 172 | if !d.SkipInitialSpace { 173 | opts = append(opts, csv.ConsiderInitialSpace()) 174 | } 175 | if d.Header { 176 | opts = append(opts, csv.LoadHeaders()) 177 | } 178 | return opts 179 | } 180 | 181 | // GetTable returns a table object to access the data. Returns an error if the resource is not tabular. 182 | func (r *Resource) GetTable(opts ...csv.CreationOpts) (table.Table, error) { 183 | if !r.Tabular() { 184 | return nil, fmt.Errorf("methods iter/read are not supported for non tabular data") 185 | } 186 | fullOpts := append(dialectOpts(r.descriptor[dialectProp]), opts...) 187 | // Inlined resources. 188 | if r.data != nil { 189 | switch r.data.(type) { 190 | case string: 191 | return csv.NewTable(csv.FromString(r.data.(string)), fullOpts...) 192 | default: 193 | return nil, fmt.Errorf("only csv and string is supported for inlining data") 194 | } 195 | } 196 | return csv.NewTable(func() (io.ReadCloser, error) { return loadContents(r.basePath, r.path, csvLoadFunc) }, fullOpts...) 197 | } 198 | 199 | func csvLoadFunc(p string) func() (io.ReadCloser, error) { 200 | if strings.HasPrefix(p, "http") { 201 | return csv.Remote(p) 202 | } 203 | return csv.FromFile(p) 204 | } 205 | 206 | const ( 207 | remoteFetchTimeout = 15 * time.Second 208 | ) 209 | 210 | var ( 211 | httpClient *http.Client 212 | startHTTPClient sync.Once 213 | ) 214 | 215 | func binaryLoadFunc(p string) func() (io.ReadCloser, error) { 216 | if strings.HasPrefix(p, "http") { 217 | return func() (io.ReadCloser, error) { 218 | startHTTPClient.Do(func() { 219 | httpClient = &http.Client{ 220 | Timeout: remoteFetchTimeout, 221 | } 222 | }) 223 | resp, err := httpClient.Get(p) 224 | if err != nil { 225 | return nil, err 226 | } 227 | return resp.Body, nil 228 | } 229 | } 230 | return func() (io.ReadCloser, error) { 231 | return os.Open(p) 232 | } 233 | } 234 | 235 | type multiReadCloser struct { 236 | io.Reader 237 | rcs []io.ReadCloser 238 | } 239 | 240 | func (m *multiReadCloser) Close() error { 241 | var err error 242 | for _, rc := range m.rcs { 243 | if e := rc.Close(); e != nil { 244 | err = e 245 | } 246 | } 247 | return err 248 | } 249 | 250 | func newMultiReadCloser(rcs []io.ReadCloser) io.ReadCloser { 251 | readers := make([]io.Reader, len(rcs)) 252 | for i := range rcs { 253 | readers[i] = io.Reader(rcs[i]) 254 | } 255 | return &multiReadCloser{io.MultiReader(readers...), rcs} 256 | } 257 | 258 | func loadContents(basePath string, path []string, f func(string) func() (io.ReadCloser, error)) (io.ReadCloser, error) { 259 | var rcs []io.ReadCloser 260 | for _, p := range path { 261 | if basePath != "" { 262 | p = joinPaths(basePath, p) 263 | } 264 | rc, err := f(p)() 265 | if err != nil { 266 | return nil, err 267 | } 268 | rcs = append(rcs, rc) 269 | if len(path) > 1 { 270 | rcs = append(rcs, ioutil.NopCloser(bytes.NewReader([]byte{'\n'}))) 271 | } 272 | } 273 | return newMultiReadCloser(rcs), nil 274 | } 275 | 276 | // ReadAll reads all rows from the table and return it as strings. 277 | func (r *Resource) ReadAll(opts ...csv.CreationOpts) ([][]string, error) { 278 | t, err := r.GetTable(opts...) 279 | if err != nil { 280 | return nil, err 281 | } 282 | return t.ReadAll() 283 | } 284 | 285 | // RawRead returns an io.ReaderCloser associated to the resource contents. 286 | // It can be used to access the content of non-tabular resources. 287 | func (r *Resource) RawRead() (io.ReadCloser, error) { 288 | if r.data != nil { 289 | return ioutil.NopCloser(bytes.NewReader([]byte(r.data.(string)))), nil 290 | } 291 | return loadContents(r.basePath, r.path, binaryLoadFunc) 292 | } 293 | 294 | // Iter returns an Iterator to read the tabular resource. Iter returns an error 295 | // if the table physical source can not be iterated. 296 | // The iteration process always start at the beginning of the table. 297 | func (r *Resource) Iter(opts ...csv.CreationOpts) (table.Iterator, error) { 298 | t, err := r.GetTable(opts...) 299 | if err != nil { 300 | return nil, err 301 | } 302 | return t.Iter() 303 | } 304 | 305 | // GetSchema returns the schema associated to the resource, if present. The returned 306 | // schema is based on a copy of the descriptor. Changes to it won't affect the data package 307 | // descriptor structure. 308 | func (r *Resource) GetSchema() (schema.Schema, error) { 309 | if r.descriptor[schemaProp] == nil { 310 | return schema.Schema{}, fmt.Errorf("schema is not declared in the descriptor") 311 | } 312 | buf, err := json.Marshal(r.descriptor[schemaProp]) 313 | if err != nil { 314 | return schema.Schema{}, err 315 | } 316 | var s schema.Schema 317 | if err := json.Unmarshal(buf, &s); err != nil { 318 | return schema.Schema{}, fmt.Errorf("error unmarshaling schema:%w", err) 319 | } 320 | return s, nil 321 | } 322 | 323 | // Cast resource contents. 324 | // The result argument must necessarily be the address for a slice. The slice 325 | // may be nil or previously allocated. 326 | func (r *Resource) Cast(out interface{}, opts ...csv.CreationOpts) error { 327 | sch, err := r.GetSchema() 328 | if err != nil { 329 | return err 330 | } 331 | tbl, err := r.GetTable(opts...) 332 | if err != nil { 333 | return err 334 | } 335 | return sch.CastTable(tbl, out) 336 | } 337 | 338 | // CastColumn casts a column from tabular resource contents. 339 | // The out argument must necessarily be the address for a slice. The slice 340 | // may be nil or previously allocated. 341 | func (r *Resource) CastColumn(name string, out interface{}, opts ...csv.CreationOpts) error { 342 | sch, err := r.GetSchema() 343 | if err != nil { 344 | return err 345 | } 346 | tab, err := r.GetTable(opts...) 347 | if err != nil { 348 | return err 349 | } 350 | col, err := tab.ReadColumn(name) 351 | if err != nil { 352 | return err 353 | } 354 | return sch.CastColumn(col, name, out) 355 | } 356 | 357 | // NewResourceWithDefaultRegistry creates a new Resource from the passed-in descriptor. 358 | // It uses the default registry to validate the resource descriptor. 359 | func NewResourceWithDefaultRegistry(d map[string]interface{}) (*Resource, error) { 360 | reg, err := validator.NewRegistry() 361 | if err != nil { 362 | return nil, err 363 | } 364 | return NewResource(d, reg) 365 | } 366 | 367 | // NewResource creates a new Resource from the passed-in descriptor, if valid. The 368 | // passed-in validator.Registry will be the source of profiles used in the validation. 369 | func NewResource(d map[string]interface{}, registry validator.Registry) (*Resource, error) { 370 | cpy, err := clone.Descriptor(d) 371 | if err != nil { 372 | return nil, err 373 | } 374 | if schStr, ok := cpy[schemaProp].(string); ok { 375 | cpy[schemaProp], err = loadSchema(schStr) 376 | if err != nil { 377 | return nil, err 378 | } 379 | } 380 | fillResourceDescriptorWithDefaultValues(cpy) 381 | profile, ok := cpy[profilePropName].(string) 382 | if !ok { 383 | return nil, fmt.Errorf("profile property MUST be a string:\"%s\"", profilePropName) 384 | } 385 | if err := validator.Validate(cpy, profile, registry); err != nil { 386 | return nil, err 387 | } 388 | r := Resource{ 389 | descriptor: cpy, 390 | name: cpy[nameProp].(string), 391 | } 392 | pathI := cpy[pathProp] 393 | if pathI != nil { 394 | p, err := parsePath(pathI, cpy) 395 | if err != nil { 396 | return nil, err 397 | } 398 | r.path = append([]string{}, p...) 399 | return &r, nil 400 | } 401 | dataI := cpy[dataProp] 402 | data, err := parseData(dataI, cpy) 403 | if err != nil { 404 | return nil, err 405 | } 406 | r.data = data 407 | return &r, nil 408 | } 409 | 410 | func fillResourceDescriptorWithDefaultValues(r map[string]interface{}) { 411 | if r[profilePropName] == nil { 412 | r[profilePropName] = defaultResourceProfile 413 | } 414 | if r[encodingPropName] == nil { 415 | r[encodingPropName] = defaultResourceEncoding 416 | } 417 | // Filling up mandatory values with default values if not set. 418 | // That prevents users from the hassle of manually setting up all mandatory values. 419 | if r[dialectProp] != nil { 420 | if dMap, ok := r[dialectProp].(map[string]interface{}); ok { 421 | if dMap[delimiterProp] == nil { 422 | dMap[delimiterProp] = string(defaultDialect.Delimiter) 423 | } 424 | if dMap[doubleQuoteProp] == nil { 425 | dMap[doubleQuoteProp] = defaultDialect.DoubleQuote 426 | } 427 | } 428 | } 429 | } 430 | 431 | func parseData(dataI interface{}, d map[string]interface{}) (interface{}, error) { 432 | if dataI != nil { 433 | switch dataI.(type) { 434 | case string: 435 | if d[formatProp] == nil && d[mediaTypeProp] == nil { 436 | return nil, fmt.Errorf("format or mediatype properties MUST be provided for JSON data strings. Descriptor:%v", d) 437 | } 438 | return dataI, nil 439 | case []interface{}, map[string]interface{}: 440 | return dataI, nil 441 | } 442 | } 443 | return nil, fmt.Errorf("data property must be either a JSON array/object OR a JSON string. Descriptor:%v", d) 444 | } 445 | 446 | func parsePath(pathI interface{}, d map[string]interface{}) ([]string, error) { 447 | var returned []string 448 | // Parse. 449 | switch path := pathI.(type) { 450 | default: 451 | return nil, fmt.Errorf("path MUST be a string or an array of strings. Descriptor:%v", d) 452 | case string: 453 | returned = append(returned, path) 454 | case []string: 455 | returned = append(returned, path...) 456 | case []interface{}: 457 | for _, p := range pathI.([]interface{}) { 458 | pStr, ok := p.(string) 459 | if !ok { 460 | return nil, fmt.Errorf("path MUST be a string or an array of strings. Descriptor:%v", d) 461 | } 462 | returned = append(returned, pStr) 463 | } 464 | } 465 | var lastType, currType pathType 466 | // Validation. 467 | for index, p := range returned { 468 | // Check if it is a relative path. 469 | u, err := url.Parse(p) 470 | if err != nil || u.Scheme == "" { 471 | if path.IsAbs(p) || strings.HasPrefix(path.Clean(p), "..") { 472 | return nil, fmt.Errorf("absolute paths (/) and relative parent paths (../) MUST NOT be used. Descriptor:%v", d) 473 | } 474 | currType = relativePath 475 | } else { // Check if it is a valid URL. 476 | if u.Scheme != "http" && u.Scheme != "https" { 477 | return nil, fmt.Errorf("URLs MUST be fully qualified. MUST be using either http or https scheme. Descriptor:%v", d) 478 | } 479 | currType = urlPath 480 | } 481 | if index > 0 { 482 | if currType != lastType { 483 | return nil, fmt.Errorf("it is NOT permitted to mix fully qualified URLs and relative paths in a single resource. Descriptor:%v", d) 484 | } 485 | } 486 | lastType = currType 487 | } 488 | return returned, nil 489 | } 490 | 491 | // NewUncheckedResource returns an Resource instance based on the descriptor without any verification. The returned Resource might 492 | // not be valid. 493 | func NewUncheckedResource(d map[string]interface{}) *Resource { 494 | r := &Resource{descriptor: d} 495 | nI, ok := d["name"] 496 | if ok { 497 | nStr, ok := nI.(string) 498 | if ok { 499 | r.name = nStr 500 | } 501 | } 502 | pI, ok := d["path"] 503 | if ok { 504 | r.path = pI.([]string) 505 | } 506 | return r 507 | } 508 | 509 | // NewResourceFromString creates a new Resource from the passed-in JSON descriptor, if valid. The 510 | // passed-in validator.Registry will be the source of profiles used in the validation. 511 | func NewResourceFromString(res string, registry validator.Registry) (*Resource, error) { 512 | var d map[string]interface{} 513 | if err := json.Unmarshal([]byte(res), &d); err != nil { 514 | return nil, err 515 | } 516 | return NewResource(d, registry) 517 | } 518 | -------------------------------------------------------------------------------- /datapackage/resource_test.go: -------------------------------------------------------------------------------- 1 | package datapackage 2 | 3 | import ( 4 | "fmt" 5 | "io/ioutil" 6 | "net/http" 7 | "net/http/httptest" 8 | "os" 9 | "reflect" 10 | "testing" 11 | 12 | "github.com/frictionlessdata/datapackage-go/validator" 13 | "github.com/frictionlessdata/tableschema-go/csv" 14 | "github.com/frictionlessdata/tableschema-go/schema" 15 | "github.com/matryer/is" 16 | ) 17 | 18 | func ExampleResource_CastColumn() { 19 | resStr := ` 20 | { 21 | "name": "col", 22 | "data": "name,age\nfoo,42\nbar,84", 23 | "format": "csv", 24 | "profile": "tabular-data-resource", 25 | "schema": {"fields": [{"name": "name", "type": "string"},{"name": "age", "type": "integer"}]} 26 | }` 27 | res, _ := NewResourceFromString(resStr, validator.MustInMemoryRegistry()) 28 | var ages []float64 29 | res.CastColumn("age", &ages, csv.LoadHeaders()) 30 | fmt.Println(ages) 31 | // Output: [42 84] 32 | } 33 | 34 | func TestNewResourceWithDefaultRegistry(t *testing.T) { 35 | res, _ := NewResourceWithDefaultRegistry(r1) 36 | fmt.Println(res.Name()) 37 | // Output: res1 38 | } 39 | 40 | func TestNew(t *testing.T) { 41 | t.Run("Invalid", func(t *testing.T) { 42 | data := []struct { 43 | desc string 44 | d map[string]interface{} 45 | }{ 46 | {"EmptyDescriptor", map[string]interface{}{}}, 47 | {"NoPathOrData", map[string]interface{}{"name": "foo"}}, 48 | {"PathObject", map[string]interface{}{"name": "foo", "path": map[string]string{"foo": "bar"}}}, 49 | {"AbsolutePath", map[string]interface{}{"name": "foo", "path": "/bar"}}, 50 | {"InvalidRelativePath", map[string]interface{}{"name": "foo", "path": "../bar"}}, 51 | {"InvalidSchemeURL", map[string]interface{}{"name": "foo", "path": "myscheme://bar"}}, 52 | {"MixedPaths", map[string]interface{}{"name": "foo", "path": []string{"https://bar", "bar"}}}, 53 | {"PathAndData", map[string]interface{}{"name": "foo", "data": "foo", "path": "foo"}}, 54 | {"InvalidJSONStringData", map[string]interface{}{"name": "foo", "data": "invalidJSONObjectString"}}, 55 | {"InvalidJSONType", map[string]interface{}{"name": "foo", "data": 1}}, 56 | {"UpperCaseName", map[string]interface{}{"name": "UP", "path": "http://url.com"}}, 57 | {"NameInvalidChar", map[string]interface{}{"name": "u*p", "path": "http://url.com"}}, 58 | {"NameWithSpace", map[string]interface{}{"name": "u p", "path": "http://url.com"}}, 59 | {"NameIsNotString", map[string]interface{}{"name": 1, "path": "http://url.com"}}, 60 | {"SchemaAsInt", map[string]interface{}{"name": "name", "schema": 1, "path": "http://url.com"}}, 61 | {"SchemaInvalidPath", map[string]interface{}{"name": "name", "schema": "/bar", "path": "http://url.com"}}, 62 | {"InvalidProfile", map[string]interface{}{"name": "foo", "path": "foo.csv", "profile": 1}}, 63 | {"DataAsStringNoMediatype", map[string]interface{}{"name": "foo", "data": "1,2\n3,4"}}, 64 | {"DataInvalidType", map[string]interface{}{"name": "foo", "data": 1}}, 65 | } 66 | for _, d := range data { 67 | t.Run(d.desc, func(t *testing.T) { 68 | t.Parallel() 69 | is := is.New(t) 70 | _, err := NewResource(d.d, validator.MustInMemoryRegistry()) 71 | is.True(err != nil) 72 | }) 73 | } 74 | }) 75 | t.Run("ValidNames", func(t *testing.T) { 76 | data := []struct { 77 | testDescription string 78 | descriptor map[string]interface{} 79 | want string 80 | }{ 81 | {"NoPunctuation", map[string]interface{}{"name": "up", "path": "foo.csv"}, "up"}, 82 | {"WithPunctuation", map[string]interface{}{"name": "u.p_d.o.w.n", "path": "foo.csv"}, "u.p_d.o.w.n"}, 83 | } 84 | for _, d := range data { 85 | t.Run(d.testDescription, func(t *testing.T) { 86 | t.Parallel() 87 | is := is.New(t) 88 | r, err := NewResource(d.descriptor, validator.MustInMemoryRegistry()) 89 | is.NoErr(err) 90 | is.True(r.name == d.want) 91 | }) 92 | } 93 | }) 94 | t.Run("ValidPaths", func(t *testing.T) { 95 | data := []struct { 96 | testDescription string 97 | descriptor map[string]interface{} 98 | want []string 99 | }{ 100 | {"URL", map[string]interface{}{"name": "foo", "url": "http://data/foo.csv"}, []string{"http://url.com"}}, 101 | {"FilePath", map[string]interface{}{"name": "foo", "path": "data/foo.csv"}, []string{"data/foo.csv"}}, 102 | {"SlicePath", map[string]interface{}{"name": "foo", "path": []string{"https://foo.csv", "http://data/bar.csv"}}, []string{"https://foo.csv", "http://data/bar.csv"}}, 103 | {"SlicePath", map[string]interface{}{"name": "foo", "path": []interface{}{"https://foo.csv", "http://data/bar.csv"}}, []string{"https://foo.csv", "http://data/bar.csv"}}, 104 | } 105 | for _, d := range data { 106 | t.Run(d.testDescription, func(t *testing.T) { 107 | t.Parallel() 108 | is := is.New(t) 109 | r, err := NewResource(d.descriptor, validator.MustInMemoryRegistry()) 110 | is.NoErr(err) 111 | is.True(reflect.DeepEqual(d.want, r.path)) 112 | }) 113 | } 114 | }) 115 | t.Run("ValidData", func(t *testing.T) { 116 | data := []struct { 117 | testDescription string 118 | descriptor map[string]interface{} 119 | want interface{} 120 | }{ 121 | { 122 | "JSONObject", 123 | map[string]interface{}{"name": "foo", "data": map[string]interface{}{"a": 1, "b": 2}}, 124 | map[string]interface{}{"a": 1, "b": 2}, 125 | }, 126 | { 127 | "JSONArray", 128 | map[string]interface{}{"name": "foo", "data": []interface{}{map[string]interface{}{"a": 1}, map[string]interface{}{"b": 2}}}, 129 | []interface{}{map[string]interface{}{"a": 1}, map[string]interface{}{"b": 2}}, 130 | }, 131 | { 132 | "String", 133 | map[string]interface{}{"name": "foo", "data": "A,B,C\n1,2,3\n4,5,6", "format": "csv"}, 134 | "A,B,C\n1,2,3\n4,5,6", 135 | }, 136 | { 137 | "Table", 138 | map[string]interface{}{"name": "foo", "data": []interface{}{[]string{"A", "B"}, []string{"a", "b"}}}, 139 | []interface{}{[]string{"A", "B"}, []string{"a", "b"}}, 140 | }, 141 | } 142 | for _, d := range data { 143 | t.Run(d.testDescription, func(t *testing.T) { 144 | t.Parallel() 145 | is := is.New(t) 146 | r, err := NewResource(d.descriptor, validator.MustInMemoryRegistry()) 147 | is.NoErr(err) 148 | if !reflect.DeepEqual(reflect.ValueOf(d.want).Interface(), r.data) { 149 | t.Fatalf("want:%v type:%v got:%v type:%v", d.want, reflect.TypeOf(d.want), r.data, reflect.TypeOf(r.data)) 150 | } 151 | }) 152 | } 153 | }) 154 | t.Run("DelimiterDefaultValues", func(t *testing.T) { 155 | is := is.New(t) 156 | r, err := NewResource( 157 | map[string]interface{}{"name": "foo", "path": "foo.csv", "dialect": map[string]interface{}{}}, 158 | validator.MustInMemoryRegistry()) 159 | is.NoErr(err) 160 | is.Equal(r.descriptor["dialect"], map[string]interface{}{"delimiter": ",", "doubleQuote": true}) 161 | }) 162 | t.Run("SchemaLoading", func(t *testing.T) { 163 | t.Run("ValidRemote", func(t *testing.T) { 164 | is := is.New(t) 165 | ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 166 | fmt.Fprintln(w, `{"fields": [{"name": "name","type": "string"}]}`) 167 | })) 168 | defer ts.Close() 169 | r, err := NewResource( 170 | map[string]interface{}{"name": "foo", "path": "foo.csv", "schema": ts.URL}, 171 | validator.MustInMemoryRegistry(), 172 | ) 173 | is.NoErr(err) // Resource should be created successfully. 174 | sch, err := r.GetSchema() 175 | is.NoErr(err) 176 | is.Equal(sch.Fields[0].Type, schema.StringType) 177 | }) 178 | t.Run("InvalidRemote", func(t *testing.T) { 179 | _, err := NewResource( 180 | map[string]interface{}{"name": "foo", "path": "foo.csv", "schema": "http://foobar"}, 181 | validator.MustInMemoryRegistry(), 182 | ) 183 | if err == nil { 184 | t.Fatalf("want:err got:nil") 185 | } 186 | }) 187 | t.Run("ValidLocal", func(t *testing.T) { 188 | is := is.New(t) 189 | f, err := ioutil.TempFile("", "resourceNewValidLocal") 190 | is.NoErr(err) 191 | defer os.Remove(f.Name()) 192 | is.NoErr(ioutil.WriteFile(f.Name(), []byte(`{"fields": [{"name": "name","type": "string"}]}`), 0666)) 193 | r, err := NewResource( 194 | map[string]interface{}{"name": "foo", "path": "foo.csv", "schema": f.Name()}, 195 | validator.MustInMemoryRegistry(), 196 | ) 197 | is.NoErr(err) 198 | sch, err := r.GetSchema() 199 | is.NoErr(err) 200 | is.Equal(sch.Fields[0].Type, schema.StringType) 201 | }) 202 | t.Run("InvalidLocal", func(t *testing.T) { 203 | _, err := NewResource( 204 | map[string]interface{}{"name": "foo", "path": "foo.csv", "schema": "foobarbez"}, 205 | validator.MustInMemoryRegistry(), 206 | ) 207 | if err == nil { 208 | t.Fatalf("want:err got:nil") 209 | } 210 | }) 211 | }) 212 | } 213 | 214 | func TestResource_Descriptor(t *testing.T) { 215 | is := is.New(t) 216 | r, err := NewResource(r1, validator.MustInMemoryRegistry()) 217 | is.NoErr(err) 218 | 219 | cpy := r.Descriptor() 220 | is.Equal(r.descriptor, cpy) 221 | 222 | // Checking if modifying the copy would not affect the source. 223 | cpy["foo"] = "bar" 224 | if reflect.DeepEqual(r.descriptor, cpy) { 225 | t.Fatalf("%+v == %+v", r.descriptor, cpy) 226 | } 227 | } 228 | 229 | func TestResource_Update(t *testing.T) { 230 | t.Run("Valid", func(t *testing.T) { 231 | is := is.New(t) 232 | r, err := NewResource(r1, validator.MustInMemoryRegistry()) 233 | is.NoErr(err) 234 | is.NoErr(r.Update(r2, validator.InMemoryLoader())) 235 | is.Equal(r.Descriptor(), r2Filled) 236 | }) 237 | t.Run("Invalid", func(t *testing.T) { 238 | is := is.New(t) 239 | r, err := NewResource(r1, validator.MustInMemoryRegistry()) 240 | is.NoErr(err) 241 | if err := r.Update(invalidResource, validator.InMemoryLoader()); err == nil { 242 | t.Fatalf("want:err got:nil") 243 | } 244 | }) 245 | } 246 | func TestResource_Tabular(t *testing.T) { 247 | is := is.New(t) 248 | r := NewUncheckedResource(map[string]interface{}{"profile": "tabular-data-resource"}) 249 | is.True(r.Tabular()) 250 | r1 := NewUncheckedResource(map[string]interface{}{"profile": "data-resource"}) 251 | is.True(!r1.Tabular()) 252 | r2 := NewUncheckedResource(map[string]interface{}{"format": "csv"}) 253 | is.True(r2.Tabular()) 254 | r3 := NewUncheckedResource(map[string]interface{}{"path": []string{"boo.csv"}}) 255 | is.True(r3.Tabular()) 256 | } 257 | 258 | func TestResource_ReadAll(t *testing.T) { 259 | t.Run("LoadData", func(t *testing.T) { 260 | is := is.New(t) 261 | ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 262 | fmt.Fprintln(w, "name\nfoo") 263 | })) 264 | defer ts.Close() 265 | resStr := fmt.Sprintf(` 266 | { 267 | "name": "names", 268 | "path": "%s/data.csv", 269 | "profile": "tabular-data-resource", 270 | "schema": {"fields": [{"name": "name","type": "string"}]} 271 | }`, ts.URL) 272 | res, err := NewResourceFromString(resStr, validator.MustInMemoryRegistry()) 273 | is.NoErr(err) 274 | contents, err := res.ReadAll() 275 | is.NoErr(err) 276 | is.Equal(contents, [][]string{{"name"}, {"foo"}}) 277 | }) 278 | t.Run("InlineData", func(t *testing.T) { 279 | is := is.New(t) 280 | resStr := ` 281 | { 282 | "name": "names", 283 | "data": "name\nfoo", 284 | "format": "csv", 285 | "profile": "tabular-data-resource", 286 | "schema": {"fields": [{"name": "name", "type": "string"}]} 287 | }` 288 | res, err := NewResourceFromString(resStr, validator.MustInMemoryRegistry()) 289 | is.NoErr(err) 290 | contents, err := res.ReadAll() 291 | is.NoErr(err) 292 | is.Equal(contents, [][]string{{"name"}, {"foo"}}) 293 | }) 294 | t.Run("InvalidProfileType", func(t *testing.T) { 295 | r1 := NewUncheckedResource(map[string]interface{}{"profile": "data-resource"}) 296 | _, err := r1.ReadAll() 297 | if err == nil { 298 | t.Fatalf("want:nil got:err") 299 | } 300 | }) 301 | t.Run("Dialect", func(t *testing.T) { 302 | t.Run("Valid", func(t *testing.T) { 303 | is := is.New(t) 304 | r, err := NewResource( 305 | map[string]interface{}{ 306 | "name": "foo", 307 | "data": "name;age\n foo;42", 308 | "format": "csv", 309 | "dialect": map[string]interface{}{"delimiter": ";", "skipInitialSpace": false, "header": true}}, 310 | validator.MustInMemoryRegistry(), 311 | ) 312 | is.NoErr(err) 313 | contents, err := r.ReadAll() 314 | is.NoErr(err) 315 | is.Equal(contents, [][]string{{" foo", "42"}}) 316 | }) 317 | t.Run("EmptyDelimiter", func(t *testing.T) { 318 | is := is.New(t) 319 | r, err := NewResource( 320 | map[string]interface{}{ 321 | "name": "foo", 322 | "data": "name,age\nfoo,42", 323 | "format": "csv", 324 | "dialect": map[string]interface{}{"delimiter": ""}}, 325 | validator.MustInMemoryRegistry(), 326 | ) 327 | is.NoErr(err) 328 | contents, err := r.ReadAll() 329 | is.NoErr(err) 330 | is.Equal(contents, [][]string{{"foo", "42"}}) 331 | }) 332 | t.Run("Multipart", func(t *testing.T) { 333 | is := is.New(t) 334 | schemaServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 335 | fmt.Fprintln(w, `{"fields": [{"name": "name","type": "string"}]}`) 336 | })) 337 | defer schemaServer.Close() 338 | res1Server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 339 | fmt.Fprintln(w, "name\nFoo") 340 | })) 341 | defer res1Server.Close() 342 | res2Server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 343 | fmt.Fprintln(w, "Bar") 344 | })) 345 | defer res2Server.Close() 346 | r, err := NewResource( 347 | map[string]interface{}{"name": "foo", "format": "csv", "path": []interface{}{res1Server.URL, res2Server.URL}, "schema": schemaServer.URL}, 348 | validator.MustInMemoryRegistry(), 349 | ) 350 | is.NoErr(err) 351 | contents, err := r.ReadAll() 352 | is.NoErr(err) 353 | fmt.Println(contents, [][]string{{"name"}, {"Foo"}, {"Bar"}}) 354 | }) 355 | }) 356 | } 357 | 358 | func TestResource_Iter(t *testing.T) { 359 | is := is.New(t) 360 | resStr := ` 361 | { 362 | "name": "iter", 363 | "data": "name", 364 | "format": "csv", 365 | "profile": "tabular-data-resource", 366 | "schema": {"fields": [{"name": "foo", "type": "string"}]} 367 | }` 368 | res, err := NewResourceFromString(resStr, validator.MustInMemoryRegistry()) 369 | is.NoErr(err) 370 | iter, err := res.Iter() 371 | is.NoErr(err) 372 | is.True(iter.Next()) 373 | is.Equal(iter.Row(), []string{"name"}) 374 | is.True(!iter.Next()) 375 | } 376 | 377 | func TestResource_GetSchema(t *testing.T) { 378 | t.Run("Valid", func(t *testing.T) { 379 | is := is.New(t) 380 | resStr := ` 381 | { 382 | "name": "iter", 383 | "data": "32", 384 | "format": "csv", 385 | "profile": "tabular-data-resource", 386 | "schema": {"fields": [{"name": "Age", "type": "integer"}]} 387 | }` 388 | res, err := NewResourceFromString(resStr, validator.MustInMemoryRegistry()) 389 | is.NoErr(err) 390 | sch, err := res.GetSchema() 391 | is.NoErr(err) 392 | row := struct { 393 | Age int 394 | }{} 395 | sch.CastRow([]string{"32"}, &row) 396 | is.Equal(row.Age, 32) 397 | }) 398 | t.Run("NoSchema", func(t *testing.T) { 399 | res := NewUncheckedResource(map[string]interface{}{}) 400 | _, err := res.GetSchema() 401 | if err == nil { 402 | t.Fatal("want:err got:nil") 403 | } 404 | }) 405 | t.Run("InvalidSchemaUnmarshal", func(t *testing.T) { 406 | is := is.New(t) 407 | // fields must be an array. 408 | resStr := ` 409 | { 410 | "name": "iter", 411 | "data": "32", 412 | "format": "csv", 413 | "schema": {"fields": [{"name": "Age", "type": "integer"}], "foreignKeys":{}} 414 | }` 415 | res, err := NewResourceFromString(resStr, validator.MustInMemoryRegistry()) 416 | is.NoErr(err) 417 | if _, err := res.GetSchema(); err == nil { 418 | t.Fatal("want:err got:nil") 419 | } 420 | }) 421 | } 422 | 423 | func TestResource_Cast(t *testing.T) { 424 | resStr := ` 425 | { 426 | "name": "iter", 427 | "data": "32", 428 | "format": "csv", 429 | "profile": "tabular-data-resource", 430 | "schema": {"fields": [{"name": "Age", "type": "integer"}]} 431 | }` 432 | rows := []struct { 433 | Age int 434 | }{} 435 | t.Run("Valid", func(t *testing.T) { 436 | is := is.New(t) 437 | res, err := NewResourceFromString(resStr, validator.MustInMemoryRegistry()) 438 | is.NoErr(err) 439 | is.NoErr(res.Cast(&rows)) 440 | is.Equal(rows[0].Age, 32) 441 | }) 442 | t.Run("NoSchema", func(t *testing.T) { 443 | res := NewUncheckedResource(map[string]interface{}{}) 444 | if res.Cast(&rows) == nil { 445 | t.Fatal("want:err got:nil") 446 | } 447 | }) 448 | t.Run("NoData", func(t *testing.T) { 449 | res := NewUncheckedResource(map[string]interface{}{ 450 | "schema": map[string]interface{}{}, 451 | }) 452 | if res.Cast(&rows) == nil { 453 | t.Fatal("want:err got:nil") 454 | } 455 | }) 456 | } 457 | 458 | func TestResource_RawRead(t *testing.T) { 459 | t.Run("Remote", func(t *testing.T) { 460 | is := is.New(t) 461 | ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 462 | fmt.Fprint(w, "1234") 463 | })) 464 | defer ts.Close() 465 | resStr := fmt.Sprintf(` 466 | { 467 | "name": "ids", 468 | "path": "%s/id1", 469 | "profile": "data-resource" 470 | }`, ts.URL) 471 | res, err := NewResourceFromString(resStr, validator.MustInMemoryRegistry()) 472 | is.NoErr(err) 473 | rc, err := res.RawRead() 474 | is.NoErr(err) 475 | defer rc.Close() 476 | contents, err := ioutil.ReadAll(rc) 477 | is.NoErr(err) 478 | is.Equal(string(contents), "1234") 479 | }) 480 | t.Run("Inline", func(t *testing.T) { 481 | is := is.New(t) 482 | resStr := ` 483 | { 484 | "name": "ids", "data": "{\"foo\":\"1234\"}", "profile":"data-resource", "mediatype":"application/json" 485 | }` 486 | res, err := NewResourceFromString(resStr, validator.MustInMemoryRegistry()) 487 | is.NoErr(err) 488 | rc, err := res.RawRead() 489 | is.NoErr(err) 490 | defer rc.Close() 491 | contents, err := ioutil.ReadAll(rc) 492 | is.NoErr(err) 493 | is.Equal(string(contents), "{\"foo\":\"1234\"}") 494 | }) 495 | } 496 | 497 | func TestResource_ReadColumn(t *testing.T) { 498 | resStr := ` 499 | { 500 | "name": "col", 501 | "data": "name,age\nfoo,42\nbar,84", 502 | "format": "csv", 503 | "profile": "tabular-data-resource", 504 | "schema": {"fields": [{"name": "name", "type": "string"},{"name": "age", "type": "integer"}]} 505 | }` 506 | t.Run("Valid", func(t *testing.T) { 507 | is := is.New(t) 508 | res, err := NewResourceFromString(resStr, validator.MustInMemoryRegistry()) 509 | is.NoErr(err) 510 | var ages []float64 511 | is.NoErr(res.CastColumn("age", &ages, csv.LoadHeaders())) 512 | is.Equal(float64(42), ages[0]) 513 | is.Equal(float64(84), ages[1]) 514 | }) 515 | t.Run("NoSchema", func(t *testing.T) { 516 | res := NewUncheckedResource(map[string]interface{}{}) 517 | var ages []float64 518 | if res.CastColumn("age", &ages) == nil { 519 | t.Fatal("want:err got:nil") 520 | } 521 | }) 522 | t.Run("NoData", func(t *testing.T) { 523 | res := NewUncheckedResource(map[string]interface{}{ 524 | "schema": map[string]interface{}{}, 525 | }) 526 | var ages []float64 527 | if res.CastColumn("age", &ages) == nil { 528 | t.Fatal("want:err got:nil") 529 | } 530 | }) 531 | t.Run("HeaderNotFound", func(t *testing.T) { 532 | is := is.New(t) 533 | res, err := NewResourceFromString(resStr, validator.MustInMemoryRegistry()) 534 | is.NoErr(err) 535 | var ages []float64 536 | if res.CastColumn("foo", &ages) == nil { 537 | t.Fatal("want:err got:nil") 538 | } 539 | }) 540 | t.Run("FieldNotFound", func(t *testing.T) { 541 | is := is.New(t) 542 | resStr := ` 543 | { 544 | "name": "col", 545 | "data": "name,age\nfoo,42\nbar,84", 546 | "format": "csv", 547 | "profile": "tabular-data-resource", 548 | "schema": {"fields": [{"name": "name", "type": "string"},{"name": "Age", "type": "integer"}]} 549 | }` 550 | res, err := NewResourceFromString(resStr, validator.MustInMemoryRegistry()) 551 | is.NoErr(err) 552 | var ages []float64 553 | if res.CastColumn("age", &ages) == nil { 554 | t.Fatal("want:err got:nil") 555 | } 556 | }) 557 | } 558 | -------------------------------------------------------------------------------- /datapackage/schema.go: -------------------------------------------------------------------------------- 1 | package datapackage 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "io" 7 | "io/ioutil" 8 | "net/http" 9 | "os" 10 | "strings" 11 | 12 | "github.com/frictionlessdata/tableschema-go/schema" 13 | ) 14 | 15 | func loadSchema(p string) (map[string]interface{}, error) { 16 | var reader io.Reader 17 | if strings.HasPrefix(p, "http") { 18 | resp, err := http.Get(p) 19 | if err != nil { 20 | return nil, err 21 | } 22 | defer resp.Body.Close() 23 | reader = resp.Body 24 | } else { 25 | f, err := os.Open(p) 26 | if err != nil { 27 | return nil, err 28 | } 29 | defer f.Close() 30 | reader = f 31 | } 32 | buf, err := ioutil.ReadAll(reader) 33 | if err != nil { 34 | return nil, err 35 | } 36 | _, err = schema.Read(bytes.NewBuffer(buf)) 37 | if err != nil { 38 | return nil, err 39 | } 40 | var ret map[string]interface{} 41 | if err := json.Unmarshal(buf, &ret); err != nil { 42 | return nil, err 43 | } 44 | return ret, nil 45 | } 46 | -------------------------------------------------------------------------------- /datapackage/test_package.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage-go/fe7db5853f890619fb378222bf7c1b2c9a288192/datapackage/test_package.zip -------------------------------------------------------------------------------- /doc.go: -------------------------------------------------------------------------------- 1 | // Package datapackage is official Go respository of libraries to deal with frictionless data 2 | // packages. 3 | package datapackage 4 | -------------------------------------------------------------------------------- /examples/build_package/cities.csv: -------------------------------------------------------------------------------- 1 | city,year,population 2 | london,2017,8780000 3 | paris,2017,2240000 4 | rome,2017,2860000 -------------------------------------------------------------------------------- /examples/build_package/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/frictionlessdata/datapackage-go/datapackage" 7 | "github.com/frictionlessdata/datapackage-go/validator" 8 | ) 9 | 10 | func main() { 11 | descriptor := map[string]interface{}{ 12 | "resources": []interface{}{ 13 | map[string]interface{}{ 14 | "name": "books", 15 | "path": "books.csv", 16 | "format": "csv", 17 | "profile": "tabular-data-resource", 18 | "schema": map[string]interface{}{ 19 | "fields": []interface{}{ 20 | map[string]interface{}{"name": "author", "type": "string"}, 21 | map[string]interface{}{"name": "title", "type": "string"}, 22 | map[string]interface{}{"name": "year", "type": "integer"}, 23 | }, 24 | }, 25 | }, 26 | }, 27 | } 28 | pkg, err := datapackage.New(descriptor, ".", validator.InMemoryLoader()) 29 | if err != nil { 30 | panic(err) 31 | } 32 | // Removing resource. 33 | pkg.RemoveResource("books") 34 | 35 | // Adding new resource. 36 | pkg.AddResource(map[string]interface{}{ 37 | "name": "cities", 38 | "path": "cities.csv", 39 | "format": "csv", 40 | "profile": "tabular-data-resource", 41 | "schema": map[string]interface{}{ 42 | "fields": []interface{}{ 43 | map[string]interface{}{"name": "city", "type": "string"}, 44 | map[string]interface{}{"name": "year", "type": "integer"}, 45 | map[string]interface{}{"name": "population", "type": "integer"}, 46 | }, 47 | }, 48 | }) 49 | 50 | // Printing resource contents. 51 | cities, _ := pkg.GetResource("cities").ReadAll() 52 | fmt.Println("## Cities: ", cities) 53 | } 54 | -------------------------------------------------------------------------------- /examples/inline/datapackage.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "remote_datapackage", 3 | "resources": [ 4 | { 5 | "name": "books", 6 | "format": "csv", 7 | "data": "author,title,year\nRobert M. Pirsig,Zen and the Art of Motorcycle Maintenance,1974\nLast and First Men: A Story of the Near and Far Future,Olaf Sapledon,1930\nSolaris,Stanisław Lem,1961", 8 | "profile":"tabular-data-resource", 9 | "schema": { 10 | "fields": [ 11 | { 12 | "name": "author", 13 | "type": "string" 14 | }, 15 | { 16 | "name": "title", 17 | "type": "string" 18 | }, 19 | { 20 | "name": "year", 21 | "type": "integer" 22 | } 23 | ] 24 | } 25 | } 26 | ] 27 | } -------------------------------------------------------------------------------- /examples/inline/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | 7 | "github.com/frictionlessdata/datapackage-go/datapackage" 8 | ) 9 | 10 | func main() { 11 | pkg, err := datapackage.Load("datapackage.json") 12 | if err != nil { 13 | panic(err) 14 | } 15 | fmt.Printf("Data package \"%s\" successfully created.\n", pkg.Descriptor()["name"]) 16 | 17 | fmt.Printf("## Resources ##") 18 | for _, res := range pkg.Resources() { 19 | b, _ := json.MarshalIndent(res.Descriptor(), "", " ") 20 | fmt.Println(string(b)) 21 | } 22 | 23 | fmt.Println("## Contents ##") 24 | books := pkg.GetResource("books") 25 | contents, _ := books.ReadAll() 26 | fmt.Println(contents) 27 | } 28 | -------------------------------------------------------------------------------- /examples/load/data.csv: -------------------------------------------------------------------------------- 1 | author;title;year 2 | Robert M. Pirsig;Zen and the Art of Motorcycle Maintenance;1974 3 | Last and First Men: A Story of the Near and Far Future;Olaf Sapledon;1930 4 | Solaris;Stanisław Lem;1961 -------------------------------------------------------------------------------- /examples/load/datapackage.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "remote_datapackage", 3 | "resources": [ 4 | { 5 | "name": "books", 6 | "format": "csv", 7 | "path": "data.csv", 8 | "profile":"tabular-data-resource", 9 | "schema": "schema.json", 10 | "dialect":{ 11 | "delimiter":";" 12 | } 13 | } 14 | ] 15 | } 16 | -------------------------------------------------------------------------------- /examples/load/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | 7 | "github.com/frictionlessdata/datapackage-go/datapackage" 8 | "github.com/frictionlessdata/tableschema-go/csv" 9 | ) 10 | 11 | func main() { 12 | pkg, err := datapackage.Load("datapackage.json") 13 | if err != nil { 14 | panic(err) 15 | } 16 | fmt.Printf("Data package \"%s\" successfully created.\n", pkg.Descriptor()["name"]) 17 | 18 | fmt.Printf("\n## Resources ##") 19 | for _, res := range pkg.Resources() { 20 | b, _ := json.MarshalIndent(res.Descriptor(), "", " ") 21 | fmt.Println(string(b)) 22 | } 23 | 24 | fmt.Println("\n## Raw Content ##") 25 | books := pkg.GetResource("books") 26 | contents, _ := books.ReadAll() 27 | fmt.Println(contents) 28 | 29 | fmt.Println("\n## Cast Content ##") 30 | book := struct { 31 | Author string `tableheader:"author"` 32 | Title string `tableheader:"title"` 33 | Year int `tableheader:"year"` 34 | }{} 35 | sch, _ := books.GetSchema() 36 | iter, _ := books.Iter(csv.LoadHeaders()) 37 | for iter.Next() { 38 | sch.CastRow(iter.Row(), &book) 39 | fmt.Printf("%+v\n", book) 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /examples/load/schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "fields": [ 3 | { 4 | "name": "author", 5 | "type": "string" 6 | }, 7 | { 8 | "name": "title", 9 | "type": "string" 10 | }, 11 | { 12 | "name": "year", 13 | "type": "integer" 14 | } 15 | ] 16 | } -------------------------------------------------------------------------------- /examples/load_zip/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | 7 | "github.com/frictionlessdata/datapackage-go/datapackage" 8 | "github.com/frictionlessdata/tableschema-go/csv" 9 | ) 10 | 11 | func main() { 12 | pkg, err := datapackage.Load("package.zip") 13 | if err != nil { 14 | panic(err) 15 | } 16 | fmt.Printf("Data package \"%s\" successfully created.\n", pkg.Descriptor()["name"]) 17 | 18 | fmt.Printf("\n## Resources ##") 19 | for _, res := range pkg.Resources() { 20 | b, _ := json.MarshalIndent(res.Descriptor(), "", " ") 21 | fmt.Println(string(b)) 22 | } 23 | 24 | fmt.Println("\n## Raw Content ##") 25 | books := pkg.GetResource("books") 26 | contents, _ := books.ReadAll() 27 | fmt.Println(contents) 28 | 29 | fmt.Println("\n## Cast Content ##") 30 | book := struct { 31 | Author string `tableheader:"author"` 32 | Title string `tableheader:"title"` 33 | Year int `tableheader:"year"` 34 | }{} 35 | sch, _ := books.GetSchema() 36 | iter, _ := books.Iter(csv.LoadHeaders()) 37 | for iter.Next() { 38 | sch.CastRow(iter.Row(), &book) 39 | fmt.Printf("%+v\n", book) 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /examples/load_zip/package.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/datapackage-go/fe7db5853f890619fb378222bf7c1b2c9a288192/examples/load_zip/package.zip -------------------------------------------------------------------------------- /examples/multipart/data.csv: -------------------------------------------------------------------------------- 1 | author;title;year 2 | Robert M. Pirsig;Zen and the Art of Motorcycle Maintenance;1974 3 | Last and First Men: A Story of the Near and Far Future;Olaf Sapledon;1930 4 | Solaris;Stanisław Lem;1961 -------------------------------------------------------------------------------- /examples/multipart/data1.csv: -------------------------------------------------------------------------------- 1 | 1984;George Orwell;1974 2 | The Hitchhiker's Guide to the Galaxy;Douglas Adams;1979 -------------------------------------------------------------------------------- /examples/multipart/datapackage.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "remote_datapackage", 3 | "resources": [ 4 | { 5 | "name": "books", 6 | "format": "csv", 7 | "path": ["data.csv","data1.csv"], 8 | "profile":"tabular-data-resource", 9 | "schema": { 10 | "fields": [ 11 | { 12 | "name": "author", 13 | "type": "string" 14 | }, 15 | { 16 | "name": "title", 17 | "type": "string" 18 | }, 19 | { 20 | "name": "year", 21 | "type": "integer" 22 | } 23 | ] 24 | }, 25 | "dialect":{ 26 | "delimiter":";" 27 | } 28 | } 29 | ] 30 | } -------------------------------------------------------------------------------- /examples/multipart/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | 7 | "github.com/frictionlessdata/datapackage-go/datapackage" 8 | "github.com/frictionlessdata/tableschema-go/csv" 9 | ) 10 | 11 | func main() { 12 | pkg, err := datapackage.Load("datapackage.json") 13 | if err != nil { 14 | panic(err) 15 | } 16 | fmt.Printf("Data package \"%s\" successfully created.\n", pkg.Descriptor()["name"]) 17 | 18 | fmt.Printf("\n## Resources ##") 19 | for _, res := range pkg.Resources() { 20 | b, _ := json.MarshalIndent(res.Descriptor(), "", " ") 21 | fmt.Println(string(b)) 22 | } 23 | 24 | fmt.Println("\n## Raw Content ##") 25 | books := pkg.GetResource("books") 26 | contents, _ := books.ReadAll() 27 | fmt.Println(contents) 28 | 29 | fmt.Println("\n## Cast Content ##") 30 | book := struct { 31 | Author string `tableheader:"author"` 32 | Title string `tableheader:"title"` 33 | Year int `tableheader:"year"` 34 | }{} 35 | sch, _ := books.GetSchema() 36 | iter, _ := books.Iter(csv.LoadHeaders()) 37 | for iter.Next() { 38 | sch.CastRow(iter.Row(), &book) 39 | fmt.Printf("%+v\n", book) 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /examples/remote/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/frictionlessdata/datapackage-go/datapackage" 7 | "github.com/frictionlessdata/tableschema-go/csv" 8 | ) 9 | 10 | type element struct { 11 | Number int `tableheader:"atomic number"` 12 | Symbol string `tableheader:"symbol"` 13 | Name string `tableheader:"name"` 14 | Mass float64 `tableheader:"atomic mass"` 15 | Metal string `tableheader:"metal or nonmetal?"` 16 | } 17 | 18 | func main() { 19 | pkg, err := datapackage.Load("https://raw.githubusercontent.com/frictionlessdata/example-data-packages/master/periodic-table/datapackage.json") 20 | if err != nil { 21 | panic(err) 22 | } 23 | resource := pkg.GetResource("data") 24 | var elements []element 25 | if err := resource.Cast(&elements, csv.LoadHeaders()); err != nil { 26 | panic(err) 27 | } 28 | for _, e := range elements { 29 | fmt.Printf("%+v\n", e) 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /examples/zip/.gitignore: -------------------------------------------------------------------------------- 1 | package.zip 2 | -------------------------------------------------------------------------------- /examples/zip/data/data.csv: -------------------------------------------------------------------------------- 1 | author,title,year 2 | Robert M. Pirsig,Zen and the Art of Motorcycle Maintenance,1974 3 | Last and First Men: A Story of the Near and Far Future,Olaf Sapledon,1930 4 | Solaris,Stanisław Lem,1961 -------------------------------------------------------------------------------- /examples/zip/datapackage.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "remote_datapackage", 3 | "resources": [ 4 | { 5 | "name": "books", 6 | "format": "csv", 7 | "path": "data/data.csv", 8 | "profile":"tabular-data-resource", 9 | "schema": { 10 | "fields": [ 11 | { 12 | "name": "author", 13 | "type": "string" 14 | }, 15 | { 16 | "name": "title", 17 | "type": "string" 18 | }, 19 | { 20 | "name": "year", 21 | "type": "integer" 22 | } 23 | ] 24 | } 25 | } 26 | ] 27 | } 28 | 29 | -------------------------------------------------------------------------------- /examples/zip/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/frictionlessdata/datapackage-go/datapackage" 7 | ) 8 | 9 | func main() { 10 | pkg, err := datapackage.Load("datapackage.json") 11 | if err != nil { 12 | panic(err) 13 | } 14 | fmt.Printf("Data package \"%s\" successfully created.\n", pkg.Descriptor()["name"]) 15 | if err := pkg.Zip("package.zip"); err != nil { 16 | panic(err) 17 | } 18 | fmt.Println("Zip package.zip created successfully.") 19 | } 20 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/frictionlessdata/datapackage-go 2 | 3 | go 1.18 4 | 5 | require ( 6 | github.com/frictionlessdata/tableschema-go v1.1.4-0.20220401172006-6cc5f3b2411c 7 | github.com/matryer/is v1.2.0 8 | github.com/santhosh-tekuri/jsonschema v1.2.4 9 | ) 10 | 11 | require ( 12 | github.com/satori/go.uuid v1.2.0 // indirect 13 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect 14 | ) 15 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/frictionlessdata/tableschema-go v1.1.4-0.20220401172006-6cc5f3b2411c h1:7S5F4VDf8vkLL3egYLWobmq1FbZb7ig33IbliL7Tr/M= 2 | github.com/frictionlessdata/tableschema-go v1.1.4-0.20220401172006-6cc5f3b2411c/go.mod h1:B+DhLlwjCf6p6FqVkqpdYyAIy7L8jHCaxa2wFaqpYdc= 3 | github.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI= 4 | github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= 5 | github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= 6 | github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= 7 | github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= 8 | github.com/matryer/is v0.0.0-20170112134659-c0323ceb4e99/go.mod h1:2fLPjFQM9rhQ15aVEtbuwhJinnOqrmgXPNdZsdwlWXA= 9 | github.com/matryer/is v1.2.0 h1:92UTHpy8CDwaJ08GqLDzhhuixiBUUD1p3AU6PHddz4A= 10 | github.com/matryer/is v1.2.0/go.mod h1:2fLPjFQM9rhQ15aVEtbuwhJinnOqrmgXPNdZsdwlWXA= 11 | github.com/santhosh-tekuri/jsonschema v1.2.4 h1:hNhW8e7t+H1vgY+1QeEQpveR6D4+OwKPXCfD2aieJis= 12 | github.com/santhosh-tekuri/jsonschema v1.2.4/go.mod h1:TEAUOeZSmIxTTuHatJzrvARHiuO9LYd+cIxzgEHCQI4= 13 | github.com/satori/go.uuid v1.1.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0= 14 | github.com/satori/go.uuid v1.2.0 h1:0uYX9dsZ2yD7q2RtLRtPSdGDWzjeM3TbMJP9utgA0ww= 15 | github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0= 16 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= 17 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= 18 | -------------------------------------------------------------------------------- /validator/jsonschema.go: -------------------------------------------------------------------------------- 1 | package validator 2 | 3 | import ( 4 | "github.com/santhosh-tekuri/jsonschema" 5 | ) 6 | 7 | // jsonSchemaValidator is a validator backed by JSONSchema parsing and validation. 8 | type jsonSchema struct { 9 | schema *jsonschema.Schema 10 | } 11 | 12 | // IsValid checks the passed-in descriptor against the JSONSchema. If it returns 13 | // false, erros can be checked calling Errors() method. 14 | func (v *jsonSchema) Validate(descriptor map[string]interface{}) error { 15 | return v.schema.ValidateInterface(descriptor) 16 | } 17 | -------------------------------------------------------------------------------- /validator/profile_cache/data-package.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-04/schema#", 3 | "title": "Data Package", 4 | "description": "Data Package is a simple specification for data access and delivery.", 5 | "type": "object", 6 | "required": [ 7 | "resources" 8 | ], 9 | "properties": { 10 | "profile": { 11 | "default": "data-package", 12 | "propertyOrder": 10, 13 | "title": "Profile", 14 | "description": "The profile of this descriptor.", 15 | "context": "Every Package and Resource descriptor has a profile. The default profile, if none is declared, is `data-package` for Package and `data-resource` for Resource.", 16 | "type": "string", 17 | "examples": [ 18 | "{\n \"profile\": \"tabular-data-package\"\n}\n", 19 | "{\n \"profile\": \"http://example.com/my-profiles-json-schema.json\"\n}\n" 20 | ] 21 | }, 22 | "name": { 23 | "propertyOrder": 20, 24 | "title": "Name", 25 | "description": "An identifier string. Lower case characters with `.`, `_`, `-` and `/` are allowed.", 26 | "type": "string", 27 | "pattern": "^([-a-z0-9._/])+$", 28 | "context": "This is ideally a url-usable and human-readable name. Name `SHOULD` be invariant, meaning it `SHOULD NOT` change when its parent descriptor is updated.", 29 | "examples": [ 30 | "{\n \"name\": \"my-nice-name\"\n}\n" 31 | ] 32 | }, 33 | "id": { 34 | "propertyOrder": 30, 35 | "title": "ID", 36 | "description": "A property reserved for globally unique identifiers. Examples of identifiers that are unique include UUIDs and DOIs.", 37 | "context": "A common usage pattern for Data Packages is as a packaging format within the bounds of a system or platform. In these cases, a unique identifier for a package is desired for common data handling workflows, such as updating an existing package. While at the level of the specification, global uniqueness cannot be validated, consumers using the `id` property `MUST` ensure identifiers are globally unique.", 38 | "type": "string", 39 | "examples": [ 40 | "{\n \"id\": \"b03ec84-77fd-4270-813b-0c698943f7ce\"\n}\n", 41 | "{\n \"id\": \"http://dx.doi.org/10.1594/PANGAEA.726855\"\n}\n" 42 | ] 43 | }, 44 | "title": { 45 | "propertyOrder": 40, 46 | "title": "Title", 47 | "description": "A human-readable title.", 48 | "type": "string", 49 | "examples": [ 50 | "{\n \"title\": \"My Package Title\"\n}\n" 51 | ] 52 | }, 53 | "description": { 54 | "propertyOrder": 50, 55 | "title": "Description", 56 | "description": "A text description. Markdown is encouraged.", 57 | "type": "string", 58 | "examples": [ 59 | "{\n \"description\": \"# My Package description\\nAll about my package.\"\n}\n" 60 | ] 61 | }, 62 | "homepage": { 63 | "propertyOrder": 60, 64 | "title": "Home Page", 65 | "description": "The home on the web that is related to this data package.", 66 | "type": "string", 67 | "format": "uri", 68 | "examples": [ 69 | "{\n \"homepage\": \"http://example.com/\"\n}\n" 70 | ] 71 | }, 72 | "created": { 73 | "propertyOrder": 70, 74 | "title": "Created", 75 | "description": "The datetime on which this descriptor was created.", 76 | "context": "The datetime must conform to the string formats for datetime as described in [RFC3339](https://tools.ietf.org/html/rfc3339#section-5.6)", 77 | "type": "string", 78 | "format": "date-time", 79 | "examples": [ 80 | "{\n \"created\": \"1985-04-12T23:20:50.52Z\"\n}\n" 81 | ] 82 | }, 83 | "contributors": { 84 | "propertyOrder": 80, 85 | "title": "Contributors", 86 | "description": "The contributors to this descriptor.", 87 | "type": "array", 88 | "minItems": 1, 89 | "items": { 90 | "title": "Contributor", 91 | "description": "A contributor to this descriptor.", 92 | "properties": { 93 | "title": { 94 | "title": "Title", 95 | "description": "A human-readable title.", 96 | "type": "string", 97 | "examples": [ 98 | "{\n \"title\": \"My Package Title\"\n}\n" 99 | ] 100 | }, 101 | "path": { 102 | "title": "Path", 103 | "description": "A fully qualified URL, or a POSIX file path..", 104 | "type": "string", 105 | "examples": [ 106 | "{\n \"path\": \"file.csv\"\n}\n", 107 | "{\n \"path\": \"http://example.com/file.csv\"\n}\n" 108 | ], 109 | "context": "Implementations need to negotiate the type of path provided, and dereference the data accordingly." 110 | }, 111 | "email": { 112 | "title": "Email", 113 | "description": "An email address.", 114 | "type": "string", 115 | "format": "email", 116 | "examples": [ 117 | "{\n \"email\": \"example@example.com\"\n}\n" 118 | ] 119 | }, 120 | "organisation": { 121 | "title": "Organization", 122 | "description": "An organizational affiliation for this contributor.", 123 | "type": "string" 124 | }, 125 | "role": { 126 | "type": "string", 127 | "enum": [ 128 | "publisher", 129 | "author", 130 | "maintainer", 131 | "wrangler", 132 | "contributor" 133 | ], 134 | "default": "contributor" 135 | } 136 | }, 137 | "required": [ 138 | "title" 139 | ], 140 | "context": "Use of this property does not imply that the person was the original creator of, or a contributor to, the data in the descriptor, but refers to the composition of the descriptor itself." 141 | }, 142 | "examples": [ 143 | "{\n \"contributors\": [\n {\n \"title\": \"Joe Bloggs\"\n }\n ]\n}\n", 144 | "{\n \"contributors\": [\n {\n \"title\": \"Joe Bloggs\",\n \"email\": \"joe@example.com\",\n \"role\": \"author\"\n }\n ]\n}\n" 145 | ] 146 | }, 147 | "keywords": { 148 | "propertyOrder": 90, 149 | "title": "Keywords", 150 | "description": "A list of keywords that describe this package.", 151 | "type": "array", 152 | "minItems": 1, 153 | "items": { 154 | "type": "string" 155 | }, 156 | "examples": [ 157 | "{\n \"keywords\": [\n \"data\",\n \"fiscal\",\n \"transparency\"\n ]\n}\n" 158 | ] 159 | }, 160 | "image": { 161 | "propertyOrder": 100, 162 | "title": "Image", 163 | "description": "A image to represent this package.", 164 | "type": "string", 165 | "examples": [ 166 | "{\n \"image\": \"http://example.com/image.jpg\"\n}\n", 167 | "{\n \"image\": \"relative/to/image.jpg\"\n}\n" 168 | ] 169 | }, 170 | "licenses": { 171 | "propertyOrder": 110, 172 | "title": "Licenses", 173 | "description": "The license(s) under which this package is published.", 174 | "type": "array", 175 | "minItems": 1, 176 | "items": { 177 | "title": "License", 178 | "description": "A license for this descriptor.", 179 | "type": "object", 180 | "properties": { 181 | "name": { 182 | "title": "Open Definition license identifier", 183 | "description": "MUST be an Open Definition license identifier, see http://licenses.opendefinition.org/", 184 | "type": "string", 185 | "pattern": "^([-a-zA-Z0-9._])+$" 186 | }, 187 | "path": { 188 | "title": "Path", 189 | "description": "A fully qualified URL, or a POSIX file path..", 190 | "type": "string", 191 | "examples": [ 192 | "{\n \"path\": \"file.csv\"\n}\n", 193 | "{\n \"path\": \"http://example.com/file.csv\"\n}\n" 194 | ], 195 | "context": "Implementations need to negotiate the type of path provided, and dereference the data accordingly." 196 | }, 197 | "title": { 198 | "title": "Title", 199 | "description": "A human-readable title.", 200 | "type": "string", 201 | "examples": [ 202 | "{\n \"title\": \"My Package Title\"\n}\n" 203 | ] 204 | } 205 | }, 206 | "context": "Use of this property does not imply that the person was the original creator of, or a contributor to, the data in the descriptor, but refers to the composition of the descriptor itself." 207 | }, 208 | "context": "This property is not legally binding and does not guarantee that the package is licensed under the terms defined herein.", 209 | "examples": [ 210 | "{\n \"licenses\": [\n {\n \"name\": \"odc-pddl-1.0\",\n \"uri\": \"http://opendatacommons.org/licenses/pddl/\"\n }\n ]\n}\n" 211 | ] 212 | }, 213 | "resources": { 214 | "propertyOrder": 120, 215 | "title": "Data Resources", 216 | "description": "An `array` of Data Resource objects, each compliant with the [Data Resource](/data-resource/) specification.", 217 | "type": "array", 218 | "minItems": 1, 219 | "items": { 220 | "title": "Data Resource", 221 | "description": "Data Resource.", 222 | "type": "object", 223 | "oneOf": [ 224 | { 225 | "required": [ 226 | "name", 227 | "data" 228 | ] 229 | }, 230 | { 231 | "required": [ 232 | "name", 233 | "path" 234 | ] 235 | } 236 | ], 237 | "properties": { 238 | "profile": { 239 | "propertyOrder": 10, 240 | "default": "data-resource", 241 | "title": "Profile", 242 | "description": "The profile of this descriptor.", 243 | "context": "Every Package and Resource descriptor has a profile. The default profile, if none is declared, is `data-package` for Package and `data-resource` for Resource.", 244 | "type": "string", 245 | "examples": [ 246 | "{\n \"profile\": \"tabular-data-package\"\n}\n", 247 | "{\n \"profile\": \"http://example.com/my-profiles-json-schema.json\"\n}\n" 248 | ] 249 | }, 250 | "name": { 251 | "propertyOrder": 20, 252 | "title": "Name", 253 | "description": "An identifier string. Lower case characters with `.`, `_`, `-` and `/` are allowed.", 254 | "type": "string", 255 | "pattern": "^([-a-z0-9._/])+$", 256 | "context": "This is ideally a url-usable and human-readable name. Name `SHOULD` be invariant, meaning it `SHOULD NOT` change when its parent descriptor is updated.", 257 | "examples": [ 258 | "{\n \"name\": \"my-nice-name\"\n}\n" 259 | ] 260 | }, 261 | "path": { 262 | "propertyOrder": 30, 263 | "title": "Path", 264 | "description": "A reference to the data for this resource, as either a path as a string, or an array of paths as strings. of valid URIs.", 265 | "oneOf": [ 266 | { 267 | "title": "Path", 268 | "description": "A fully qualified URL, or a POSIX file path..", 269 | "type": "string", 270 | "examples": [ 271 | "{\n \"path\": \"file.csv\"\n}\n", 272 | "{\n \"path\": \"http://example.com/file.csv\"\n}\n" 273 | ], 274 | "context": "Implementations need to negotiate the type of path provided, and dereference the data accordingly." 275 | }, 276 | { 277 | "type": "array", 278 | "minItems": 1, 279 | "items": { 280 | "title": "Path", 281 | "description": "A fully qualified URL, or a POSIX file path..", 282 | "type": "string", 283 | "examples": [ 284 | "{\n \"path\": \"file.csv\"\n}\n", 285 | "{\n \"path\": \"http://example.com/file.csv\"\n}\n" 286 | ], 287 | "context": "Implementations need to negotiate the type of path provided, and dereference the data accordingly." 288 | }, 289 | "examples": [ 290 | "[ \"file.csv\" ]\n", 291 | "[ \"http://example.com/file.csv\" ]\n" 292 | ] 293 | } 294 | ], 295 | "context": "The dereferenced value of each referenced data source in `path` `MUST` be commensurate with a native, dereferenced representation of the data the resource describes. For example, in a *Tabular* Data Resource, this means that the dereferenced value of `path` `MUST` be an array.", 296 | "examples": [ 297 | "{\n \"path\": [\n \"file.csv\",\n \"file2.csv\"\n ]\n}\n", 298 | "{\n \"path\": [\n \"http://example.com/file.csv\",\n \"http://example.com/file2.csv\"\n ]\n}\n", 299 | "{\n \"path\": \"http://example.com/file.csv\"\n}\n" 300 | ] 301 | }, 302 | "data": { 303 | "propertyOrder": 230, 304 | "title": "Data", 305 | "description": "Inline data for this resource." 306 | }, 307 | "schema": { 308 | "propertyOrder": 40, 309 | "title": "Schema", 310 | "description": "A schema for this resource.", 311 | "type": "object" 312 | }, 313 | "title": { 314 | "propertyOrder": 50, 315 | "title": "Title", 316 | "description": "A human-readable title.", 317 | "type": "string", 318 | "examples": [ 319 | "{\n \"title\": \"My Package Title\"\n}\n" 320 | ] 321 | }, 322 | "description": { 323 | "propertyOrder": 60, 324 | "title": "Description", 325 | "description": "A text description. Markdown is encouraged.", 326 | "type": "string", 327 | "examples": [ 328 | "{\n \"description\": \"# My Package description\\nAll about my package.\"\n}\n" 329 | ] 330 | }, 331 | "homepage": { 332 | "propertyOrder": 70, 333 | "title": "Home Page", 334 | "description": "The home on the web that is related to this data package.", 335 | "type": "string", 336 | "format": "uri", 337 | "examples": [ 338 | "{\n \"homepage\": \"http://example.com/\"\n}\n" 339 | ] 340 | }, 341 | "sources": { 342 | "propertyOrder": 140, 343 | "options": { 344 | "hidden": true 345 | }, 346 | "title": "Sources", 347 | "description": "The raw sources for this resource.", 348 | "type": "array", 349 | "minItems": 1, 350 | "items": { 351 | "title": "Source", 352 | "description": "A source file.", 353 | "type": "object", 354 | "required": [ 355 | "title" 356 | ], 357 | "properties": { 358 | "title": { 359 | "title": "Title", 360 | "description": "A human-readable title.", 361 | "type": "string", 362 | "examples": [ 363 | "{\n \"title\": \"My Package Title\"\n}\n" 364 | ] 365 | }, 366 | "path": { 367 | "title": "Path", 368 | "description": "A fully qualified URL, or a POSIX file path..", 369 | "type": "string", 370 | "examples": [ 371 | "{\n \"path\": \"file.csv\"\n}\n", 372 | "{\n \"path\": \"http://example.com/file.csv\"\n}\n" 373 | ], 374 | "context": "Implementations need to negotiate the type of path provided, and dereference the data accordingly." 375 | }, 376 | "email": { 377 | "title": "Email", 378 | "description": "An email address.", 379 | "type": "string", 380 | "format": "email", 381 | "examples": [ 382 | "{\n \"email\": \"example@example.com\"\n}\n" 383 | ] 384 | } 385 | } 386 | }, 387 | "examples": [ 388 | "{\n \"sources\": [\n {\n \"name\": \"World Bank and OECD\",\n \"uri\": \"http://data.worldbank.org/indicator/NY.GDP.MKTP.CD\"\n }\n ]\n}\n" 389 | ] 390 | }, 391 | "licenses": { 392 | "description": "The license(s) under which the resource is published.", 393 | "propertyOrder": 150, 394 | "options": { 395 | "hidden": true 396 | }, 397 | "title": "Licenses", 398 | "type": "array", 399 | "minItems": 1, 400 | "items": { 401 | "title": "License", 402 | "description": "A license for this descriptor.", 403 | "type": "object", 404 | "properties": { 405 | "name": { 406 | "title": "Open Definition license identifier", 407 | "description": "MUST be an Open Definition license identifier, see http://licenses.opendefinition.org/", 408 | "type": "string", 409 | "pattern": "^([-a-zA-Z0-9._])+$" 410 | }, 411 | "path": { 412 | "title": "Path", 413 | "description": "A fully qualified URL, or a POSIX file path..", 414 | "type": "string", 415 | "examples": [ 416 | "{\n \"path\": \"file.csv\"\n}\n", 417 | "{\n \"path\": \"http://example.com/file.csv\"\n}\n" 418 | ], 419 | "context": "Implementations need to negotiate the type of path provided, and dereference the data accordingly." 420 | }, 421 | "title": { 422 | "title": "Title", 423 | "description": "A human-readable title.", 424 | "type": "string", 425 | "examples": [ 426 | "{\n \"title\": \"My Package Title\"\n}\n" 427 | ] 428 | } 429 | }, 430 | "context": "Use of this property does not imply that the person was the original creator of, or a contributor to, the data in the descriptor, but refers to the composition of the descriptor itself." 431 | }, 432 | "context": "This property is not legally binding and does not guarantee that the package is licensed under the terms defined herein.", 433 | "examples": [ 434 | "{\n \"licenses\": [\n {\n \"name\": \"odc-pddl-1.0\",\n \"uri\": \"http://opendatacommons.org/licenses/pddl/\"\n }\n ]\n}\n" 435 | ] 436 | }, 437 | "format": { 438 | "propertyOrder": 80, 439 | "title": "Format", 440 | "description": "The file format of this resource.", 441 | "context": "`csv`, `xls`, `json` are examples of common formats.", 442 | "type": "string", 443 | "examples": [ 444 | "{\n \"format\": \"xls\"\n}\n" 445 | ] 446 | }, 447 | "mediatype": { 448 | "propertyOrder": 90, 449 | "title": "Media Type", 450 | "description": "The media type of this resource. Can be any valid media type listed with [IANA](https://www.iana.org/assignments/media-types/media-types.xhtml).", 451 | "type": "string", 452 | "pattern": "^(.+)/(.+)$", 453 | "examples": [ 454 | "{\n \"mediatype\": \"text/csv\"\n}\n" 455 | ] 456 | }, 457 | "encoding": { 458 | "propertyOrder": 100, 459 | "title": "Encoding", 460 | "description": "The file encoding of this resource.", 461 | "type": "string", 462 | "default": "utf-8", 463 | "examples": [ 464 | "{\n \"encoding\": \"utf-8\"\n}\n" 465 | ] 466 | }, 467 | "bytes": { 468 | "propertyOrder": 110, 469 | "options": { 470 | "hidden": true 471 | }, 472 | "title": "Bytes", 473 | "description": "The size of this resource in bytes.", 474 | "type": "integer", 475 | "examples": [ 476 | "{\n \"bytes\": 2082\n}\n" 477 | ] 478 | }, 479 | "hash": { 480 | "propertyOrder": 120, 481 | "options": { 482 | "hidden": true 483 | }, 484 | "title": "Hash", 485 | "type": "string", 486 | "description": "The MD5 hash of this resource. Indicate other hashing algorithms with the {algorithm}:{hash} format.", 487 | "pattern": "^([^:]+:[a-fA-F0-9]+|[a-fA-F0-9]{32}|)$", 488 | "examples": [ 489 | "{\n \"hash\": \"d25c9c77f588f5dc32059d2da1136c02\"\n}\n", 490 | "{\n \"hash\": \"SHA256:5262f12512590031bbcc9a430452bfd75c2791ad6771320bb4b5728bfb78c4d0\"\n}\n" 491 | ] 492 | } 493 | } 494 | }, 495 | "examples": [ 496 | "{\n \"resources\": [\n {\n \"name\": \"my-data\",\n \"data\": [\n \"data.csv\"\n ],\n \"mediatype\": \"text/csv\"\n }\n ]\n}\n" 497 | ] 498 | }, 499 | "sources": { 500 | "propertyOrder": 200, 501 | "options": { 502 | "hidden": true 503 | }, 504 | "title": "Sources", 505 | "description": "The raw sources for this resource.", 506 | "type": "array", 507 | "minItems": 1, 508 | "items": { 509 | "title": "Source", 510 | "description": "A source file.", 511 | "type": "object", 512 | "required": [ 513 | "title" 514 | ], 515 | "properties": { 516 | "title": { 517 | "title": "Title", 518 | "description": "A human-readable title.", 519 | "type": "string", 520 | "examples": [ 521 | "{\n \"title\": \"My Package Title\"\n}\n" 522 | ] 523 | }, 524 | "path": { 525 | "title": "Path", 526 | "description": "A fully qualified URL, or a POSIX file path..", 527 | "type": "string", 528 | "examples": [ 529 | "{\n \"path\": \"file.csv\"\n}\n", 530 | "{\n \"path\": \"http://example.com/file.csv\"\n}\n" 531 | ], 532 | "context": "Implementations need to negotiate the type of path provided, and dereference the data accordingly." 533 | }, 534 | "email": { 535 | "title": "Email", 536 | "description": "An email address.", 537 | "type": "string", 538 | "format": "email", 539 | "examples": [ 540 | "{\n \"email\": \"example@example.com\"\n}\n" 541 | ] 542 | } 543 | } 544 | }, 545 | "examples": [ 546 | "{\n \"sources\": [\n {\n \"name\": \"World Bank and OECD\",\n \"uri\": \"http://data.worldbank.org/indicator/NY.GDP.MKTP.CD\"\n }\n ]\n}\n" 547 | ] 548 | } 549 | } 550 | } -------------------------------------------------------------------------------- /validator/profile_cache/data-resource.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-04/schema#", 3 | "title": "Data Resource", 4 | "description": "Data Resource.", 5 | "type": "object", 6 | "oneOf": [ 7 | { 8 | "required": [ 9 | "name", 10 | "data" 11 | ] 12 | }, 13 | { 14 | "required": [ 15 | "name", 16 | "path" 17 | ] 18 | } 19 | ], 20 | "properties": { 21 | "profile": { 22 | "propertyOrder": 10, 23 | "default": "data-resource", 24 | "title": "Profile", 25 | "description": "The profile of this descriptor.", 26 | "context": "Every Package and Resource descriptor has a profile. The default profile, if none is declared, is `data-package` for Package and `data-resource` for Resource.", 27 | "type": "string", 28 | "examples": [ 29 | "{\n \"profile\": \"tabular-data-package\"\n}\n", 30 | "{\n \"profile\": \"http://example.com/my-profiles-json-schema.json\"\n}\n" 31 | ] 32 | }, 33 | "name": { 34 | "propertyOrder": 20, 35 | "title": "Name", 36 | "description": "An identifier string. Lower case characters with `.`, `_`, `-` and `/` are allowed.", 37 | "type": "string", 38 | "pattern": "^([-a-z0-9._/])+$", 39 | "context": "This is ideally a url-usable and human-readable name. Name `SHOULD` be invariant, meaning it `SHOULD NOT` change when its parent descriptor is updated.", 40 | "examples": [ 41 | "{\n \"name\": \"my-nice-name\"\n}\n" 42 | ] 43 | }, 44 | "path": { 45 | "propertyOrder": 30, 46 | "title": "Path", 47 | "description": "A reference to the data for this resource, as either a path as a string, or an array of paths as strings. of valid URIs.", 48 | "oneOf": [ 49 | { 50 | "title": "Path", 51 | "description": "A fully qualified URL, or a POSIX file path..", 52 | "type": "string", 53 | "examples": [ 54 | "{\n \"path\": \"file.csv\"\n}\n", 55 | "{\n \"path\": \"http://example.com/file.csv\"\n}\n" 56 | ], 57 | "context": "Implementations need to negotiate the type of path provided, and dereference the data accordingly." 58 | }, 59 | { 60 | "type": "array", 61 | "minItems": 1, 62 | "items": { 63 | "title": "Path", 64 | "description": "A fully qualified URL, or a POSIX file path..", 65 | "type": "string", 66 | "examples": [ 67 | "{\n \"path\": \"file.csv\"\n}\n", 68 | "{\n \"path\": \"http://example.com/file.csv\"\n}\n" 69 | ], 70 | "context": "Implementations need to negotiate the type of path provided, and dereference the data accordingly." 71 | }, 72 | "examples": [ 73 | "[ \"file.csv\" ]\n", 74 | "[ \"http://example.com/file.csv\" ]\n" 75 | ] 76 | } 77 | ], 78 | "context": "The dereferenced value of each referenced data source in `path` `MUST` be commensurate with a native, dereferenced representation of the data the resource describes. For example, in a *Tabular* Data Resource, this means that the dereferenced value of `path` `MUST` be an array.", 79 | "examples": [ 80 | "{\n \"path\": [\n \"file.csv\",\n \"file2.csv\"\n ]\n}\n", 81 | "{\n \"path\": [\n \"http://example.com/file.csv\",\n \"http://example.com/file2.csv\"\n ]\n}\n", 82 | "{\n \"path\": \"http://example.com/file.csv\"\n}\n" 83 | ] 84 | }, 85 | "data": { 86 | "propertyOrder": 230, 87 | "title": "Data", 88 | "description": "Inline data for this resource." 89 | }, 90 | "schema": { 91 | "propertyOrder": 40, 92 | "title": "Schema", 93 | "description": "A schema for this resource.", 94 | "type": "object" 95 | }, 96 | "title": { 97 | "propertyOrder": 50, 98 | "title": "Title", 99 | "description": "A human-readable title.", 100 | "type": "string", 101 | "examples": [ 102 | "{\n \"title\": \"My Package Title\"\n}\n" 103 | ] 104 | }, 105 | "description": { 106 | "propertyOrder": 60, 107 | "title": "Description", 108 | "description": "A text description. Markdown is encouraged.", 109 | "type": "string", 110 | "examples": [ 111 | "{\n \"description\": \"# My Package description\\nAll about my package.\"\n}\n" 112 | ] 113 | }, 114 | "homepage": { 115 | "propertyOrder": 70, 116 | "title": "Home Page", 117 | "description": "The home on the web that is related to this data package.", 118 | "type": "string", 119 | "format": "uri", 120 | "examples": [ 121 | "{\n \"homepage\": \"http://example.com/\"\n}\n" 122 | ] 123 | }, 124 | "sources": { 125 | "propertyOrder": 140, 126 | "options": { 127 | "hidden": true 128 | }, 129 | "title": "Sources", 130 | "description": "The raw sources for this resource.", 131 | "type": "array", 132 | "minItems": 1, 133 | "items": { 134 | "title": "Source", 135 | "description": "A source file.", 136 | "type": "object", 137 | "required": [ 138 | "title" 139 | ], 140 | "properties": { 141 | "title": { 142 | "title": "Title", 143 | "description": "A human-readable title.", 144 | "type": "string", 145 | "examples": [ 146 | "{\n \"title\": \"My Package Title\"\n}\n" 147 | ] 148 | }, 149 | "path": { 150 | "title": "Path", 151 | "description": "A fully qualified URL, or a POSIX file path..", 152 | "type": "string", 153 | "examples": [ 154 | "{\n \"path\": \"file.csv\"\n}\n", 155 | "{\n \"path\": \"http://example.com/file.csv\"\n}\n" 156 | ], 157 | "context": "Implementations need to negotiate the type of path provided, and dereference the data accordingly." 158 | }, 159 | "email": { 160 | "title": "Email", 161 | "description": "An email address.", 162 | "type": "string", 163 | "format": "email", 164 | "examples": [ 165 | "{\n \"email\": \"example@example.com\"\n}\n" 166 | ] 167 | } 168 | } 169 | }, 170 | "examples": [ 171 | "{\n \"sources\": [\n {\n \"name\": \"World Bank and OECD\",\n \"uri\": \"http://data.worldbank.org/indicator/NY.GDP.MKTP.CD\"\n }\n ]\n}\n" 172 | ] 173 | }, 174 | "licenses": { 175 | "description": "The license(s) under which the resource is published.", 176 | "propertyOrder": 150, 177 | "options": { 178 | "hidden": true 179 | }, 180 | "title": "Licenses", 181 | "type": "array", 182 | "minItems": 1, 183 | "items": { 184 | "title": "License", 185 | "description": "A license for this descriptor.", 186 | "type": "object", 187 | "properties": { 188 | "name": { 189 | "title": "Open Definition license identifier", 190 | "description": "MUST be an Open Definition license identifier, see http://licenses.opendefinition.org/", 191 | "type": "string", 192 | "pattern": "^([-a-zA-Z0-9._])+$" 193 | }, 194 | "path": { 195 | "title": "Path", 196 | "description": "A fully qualified URL, or a POSIX file path..", 197 | "type": "string", 198 | "examples": [ 199 | "{\n \"path\": \"file.csv\"\n}\n", 200 | "{\n \"path\": \"http://example.com/file.csv\"\n}\n" 201 | ], 202 | "context": "Implementations need to negotiate the type of path provided, and dereference the data accordingly." 203 | }, 204 | "title": { 205 | "title": "Title", 206 | "description": "A human-readable title.", 207 | "type": "string", 208 | "examples": [ 209 | "{\n \"title\": \"My Package Title\"\n}\n" 210 | ] 211 | } 212 | }, 213 | "context": "Use of this property does not imply that the person was the original creator of, or a contributor to, the data in the descriptor, but refers to the composition of the descriptor itself." 214 | }, 215 | "context": "This property is not legally binding and does not guarantee that the package is licensed under the terms defined herein.", 216 | "examples": [ 217 | "{\n \"licenses\": [\n {\n \"name\": \"odc-pddl-1.0\",\n \"uri\": \"http://opendatacommons.org/licenses/pddl/\"\n }\n ]\n}\n" 218 | ] 219 | }, 220 | "format": { 221 | "propertyOrder": 80, 222 | "title": "Format", 223 | "description": "The file format of this resource.", 224 | "context": "`csv`, `xls`, `json` are examples of common formats.", 225 | "type": "string", 226 | "examples": [ 227 | "{\n \"format\": \"xls\"\n}\n" 228 | ] 229 | }, 230 | "mediatype": { 231 | "propertyOrder": 90, 232 | "title": "Media Type", 233 | "description": "The media type of this resource. Can be any valid media type listed with [IANA](https://www.iana.org/assignments/media-types/media-types.xhtml).", 234 | "type": "string", 235 | "pattern": "^(.+)/(.+)$", 236 | "examples": [ 237 | "{\n \"mediatype\": \"text/csv\"\n}\n" 238 | ] 239 | }, 240 | "encoding": { 241 | "propertyOrder": 100, 242 | "title": "Encoding", 243 | "description": "The file encoding of this resource.", 244 | "type": "string", 245 | "default": "utf-8", 246 | "examples": [ 247 | "{\n \"encoding\": \"utf-8\"\n}\n" 248 | ] 249 | }, 250 | "bytes": { 251 | "propertyOrder": 110, 252 | "options": { 253 | "hidden": true 254 | }, 255 | "title": "Bytes", 256 | "description": "The size of this resource in bytes.", 257 | "type": "integer", 258 | "examples": [ 259 | "{\n \"bytes\": 2082\n}\n" 260 | ] 261 | }, 262 | "hash": { 263 | "propertyOrder": 120, 264 | "options": { 265 | "hidden": true 266 | }, 267 | "title": "Hash", 268 | "type": "string", 269 | "description": "The MD5 hash of this resource. Indicate other hashing algorithms with the {algorithm}:{hash} format.", 270 | "pattern": "^([^:]+:[a-fA-F0-9]+|[a-fA-F0-9]{32}|)$", 271 | "examples": [ 272 | "{\n \"hash\": \"d25c9c77f588f5dc32059d2da1136c02\"\n}\n", 273 | "{\n \"hash\": \"SHA256:5262f12512590031bbcc9a430452bfd75c2791ad6771320bb4b5728bfb78c4d0\"\n}\n" 274 | ] 275 | } 276 | } 277 | } -------------------------------------------------------------------------------- /validator/profile_cache/registry.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": "data-package", 4 | "title": "Data Package", 5 | "schema": "/data-package.json", 6 | "schema_path": "/data-package.json", 7 | "specification": "https://specs.frictionlessdata.io/data-package/" 8 | }, 9 | { 10 | "id": "tabular-data-package", 11 | "title": "Tabular Data Package", 12 | "schema": "/tabular-data-package.json", 13 | "schema_path": "/tabular-data-package.json", 14 | "specification": "http://specs.frictionlessdata.io/tabular-data-package/" 15 | }, 16 | { 17 | "id": "fiscal-data-package", 18 | "title": "Fiscal Data Package", 19 | "schema": "/fiscal-data-package.json", 20 | "schema_path": "/fiscal-data-package.json", 21 | "specification": "https://specs.frictionlessdata.io/fiscal-data-package/" 22 | }, 23 | { 24 | "id": "data-resource", 25 | "title": "Data Resource", 26 | "schema": "/data-resource.json", 27 | "schema_path": "/data-resource.json", 28 | "specification": "https://specs.frictionlessdata.io/data-resource" 29 | }, 30 | { 31 | "id": "tabular-data-resource", 32 | "title": "Tabular Data Resource", 33 | "schema": "/tabular-data-resource.json", 34 | "schema_path": "/tabular-data-resource.json", 35 | "specification": "https://specs.frictionlessdata.io/tabular-data-resource" 36 | }, 37 | { 38 | "id": "table-schema", 39 | "title": "Table Schema", 40 | "schema": "/table-schema.json", 41 | "schema_path": "/table-schema.json", 42 | "specification": "https://specs.frictionlessdata.io/table-schema/" 43 | } 44 | ] -------------------------------------------------------------------------------- /validator/registry.go: -------------------------------------------------------------------------------- 1 | package validator 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "fmt" 7 | "io/ioutil" 8 | "net/http" 9 | "strings" 10 | 11 | "github.com/frictionlessdata/datapackage-go/validator/profile_cache" 12 | "github.com/santhosh-tekuri/jsonschema" 13 | 14 | _ "github.com/santhosh-tekuri/jsonschema/httploader" // This import alows jsonschema to load urls. 15 | _ "github.com/santhosh-tekuri/jsonschema/loader" // This import alows jsonschema to load filepaths. 16 | ) 17 | 18 | // RegistryLoader loads a registry. 19 | type RegistryLoader func() (Registry, error) 20 | 21 | // Registry represents a set of registered validators, which could be loaded locally or remotelly. 22 | type Registry interface { 23 | GetValidator(profile string) (DescriptorValidator, error) 24 | } 25 | 26 | type profileSpec struct { 27 | ID string `json:"id,omitempty"` 28 | Title string `json:"title,omitempty"` 29 | Schema string `json:"schema,omitempty"` 30 | SchemaPath string `json:"schema_path,omitempty"` 31 | Specification string `json:"specification,omitempty"` 32 | } 33 | 34 | type localRegistry struct { 35 | registry map[string]profileSpec 36 | inMemoryOnly bool 37 | } 38 | 39 | func (local *localRegistry) GetValidator(profile string) (DescriptorValidator, error) { 40 | spec, ok := local.registry[profile] 41 | if !ok { 42 | return nil, fmt.Errorf("invalid profile:%s", profile) 43 | } 44 | b, err := profile_cache.FSByte(!local.inMemoryOnly, spec.Schema) 45 | if err != nil { 46 | return nil, err 47 | } 48 | c := jsonschema.NewCompiler() 49 | c.AddResource(profile, bytes.NewReader(b)) // Adding in-memory resource. 50 | schema, err := c.Compile(profile) 51 | if err != nil { 52 | return nil, err 53 | } 54 | return &jsonSchema{schema: schema}, nil 55 | } 56 | 57 | // LocalRegistryLoader creates a new registry, which is based on the local file system (or in-memory cache) 58 | // to locate json schema profiles. Setting inMemoryOnly to true will make sure only the in-memory 59 | // cache (registry_cache Go package) is accessed, thus avoiding access the filesystem. 60 | func LocalRegistryLoader(localRegistryPath string, inMemoryOnly bool) RegistryLoader { 61 | return func() (Registry, error) { 62 | buf, err := profile_cache.FSByte(!inMemoryOnly, localRegistryPath) 63 | if err != nil { 64 | return nil, err 65 | } 66 | m, err := unmarshalRegistryContents(buf) 67 | if err != nil { 68 | return nil, err 69 | } 70 | return &localRegistry{registry: m, inMemoryOnly: inMemoryOnly}, nil 71 | } 72 | } 73 | 74 | type remoteRegistry struct { 75 | registry map[string]profileSpec 76 | } 77 | 78 | func (remote *remoteRegistry) GetValidator(profile string) (DescriptorValidator, error) { 79 | spec, ok := remote.registry[profile] 80 | if !ok { 81 | return nil, fmt.Errorf("invalid profile:%s", profile) 82 | } 83 | c := jsonschema.NewCompiler() 84 | c.AddResource(profile, strings.NewReader(spec.Schema)) // Adding in-memory resource. 85 | schema, err := c.Compile(spec.Schema) 86 | if err != nil { 87 | return nil, err 88 | } 89 | return &jsonSchema{schema: schema}, nil 90 | } 91 | 92 | // RemoteRegistryLoader loads the schema registry map from the passed-in URL. 93 | func RemoteRegistryLoader(url string) RegistryLoader { 94 | return func() (Registry, error) { 95 | resp, err := http.Get(url) 96 | if err != nil { 97 | return nil, fmt.Errorf("error fetching remote profile cache registry from %s: %q", url, err) 98 | } 99 | defer resp.Body.Close() 100 | buf, err := ioutil.ReadAll(resp.Body) 101 | if err != nil { 102 | return nil, fmt.Errorf("error reading remote profile cache registry from %s: %q", url, err) 103 | } 104 | m, err := unmarshalRegistryContents(buf) 105 | if err != nil { 106 | return nil, err 107 | } 108 | return &remoteRegistry{registry: m}, nil 109 | } 110 | } 111 | 112 | // FallbackRegistryLoader returns the first passed-in registry loaded successfully. 113 | // It returns an error if there is no successfully loaded registry. 114 | func FallbackRegistryLoader(loaders ...RegistryLoader) RegistryLoader { 115 | return func() (Registry, error) { 116 | if len(loaders) == 0 { 117 | return nil, fmt.Errorf("there should be at least one registry loader to fallback") 118 | } 119 | var registry Registry 120 | var errors []error 121 | for _, loader := range loaders { 122 | reg, err := loader() 123 | if err != nil { 124 | errors = append(errors, err) 125 | continue 126 | } 127 | registry = reg 128 | break 129 | } 130 | if registry == nil { 131 | var erroMsg string 132 | for _, err := range errors { 133 | erroMsg += fmt.Sprintln(err.Error()) 134 | } 135 | return nil, fmt.Errorf(erroMsg) 136 | } 137 | return registry, nil 138 | } 139 | } 140 | 141 | func unmarshalRegistryContents(buf []byte) (map[string]profileSpec, error) { 142 | var specs []profileSpec 143 | if err := json.Unmarshal(buf, &specs); err != nil { 144 | return nil, fmt.Errorf("error parsing profile cache registry. Contents:\"%s\". Err:\"%q\"", string(buf), err) 145 | } 146 | m := make(map[string]profileSpec, len(specs)) 147 | for _, s := range specs { 148 | m[s.ID] = s 149 | } 150 | return m, nil 151 | } 152 | -------------------------------------------------------------------------------- /validator/registry_test.go: -------------------------------------------------------------------------------- 1 | package validator 2 | 3 | import ( 4 | "fmt" 5 | "net/http" 6 | "net/http/httptest" 7 | "testing" 8 | 9 | "github.com/matryer/is" 10 | ) 11 | 12 | const simpleSchema = `{ 13 | "$schema": "http://json-schema.org/draft-04/schema#", 14 | "type": "object", 15 | "oneOf": [{"required": ["name"]}] 16 | }` 17 | 18 | var localLoader = LocalRegistryLoader(localRegistryPath, true /* in memory only*/) 19 | 20 | func TestDescriptorValidator_IsValid(t *testing.T) { 21 | t.Run("ValidProfile", func(t *testing.T) { 22 | is := is.New(t) 23 | v, err := New("data-package", localLoader) 24 | is.NoErr(err) 25 | is.NoErr(v.Validate(map[string]interface{}{"resources": []interface{}{map[string]interface{}{"name": "res1", "path": "foo.csv"}}})) 26 | }) 27 | t.Run("InvalidProfile", func(t *testing.T) { 28 | is := is.New(t) 29 | v, err := New("data-package", localLoader) 30 | is.NoErr(err) 31 | is.True(v.Validate(map[string]interface{}{"resources": []interface{}{map[string]interface{}{"name": "res1"}}}) != nil) 32 | }) 33 | } 34 | 35 | func TestNew(t *testing.T) { 36 | t.Run("ThirdPartyRemoteSchema", func(t *testing.T) { 37 | is := is.New(t) 38 | ts := serverForTests(simpleSchema) 39 | defer ts.Close() 40 | 41 | v, err := New(ts.URL) 42 | is.NoErr(err) 43 | is.NoErr(v.Validate(map[string]interface{}{"name": "foo"})) 44 | }) 45 | t.Run("RemoteSchemaRegistry", func(t *testing.T) { 46 | is := is.New(t) 47 | schServer := serverForTests(simpleSchema) 48 | defer schServer.Close() 49 | regServer := serverForTests(fmt.Sprintf(`[{"id":"schemaID", "schema":"%s"}]`, schServer.URL)) 50 | defer regServer.Close() 51 | 52 | v, err := New("schemaID", RemoteRegistryLoader(regServer.URL)) 53 | is.NoErr(err) 54 | is.NoErr(v.Validate(map[string]interface{}{"name": "foo"})) 55 | 56 | _, err = New("foo", RemoteRegistryLoader(regServer.URL)) 57 | if err == nil { 58 | t.Fatalf("want:err got:nil") 59 | } 60 | }) 61 | t.Run("LocalRegistry", func(t *testing.T) { 62 | is := is.New(t) 63 | profiles := []string{ 64 | "data-package", 65 | "data-resource", 66 | "fiscal-data-package", 67 | "table-schema", 68 | "tabular-data-package", 69 | "tabular-data-resource", 70 | } 71 | loader, err := localLoader() 72 | is.NoErr(err) 73 | for _, p := range profiles { 74 | _, err := loader.GetValidator(p) 75 | is.NoErr(err) 76 | } 77 | }) 78 | t.Run("LocalInvalidProfile", func(t *testing.T) { 79 | _, err := New("boo", localLoader) 80 | if err == nil { 81 | t.Fatalf("want:err got:nil") 82 | } 83 | }) 84 | t.Run("InvalidRegistryJSON", func(t *testing.T) { 85 | ts := serverForTests(`123`) 86 | defer ts.Close() 87 | _, err := RemoteRegistryLoader(ts.URL)() 88 | if err == nil { 89 | t.Fatalf("want:err got:nil") 90 | } 91 | }) 92 | t.Run("InvalidRemoteRegistryURL", func(t *testing.T) { 93 | _, err := RemoteRegistryLoader("http://127.0.0.1/bar")() 94 | if err == nil { 95 | t.Fatalf("want:err got:nil") 96 | } 97 | }) 98 | } 99 | 100 | type neverValidValidator struct{} 101 | 102 | func (v neverValidValidator) Validate(map[string]interface{}) error { return fmt.Errorf("never valid") } 103 | 104 | type neverValidRegistry struct{} 105 | 106 | func (v neverValidRegistry) GetValidator(profile string) (DescriptorValidator, error) { 107 | return &neverValidValidator{}, nil 108 | } 109 | 110 | func TestFallbackRegistryLoader(t *testing.T) { 111 | t.Run("FallingBackOnLocal", func(t *testing.T) { 112 | is := is.New(t) 113 | loader, err := FallbackRegistryLoader(RemoteRegistryLoader("http://127.0.0.1/bar"), localLoader)() 114 | is.NoErr(err) 115 | v, err := loader.GetValidator("data-package") 116 | is.NoErr(err) 117 | is.NoErr(v.Validate(map[string]interface{}{"resources": []interface{}{map[string]interface{}{"name": "res1", "path": "foo.csv"}}})) 118 | }) 119 | t.Run("TwoValidsShouldPickFirst", func(t *testing.T) { 120 | is := is.New(t) 121 | loader, err := FallbackRegistryLoader(InMemoryLoader(), func() (Registry, error) { return &neverValidRegistry{}, nil })() 122 | is.NoErr(err) 123 | v, err := loader.GetValidator("data-package") 124 | is.NoErr(err) 125 | is.NoErr(v.Validate(map[string]interface{}{"resources": []interface{}{map[string]interface{}{"name": "res1", "path": "foo.csv"}}})) 126 | }) 127 | t.Run("NoLoader", func(t *testing.T) { 128 | _, err := FallbackRegistryLoader()() 129 | if err == nil { 130 | t.Fatalf("want:err got:nil") 131 | } 132 | }) 133 | t.Run("AllErrors", func(t *testing.T) { 134 | _, err := FallbackRegistryLoader(RemoteRegistryLoader("http://127.0.0.1/bar"))() 135 | if err == nil { 136 | t.Fatalf("want:err got:nil") 137 | } 138 | }) 139 | } 140 | 141 | func serverForTests(contents string) *httptest.Server { 142 | return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 143 | fmt.Fprintln(w, contents) 144 | })) 145 | } 146 | -------------------------------------------------------------------------------- /validator/validator.go: -------------------------------------------------------------------------------- 1 | package validator 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | 7 | "github.com/santhosh-tekuri/jsonschema" 8 | ) 9 | 10 | // DescriptorValidator validates a Data-Package or Resource descriptor. 11 | type DescriptorValidator interface { 12 | Validate(map[string]interface{}) error 13 | } 14 | 15 | const localRegistryPath = "/registry.json" 16 | const remoteRegistryURL = "http://frictionlessdata.io/schemas/registry.json" 17 | 18 | // NewRegistry returns a registry where users could get descriptor validators. 19 | func NewRegistry(loaders ...RegistryLoader) (Registry, error) { 20 | // Default settings. 21 | if len(loaders) == 0 { 22 | loaders = append( 23 | loaders, 24 | InMemoryLoader(), 25 | LocalRegistryLoader(localRegistryPath, false /* inMemoryOnly*/), 26 | RemoteRegistryLoader(remoteRegistryURL)) 27 | } 28 | registry, err := FallbackRegistryLoader(loaders...)() 29 | if err != nil { 30 | return nil, fmt.Errorf("could not load registry:%q", err) 31 | } 32 | return registry, nil 33 | } 34 | 35 | // New returns a new descriptor validator for the passed-in profile. 36 | func New(profile string, loaders ...RegistryLoader) (DescriptorValidator, error) { 37 | // If it is a third-party schema. Directly referenced from the internet or local file. 38 | if strings.HasPrefix(profile, "http") || strings.HasPrefix(profile, "file") { 39 | schema, err := jsonschema.Compile(profile) 40 | if err != nil { 41 | return nil, err 42 | } 43 | return &jsonSchema{schema: schema}, nil 44 | } 45 | registry, err := NewRegistry(loaders...) 46 | if err != nil { 47 | return nil, err 48 | } 49 | return registry.GetValidator(profile) 50 | } 51 | 52 | // Validate checks whether the descriptor the descriptor is valid against the passed-in profile/registry. 53 | // If the validation process generates multiple errors, their messages are coalesced. 54 | // It is a syntax-sugar around getting the validator from the registry and coalescing errors. 55 | func Validate(descriptor map[string]interface{}, profile string, registry Registry) error { 56 | validator, err := registry.GetValidator(profile) 57 | if err != nil { 58 | return fmt.Errorf("invalid Schema (Profile:%s): %q", profile, err) 59 | } 60 | return validator.Validate(descriptor) 61 | } 62 | 63 | // MustInMemoryRegistry returns the local cache registry, which is shipped with the library. 64 | // It panics if there are errors retrieving the registry. 65 | func MustInMemoryRegistry() Registry { 66 | reg, err := InMemoryLoader()() 67 | if err != nil { 68 | panic(err) 69 | } 70 | return reg 71 | } 72 | 73 | // InMemoryLoader returns a loader which points tothe local cache registry. 74 | func InMemoryLoader() RegistryLoader { 75 | return LocalRegistryLoader(localRegistryPath, true /* in memory only*/) 76 | } 77 | -------------------------------------------------------------------------------- /validator/validator_test.go: -------------------------------------------------------------------------------- 1 | package validator 2 | 3 | import "fmt" 4 | 5 | func ExampleValidate() { 6 | resource := map[string]interface{}{"name": "foo", "path": "foo.csv"} 7 | fmt.Print(Validate(resource, "data-resource", MustInMemoryRegistry())) 8 | // Output: 9 | } 10 | 11 | func ExampleNewRegistry() { 12 | registry, _ := NewRegistry(LocalRegistryLoader(localRegistryPath, true /* in memory only*/)) 13 | validator, _ := registry.GetValidator("data-resource") 14 | fmt.Println(validator.Validate(map[string]interface{}{"name": "res1", "path": "foo.csv"})) 15 | // Output: 16 | } 17 | --------------------------------------------------------------------------------