├── .github
    ├── dependabot.yml
    └── workflows
    │   └── push.yml
├── .gitignore
├── .ruby-version
├── CKAN.md
├── Gemfile
├── Gemfile.lock
├── LICENSE.md
├── README.md
├── Rakefile
├── bin
    └── data_kitten
├── data_kitten.gemspec
├── lib
    ├── data_kitten.rb
    └── data_kitten
    │   ├── agent.rb
    │   ├── dataset.rb
    │   ├── distribution.rb
    │   ├── distribution_format.rb
    │   ├── fetcher.rb
    │   ├── hosts.rb
    │   ├── hosts
    │       ├── bitbucket.rb
    │       ├── gist.rb
    │       └── github.rb
    │   ├── license.rb
    │   ├── origins.rb
    │   ├── origins
    │       ├── git.rb
    │       ├── html.rb
    │       ├── json.rb
    │       ├── linked_data.rb
    │       └── web_service.rb
    │   ├── publishing_formats.rb
    │   ├── publishing_formats
    │       ├── ckan.rb
    │       ├── datapackage.rb
    │       ├── linked_data.rb
    │       └── rdfa.rb
    │   ├── rights.rb
    │   ├── source.rb
    │   ├── temporal.rb
    │   ├── utils
    │       ├── ckan3_hash.rb
    │       └── guessable_lookup.rb
    │   └── version.rb
└── spec
    ├── ckan3_hash_spec.rb
    ├── ckan_fakeweb.rb
    ├── dataset_spec.rb
    ├── distribution_format_spec.rb
    ├── distribution_spec.rb
    ├── fetcher_spec.rb
    ├── fixtures
        ├── basic-dcat-rdfa.html
        ├── ckan
        │   ├── organization-show-peterborough.json
        │   ├── organization_show-ecology.json
        │   ├── organization_show-ni-spatial.json
        │   ├── package-show-frozen-animals.json
        │   ├── package_show-toilets.json
        │   ├── rest-dataset-cadastral.json
        │   ├── rest-dataset-defence.json
        │   ├── rest-dataset-frozen-animals.json
        │   ├── rest-dataset-pollinator.json
        │   ├── rest-dataset-toilets.json
        │   ├── rest-frozen-animals.json
        │   ├── rest-organization-defence.json
        │   └── rest-organization-health.json
        ├── datapackage.json
        ├── dcat-odrs-rdfa.html
        └── odrs-datapackage.json
    ├── guessable_lookup_spec.rb
    ├── hosts
        └── github_spec.rb
    ├── license_spec.rb
    ├── origins
        └── linked_data_spec.rb
    ├── publishing_format
        ├── ckan_spec.rb
        ├── datapackage_spec.rb
        ├── linked_data_spec.rb
        └── rdfa_spec.rb
    └── spec_helper.rb


/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 | - package-ecosystem: bundler
4 |   directory: "/"
5 |   schedule:
6 |     interval: daily
7 |   open-pull-requests-limit: 10
8 | 


--------------------------------------------------------------------------------
/.github/workflows/push.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | on:
 3 |   push:
 4 |     branches: [ main ]
 5 |   pull_request:
 6 |     branches: [ main ]
 7 | jobs:
 8 |   test:
 9 |     runs-on: ubuntu-latest
10 |     strategy:
11 |       matrix:
12 |         ruby-version: ['2.5']
13 |       fail-fast: false
14 |     steps:
15 |       - uses: actions/checkout@v2
16 |       - uses: ruby/setup-ruby@v1
17 |         with:
18 |           bundler-cache: true
19 |           ruby-version: ${{ matrix.ruby-version }}
20 |       - name: Install dependencies
21 |         run: bundle install
22 |       - name: Run the tests
23 |         run: bundle exec rake
24 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | *.gem
3 | 
4 | /coverage/
5 | /tmp/
6 | /.bundle/
7 | Gemfile.lock
8 | 


--------------------------------------------------------------------------------
/.ruby-version:
--------------------------------------------------------------------------------
1 | 2.5.8
2 | 


--------------------------------------------------------------------------------
/CKAN.md:
--------------------------------------------------------------------------------
 1 | # Notes on CKAN API
 2 | 
 3 | ## General
 4 | 
 5 | There are 3 versions of the API (1,2 and 3), documented [here](http://docs.ckan.org/en/latest/api/index.html). Things we mostly use (at moment of writing):
 6 | 
 7 | ---
 8 | 
 9 | ##### `/api/3/action/package_show?id={package_id}`
10 | Provides all metadata for a package/dataset. We only use this _sometimes_ to get the `id` of the dataset, and then request all data via the old `/api/2/rest/package`.
11 | 
12 | ---
13 | 
14 | ##### `/api/2/rest/package/{package_id}`
15 | An older version and slightly different.
16 | 
17 | ###### Differences between v2 & v3:
18 | 
19 | 1. Metadata in v3 is in the `result` field, whereas in v2 metadata is the response itself
20 | 2. `extras` field contains an array of key-value hashes in v3 (`[{ key: "id", value: 1 }]`), whereas in v2 it is a hash (`{id: 1}`)
21 | 
22 | ---
23 | 
24 | ##### `/api/3/action/organization_show?id={organization_id}`
25 | Provides all metadata for an organization.
26 | 
27 | ## Gotchas?
28 | 
29 | The value of `name` field can be used as the `id` parameter in requests.
30 | 
31 | In version 3, the `extras` field is an array populated with key-value objects, as in:
32 | 
33 | ```javascript
34 | [{key: 'language', value: 'en'}, {key: ..., value: ...}]
35 | ```
36 | 
37 | which makes it a bit more difficult to parse. Version 1 & 2 store it "the proper way", with the keys being actual keys.
38 | 
39 | ## Portal specific
40 | 
41 | There is a [ckan-api-inspector](http://theodi.github.io/ckan-api-inspector) that helps see all the metadata fields that occur and their values. It helps see whether a field follows a pattern or can be populated by  (which many surprisingly are)
42 | 
43 | ### data.gov
44 | 
45 | - needs POST request to access `/api/3/action/organization_show?id={organization_id}`, but provides painless way to organization metadata via `api/2/rest/group/{organization_id}`. Haven't found any other CKAN site that provides organization through API v2
46 | - doesn't support `package_list` ([Github issue](https://github.com/GSA/data.gov/issues/295))
47 | - one of the few sites that _tries_ to follow a schema https://project-open-data.cio.gov/v1.1/schema/ but often field values would not match it
48 | - Provides some harvesting metadata which tends to follow a standard schema (["As part of Project Open Data most government offices are transitioning to make all of their metadata available via a standard schema"](https://www.data.gov/developers/harvesting)).
49 | 
50 | ### data.gov.uk
51 | 


--------------------------------------------------------------------------------
/Gemfile:
--------------------------------------------------------------------------------
1 | source "https://rubygems.org"
2 | 
3 | gemspec
4 | 
5 | group :development do
6 |   gem "fakeweb", github: "chrisk/fakeweb"
7 | end
8 | 


--------------------------------------------------------------------------------
/Gemfile.lock:
--------------------------------------------------------------------------------
  1 | GIT
  2 |   remote: https://github.com/chrisk/fakeweb.git
  3 |   revision: 2b08c1ff2714ec13a12f3497d67fcefce95c2cbe
  4 |   specs:
  5 |     fakeweb (1.3.0)
  6 | 
  7 | PATH
  8 |   remote: .
  9 |   specs:
 10 |     data_kitten (1.3.4)
 11 |       datapackage (~> 0.0.0)
 12 |       git (~> 1.7)
 13 |       json (~> 2.5)
 14 |       linkeddata (>= 2, < 4)
 15 |       nokogiri (~> 1.6)
 16 |       rake
 17 |       rest-client (~> 2.1)
 18 | 
 19 | GEM
 20 |   remote: https://rubygems.org/
 21 |   specs:
 22 |     addressable (2.8.6)
 23 |       public_suffix (>= 2.0.2, < 6.0)
 24 |     amazing_print (1.4.0)
 25 |     ast (2.4.2)
 26 |     bcp47 (0.3.3)
 27 |       i18n
 28 |     builder (3.2.4)
 29 |     coderay (1.1.3)
 30 |     colorize (0.8.1)
 31 |     concurrent-ruby (1.2.2)
 32 |     connection_pool (2.2.5)
 33 |     coveralls (0.8.23)
 34 |       json (>= 1.8, < 3)
 35 |       simplecov (~> 0.16.1)
 36 |       term-ansicolor (~> 1.3)
 37 |       thor (>= 0.19.4, < 2.0)
 38 |       tins (~> 1.6)
 39 |     datapackage (0.0.4)
 40 |       colorize
 41 |       json
 42 |       json-schema
 43 |       rest-client
 44 |     diff-lcs (1.5.0)
 45 |     docile (1.4.0)
 46 |     domain_name (0.5.20190701)
 47 |       unf (>= 0.0.5, < 1.0.0)
 48 |     ebnf (2.2.1)
 49 |       amazing_print (~> 1.2)
 50 |       htmlentities (~> 4.3)
 51 |       rdf (~> 3.1)
 52 |       scanf (~> 1.0)
 53 |       sxp (~> 1.1)
 54 |       unicode-types (~> 1.6)
 55 |     equivalent-xml (0.6.0)
 56 |       nokogiri (>= 1.4.3)
 57 |     git (1.19.1)
 58 |       addressable (~> 2.8)
 59 |       rchardet (~> 1.8)
 60 |     haml (5.2.2)
 61 |       temple (>= 0.8.0)
 62 |       tilt
 63 |     hamster (3.0.0)
 64 |       concurrent-ruby (~> 1.0)
 65 |     htmlentities (4.3.4)
 66 |     http-accept (1.7.0)
 67 |     http-cookie (1.0.4)
 68 |       domain_name (~> 0.5)
 69 |     i18n (1.12.0)
 70 |       concurrent-ruby (~> 1.0)
 71 |     json (2.7.4)
 72 |     json-canonicalization (0.2.1)
 73 |     json-ld (3.1.10)
 74 |       htmlentities (~> 4.3)
 75 |       json-canonicalization (~> 0.2)
 76 |       link_header (~> 0.0, >= 0.0.8)
 77 |       multi_json (~> 1.14)
 78 |       rack (~> 2.0)
 79 |       rdf (~> 3.1)
 80 |     json-ld-preloaded (3.1.6)
 81 |       json-ld (~> 3.1)
 82 |       rdf (~> 3.1)
 83 |     json-schema (2.8.1)
 84 |       addressable (>= 2.4)
 85 |     ld-patch (3.1.3)
 86 |       ebnf (~> 2.1)
 87 |       rdf (~> 3.1)
 88 |       rdf-xsd (~> 3.1)
 89 |       sparql (~> 3.1)
 90 |       sxp (~> 1.1)
 91 |     link_header (0.0.8)
 92 |     linkeddata (3.1.6)
 93 |       equivalent-xml (~> 0.6)
 94 |       json-ld (~> 3.1, >= 3.1.10)
 95 |       json-ld-preloaded (~> 3.1, >= 3.1.6)
 96 |       ld-patch (~> 3.1, >= 3.1.3)
 97 |       nokogiri (~> 1.12)
 98 |       rdf (~> 3.1, >= 3.1.15)
 99 |       rdf-aggregate-repo (~> 3.1)
100 |       rdf-isomorphic (~> 3.1, >= 3.1.1)
101 |       rdf-json (~> 3.1)
102 |       rdf-microdata (~> 3.1, >= 3.1.4)
103 |       rdf-n3 (~> 3.1, >= 3.1.2)
104 |       rdf-normalize (~> 0.4)
105 |       rdf-ordered-repo (~> 3.1, >= 3.1.1)
106 |       rdf-rdfa (~> 3.1, >= 3.1.3)
107 |       rdf-rdfxml (~> 3.1, >= 3.1.1)
108 |       rdf-reasoner (~> 0.7, >= 0.7.2)
109 |       rdf-tabular (~> 3.1, >= 3.1.1)
110 |       rdf-trig (~> 3.1, >= 3.1.2)
111 |       rdf-trix (~> 3.1, >= 3.1.1)
112 |       rdf-turtle (~> 3.1, >= 3.1.3)
113 |       rdf-vocab (~> 3.1, >= 3.1.14)
114 |       rdf-xsd (~> 3.1, >= 3.1.1)
115 |       shacl (~> 0.1, >= 0.1.1)
116 |       shex (~> 0.6, >= 0.6.4)
117 |       sparql (~> 3.1, >= 3.1.8)
118 |       sparql-client (~> 3.1, >= 3.1.2)
119 |     logger (1.5.1)
120 |     method_source (1.0.0)
121 |     mime-types (3.4.1)
122 |       mime-types-data (~> 3.2015)
123 |     mime-types-data (3.2022.0105)
124 |     mini_portile2 (2.6.1)
125 |     multi_json (1.15.0)
126 |     net-http-persistent (4.0.1)
127 |       connection_pool (~> 2.2)
128 |     netrc (0.11.0)
129 |     nokogiri (1.12.5)
130 |       mini_portile2 (~> 2.6.1)
131 |       racc (~> 1.4)
132 |     parallel (1.22.1)
133 |     parser (3.1.2.0)
134 |       ast (~> 2.4.1)
135 |     pry (0.14.2)
136 |       coderay (~> 1.1)
137 |       method_source (~> 1.0)
138 |     public_suffix (4.0.7)
139 |     racc (1.6.0)
140 |     rack (2.2.8.1)
141 |     rainbow (3.1.1)
142 |     rake (13.2.1)
143 |     rchardet (1.8.0)
144 |     rdf (3.1.15)
145 |       hamster (~> 3.0)
146 |       link_header (~> 0.0, >= 0.0.8)
147 |     rdf-aggregate-repo (3.1.0)
148 |       rdf (~> 3.1)
149 |     rdf-isomorphic (3.1.1)
150 |       rdf (~> 3.1)
151 |     rdf-json (3.1.0)
152 |       rdf (~> 3.1)
153 |     rdf-microdata (3.1.4)
154 |       htmlentities (~> 4.3)
155 |       nokogiri (~> 1.12)
156 |       rdf (~> 3.1, >= 3.1.13)
157 |       rdf-rdfa (~> 3.1, >= 3.1.3)
158 |       rdf-xsd (~> 3.1)
159 |     rdf-n3 (3.1.2)
160 |       ebnf (~> 2.1)
161 |       rdf (~> 3.1, >= 3.1.8)
162 |       sparql (~> 3.1, >= 3.1.4)
163 |       sxp (~> 1.1)
164 |     rdf-normalize (0.4.0)
165 |       rdf (~> 3.1)
166 |     rdf-ordered-repo (3.1.1)
167 |       rdf (~> 3.1)
168 |     rdf-rdfa (3.1.3)
169 |       haml (~> 5.2)
170 |       htmlentities (~> 4.3)
171 |       rdf (~> 3.1, >= 3.1.13)
172 |       rdf-aggregate-repo (~> 3.1)
173 |       rdf-vocab (~> 3.1, >= 3.1.11)
174 |       rdf-xsd (~> 3.1)
175 |     rdf-rdfxml (3.1.1)
176 |       htmlentities (~> 4.3)
177 |       rdf (~> 3.1)
178 |       rdf-rdfa (~> 3.1)
179 |       rdf-xsd (~> 3.1)
180 |     rdf-reasoner (0.7.2)
181 |       rdf (~> 3.1, >= 3.1.12)
182 |       rdf-xsd (~> 3.1)
183 |     rdf-tabular (3.1.1)
184 |       addressable (~> 2.3)
185 |       bcp47 (~> 0.3, >= 0.3.3)
186 |       json-ld (~> 3.1)
187 |       rdf (~> 3.1)
188 |       rdf-vocab (~> 3.1)
189 |       rdf-xsd (~> 3.1)
190 |     rdf-trig (3.1.2)
191 |       ebnf (~> 2.1)
192 |       rdf (~> 3.1)
193 |       rdf-turtle (~> 3.1)
194 |     rdf-trix (3.1.1)
195 |       rdf (~> 3.1)
196 |       rdf-xsd (~> 3.1)
197 |     rdf-turtle (3.1.3)
198 |       ebnf (~> 2.1)
199 |       rdf (~> 3.1, >= 3.1.8)
200 |     rdf-vocab (3.1.14)
201 |       rdf (~> 3.1, >= 3.1.12)
202 |     rdf-xsd (3.1.1)
203 |       rdf (~> 3.1)
204 |       rexml (~> 3.2)
205 |     regexp_parser (2.3.1)
206 |     rest-client (2.1.0)
207 |       http-accept (>= 1.7.0, < 2.0)
208 |       http-cookie (>= 1.0.2, < 2.0)
209 |       mime-types (>= 1.16, < 4.0)
210 |       netrc (~> 0.8)
211 |     rexml (3.3.9)
212 |     rspec (3.11.0)
213 |       rspec-core (~> 3.11.0)
214 |       rspec-expectations (~> 3.11.0)
215 |       rspec-mocks (~> 3.11.0)
216 |     rspec-core (3.11.0)
217 |       rspec-support (~> 3.11.0)
218 |     rspec-expectations (3.11.0)
219 |       diff-lcs (>= 1.2.0, < 2.0)
220 |       rspec-support (~> 3.11.0)
221 |     rspec-mocks (3.11.1)
222 |       diff-lcs (>= 1.2.0, < 2.0)
223 |       rspec-support (~> 3.11.0)
224 |     rspec-support (3.11.0)
225 |     rubocop (1.25.0)
226 |       parallel (~> 1.10)
227 |       parser (>= 3.1.0.0)
228 |       rainbow (>= 2.2.2, < 4.0)
229 |       regexp_parser (>= 1.8, < 3.0)
230 |       rexml
231 |       rubocop-ast (>= 1.15.1, < 2.0)
232 |       ruby-progressbar (~> 1.7)
233 |       unicode-display_width (>= 1.4.0, < 3.0)
234 |     rubocop-ast (1.17.0)
235 |       parser (>= 3.1.1.0)
236 |     rubocop-performance (1.13.2)
237 |       rubocop (>= 1.7.0, < 2.0)
238 |       rubocop-ast (>= 0.4.0)
239 |     ruby-progressbar (1.11.0)
240 |     scanf (1.0.0)
241 |     shacl (0.1.1)
242 |       json-ld (~> 3.1, >= 3.1.7)
243 |       rdf (~> 3.1, >= 3.1.8)
244 |       sparql (~> 3.1)
245 |       sxp (~> 1.1)
246 |     shex (0.6.4)
247 |       ebnf (~> 2.1, >= 2.2)
248 |       htmlentities (~> 4.3)
249 |       json-ld (~> 3.1)
250 |       json-ld-preloaded (~> 3.1)
251 |       rdf (~> 3.1)
252 |       rdf-xsd (~> 3.1)
253 |       sparql (~> 3.1)
254 |       sxp (~> 1.1)
255 |     simplecov (0.16.1)
256 |       docile (~> 1.1)
257 |       json (>= 1.8, < 3)
258 |       simplecov-html (~> 0.10.0)
259 |     simplecov-html (0.10.2)
260 |     sparql (3.1.8)
261 |       builder (~> 3.2)
262 |       ebnf (~> 2.1)
263 |       logger (~> 1.4)
264 |       rdf (~> 3.1, >= 3.1.14)
265 |       rdf-aggregate-repo (~> 3.1)
266 |       rdf-xsd (~> 3.1)
267 |       sparql-client (~> 3.1, >= 3.1.2)
268 |       sxp (~> 1.1)
269 |     sparql-client (3.1.2)
270 |       net-http-persistent (~> 4.0, >= 4.0.1)
271 |       rdf (~> 3.1)
272 |     standard (1.7.0)
273 |       rubocop (= 1.25.0)
274 |       rubocop-performance (= 1.13.2)
275 |     sxp (1.1.0)
276 |       rdf (~> 3.1)
277 |     sync (0.5.0)
278 |     temple (0.8.2)
279 |     term-ansicolor (1.7.1)
280 |       tins (~> 1.0)
281 |     thor (1.2.1)
282 |     tilt (2.0.10)
283 |     tins (1.31.0)
284 |       sync
285 |     unf (0.1.4)
286 |       unf_ext
287 |     unf_ext (0.0.8.1)
288 |     unicode-display_width (2.1.0)
289 |     unicode-types (1.7.0)
290 | 
291 | PLATFORMS
292 |   ruby
293 | 
294 | DEPENDENCIES
295 |   coveralls
296 |   data_kitten!
297 |   fakeweb!
298 |   pry
299 |   rspec
300 |   standard
301 | 
302 | BUNDLED WITH
303 |    2.1.4
304 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | Copyright 2013 The Open Data Institute
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining
 4 | a copy of this software and associated documentation files (the
 5 | "Software"), to deal in the Software without restriction, including
 6 | without limitation the rights to use, copy, modify, merge, publish,
 7 | distribute, sublicense, and/or sell copies of the Software, and to
 8 | permit persons to whom the Software is furnished to do so, subject to
 9 | the following conditions:
10 | 
11 | The above copyright notice and this permission notice shall be
12 | included in all copies or substantial portions of the Software.
13 | 
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | [![Build Status](http://img.shields.io/travis/theodi/data_kitten.svg)](https://travis-ci.org/theodi/data_kitten)
 2 | [![Dependency Status](http://img.shields.io/gemnasium/theodi/data_kitten.svg)](https://gemnasium.com/theodi/data_kitten)
 3 | [![Coverage Status](http://img.shields.io/coveralls/theodi/data_kitten.svg)](https://coveralls.io/r/theodi/data_kitten)
 4 | [![Code Climate](http://img.shields.io/codeclimate/github/theodi/data_kitten.svg)](https://codeclimate.com/github/theodi/data_kitten)
 5 | [![Gem Version](http://img.shields.io/gem/v/data_kitten.svg)](https://rubygems.org/gems/data_kitten)
 6 | [![License](http://img.shields.io/:license-mit-blue.svg)](http://theodi.mit-license.org)
 7 | [![Badges](http://img.shields.io/:badges-7/7-ff6799.svg)](https://github.com/pikesley/badger)
 8 | 
 9 | # data_kitten
10 | 
11 | ![DATAS - I HAZ THEM](https://gs1.wac.edgecastcdn.net/8019B6/data.tumblr.com/67399f2b335ef62d562dc9eb41c0db16/tumblr_mmy9g7rA8M1s4aj1ho1_500.jpg)
12 | 
13 | A collection of classes that represent Datasets and other concepts, modeled on [DCAT](http://www.w3.org/TR/vocab-dcat/)
14 | 
15 | The module is designed to automatically interrogate data sources and give back data 
16 | and metadata in a consistent format. The best starting place is probably by having a look at `Dataset`.
17 | 
18 | It is designed to handle data from multiple `Sources` (such as git repositories, local files, remote URLs), 
19 | `Hosts` (GitHub, etc), and `PublishingFormats` (DataPackage, RDFa, microdata, DSPL, etc).
20 | 
21 | Currently supports Datapackages in git repositories (including but not limited to GitHub repos). 
22 | Wider support will follow.
23 | 
24 | # Documentation
25 | 
26 | Full YARD documentation is available on [Rubydoc.info](http://rubydoc.info/github/theodi/data_kitten/master/frames).
27 | 
28 | # Licence
29 | 
30 | This code is open source under the MIT license. See the LICENSE.md file for full details.
31 | 
32 | # Requirements
33 | 
34 | * Git ~> 1.2.6
35 | 
36 | # Usage
37 | 
38 | Pop the gem into your Gemfile:
39 | 
40 |         gem 'data_kitten', :git => "git://github.com/theodi/data_kitten.git"
41 | 
42 | Require if you need to:
43 | 
44 | 	require 'data_kitten'
45 | 	
46 | Request a dataset:
47 | 	
48 | 	dataset = DataKitten::Dataset.new("https://github.com/theodi/dataset-mod-disposals.git")
49 | 	
50 | Use the results:
51 | 
52 | 	dataset.supported?
53 | 	dataset.origin
54 | 	dataset.host
55 | 	dataset.data_title
56 | 	dataset.documentation_url
57 | 	dataset.release_type
58 | 	dataset.time_sensitive?
59 | 	dataset.publishing_format
60 | 	dataset.maintainers
61 | 	dataset.publishers
62 | 	dataset.licenses
63 | 	dataset.contributors
64 | 	dataset.crowdsourced?
65 | 	dataset.contributor_agreement_url
66 | 	dataset.distributions
67 | 	dataset.change_history
68 | 	
69 | 	# And more to come!
70 | 
71 | See example usage in a Rails project at [https://github.com/theodi/git-data-viewer](https://github.com/theodi/git-data-viewer)
72 | 
73 | ![actual_data_kitten](http://i.imgur.com/wXZEkh7.gif)
74 | 


--------------------------------------------------------------------------------
/Rakefile:
--------------------------------------------------------------------------------
1 | $:.unshift File.join(File.dirname(__FILE__), "lib")
2 | 
3 | require "rspec/core/rake_task"
4 | 
5 | RSpec::Core::RakeTask.new(:spec)
6 | 
7 | task default: :spec
8 | 


--------------------------------------------------------------------------------
/bin/data_kitten:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | $:.unshift File.join( File.dirname(__FILE__), "..", "lib")
 3 | 
 4 | require 'data_kitten'
 5 | require 'pp'
 6 | 
 7 | if ARGV.length == 0
 8 |   puts "Usage: data_kitten <access_url>"
 9 |   exit 1
10 | end
11 | 
12 | dataset = DataKitten::Dataset.new(ARGV[0])
13 | 
14 | if dataset.publishing_format == nil
15 |   puts "Unable to determine format for dataset metadata"
16 |   exit 1
17 | end
18 | 
19 | (dataset.public_methods - Object.public_methods).sort.delete_if {|x| x.to_s =~ /=/ }.each do |method|
20 |     puts "#{method}: #{dataset.send(method).pretty_inspect}"
21 | end
22 | 
23 | 


--------------------------------------------------------------------------------
/data_kitten.gemspec:
--------------------------------------------------------------------------------
 1 | lib = File.expand_path("../lib/", __FILE__)
 2 | $:.unshift lib unless $:.include?(lib)
 3 | 
 4 | # Maintain your gem's version:
 5 | require "data_kitten/version"
 6 | 
 7 | # Describe your gem and declare its dependencies:
 8 | Gem::Specification.new do |s|
 9 |   s.name = "data_kitten"
10 |   s.version = DataKitten::VERSION
11 |   s.authors = ["James Smith", "Stuart Harrison"]
12 |   s.email = ["tech@theodi.org"]
13 |   s.homepage = "http://github.com/data-kitten"
14 |   s.summary = "Get dataset metadata in a consistent format - no matter what you throw at it"
15 |   s.license = "MIT"
16 | 
17 |   s.files = Dir["{app,config,db,lib}/**/*"] + ["LICENSE.md", "README.md"]
18 |   s.executables << "data_kitten"
19 | 
20 |   s.required_ruby_version = ["~> 2.5", "< 2.6"]
21 | 
22 |   s.add_dependency "rake"
23 |   s.add_dependency "git", "~> 1.7"
24 |   s.add_dependency "json", "~> 2.5"
25 |   s.add_dependency "rest-client", "~> 2.1"
26 |   s.add_dependency "linkeddata", ">= 2", "< 4"
27 |   s.add_dependency "nokogiri", "~> 1.6"
28 |   s.add_dependency "datapackage", "~> 0.0.0"
29 | 
30 |   s.add_development_dependency "rspec"
31 |   s.add_development_dependency "coveralls"
32 |   s.add_development_dependency "fakeweb", ["~> 1.3"]
33 |   s.add_development_dependency "pry"
34 |   s.add_development_dependency "standard"
35 | end
36 | 


--------------------------------------------------------------------------------
/lib/data_kitten.rb:
--------------------------------------------------------------------------------
 1 | require "csv"
 2 | require "uri"
 3 | require "cgi"
 4 | require "git"
 5 | require "json"
 6 | require "rest-client"
 7 | require "rdf"
 8 | require "linkeddata"
 9 | require "nokogiri"
10 | require "uri"
11 | require "datapackage"
12 | 
13 | require "data_kitten/license"
14 | require "data_kitten/rights"
15 | require "data_kitten/agent"
16 | require "data_kitten/source"
17 | require "data_kitten/temporal"
18 | require "data_kitten/dataset"
19 | require "data_kitten/distribution_format"
20 | require "data_kitten/distribution"
21 | require "data_kitten/fetcher"
22 | 
23 | # A collection of classes that represent Datasets and other concepts, modeled on {http://www.w3.org/TR/vocab-dcat/ DCAT}.
24 | #
25 | # The module is designed to automatically interrogate data sources and give back data and metadata in a consistent
26 | # format. The best starting place is probably by having a look at {Dataset}.
27 | #
28 | # It is designed to handle data from multiple {Sources} (such as git repositories, local files, remote URLs),
29 | # {Hosts} (GitHub, etc), and {PublishingFormats} (DataPackage, RDFa, microdata, DSPL, etc).
30 | #
31 | # Currently supports Datapackages in git repositories (including but not limited to GitHub repos). Wider support will follow.
32 | #
33 | # https://gs1.wac.edgecastcdn.net/8019B6/data.tumblr.com/67399f2b335ef62d562dc9eb41c0db16/tumblr_mmy9g7rA8M1s4aj1ho1_500.jpg
34 | #
35 | # @example Load a Dataset from a git repository
36 | #   dataset = Dataset.new(access_url: 'git://github.com/theodi/dataset-metadata-survey.git')
37 | #   dataset.supported?                # => true
38 | #   dataset.origin                    # => :git
39 | #   dataset.host                      # => :github
40 | #   dataset.publishing_format         # => :datapackage
41 | #   dataset.distributions             # => [Distribution<#1>, Distribution<#2>]
42 | #   dataset.distributions[0].headers  # => ['col1', 'col2']
43 | #   dataset.distributions[0].data[0]  # => {'col1' => 'value_1', 'col2' => 'value_2'}
44 | 


--------------------------------------------------------------------------------
/lib/data_kitten/agent.rb:
--------------------------------------------------------------------------------
 1 | module DataKitten
 2 |   # A person or organisation.
 3 |   #
 4 |   # Naming is based on {http://xmlns.com/foaf/spec/#term_Agent foaf:Agent}, but with useful aliases for other vocabularies.
 5 |   class Agent
 6 |     # Create a new Agent
 7 |     #
 8 |     # @param [Hash] options the details of the Agent.
 9 |     # @option options [String] :name The Agent's name
10 |     # @option options [String] :homepage The homepage URL for the Agent
11 |     # @option options [String] :mbox Email address for the Agent
12 |     #
13 |     def initialize(options)
14 |       @name = options[:name]
15 |       @homepage = options[:homepage]
16 |       @mbox = options[:mbox]
17 |     end
18 | 
19 |     # @!attribute name
20 |     #   @return [String] the name of the Agent
21 |     attr_accessor :name
22 | 
23 |     # @!attribute homepage
24 |     #   @return [String] the homepage URL of the Agent
25 |     attr_accessor :homepage
26 |     alias url homepage
27 |     alias uri homepage
28 | 
29 |     # @!attribute mbox
30 |     #   @return [String] the email address of the Agent
31 |     attr_accessor :mbox
32 |     alias email mbox
33 | 
34 |     def ==(agent)
35 |       agent.is_a?(Agent) && ([name, homepage, mbox] == [agent.name, agent.homepage, agent.mbox])
36 |     end
37 |   end
38 | end
39 | 


--------------------------------------------------------------------------------
/lib/data_kitten/dataset.rb:
--------------------------------------------------------------------------------
  1 | require "data_kitten/origins"
  2 | require "data_kitten/hosts"
  3 | require "data_kitten/publishing_formats"
  4 | 
  5 | module DataKitten
  6 |   # Represents a single dataset from some origin (see {http://www.w3.org/TR/vocab-dcat/#class-dataset dcat:Dataset}
  7 |   # for relevant vocabulary).
  8 |   #
  9 |   # Designed to be created with a URI to the dataset, and then to work out metadata from there.
 10 |   #
 11 |   # Currently supports Datasets hosted in Git (and optionally on GitHub), and which
 12 |   # use the Datapackage metadata format.
 13 |   #
 14 |   # @example Load a Dataset from a git repository
 15 |   #   dataset = Dataset.new('git://github.com/theodi/dataset-metadata-survey.git')
 16 |   #   dataset.supported?         # => true
 17 |   #   dataset.origin             # => :git
 18 |   #   dataset.host               # => :github
 19 |   #   dataset.publishing_format  # => :datapackage
 20 |   #
 21 |   class Dataset
 22 |     include DataKitten::Origins
 23 |     include DataKitten::Hosts
 24 |     include DataKitten::PublishingFormats
 25 | 
 26 |     # @!attribute access_url
 27 |     #   @return [String] the URL that gives access to the dataset
 28 |     attr_accessor :access_url
 29 | 
 30 |     # Create a new Dataset object
 31 |     #
 32 |     # The class will attempt to auto-load metadata from this URL.
 33 |     #
 34 |     # @overload new(url)
 35 |     #   @param [String] url A URL that can be used to access the Dataset
 36 |     #
 37 |     # @overload new(options)
 38 |     #   @param [Hash] options the details of the Dataset.
 39 |     #   @option options [String] :access_url A URL that can be used to access the Dataset.
 40 |     #
 41 |     def initialize(url_or_options, base_url = nil)
 42 |       url = case url_or_options
 43 |       when Hash
 44 |         base_url ||= url_or_options[:base_url]
 45 |         url_or_options[:access_url]
 46 |       else
 47 |         url_or_options
 48 |       end
 49 |       @access_url = DataKitten::Fetcher.wrap(url)
 50 |       @base_uri = URI(base_url) if base_url
 51 | 
 52 |       detect_origin
 53 |       detect_host
 54 |       detect_publishing_format
 55 |     end
 56 | 
 57 |     def uri
 58 |       URI(@access_url.to_s)
 59 |     end
 60 | 
 61 |     def base_uri
 62 |       @base_uri || uri.merge("/")
 63 |     end
 64 | 
 65 |     def url
 66 |       @access_url.to_s
 67 |     end
 68 | 
 69 |     def source
 70 |       @source ||= @access_url.as_json if @access_url.ok?
 71 |     end
 72 | 
 73 |     attr_writer :source
 74 | 
 75 |     # Can metadata be loaded for this Dataset?
 76 |     #
 77 |     # @return [Boolean] true if metadata can be loaded, false if it's
 78 |     #                   an unknown origin type, or has an unknown metadata format.
 79 |     def supported?
 80 |       !(origin.nil? || publishing_format.nil?)
 81 |     end
 82 | 
 83 |     # The origin type of the dataset.
 84 |     #
 85 |     # @return [Symbol] The origin type. For instance, datasets loaded from git
 86 |     #                  repositories will return +:git+. If no origin type is
 87 |     #                  identified, will return +nil+.
 88 |     def origin
 89 |       nil
 90 |     end
 91 | 
 92 |     # Where the dataset is hosted.
 93 |     #
 94 |     # @return [Symbol] The host. For instance, data loaded from github repositories
 95 |     #                  will return +:github+. This can be used to control extra host-specific
 96 |     #                  behaviour if required. If no host type is identified, will return +nil+.
 97 |     def host
 98 |       nil
 99 |     end
100 | 
101 |     # A unique identifier of the dataset.
102 |     #
103 |     # @return [String] the identifier of the dataset
104 |     #
105 |     attr_accessor :identifier
106 | 
107 |     # The human-readable title of the dataset.
108 |     #
109 |     # @return [String] the title of the dataset.
110 |     def data_title
111 |       nil
112 |     end
113 | 
114 |     # A brief description of the dataset
115 |     #
116 |     # @return [String] the description of the dataset.
117 |     def description
118 |       nil
119 |     end
120 | 
121 |     # Keywords for the dataset
122 |     #
123 |     # @return [Array<string>] an array of keywords
124 |     def keywords
125 |       []
126 |     end
127 | 
128 |     # Human-readable documentation for the dataset.
129 |     #
130 |     # @return [String] the URL of the documentation.
131 |     def documentation_url
132 |       nil
133 |     end
134 | 
135 |     # What type of dataset is this?
136 |     # Options are: +:web_service+ for API-accessible data, or +:one_off+ for downloadable data dumps.
137 |     #
138 |     # @return [Symbol] the release type.
139 |     def release_type
140 |       false
141 |     end
142 | 
143 |     # Date the dataset was released
144 |     #
145 |     # @return [Date] the release date of the dataset
146 |     def issued
147 |       nil
148 |     end
149 |     alias release_date issued
150 | 
151 |     # Date the dataset was last modified
152 |     #
153 |     # @return [Date] the dataset's last modified date
154 |     def modified
155 |       nil
156 |     end
157 | 
158 |     # A web page that can be used to gain access to the dataset, its distributions and/or additional information.
159 |     #
160 |     # @return [String] The URL to the dataset
161 |     def landing_page
162 |       nil
163 |     end
164 | 
165 |     # The temporal coverage of the dataset
166 |     #
167 |     # @return [Object<Temporal>] the start and end dates of the dataset's temporal coverage
168 |     def temporal
169 |       nil
170 |     end
171 | 
172 |     # Where the data is sourced from
173 |     #
174 |     # @return [Array<Source>] the sources of the data, each as a Source object.
175 |     def sources
176 |       []
177 |     end
178 | 
179 |     # Is the information time-sensitive?
180 |     #
181 |     # @return [Boolean] whether the information will go out of date.
182 |     def time_sensitive?
183 |       false
184 |     end
185 | 
186 |     # The publishing format for the dataset.
187 |     #
188 |     # @return [Symbol] The format. For instance, datasets that publish metadata in
189 |     #                  Datapackage format will return +:datapackage+. If no format
190 |     #                  is identified, will return +nil+.
191 |     def publishing_format
192 |       nil
193 |     end
194 | 
195 |     # A list of maintainers
196 |     #
197 |     # @return [Array<Agent>] An array of maintainers, each as an Agent object.
198 |     def maintainers
199 |       []
200 |     end
201 | 
202 |     # A list of publishers
203 |     #
204 |     # @return [Array<Agent>] An array of publishers, each as an Agent object.
205 |     def publishers
206 |       []
207 |     end
208 | 
209 |     # A list of licenses
210 |     #
211 |     # @return [Array<License>] An array of licenses, each as a License object.
212 |     def licenses
213 |       []
214 |     end
215 | 
216 |     # The rights statment for the data
217 |     #
218 |     # @return [Object<Rights>] How the content and data can be used, as well as copyright notice and attribution URL
219 |     def rights
220 |       nil
221 |     end
222 | 
223 |     # A list of contributors
224 |     #
225 |     # @return [Array<Agent>] An array of contributors to the dataset, each as an Agent object.
226 |     def contributors
227 |       []
228 |     end
229 | 
230 |     # The language of the dataset.
231 |     #
232 |     # @return [String] the language of the dataset
233 |     def language
234 |       nil
235 |     end
236 | 
237 |     # The main category the dataset belongs to.
238 |     #
239 |     # @return [String]
240 |     def theme
241 |       nil
242 |     end
243 | 
244 |     # Has the data been crowdsourced?
245 |     #
246 |     # @return [Boolean] Whether the data has been crowdsourced or not.
247 |     def crowdsourced?
248 |       false
249 |     end
250 | 
251 |     # The URL of the contributor license agreement
252 |     #
253 |     # @return [String] A URL for the agreement that contributors accept.
254 |     def contributor_agreement_url
255 |       nil
256 |     end
257 | 
258 |     # A list of distributions. Has aliases for popular alternative vocabularies.
259 |     #
260 |     # @return [Array<Distribution>] An array of Distribution objects.
261 |     def distributions
262 |       []
263 |     end
264 |     alias files distributions
265 |     alias resources distributions
266 | 
267 |     # How frequently the data is updated.
268 |     #
269 |     # @return [String] The frequency of update expressed as a dct:Frequency.
270 |     def update_frequency
271 |       nil
272 |     end
273 | 
274 |     # A history of changes to the Dataset
275 |     #
276 |     # @return [Array] An array of changes. Exact format depends on the origin and publishing format.
277 |     def change_history
278 |       []
279 |     end
280 | 
281 |     # Spatial coverage of the dataset
282 |     #
283 |     # @return [GeoJSON Geometry] A GeoJSON geometry object of the spatial coverage
284 |     def spatial
285 |       nil
286 |     end
287 | 
288 |     attr_accessor :metadata
289 |   end
290 | end
291 | 


--------------------------------------------------------------------------------
/lib/data_kitten/distribution.rb:
--------------------------------------------------------------------------------
  1 | module DataKitten
  2 |   # A specific available form of a dataset, such as a CSV file, an API, or an RSS feed.
  3 |   #
  4 |   # Based on {http://www.w3.org/TR/vocab-dcat/#class-distribution dcat:Distribution}, but
  5 |   # with useful aliases for other vocabularies.
  6 |   #
  7 |   class Distribution
  8 |     # @!attribute format
  9 |     #   @return [DistributionFormat] the file format of the distribution.
 10 |     attr_accessor :format
 11 | 
 12 |     # @!attribute access_url
 13 |     #   @return [String] a URL to access the distribution.
 14 |     attr_accessor :access_url
 15 | 
 16 |     # @!attribute download_url
 17 |     #   @return [String] a URL to the file of the distribution.
 18 |     attr_accessor :download_url
 19 |     alias uri download_url
 20 | 
 21 |     # @!attribute path
 22 |     #   @return [String] the path of the distribution within the source, if appropriate
 23 |     attr_accessor :path
 24 | 
 25 |     # @!attribute title
 26 |     #   @return [String] A usable name for the distribution, unique within the {Dataset}.
 27 |     attr_accessor :title
 28 |     alias name title
 29 | 
 30 |     # @!attribute description
 31 |     #   @return [String] a textual description
 32 |     attr_accessor :description
 33 | 
 34 |     # @!attribute issued
 35 |     #   @return [Date] date created
 36 |     attr_accessor :issued
 37 | 
 38 |     # @!attribute modified
 39 |     #   @return [Date] date modified
 40 |     attr_accessor :modified
 41 | 
 42 |     # @!attribute byte_size
 43 |     #   @return [Integer] size of file in bytes
 44 |     attr_accessor :byte_size
 45 | 
 46 |     # @!attribute media_type
 47 |     #   @return [String] the IANA media type (MIME type) of the distribution
 48 |     attr_accessor :media_type
 49 | 
 50 |     # @!attribute schema
 51 |     #   @return [Hash] a hash representing the schema of the data within the distribution. Will
 52 |     #                  change to a more structured object later.
 53 |     attr_accessor :schema
 54 | 
 55 |     # @!attribute extension
 56 |     #   @return [String] the file extension of the distribution
 57 |     attr_accessor :extension
 58 | 
 59 |     # Create a new Distribution. Currently only loads from Datapackage +resource+ hashes.
 60 |     #
 61 |     # @param dataset [Dataset] the {Dataset} that this is a part of.
 62 |     # @param options [Hash] A set of options with which to initialise the distribution.
 63 |     # @option options [String] :datapackage_resource the +resource+ section of a Datapackage
 64 |     #                                                representation to load information from.
 65 |     def initialize(dataset, options)
 66 |       # Store dataset
 67 |       @dataset = dataset
 68 |       # Parse datapackage
 69 |       if (r = options[:datapackage_resource])
 70 |         # Load basics
 71 |         @description = r["description"]
 72 |         # Work out format
 73 |         @format = begin
 74 |           @extension = r["format"]
 75 |           if @extension.nil?
 76 |             @extension = r["path"].is_a?(String) ? r["path"].split(".").last.upcase : nil
 77 |           end
 78 |           @extension ? DistributionFormat.new(self) : nil
 79 |         end
 80 |         # Get CSV dialect
 81 |         @dialect = r["dialect"]
 82 |         # Extract schema
 83 |         @schema = r["schema"]
 84 |         # Get path
 85 |         @path = r["path"]
 86 |         @download_url = r["url"]
 87 |         # Set title
 88 |         @title = @path || @uri
 89 |       elsif (r = options[:dcat_resource])
 90 |         @title = r[:title]
 91 |         @description = r[:title]
 92 |         @access_url = r[:accessURL]
 93 |       elsif (r = options[:ckan_resource])
 94 |         @title = r[:title]
 95 |         @description = r[:title]
 96 |         @issued = r[:issued]
 97 |         @modified = r[:modified]
 98 |         @access_url = r[:accessURL]
 99 |         @download_url = r[:downloadURL]
100 |         @byte_size = r[:byteSize]
101 |         @media_type = r[:mediaType]
102 |         @extension = r[:format]
103 |         # Load HTTP Response for further use
104 |         @format = r[:format] ? DistributionFormat.new(self) : nil
105 |       end
106 |       # Set default CSV dialect
107 |       @dialect ||= {
108 |         "delimiter" => ","
109 |       }
110 | 
111 |       @download = Fetcher.wrap(@download_url)
112 |     end
113 | 
114 |     # An array of column headers for the distribution. Loaded from the schema, or from the file directly if no
115 |     # schema is present.
116 |     #
117 |     # @return [Array<String>] an array of column headers, as strings.
118 |     def headers
119 |       @headers ||= begin
120 |         if @schema
121 |           @schema["fields"].map { |x| x["id"] }
122 |         else
123 |           data.headers
124 |         end
125 |       end
126 |     end
127 | 
128 |     # Whether the file that the distribution represents actually exists
129 |     #
130 |     # @return [Boolean] whether the HTTP response returns a success code or not
131 |     def exists?
132 |       @download.exists?
133 |     end
134 | 
135 |     # A CSV object representing the loaded data.
136 |     #
137 |     # @return [Array<Array<String>>] an array of arrays of strings, representing each row.
138 |     def data
139 |       @data ||= begin
140 |         if @path
141 |           datafile = @dataset.send(:load_file, @path)
142 |         elsif @download.ok?
143 |           datafile = @download.body
144 |         end
145 |         if datafile
146 |           case format.extension
147 |           when :csv
148 |             CSV.parse(
149 |               datafile,
150 |               headers: true,
151 |               col_sep: @dialect["delimiter"]
152 |             )
153 |           end
154 |         end
155 |                 rescue
156 |                   nil
157 |       end
158 |     end
159 |   end
160 | end
161 | 


--------------------------------------------------------------------------------
/lib/data_kitten/distribution_format.rb:
--------------------------------------------------------------------------------
 1 | module DataKitten
 2 |   # A file format for a distribution
 3 |   #
 4 |   # For instance CSV, XML, etc.
 5 |   #
 6 |   class DistributionFormat
 7 |     FORMATS = {
 8 |       csv: {structured: true, open: true},
 9 |       xls: {structured: true, open: false},
10 |       xlsx: {structured: true, open: true},
11 |       rdf: {structured: true, open: true},
12 |       xml: {structured: true, open: true},
13 |       wms: {structured: true, open: true},
14 |       ods: {structured: true, open: true},
15 |       rdfa: {structured: true, open: true},
16 |       kml: {structured: true, open: true},
17 |       rss: {structured: true, open: true},
18 |       json: {structured: true, open: true},
19 |       ical: {structured: true, open: true},
20 |       sparql: {structured: true, open: true},
21 |       georss: {structured: true, open: true},
22 |       geojson: {structured: true, open: true},
23 |       shp: {structured: true, open: true},
24 |       html: {structured: false, open: true},
25 |       doc: {structured: false, open: false},
26 |       pdf: {structured: false, open: true}
27 |     }
28 |     FORMATS.default = {}
29 | 
30 |     # @!attribute extension
31 |     # @return [Symbol] a symbol for the file extension. For instance, :csv.
32 |     attr_reader :extension
33 | 
34 |     # Create a new DistributionFormat object with the relevant extension
35 |     #
36 |     # @param distribution [Distribution] the distribution for the format
37 |     def initialize(distribution)
38 |       @distribution = distribution
39 |       # Store extension as a lowercase symbol
40 |       @extension = distribution.extension.to_s.downcase.to_sym
41 |     end
42 | 
43 |     # Is this a structured format?
44 |     #
45 |     # @return [Boolean] whether the format is machine-readable or not.
46 |     def structured?
47 |       FORMATS[extension][:structured]
48 |     end
49 | 
50 |     # Is this an open format?
51 |     #
52 |     # @return [Boolean] whether the format is open or not
53 |     def open?
54 |       FORMATS[extension][:open]
55 |     end
56 | 
57 |     # Whether the format of the file matches the extension given by the data
58 |     #
59 |     # @return [Boolean] whether the MIME type given in the HTTP response matches the data or not
60 |     def matches?
61 |       mimes = []
62 |       MIME::Types.type_for(@extension.to_s).each { |i| mimes << i.content_type }
63 |       !!(@distribution.http_head.content_type =~ /#{mimes.join('|')}/) || false
64 |     rescue
65 |       nil
66 |     end
67 |   end
68 | end
69 | 


--------------------------------------------------------------------------------
/lib/data_kitten/fetcher.rb:
--------------------------------------------------------------------------------
 1 | module DataKitten
 2 |   class Fetcher
 3 |     attr_reader :url
 4 | 
 5 |     def self.wrap(url_or_fetcher)
 6 |       if url_or_fetcher.is_a?(self)
 7 |         url_or_fetcher
 8 |       else
 9 |         new(url_or_fetcher)
10 |       end
11 |     end
12 | 
13 |     def initialize(url)
14 |       @url = url
15 |     end
16 | 
17 |     def exists?
18 |       if @requested
19 |         ok?
20 |       else
21 |         RestClient.head(url).code == 200
22 |       end
23 |     rescue RestClient::ExceptionWithResponse => _error
24 |       false
25 |     end
26 | 
27 |     def ok?
28 |       code == 200
29 |     end
30 | 
31 |     def code
32 |       response ? response.code : @code
33 |     end
34 | 
35 |     def body
36 |       response
37 |     end
38 | 
39 |     def as_json
40 |       JSON.parse(body) if response
41 |     rescue JSON::ParserError
42 |       nil
43 |     end
44 | 
45 |     def content_type
46 |       response.headers[:content_type] if response
47 |     end
48 | 
49 |     def content_type_format
50 |       if (val = content_type)
51 |         val.split(";").first
52 |       end
53 |     end
54 | 
55 |     def to_s
56 |       url.to_s
57 |     end
58 | 
59 |     def html?
60 |       !!(content_type_format =~ %r{^text/html}i)
61 |     end
62 | 
63 |     def json?
64 |       !!(content_type_format =~ %r{^application/json}i)
65 |     end
66 | 
67 |     private
68 | 
69 |     def response
70 |       unless @requested
71 |         @requested = true
72 |         begin
73 |           @response = RestClient.get(url)
74 |         rescue RestClient::ExceptionWithResponse => error
75 |           @error = error.response
76 |           @code = @error.code
77 |         end
78 |       end
79 |       @response
80 |     end
81 |   end
82 | end
83 | 


--------------------------------------------------------------------------------
/lib/data_kitten/hosts.rb:
--------------------------------------------------------------------------------
 1 | require "data_kitten/hosts/github"
 2 | require "data_kitten/hosts/bitbucket"
 3 | require "data_kitten/hosts/gist"
 4 | 
 5 | module DataKitten
 6 |   module Hosts
 7 |     private
 8 | 
 9 |     def detect_host
10 |       [
11 |         DataKitten::Hosts::Github,
12 |         DataKitten::Hosts::Bitbucket,
13 |         DataKitten::Hosts::Gist
14 |       ].each do |host|
15 |         extend host if host.supported?(url)
16 |         break
17 |       end
18 |     end
19 |   end
20 | end
21 | 


--------------------------------------------------------------------------------
/lib/data_kitten/hosts/bitbucket.rb:
--------------------------------------------------------------------------------
 1 | module DataKitten
 2 |   module Hosts
 3 |     # Bitbucket host module. Automatically mixed into {Dataset} for datasets that are loaded from Bitbucket.
 4 |     #
 5 |     # @see Dataset
 6 |     #
 7 |     module Bitbucket
 8 |       def self.supported?(uri)
 9 |         uri =~ /\A(git|https?):\/\/[^\/]*bitbucket\.org\//
10 |       end
11 | 
12 |       # Where the dataset is hosted.
13 |       # @return [Symbol] +:bitbucket+
14 |       # @see Dataset#host
15 |       def host
16 |         :bitbucket
17 |       end
18 | 
19 |       # Helper for generating Bitbucket URLs
20 |       #
21 |       # @param path [String] The path to append to the Bitbucket base URL.
22 |       #
23 |       # @return [String] The supplied path with the Bitbucket base URL prepended
24 |       #
25 |       # @example
26 |       #   dataset = Dataset.new('https://bitbucket.org/floppy/hot-drinks.git')
27 |       #   dataset.bitbucket_path           # => 'https://bitbucket.org/floppy/hot-drinks/'
28 |       #   dataset.bitbucket_path('pull-requests') # => 'https://bitbucket.org/floppy/hot-drinks/pull-requests'
29 |       def bitbucket_path(path = "")
30 |         "https://bitbucket.org/#{bitbucket_user_name}/#{bitbucket_repository_name}/#{path}"
31 |       end
32 | 
33 |       private
34 | 
35 |       def bitbucket_user_name
36 |         @bitbucket_user_name ||= uri.split("/")[-2]
37 |       end
38 | 
39 |       def bitbucket_repository_name
40 |         @bitbucket_repository_name ||= uri.split("/")[-1].split(".")[0]
41 |       end
42 |     end
43 |   end
44 | end
45 | 


--------------------------------------------------------------------------------
/lib/data_kitten/hosts/gist.rb:
--------------------------------------------------------------------------------
 1 | module DataKitten
 2 |   module Hosts
 3 |     # Gist host module. Automatically mixed into {Dataset} for datasets that are loaded from Gist.
 4 |     #
 5 |     # @see Dataset
 6 |     #
 7 |     module Gist
 8 |       def self.supported?(uri)
 9 |         uri =~ /\A(git|https?):\/\/gist\.github\.com\//
10 |       end
11 | 
12 |       # Where the dataset is hosted.
13 |       # @return [Symbol] +:gist+
14 |       # @see Dataset#host
15 |       def host
16 |         :gist
17 |       end
18 | 
19 |       # Helper for generating Gist URLs
20 |       #
21 |       # @param path [String] The path to append to the Gist base URL.
22 |       #
23 |       # @return [String] The supplied path with the Gist base URL prepended
24 |       #
25 |       # @example
26 |       #   dataset = Dataset.new('git://gist.github.com/5633865.git')
27 |       #   dataset.gist_path           # => 'https://gist.github.com/5633865'
28 |       #   dataset.gist_path('download') # => 'https://gist.github.com/5633865/download'
29 |       def gist_path(path = "")
30 |         "https://gist.github.com/#{gist_repository_name}/#{path}"
31 |       end
32 | 
33 |       private
34 | 
35 |       def gist_repository_name
36 |         @gist_repository_name ||= uri.split("/")[-1].split(".")[0]
37 |       end
38 |     end
39 |   end
40 | end
41 | 


--------------------------------------------------------------------------------
/lib/data_kitten/hosts/github.rb:
--------------------------------------------------------------------------------
 1 | module DataKitten
 2 |   module Hosts
 3 |     # GitHub host module. Automatically mixed into {Dataset} for datasets that are loaded from GitHub.
 4 |     #
 5 |     # @see Dataset
 6 |     #
 7 |     module Github
 8 |       def self.supported?(uri)
 9 |         uri =~ /\A(git|https?):\/\/github\.com\//
10 |       end
11 | 
12 |       # Where the dataset is hosted.
13 |       # @return [Symbol] +:github+
14 |       # @see Dataset#host
15 |       def host
16 |         :github
17 |       end
18 | 
19 |       # Helper for generating GitHub URLs
20 |       #
21 |       # @param path [String] The path to append to the GitHub base URL.
22 |       #
23 |       # @return [String] The supplied path with the GitHub base URL prepended
24 |       #
25 |       # @example
26 |       #   dataset = Dataset.new('git://github.com/theodi/dataset-metadata-survey.git')
27 |       #   dataset.github_path           # => 'https://github.com/theodi/dataset-metadata-survey/'
28 |       #   dataset.github_path('issues') # => 'https://github.com/theodi/dataset-metadata-survey/issues'
29 |       def github_path(path = "")
30 |         "https://github.com/#{github_user_name}/#{github_repository_name}/#{path}"
31 |       end
32 | 
33 |       private
34 | 
35 |       def github_user_name
36 |         @github_user_name ||= uri.split("/")[-2]
37 |       end
38 | 
39 |       def github_repository_name
40 |         @github_repository_name ||= uri.split("/")[-1].split(".")[0]
41 |       end
42 |     end
43 |   end
44 | end
45 | 


--------------------------------------------------------------------------------
/lib/data_kitten/license.rb:
--------------------------------------------------------------------------------
 1 | module DataKitten
 2 |   # A license for a {Dataset} or {Distribution}
 3 |   #
 4 |   class License
 5 |     LICENSES = {
 6 |       /opendatacommons.org.*\/by(\/|$)/ => "odc-by",
 7 |       /opendatacommons.org.*\/odbl(\/|$)/ => "odc-odbl",
 8 |       /opendatacommons.org.*\/pddl(\/|$)/ => "odc-pddl",
 9 |       /opendefinition.org.*\/odc-by(\/|$)/ => "odc-by",
10 |       /opendefinition.org.*\/odc-pddl(\/|$)/ => "odc-pddl",
11 |       /opendefinition.org.*\/cc-zero(\/|$)/ => "cc-zero",
12 |       /opendefinition.org.*\/cc-by(\/|$)/ => "cc-by",
13 |       /opendefinition.org.*\/cc-by-sa(\/|$)/ => "cc-by-sa",
14 |       /opendefinition.org.*\/gfdl(\/|$)/ => "gfdl",
15 |       /creativecommons.org.*\/zero(\/|$)/ => "cc-zero",
16 |       /creativecommons.org.*\/by-sa(\/|$)/ => "cc-by-sa",
17 |       /creativecommons.org.*\/by(\/|$)/ => "cc-by",
18 |       /(data|nationalarchives).gov.uk.*\/open-government-licence(\/|$)/ => "ogl-uk",
19 |       /usa.gov\/publicdomain(\/|$)/ => "us-pd"
20 |     }
21 | 
22 |     # @!attribute is
23 |     #   @return [String] a short ID that identifies the license.
24 |     attr_accessor :id
25 | 
26 |     # @!attribute name
27 |     #   @return [String] the human name of the license.
28 |     attr_accessor :name
29 | 
30 |     # @!attribute uri
31 |     #   @return [String] the URI for the license text.
32 |     attr_accessor :uri
33 | 
34 |     # @!attribute type
35 |     #   @return [String] the type of information this license applies to. Could be +:data+ or +:content+.
36 |     attr_accessor :type
37 | 
38 |     # @!attribute abbr
39 |     #   @return [String] the license abbreviation
40 |     attr_accessor :abbr
41 | 
42 |     # Create a new License object.
43 |     #
44 |     # @param options [Hash] A set of options with which to initialise the license.
45 |     # @option options [String] :id the short ID for the license
46 |     # @option options [String] :name the human name for the license
47 |     # @option options [String] :uri the URI of the license text
48 |     # @option options [String] :type the type of information covered by this license.
49 |     def initialize(options)
50 |       @id = options[:id]
51 |       @name = options[:name]
52 |       @uri = options[:uri]
53 |       @type = options[:type]
54 |       @abbr = get_license_abbr(@uri) if @uri
55 |     end
56 | 
57 |     def get_license_abbr(uri)
58 |       license = LICENSES.find { |regex, abbr| uri =~ regex }
59 |       license&.last
60 |     end
61 |   end
62 | end
63 | 


--------------------------------------------------------------------------------
/lib/data_kitten/origins.rb:
--------------------------------------------------------------------------------
 1 | require "data_kitten/origins/git"
 2 | require "data_kitten/origins/web_service"
 3 | require "data_kitten/origins/html"
 4 | require "data_kitten/origins/json"
 5 | require "data_kitten/origins/linked_data"
 6 | 
 7 | module DataKitten
 8 |   module Origins
 9 |     private
10 | 
11 |     def detect_origin
12 |       [
13 |         DataKitten::Origins::Git,
14 |         DataKitten::Origins::HTML,
15 |         DataKitten::Origins::JSON,
16 |         DataKitten::Origins::WebService,
17 |         DataKitten::Origins::LinkedData
18 |       ].each do |origin|
19 |         if origin.supported?(@access_url)
20 |           extend origin
21 |           break
22 |         end
23 |       end
24 |     end
25 |   end
26 | end
27 | 


--------------------------------------------------------------------------------
/lib/data_kitten/origins/git.rb:
--------------------------------------------------------------------------------
 1 | module DataKitten
 2 |   module Origins
 3 |     # Git origin module. Automatically mixed into {Dataset} for datasets that are loaded from Git repositories.
 4 |     #
 5 |     # @see Dataset
 6 |     #
 7 |     module Git
 8 |       def self.supported?(resource)
 9 |         resource.to_s =~ /\A(git|https?):\/\/.*\.git\Z/
10 |       end
11 | 
12 |       # The origin type of the dataset.
13 |       # @return [Symbol] +:git+
14 |       # @see Dataset#origin
15 |       def origin
16 |         :git
17 |       end
18 | 
19 |       # A history of changes to the Dataset, taken from the full git changelog
20 |       # @see Dataset#change_history
21 |       def change_history
22 |         @change_history ||= begin
23 |           repository.log.map { |commit| commit }
24 |         end
25 |       end
26 | 
27 |       protected
28 | 
29 |       def load_file(path)
30 |         # Make sure we have a working copy
31 |         repository
32 |         # read file
33 |         File.read(File.join(working_copy_path, path))
34 |       end
35 | 
36 |       private
37 | 
38 |       def working_copy_path
39 |         # Create holding directory
40 |         FileUtils.mkdir_p(File.join(File.dirname(__FILE__), "..", "..", "..", "tmp", "repositories"))
41 |         # generate working copy dir
42 |         File.join(File.dirname(__FILE__), "..", "..", "..", "tmp", "repositories", @access_url.tr("/", "-"))
43 |       end
44 | 
45 |       def repository
46 |         @repository ||= begin
47 |           repo = ::Git.open(working_copy_path)
48 |           repo.pull("origin", "master")
49 |           repo
50 |                         rescue ArgumentError
51 |                           ::Git.clone(@access_url, working_copy_path)
52 |         end
53 |       end
54 |     end
55 |   end
56 | end
57 | 


--------------------------------------------------------------------------------
/lib/data_kitten/origins/html.rb:
--------------------------------------------------------------------------------
 1 | module DataKitten
 2 |   module Origins
 3 |     # HTML origin module. Automatically mixed into {Dataset} for datasets that are accessed through an API.
 4 |     #
 5 |     # @see Dataset
 6 |     #
 7 |     module HTML
 8 |       def self.supported?(resource)
 9 |         resource.html?
10 |       end
11 | 
12 |       # The origin type of the dataset.
13 |       # @return [Symbol] +:html+
14 |       # @see Dataset#origin
15 |       def origin
16 |         :html
17 |       end
18 |     end
19 |   end
20 | end
21 | 


--------------------------------------------------------------------------------
/lib/data_kitten/origins/json.rb:
--------------------------------------------------------------------------------
 1 | module DataKitten
 2 |   module Origins
 3 |     # JSON origin module. Automatically mixed into {Dataset} for datasets that are accessed through an API.
 4 |     #
 5 |     # @see Dataset
 6 |     #
 7 |     module JSON
 8 |       def self.supported?(resource)
 9 |         resource.json?
10 |       end
11 | 
12 |       # The origin type of the dataset.
13 |       # @return [Symbol] +:html+
14 |       # @see Dataset#origin
15 |       def origin
16 |         :json
17 |       end
18 |     end
19 |   end
20 | end
21 | 


--------------------------------------------------------------------------------
/lib/data_kitten/origins/linked_data.rb:
--------------------------------------------------------------------------------
 1 | module DataKitten
 2 |   module Origins
 3 |     # Linked Data origin module. Automatically mixed into {Dataset} for datasets that are accessed through an API.
 4 |     #
 5 |     # @see Dataset
 6 |     #
 7 |     module LinkedData
 8 |       def self.supported?(resource)
 9 |         if (type = resource.content_type_format)
10 |           RDF::Format.content_types.key?(type)
11 |         end
12 |       end
13 | 
14 |       # The origin type of the dataset.
15 |       # @return [Symbol] +:linkeddata+
16 |       # @see Dataset#origin
17 |       def origin
18 |         :linkeddata
19 |       end
20 |     end
21 |   end
22 | end
23 | 


--------------------------------------------------------------------------------
/lib/data_kitten/origins/web_service.rb:
--------------------------------------------------------------------------------
 1 | module DataKitten
 2 |   module Origins
 3 |     # Web service origin module. Automatically mixed into {Dataset} for datasets that are accessed through an API.
 4 |     #
 5 |     # @see Dataset
 6 |     #
 7 |     module WebService
 8 |       def self.supported?(uri)
 9 |         false
10 |       end
11 | 
12 |       # The origin type of the dataset.
13 |       # @return [Symbol] +:web_service+
14 |       # @see Dataset#origin
15 |       def origin
16 |         :web_service
17 |       end
18 |     end
19 |   end
20 | end
21 | 


--------------------------------------------------------------------------------
/lib/data_kitten/publishing_formats.rb:
--------------------------------------------------------------------------------
 1 | require "data_kitten/publishing_formats/datapackage"
 2 | require "data_kitten/publishing_formats/rdfa"
 3 | require "data_kitten/publishing_formats/linked_data"
 4 | require "data_kitten/publishing_formats/ckan"
 5 | 
 6 | module DataKitten
 7 |   module PublishingFormats
 8 |     private
 9 | 
10 |     def detect_publishing_format
11 |       [
12 |         DataKitten::PublishingFormats::Datapackage,
13 |         DataKitten::PublishingFormats::CKAN,
14 |         DataKitten::PublishingFormats::RDFa,
15 |         DataKitten::PublishingFormats::LinkedData
16 |       ].each do |format|
17 |         if format.supported?(self)
18 |           extend format
19 |           break
20 |         end
21 |       end
22 |     end
23 |   end
24 | end
25 | 


--------------------------------------------------------------------------------
/lib/data_kitten/publishing_formats/ckan.rb:
--------------------------------------------------------------------------------
  1 | require "data_kitten/utils/guessable_lookup.rb"
  2 | require "data_kitten/utils/ckan3_hash.rb"
  3 | 
  4 | module DataKitten
  5 |   module PublishingFormats
  6 |     module CKAN
  7 |       def self.supported?(instance)
  8 |         uri = instance.uri
  9 |         base_uri = instance.base_uri
 10 |         *base, package = uri.path.split("/")
 11 |         if %r{api/\d+/action/package_show/?$}.match?(uri.path)
 12 |           result = JSON.parse(RestClient.get(uri.to_s))["result"]
 13 | 
 14 |           instance.identifier = result["id"]
 15 |           result["extras"] = CKAN3Hash.new(result["extras"], "key", "value")
 16 |           result["tags"] = CKAN3Hash.new(result["tags"], "name", "display_name").values
 17 |           instance.metadata = result
 18 |         elsif %r{api/\d+/rest/dataset/}.match?(uri.path)
 19 |           result = JSON.parse(RestClient.get(uri.to_s))
 20 |           instance.identifier = result["id"]
 21 |           instance.metadata = result
 22 |         else
 23 |           # If the 2nd to last element in the path is 'dataset' then it's probably
 24 |           # the CKAN dataset view page, the last element will be the dataset id
 25 |           # or name
 26 |           if base.last == "dataset"
 27 |             instance.identifier = package
 28 |             # build a base URI ending with a /
 29 |             base_uri = get_base(uri)
 30 |           # If the package is a UUID - it's more than likely to be a CKAN ID
 31 |           elsif /[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/.match?(package)
 32 |             instance.identifier = package
 33 |           else
 34 |             results = begin
 35 |               RestClient.get base_uri.merge("api/3/action/package_show").to_s, {params: {id: package}}
 36 |                       rescue RestClient::Exception
 37 |                         RestClient.get base_uri.merge("api/2/rest/dataset/#{package}").to_s
 38 |             end
 39 | 
 40 |             result = JSON.parse results
 41 |             instance.identifier = result.fetch("result", result)["id"]
 42 |           end
 43 |           instance.metadata = JSON.parse RestClient.get base_uri.merge("api/rest/package/#{instance.identifier}").to_s
 44 |         end
 45 |         instance.metadata.extend(GuessableLookup)
 46 |         instance.source = instance.metadata
 47 |         true
 48 |       rescue
 49 |         false
 50 |       end
 51 | 
 52 |       def self.get_base(uri)
 53 |         *base, _package = uri.path.split("/")
 54 |         if base.last == "dataset"
 55 |           uri.merge(base[0...-1].join("/") + "/")
 56 |         else
 57 |           uri.merge("/")
 58 |         end
 59 |       end
 60 | 
 61 |       # The publishing format for the dataset.
 62 |       # @return [Symbol] +:ckan+
 63 |       # @see Dataset#publishing_format
 64 |       def publishing_format
 65 |         :ckan
 66 |       end
 67 | 
 68 |       # The human-readable title of the dataset.
 69 |       #
 70 |       # @see Dataset#data_title
 71 |       def data_title
 72 |         metadata.lookup("title")
 73 |       end
 74 | 
 75 |       # A brief description of the dataset
 76 |       #
 77 |       # @see Dataset#description
 78 |       def description
 79 |         metadata.lookup("notes") || metadata.lookup("description")
 80 |       rescue
 81 |         nil
 82 |       end
 83 | 
 84 |       # An identifier for the dataset
 85 |       #
 86 |       # @see Dataset#identifier
 87 |       def identifier
 88 |         metadata.lookup("name") || @identifier
 89 |       end
 90 | 
 91 |       # A web page which can be used to gain access to the dataset
 92 |       #
 93 |       # @see Dataset#landing_page
 94 |       def landing_page
 95 |         metadata.lookup("extras", "landing_page") ||
 96 |           metadata.lookup("url") ||
 97 |           metadata.lookup("ckan_url")
 98 |       end
 99 | 
100 |       # Keywords for the dataset
101 |       #
102 |       # @see Dataset#keywords
103 |       def keywords
104 |         keywords = []
105 |         metadata.lookup("tags").each do |tag|
106 |           keywords << tag
107 |         end
108 |         keywords
109 |       rescue
110 |         []
111 |       end
112 | 
113 |       # A list of publishers.
114 |       #
115 |       # @see Dataset#publishers
116 |       def publishers
117 |         org = fetch_organization
118 |         result = if org
119 |           [org]
120 |         elsif (group_id = metadata.lookup("groups", 0, "id"))
121 |           [fetch_publisher(group_id)]
122 |         else
123 |           []
124 |         end
125 |         result.compact
126 |       end
127 | 
128 |       def maintainers
129 |         extract_agent("maintainer", "maintainer_email")
130 |       end
131 | 
132 |       def contributors
133 |         extract_agent("author", "author_email")
134 |       end
135 | 
136 |       # A list of licenses.
137 |       #
138 |       # @see Dataset#licenses
139 |       def licenses
140 |         id = metadata.lookup("license_id")
141 |         uri = metadata.lookup("license_url") || metadata.lookup("extras", "licence_url")
142 |         name = metadata.lookup("license_title") || metadata.lookup("extras", "licence_url_title")
143 |         if [id, uri, name].any?
144 |           [License.new(id: id, uri: uri, name: name)]
145 |         else
146 |           []
147 |         end
148 |       end
149 | 
150 |       # A list of distributions, referred to as +resources+ by Datapackage.
151 |       #
152 |       # @see Dataset#distributions
153 |       def distributions
154 |         distributions = []
155 |         metadata.lookup("resources").each do |resource|
156 |           distribution = {
157 |             title: resource["description"],
158 |             accessURL: landing_page,
159 |             downloadURL: resource["url"],
160 |             format: resource["format"],
161 |             mediaType: resource["mimetype"] || resource["content_type"]
162 |           }
163 |           distribution[:issued] = begin
164 |                                     Date.parse(resource["created"])
165 |                                   rescue
166 |                                     nil
167 |                                   end
168 |           distribution[:modified] = begin
169 |                                       Date.parse(resource["last_modified"] || resource["revision_timestamp"])
170 |                                     rescue
171 |                                       nil
172 |                                     end
173 |           distribution[:byteSize] = begin
174 |                                       Integer(resource["size"])
175 |                                     rescue
176 |                                       nil
177 |                                     end
178 |           distributions << Distribution.new(self, ckan_resource: distribution)
179 |         end
180 |         distributions
181 |       rescue
182 |         nil
183 |       end
184 | 
185 |       # How frequently the data is updated.
186 |       #
187 |       # @see Dataset#update_frequency
188 |       def update_frequency
189 |         metadata.lookup("extras", "update_frequency") ||
190 |           metadata.lookup("extras", "frequency-of-update") ||
191 |           metadata.lookup("extras", "accrual_periodicity")
192 |       rescue
193 |         nil
194 |       end
195 | 
196 |       # Date the dataset was released
197 |       #
198 |       # @see Dataset#issued
199 |       def issued
200 |         Date.parse metadata.lookup("metadata_created")
201 |       rescue
202 |         nil
203 |       end
204 | 
205 |       # Date the dataset was modified
206 |       #
207 |       # @see Dataset#modified
208 |       def modified
209 |         Date.parse metadata.lookup("metadata_modified")
210 |       rescue
211 |         nil
212 |       end
213 | 
214 |       # The temporal coverage of the dataset
215 |       #
216 |       # @see Dataset#temporal
217 |       def temporal
218 |         from = metadata.lookup("extras", "temporal_coverage-from") ||
219 |           metadata.lookup("extras", "temporal-extent-begin")
220 |         to = metadata.lookup("extras", "temporal_coverage-to") ||
221 |           metadata.lookup("extras", "temporal-extent-end")
222 |         start_date = begin
223 |                        Date.parse from
224 |                      rescue
225 |                        nil
226 |                      end
227 |         end_date = begin
228 |                      Date.parse to
229 |                    rescue
230 |                      nil
231 |                    end
232 |         Temporal.new(start: start_date, end: end_date)
233 |       end
234 | 
235 |       # The language of the dataset
236 |       #
237 |       # @see Dataset#language
238 |       def language
239 |         metadata.lookup("language") ||
240 |           metadata.lookup("metadata_language") ||
241 |           metadata.lookup("extras", "metadata_language") ||
242 |           metadata.lookup("extras", "language", 0) ||
243 |           metadata.lookup("extras", "language")
244 |       end
245 | 
246 |       # The main category of the dataset
247 |       #
248 |       # @see Dataset#theme
249 |       def theme
250 |         metadata.lookup("extras", "theme", 0) ||
251 |           metadata.lookup("extras", "theme-primary") ||
252 |           metadata.lookup("groups", 0, "name") ||
253 |           metadata.lookup("groups", 0)
254 |       end
255 | 
256 |       # Spatial coverage of the dataset
257 |       #
258 |       # @see Dataset#spatial
259 |       def spatial
260 |         extract_spatial || extract_bbox
261 |       end
262 | 
263 |       def base_uri
264 |         DataKitten::PublishingFormats::CKAN.get_base(uri)
265 |       end
266 | 
267 |       private
268 | 
269 |       def without_empty_values(h)
270 |         h.reject { |k, v| v.nil? || v.empty? }
271 |       end
272 | 
273 |       def select_extras(group, key)
274 |         extra = begin
275 |                   group["extras"][key]
276 |                 rescue
277 |                   ""
278 |                 end
279 |         if extra == ""
280 |           extra = begin
281 |                     group["result"]["extras"].find { |e| e["key"] == key }["value"]
282 |                   rescue
283 |                     ""
284 |                   end
285 |         end
286 |         extra
287 |       end
288 | 
289 |       def extract_spatial
290 |         geometry = JSON.parse metadata.lookup("extras", "spatial")
291 |         return geometry unless geometry["type"].nil?
292 |       rescue
293 |         nil
294 |       end
295 | 
296 |       def extract_bbox
297 |         west = Float(metadata.lookup("extras", "bbox-west-long"))
298 |         east = Float(metadata.lookup("extras", "bbox-east-long"))
299 |         north = Float(metadata.lookup("extras", "bbox-north-lat"))
300 |         south = Float(metadata.lookup("extras", "bbox-south-lat"))
301 | 
302 |         {"type" => "Polygon", "coordinates" => [
303 |           [
304 |             [west, north],
305 |             [east, north],
306 |             [east, south],
307 |             [west, south],
308 |             [west, north]
309 |           ]
310 |         ]}
311 |       rescue
312 |         nil
313 |       end
314 | 
315 |       def fetch_organization
316 |         if (org = metadata["organization"])
317 |           begin
318 |             uri = base_uri.merge("api/3/action/organization_show")
319 |             result = RestClient.get(uri.to_s, params: {id: org["id"]})
320 |             org_data = JSON.parse(result)["result"]
321 |             extras = CKAN3Hash.new(without_empty_values(org_data["extras"]), "key", "value")
322 |           rescue
323 |             uri = base_uri.merge("api/rest/group/#{org["id"]}")
324 |             result = RestClient.get(uri.to_s)
325 |             org_data = JSON.parse(result)
326 |             extras = without_empty_values(org_data["extras"])
327 |           end
328 |           Agent.new(
329 |             name: org_data["title"],
330 |             mbox: (org_data["contact-email"] || extras["contact-email"]),
331 |             homepage: extras["website-url"] || base_uri.to_s
332 |           )
333 |         end
334 |       rescue
335 |         nil
336 |       end
337 | 
338 |       def fetch_publisher(id)
339 |         [
340 |           "api/3/action/organization_show?id=#{id}",
341 |           "api/3/action/group_show?id=#{id}",
342 |           "api/rest/group/#{id}"
343 |         ].each do |uri|
344 |           begin
345 |             @group = JSON.parse RestClient.get base_uri.merge(uri).to_s
346 |             break
347 |           rescue
348 |             # FakeWeb raises FakeWeb::NetConnectNotAllowedError, whereas
349 |             # RestClient raises RestClient::ResourceNotFound in the "real world".
350 |             nil
351 |           end
352 |         end
353 | 
354 |         if @group
355 |           Agent.new(name: @group["display_name"] || @group["result"]["title"],
356 |                     homepage: select_extras(@group, "website-url"),
357 |                     mbox: select_extras(@group, "contact-email"))
358 |         end
359 |       end
360 | 
361 |       def parsed_uri
362 |         URI(uri)
363 |       end
364 | 
365 |       def extract_agent(name_field, email_field)
366 |         name = metadata.lookup(name_field)
367 |         email = metadata.lookup(email_field)
368 |         if [name, email].any?
369 |           [Agent.new(name: name, mbox: email)]
370 |         else
371 |           []
372 |         end
373 |       end
374 |     end
375 |   end
376 | end
377 | 


--------------------------------------------------------------------------------
/lib/data_kitten/publishing_formats/datapackage.rb:
--------------------------------------------------------------------------------
  1 | module DataKitten
  2 |   module PublishingFormats
  3 |     # Datapackage metadata format module. Automatically mixed into {Dataset} for datasets that include a +datapackage.json+.
  4 |     #
  5 |     # @see Dataset
  6 |     #
  7 |     module Datapackage
  8 |       def self.supported?(instance)
  9 |         if instance.send(:origin) == :git
 10 |           metadata = instance.send(:load_file, "datapackage.json")
 11 |           datapackage = DataPackage::Package.new(JSON.parse(metadata))
 12 | 
 13 |         else
 14 |           datapackage = DataPackage::Package.new(instance.url)
 15 |         end
 16 |         !datapackage.datapackage_version.nil?
 17 |       rescue => _e
 18 |         false
 19 |       end
 20 | 
 21 |       # The publishing format for the dataset.
 22 |       # @return [Symbol] +:datapackage+
 23 |       # @see Dataset#publishing_format
 24 |       def publishing_format
 25 |         :datapackage
 26 |       end
 27 | 
 28 |       # A list of maintainers.
 29 |       #
 30 |       # @see Dataset#maintainers
 31 |       def maintainers
 32 |         package.maintainers.map do |x|
 33 |           Agent.new(name: x["name"], uri: x["web"], email: x["email"])
 34 |         end
 35 |       end
 36 | 
 37 |       # A list of publishers.
 38 |       #
 39 |       # @see Dataset#publishers
 40 |       def publishers
 41 |         package.publisher.map do |x|
 42 |           Agent.new(name: x["name"], uri: x["web"], email: x["email"])
 43 |         end
 44 |       end
 45 | 
 46 |       # A list of licenses.
 47 |       #
 48 |       # @see Dataset#licenses
 49 |       def licenses
 50 |         package.licenses.map do |x|
 51 |           License.new(id: x["id"], uri: x["url"], name: x["name"])
 52 |         end
 53 |       end
 54 | 
 55 |       def rights
 56 |         if package.property("rights")
 57 |           Rights.new(package.property("rights", []).each_with_object({}) { |(k, v), h| h[k.to_sym] = v })
 58 |         end
 59 |       end
 60 | 
 61 |       # A list of contributors.
 62 |       #
 63 |       # @see Dataset#contributors
 64 |       def contributors
 65 |         package.contributors.map do |x|
 66 |           Agent.new(name: x["name"], uri: x["web"], email: x["email"])
 67 |         end
 68 |       end
 69 | 
 70 |       # A list of distributions, referred to as +resources+ by Datapackage.
 71 |       #
 72 |       # @see Dataset#distributions
 73 |       def distributions
 74 |         package.resources.map { |resource| Distribution.new(self, datapackage_resource: resource) }
 75 |       end
 76 | 
 77 |       # The human-readable title of the dataset.
 78 |       #
 79 |       # @see Dataset#data_title
 80 |       def data_title
 81 |         package.title || package.name
 82 |       end
 83 | 
 84 |       # A brief description of the dataset
 85 |       #
 86 |       # @see Dataset#description
 87 |       def description
 88 |         package.description
 89 |       end
 90 | 
 91 |       # Keywords for the dataset
 92 |       #
 93 |       # @see Dataset#keywords
 94 |       def keywords
 95 |         package.keywords
 96 |       end
 97 | 
 98 |       # Where the data is sourced from
 99 |       #
100 |       # @see Dataset#sources
101 |       def sources
102 |         package.sources.map do |x|
103 |           Source.new(label: x["name"], resource: x["web"])
104 |         end
105 |       end
106 | 
107 |       # Date the dataset was modified
108 |       def modified
109 |         package.last_modified
110 |       end
111 | 
112 |       # A history of changes to the Dataset.
113 |       #
114 |       # If {Dataset#source} is +:git+, this is the git changelog for the actual distribution files, rather
115 |       # then the full unfiltered log.
116 |       #
117 |       # @return [Array] An array of changes. Exact format depends on the source.
118 |       #
119 |       # @see Dataset#change_history
120 |       def change_history
121 |         @change_history ||= begin
122 |           if origin == :git
123 |             # Get a log for each file in the local repo
124 |             logs = distributions.map { |file|
125 |               if file.path
126 |                 log = repository.log.path(file.path)
127 |                 # Convert to list of commits
128 |                 log.map { |commit| commit }
129 |               else
130 |                 []
131 |               end
132 |             }
133 |             # combine all logs, make unique, and re-sort in date order
134 |             logs.flatten.uniq.sort_by { |x| x.committer.date }.reverse
135 |           else
136 |             []
137 |           end
138 |         end
139 |       end
140 | 
141 |       private
142 | 
143 |       def package
144 |         unless @datapackage
145 |           if origin == :git
146 |             metadata = load_file("datapackage.json")
147 |             @datapackage = DataPackage::Package.new(JSON.parse(metadata))
148 |           else
149 |             @datapackage = DataPackage::Package.new(url)
150 |           end
151 |         end
152 |         @datapackage
153 |       end
154 |     end
155 |   end
156 | end
157 | 


--------------------------------------------------------------------------------
/lib/data_kitten/publishing_formats/linked_data.rb:
--------------------------------------------------------------------------------
 1 | module DataKitten
 2 |   module PublishingFormats
 3 |     module LinkedData
 4 |       ACCEPT_HEADER = "text/turtle, application/n-triples, application/ld+json; q=1.0,application/rdf+xml; q=0.8, */*; q=0.5"
 5 | 
 6 |       include RDFa
 7 | 
 8 |       # Find first resource with one of the specified RDF types
 9 |       def self.first_of_type(graph, classes)
10 |         term = nil
11 |         classes.each do |clazz|
12 |           term = graph.first_subject(
13 |             RDF::Query::Pattern.new(nil, RDF.type, clazz)
14 |           )
15 |           break if term
16 |         end
17 |         term
18 |       end
19 | 
20 |       # Attempt to create an RDF graph for this object
21 |       #
22 |       # Supports content negotiation for various RDF serializations. Attempts "dataset autodiscovery" if it receives
23 |       # an HTML response. This leaves the RDFa Publishing Format to just parse RDFa responses
24 |       def self.create_graph(uri)
25 |         resp = RestClient.get uri,
26 |           accept: ACCEPT_HEADER
27 |         return false if resp.code != 200
28 | 
29 |         if /text\/html/.match?(resp.headers[:content_type])
30 |           doc = Nokogiri::HTML(resp.body)
31 |           link = doc.search("link[rel=alternate]").detect { |n| n[:type] == "application/rdf+xml" }
32 |           if link
33 |             resp = RestClient.get link["href"],
34 |               accept: ACCEPT_HEADER
35 |             return false if resp.code != 200
36 |           else
37 |             return false
38 |           end
39 |         end
40 | 
41 |         reader = RDF::Reader.for(content_type: resp.headers[:content_type])
42 | 
43 |         unless reader
44 |           extension = File.extname(uri).delete(".")
45 |           reader = RDF::Reader.for(file_extension: extension) if extension != ""
46 |         end
47 |         return false unless reader
48 | 
49 |         graph = RDF::Graph.new
50 |         graph << reader.new(StringIO.new(resp.body))
51 | 
52 |         graph
53 |       rescue
54 |         nil
55 |       end
56 | 
57 |       # Can we create an RDF graph for this object containing the description of a dataset?
58 |       def self.supported?(instance)
59 |         graph = create_graph(instance.url)
60 |         return false unless graph
61 |         return true if first_of_type(graph,
62 |           [RDF::Vocabulary.new("http://www.w3.org/ns/dcat#").Dataset,
63 |             RDF::Vocabulary.new("http://rdfs.org/ns/void#").Dataset])
64 |         false
65 |       end
66 | 
67 |       # The publishing format for the dataset.
68 |       # @return [Symbol] +:rdfa+
69 |       # @see Dataset#publishing_format
70 |       def publishing_format
71 |         :rdf
72 |       end
73 | 
74 |       private
75 | 
76 |       def dataset_uri
77 |         url
78 |       end
79 | 
80 |       def graph
81 |         @graph ||= LinkedData.create_graph(dataset_uri)
82 |       end
83 |     end
84 |   end
85 | end
86 | 


--------------------------------------------------------------------------------
/lib/data_kitten/publishing_formats/rdfa.rb:
--------------------------------------------------------------------------------
  1 | module DataKitten
  2 |   module PublishingFormats
  3 |     module RDFa
  4 |       def self.supported?(instance)
  5 |         graph = RDF::Graph.load(instance.uri, format: :rdfa)
  6 | 
  7 |         query = RDF::Query.new({
  8 |           dataset: {
  9 |             RDF.type => RDF::Vocabulary.new("http://www.w3.org/ns/dcat#").Dataset
 10 |           }
 11 |         })
 12 | 
 13 |         query.execute(graph)[0][:dataset].to_s
 14 |       rescue
 15 |         false
 16 |       end
 17 | 
 18 |       # The publishing format for the dataset.
 19 |       # @return [Symbol] +:rdfa+
 20 |       # @see Dataset#publishing_format
 21 |       def publishing_format
 22 |         :rdfa
 23 |       end
 24 | 
 25 |       # A list of maintainers.
 26 |       #
 27 |       # @see Dataset#maintainers
 28 |       def maintainers
 29 |         []
 30 |       end
 31 | 
 32 |       # A list of publishers.
 33 |       #
 34 |       # @see Dataset#publishers
 35 |       def publishers
 36 |         publishers = []
 37 |         uris = metadata[dataset_uri][RDF::Vocab::DC.publisher.to_s]
 38 |         uris.each do |publisher_uri|
 39 |           publishers << Agent.new(name: first_value(publisher_uri, RDF::Vocab::FOAF.name),
 40 |                                   homepage: first_value(publisher_uri, RDF::Vocab::FOAF.homepage),
 41 |                                   mbox: first_value(publisher_uri, RDF::Vocab::FOAF.mbox))
 42 |         end
 43 |         publishers
 44 |       rescue
 45 |         []
 46 |       end
 47 | 
 48 |       # The rights statment for the data
 49 |       #
 50 |       # @see Dataset#rights
 51 |       def rights
 52 |         rights_uri = metadata[dataset_uri][RDF::Vocab::DC.rights.to_s][0]
 53 |         if !metadata[rights_uri]
 54 |           Rights.new(uri: rights_uri)
 55 |         else
 56 |           Rights.new(uri: uri,
 57 |                      dataLicense: first_value(rights_uri, odrs.dataLicense),
 58 |                      contentLicense: first_value(rights_uri, odrs.contentLicense),
 59 |                      copyrightNotice: first_value(rights_uri, odrs.copyrightNotice),
 60 |                      attributionURL: first_value(rights_uri, odrs.attributionURL),
 61 |                      attributionText: first_value(rights_uri, odrs.attributionText),
 62 |                      copyrightHolder: first_value(rights_uri, odrs.copyrightHolder),
 63 |                      databaseRightHolder: first_value(rights_uri, odrs.databaseRightHolder),
 64 |                      copyrightYear: first_value(rights_uri, odrs.copyrightYear),
 65 |                      databaseRightYear: first_value(rights_uri, odrs.databaseRightYear),
 66 |                      copyrightStatement: first_value(rights_uri, odrs.copyrightStatement),
 67 |                      databaseRightStatement: first_value(rights_uri, odrs.databaseRightStatement))
 68 |         end
 69 |       rescue => _e
 70 |         # puts e
 71 |         # puts e.backtrace
 72 |         nil
 73 |       end
 74 | 
 75 |       # A list of licenses.
 76 |       #
 77 |       # @see Dataset#licenses
 78 |       def licenses
 79 |         licenses = []
 80 |         uris = metadata[dataset_uri][RDF::Vocab::DC.license.to_s]
 81 |         if uris.nil?
 82 |           []
 83 |         else
 84 |           uris.each do |license_uri|
 85 |             licenses << License.new(uri: license_uri, name: first_value(license_uri, RDF::Vocab::DC.title))
 86 |           end
 87 |           licenses
 88 |         end
 89 |       rescue => _e
 90 |         []
 91 |       end
 92 | 
 93 |       # A list of contributors.
 94 |       #
 95 |       # @see Dataset#contributors
 96 |       def contributors
 97 |         []
 98 |       end
 99 | 
100 |       # A list of distributions, referred to as +resources+ by Datapackage.
101 |       #
102 |       # @see Dataset#distributions
103 |       def distributions
104 |         distributions = []
105 |         uris = metadata[dataset_uri][dcat.distribution.to_s]
106 |         uris.each do |distribution_uri|
107 |           distribution = {
108 |             title: first_value(distribution_uri, RDF::Vocab::DC.title),
109 |             accessURL: first_value(distribution_uri, dcat.accessURL)
110 |           }
111 |           distributions << Distribution.new(self, dcat_resource: distribution)
112 |         end
113 |         distributions
114 |       rescue
115 |         []
116 |       end
117 | 
118 |       # The human-readable title of the dataset.
119 |       #
120 |       # @see Dataset#data_title
121 |       def data_title
122 |         metadata[dataset_uri][dct.title.to_s][0]
123 |       rescue
124 |         nil
125 |       end
126 | 
127 |       # A brief description of the dataset
128 |       #
129 |       # @see Dataset#description
130 |       def description
131 |         metadata[dataset_uri][dct.description.to_s][0]
132 |       rescue
133 |         nil
134 |       end
135 | 
136 |       # Keywords for the dataset
137 |       #
138 |       # @see Dataset#keywords
139 |       def keywords
140 |         keywords = []
141 |         metadata[dataset_uri][dcat.keyword.to_s].each do |k|
142 |           keywords << k
143 |         end
144 |       rescue
145 |         []
146 |       end
147 | 
148 |       # Where the data is sourced from
149 |       #
150 |       # @see Dataset#sources
151 |       def sources
152 |         []
153 |       end
154 | 
155 |       # How frequently the data is updated.
156 |       #
157 |       # @see Dataset#update_frequency
158 |       def update_frequency
159 |         first_value(dataset_uri, dcat.accrualPeriodicity)
160 |       end
161 | 
162 |       def issued
163 |         date = first_value(dataset_uri, RDF::Vocab::DC.issued) ||
164 |           first_value(dataset_uri, RDF::Vocab::DC.created)
165 |         if date
166 |           return Date.parse(date)
167 |         end
168 |         nil
169 |       end
170 | 
171 |       def modified
172 |         date = first_value(dataset_uri, RDF::Vocab::DC.modified)
173 |         if date
174 |           return Date.parse(date)
175 |         end
176 |         nil
177 |       end
178 | 
179 |       private
180 | 
181 |       def graph
182 |         @graph ||= RDF::Graph.load(uri, format: :rdfa)
183 |       end
184 | 
185 |       def first_value(resource, property, default = nil)
186 |         if metadata[resource] && metadata[resource][property.to_s]
187 |           return metadata[resource][property.to_s][0]
188 |         end
189 |         default
190 |       end
191 | 
192 |       def metadata
193 |         @metadata ||= {}
194 | 
195 |         # This is UGLY, and exists solely to make getting data out of the graph easier. We will probably change this later
196 |         graph.triples.each do |triple|
197 |           @metadata[triple[0].to_s] ||= {}
198 |           @metadata[triple[0].to_s][triple[1].to_s] ||= []
199 |           @metadata[triple[0].to_s][triple[1].to_s] << triple[2].to_s unless @metadata[triple[0].to_s][triple[1].to_s].include? triple[2].to_s
200 |         end
201 | 
202 |         @metadata
203 |       end
204 | 
205 |       def dataset_uri
206 |         query = RDF::Query.new({
207 |           dataset: {
208 |             RDF.type => dcat.Dataset
209 |           }
210 |         })
211 | 
212 |         query.execute(graph)[0][:dataset].to_s
213 |       end
214 | 
215 |       def dcat
216 |         RDF::Vocabulary.new("http://www.w3.org/ns/dcat#")
217 |       end
218 | 
219 |       def dct
220 |         RDF::Vocabulary.new("http://purl.org/dc/terms/")
221 |       end
222 | 
223 |       def odrs
224 |         RDF::Vocabulary.new("http://schema.theodi.org/odrs#")
225 |       end
226 | 
227 |       def void
228 |         RDF::Vocabulary.new("http://rdfs.org/ns/void#")
229 |       end
230 |     end
231 |   end
232 | end
233 | 


--------------------------------------------------------------------------------
/lib/data_kitten/rights.rb:
--------------------------------------------------------------------------------
 1 | module DataKitten
 2 |   # A rights statement for a {Dataset} or {Distribution}
 3 |   #
 4 |   class Rights
 5 |     # @!attribute uri
 6 |     #   @return [String] the URI for the rights statement
 7 |     attr_accessor :uri
 8 | 
 9 |     # @!attribute data_license
10 |     #   @return [String] the license for the data in the dataset.
11 |     attr_accessor :data_license
12 |     alias dataLicense data_license
13 | 
14 |     # @!attribute content_license
15 |     #   @return [String] the license for the content in the dataset.
16 |     attr_accessor :content_license
17 |     alias contentLicense content_license
18 | 
19 |     # @!attribute copyrightNotice
20 |     #   @return [String] the copyright notice for the dataset.
21 |     attr_accessor :copyright_notice
22 |     alias copyrightNotice copyright_notice
23 | 
24 |     # @!attribute attribution_url
25 |     #   @return [String] the attribution URL for the dataset.
26 |     attr_accessor :attribution_url
27 |     alias attributionURL attribution_url
28 | 
29 |     # @!attribute attribution_text
30 |     #   @return [String] the attribution text for the dataset.
31 |     attr_accessor :attribution_text
32 |     alias attributionText attribution_text
33 | 
34 |     # @!attribute copyright_holder
35 |     #   @return [String] the URI of the organization that holds copyright for this dataset
36 |     attr_accessor :copyright_holder
37 |     alias copyrightHolder copyright_holder
38 | 
39 |     # @!attribute database_right_holder
40 |     #   @return [String] the URI of the organization that owns the database rights for this dataset
41 |     attr_accessor :database_right_holder
42 |     alias databaseRightHolder database_right_holder
43 | 
44 |     # @!attribute copyright_year
45 |     #   @return [String] the year in which copyright is claimed
46 |     attr_accessor :copyright_year
47 |     alias copyrightYear copyright_year
48 | 
49 |     # @!attribute database_right_year
50 |     #   @return [String] the year in which copyright is claimed
51 |     attr_accessor :database_right_year
52 |     alias databaseRightYear database_right_year
53 | 
54 |     # @!attribute copyright_statement
55 |     #   @return [String] the URL of a copyright statement for the dataset
56 |     attr_accessor :copyright_statement
57 |     alias copyrightStatement copyright_statement
58 | 
59 |     # @!attribute database_right_statement
60 |     #   @return [String] the URL of a database right statement for the dataset
61 |     attr_accessor :database_right_statement
62 |     alias databaseRightStatement database_right_statement
63 | 
64 |     # Create a new Rights object.
65 |     #
66 |     # @param options [Hash] A set of options with which to initialise the license.
67 |     # @option options [String] :dataLicense the license for the data in the dataset
68 |     # @option options [String] :contentLicense the license for the content in the dataset
69 |     # @option options [String] :copyrightNotice the copyright notice for the dataset
70 |     # @option options [String] :attributionURL the attribution URL for the dataset
71 |     # @option options [String] :attributionText attribution name for the dataset
72 |     def initialize(options)
73 |       @uri = options[:uri]
74 |       @data_license = options[:dataLicense]
75 |       @content_license = options[:contentLicense]
76 |       @copyright_notice = options[:copyrightNotice]
77 |       @attribution_url = options[:attributionURL]
78 |       @attribution_text = options[:attributionText]
79 |       @copyright_holder = options[:copyrightHolder]
80 |       @database_right_holder = options[:databaseRightHolder]
81 |       @copyright_year = options[:copyrightYear]
82 |       @database_right_year = options[:databaseRightYear]
83 |       @copyright_statement = options[:copyrightStatement]
84 |       @database_right_statement = options[:databaseRightStatement]
85 |     end
86 |   end
87 | end
88 | 


--------------------------------------------------------------------------------
/lib/data_kitten/source.rb:
--------------------------------------------------------------------------------
 1 | module DataKitten
 2 |   # Where the data has been sourced from
 3 |   # Follows the pattern of {http://purl.org/dc/terms/source} with a {http://www.w3.org/2000/01/rdf-schema#label} and a {http://www.w3.org/1999/02/22-rdf-syntax-ns#resource}, and with useful aliases for other vocabularies
 4 | 
 5 |   class Source
 6 |     # Create a new Source
 7 |     #
 8 |     # @param [Hash] options the details of the Source.
 9 |     # @option options [String] :label The name of the Source
10 |     # @option options [String] :resource The URI of the Source
11 |     #
12 |     def initialize(options)
13 |       @label = options[:label]
14 |       @resource = options[:resource]
15 |     end
16 | 
17 |     # @!attribute label
18 |     #   @return [String] the name of the Source
19 |     attr_accessor :label
20 |     alias name label
21 | 
22 |     # @!attribute label
23 |     #   @return [String] the URI of the Source
24 |     attr_accessor :resource
25 |     alias web resource
26 |   end
27 | end
28 | 


--------------------------------------------------------------------------------
/lib/data_kitten/temporal.rb:
--------------------------------------------------------------------------------
 1 | module DataKitten
 2 |   # The temporal coverage of a {Dataset} or {Distribution}
 3 |   #
 4 |   class Temporal
 5 |     # @!attribute start
 6 |     #   @return [Date] the start date of the temporal coverage
 7 |     attr_accessor :start
 8 | 
 9 |     # @!attribute end
10 |     #   @return [Date] the end date of the temporal coverage
11 |     attr_accessor :end
12 | 
13 |     # Create a new Temporal object.
14 |     #
15 |     # @param options [Hash] A set of options with which to initialise the temporal coverage.
16 |     # @option options [Date] :start the start date of the temporal coverage
17 |     # @option options [Date] :end the end date of the temporal coverage
18 |     def initialize(options)
19 |       @start = options[:start]
20 |       @end = options[:end]
21 |     end
22 |   end
23 | end
24 | 


--------------------------------------------------------------------------------
/lib/data_kitten/utils/ckan3_hash.rb:
--------------------------------------------------------------------------------
1 | class CKAN3Hash < Hash
2 |   def initialize(list, key_name, value_name)
3 |     super()
4 |     (list || []).each do |item|
5 |       self[item[key_name]] = item[value_name]
6 |     end
7 |   end
8 | end
9 | 


--------------------------------------------------------------------------------
/lib/data_kitten/utils/guessable_lookup.rb:
--------------------------------------------------------------------------------
 1 | module GuessableLookup
 2 |   def lookup(*path)
 3 |     data = self
 4 |     path.each { |key| data = guess_key(data, key) }
 5 |     data
 6 |   rescue
 7 |     nil
 8 |   end
 9 | 
10 |   private
11 | 
12 |   # Guesses which key you want from a hash and returns the value of it.
13 |   #
14 |   # It returns the value of the original key if it exists in the hash, otherwise
15 |   # tries to find a similar key, and if it fails it returns nil.
16 |   # Similar keys are ones which use '_', '-' or '' as word separators & are
17 |   # case-insensitive.
18 |   #
19 |   # @example
20 |   #   guess_key({:a_key => true}, 'a_key')  # => true
21 |   #   guess_key({:aKey => true}, 'a_key')   # => true
22 |   #   guess_key({"a-KEY" => true}, 'a_key') # => true
23 |   #
24 |   # @param data [Hash]
25 |   # @param key [String] The desired key
26 |   # @return The value of the guessed key
27 |   #
28 |   def guess_key(data, key)
29 |     return data[key] if key.is_a?(Integer) || data.key?(key)
30 |     like_key = key.gsub(/[_\-]/, "[\_\-]?")
31 |     key = data.keys.find { |k| k =~ /^#{like_key}$/i }
32 |     data[key]
33 |   rescue
34 |     nil
35 |   end
36 | end
37 | 


--------------------------------------------------------------------------------
/lib/data_kitten/version.rb:
--------------------------------------------------------------------------------
1 | module DataKitten
2 |   VERSION = "1.3.4"
3 | end
4 | 


--------------------------------------------------------------------------------
/spec/ckan3_hash_spec.rb:
--------------------------------------------------------------------------------
 1 | require "spec_helper"
 2 | 
 3 | describe CKAN3Hash do
 4 |   subject(:hash) { CKAN3Hash.new(data, "name", "display_name") }
 5 | 
 6 |   it "has keys based on provided data" do
 7 |     keys = %w[transportation planned_roadworks highways_agency roadworks]
 8 |     expect(hash.keys).to contain_exactly(*keys)
 9 |   end
10 | 
11 |   it "returns the value_key value" do
12 |     expect(hash["transportation"]).to eq "Transportation"
13 |     expect(hash["highways_agency"]).to eq "Highways Agency"
14 |     expect(hash["roadworks"]).to eq "Roadworks"
15 |     expect(hash["planned_roadworks"]).to eq "Planned Roadworks"
16 |   end
17 | 
18 |   it "returns nil for unknown key" do
19 |     expect(hash["mystery"]).to be_nil
20 |   end
21 | 
22 |   it "can be constructed with nil data" do
23 |     expect { CKAN3Hash.new(nil, "key", "value") }.to_not raise_error
24 |   end
25 | 
26 |   it "maps values" do
27 |     values = ["Transportation", "Highways Agency", "Roadworks", "Planned Roadworks"]
28 |     expect(hash.values).to contain_exactly(*values)
29 |   end
30 | 
31 |   let(:data) do
32 |     [
33 |       {
34 |         "vocabulary_id" => nil,
35 |         "display_name" => "Transportation",
36 |         "name" => "transportation",
37 |         "revision_timestamp" => "2012-06-29T10:29:59.119372",
38 |         "state" => "active",
39 |         "id" => "423aad62-c714-45b6-9f9b-1b8fe4933ae1"
40 |       },
41 |       {
42 |         "vocabulary_id" => nil,
43 |         "display_name" => "Highways Agency",
44 |         "name" => "highways_agency",
45 |         "revision_timestamp" => "2011-10-25T15:58:51.324189",
46 |         "state" => "active",
47 |         "id" => "37c942e9-a9a7-4409-a46b-7b941e1591dc"
48 |       },
49 |       {
50 |         "vocabulary_id" => nil,
51 |         "display_name" => "Planned Roadworks",
52 |         "name" => "planned_roadworks",
53 |         "revision_timestamp" => "2011-10-25T15:58:51.324189",
54 |         "state" => "active",
55 |         "id" => "97b6c453-1ba1-416d-be24-1f484a1e80e0"
56 |       },
57 |       {
58 |         "vocabulary_id" => nil,
59 |         "display_name" => "Roadworks",
60 |         "name" => "roadworks",
61 |         "revision_timestamp" => "2011-10-25T15:58:51.324189",
62 |         "state" => "active",
63 |         "id" => "7fa99a87-10e8-41fe-b02b-f518cb8da1ed"
64 |       }
65 |     ]
66 |   end
67 | end
68 | 


--------------------------------------------------------------------------------
/spec/ckan_fakeweb.rb:
--------------------------------------------------------------------------------
  1 | module CKANFakeweb
  2 |   module_function
  3 | 
  4 |   def register_defence_dataset(base = "http://example.org/")
  5 |     data = {
  6 |       body: load_fixture("ckan/rest-dataset-defence.json"),
  7 |       content_type: "application/json"
  8 |     }
  9 |     register_urls(URI(base), {
 10 |       "dataset/defence" => {
 11 |         body: "",
 12 |         content_type: "text/html"
 13 |       },
 14 |       "api/3/action/package_show?id=defence" => {
 15 |         body: "",
 16 |         content_type: "application/json"
 17 |       },
 18 |       "api/2/rest/dataset/defence" => data,
 19 |       "api/2/search/dataset?q=defence" => data,
 20 |       "api/rest/package/47f7438a-506d-49c9-b565-7573f8df031e" => data,
 21 |       "api/rest/package/defence" => data,
 22 |       "api/rest/group/a3969e37-3ac3-42fe-8317-c8575a9f5317" => {
 23 |         body: load_fixture("ckan/rest-organization-defence.json"),
 24 |         content_type: "application/json"
 25 |       }
 26 |     })
 27 |   end
 28 | 
 29 |   def register_toilets_dataset
 30 |     html = {
 31 |       body: "",
 32 |       content_type: "text/html"
 33 |     }
 34 |     data = {
 35 |       body: load_fixture("ckan/rest-dataset-toilets.json"),
 36 |       content_type: "application/json"
 37 |     }
 38 |     register_urls(URI("http://example.org/"), {
 39 |       "/dataset/toilets" => html,
 40 |       "/dataset/62766308-cb4f-4275-b4a4-937f52a978c5" => html,
 41 |       "/api/3/action/package_show?id=toilets" => {
 42 |         body: load_fixture("ckan/package_show-toilets.json"),
 43 |         content_type: "application/json"
 44 |       },
 45 |       "/api/2/rest/dataset/toilets" => data,
 46 |       "/api/2/search/dataset?q=toilets" => data,
 47 |       "/api/rest/package/553b3049-2b8b-46a2-95e6-640d7986a8c1" => data,
 48 |       "/api/rest/package/62766308-cb4f-4275-b4a4-937f52a978c5" => data,
 49 |       "/api/rest/package/toilets" => data,
 50 |       "/api/rest/group/2df7090e-2ebb-416e-8994-6de43d820d5c" => {
 51 |         body: load_fixture("ckan/rest-organization-health.json"),
 52 |         content_type: "application/json"
 53 |       }
 54 |     })
 55 |   end
 56 | 
 57 |   def register_cadastral_dataset
 58 |     register_urls(URI("http://example.org/"), {
 59 |       "/api/rest/package/65493c4b-46d5-4125-b7d4-fc1df2b33349" => {
 60 |         body: load_fixture("ckan/rest-dataset-cadastral.json"),
 61 |         content_type: "application/json"
 62 |       },
 63 |       "/api/3/action/organization_show?id=cd937140-1310-4e2a-b211-5de8bebd910d" => {
 64 |         body: load_fixture("ckan/organization_show-ni-spatial.json"),
 65 |         content_type: "application/json"
 66 |       }
 67 |     })
 68 |   end
 69 | 
 70 |   def register_pollinator_dataset
 71 |     register_urls(URI("http://example.org/"), {
 72 |       "/api/rest/package/10d394fd-88b9-489f-9552-b7b567f927e2" => {
 73 |         body: load_fixture("ckan/rest-dataset-pollinator.json"),
 74 |         content_type: "application/json"
 75 |       },
 76 |       "/api/3/action/organization_show?id=866f4088-ae4f-43b8-ba8c-6d3141a327f2" => {
 77 |         body: load_fixture("ckan/organization_show-ecology.json"),
 78 |         content_type: "application/json"
 79 |       }
 80 |     })
 81 |   end
 82 | 
 83 |   def register_frozen_animals_dataset(base = "http://example.org/")
 84 |     register_urls(URI(base), {
 85 |       "api/3/action/package_show?id=frozen-animals" => {
 86 |         body: load_fixture("ckan/package-show-frozen-animals.json"),
 87 |         content_type: "application/json"
 88 |       },
 89 |       "api/3/action/organization_show?id=e70862ec-8167-48e6-a27c-a0e9db1ebc87" => {
 90 |         body: load_fixture("ckan/organization-show-peterborough.json"),
 91 |         content_type: "application/json"
 92 |       }
 93 |     })
 94 |   end
 95 | 
 96 |   def register_dataset(base_uri, name, fixture)
 97 |     data = {
 98 |       body: fixture, content_type: "application/json"
 99 |     }
100 |     {
101 |       "dataset/#{name}" => {
102 |         body: "",
103 |         content_type: "text/html"
104 |       },
105 |       "api/3/action/package_show?id=#{name}" => {
106 |         body: "",
107 |         content_type: "application/json"
108 |       },
109 |       "api/2/rest/dataset/#{name}" => data,
110 |       "api/2/search/dataset?q=#{name}" => data,
111 |       "api/rest/package/#{name}" => data,
112 |       "api/rest/package/#{fixture["id"]}" => data
113 |     }.each do |path, options|
114 |       FakeWeb.register_uri(:get, (base_uri + path).to_s, options)
115 |     end
116 | 
117 |     (base_uri + "dataset/#{name}").to_s
118 |   end
119 | 
120 |   def register_urls(base_uri, urls)
121 |     urls.each do |path, options|
122 |       FakeWeb.register_uri(:get, base_uri + path, options)
123 |     end
124 |   end
125 | end
126 | 


--------------------------------------------------------------------------------
/spec/dataset_spec.rb:
--------------------------------------------------------------------------------
 1 | require "spec_helper"
 2 | require "ckan_fakeweb"
 3 | 
 4 | describe DataKitten::Dataset do
 5 |   before :each do
 6 |     FakeWeb.clean_registry
 7 |   end
 8 | 
 9 |   describe "constructing a dataset" do
10 |     before { CKANFakeweb.register_defence_dataset }
11 |     let(:url) { "http://example.org/dataset/defence" }
12 |     let(:base) { "http://example.org/" }
13 | 
14 |     it "accepts access_url symbol option" do
15 |       dataset = DataKitten::Dataset.new(access_url: url)
16 |       expect(dataset.publishing_format).to eql(:ckan)
17 |     end
18 | 
19 |     it "accepts url option" do
20 |       dataset = DataKitten::Dataset.new(url)
21 |       expect(dataset.publishing_format).to eql(:ckan)
22 |     end
23 | 
24 |     it "finds default base_uri" do
25 |       dataset = DataKitten::Dataset.new(url)
26 |       expect(dataset.base_uri).to eql(URI("http://example.org/"))
27 |     end
28 | 
29 |     it "accepts a url and base url" do
30 |       dataset = DataKitten::Dataset.new(url, base)
31 |       expect(dataset.uri).to eql(URI(url))
32 |       expect(dataset.base_uri).to eql(URI(base))
33 |     end
34 | 
35 |     it "accepts access_url and base_url options" do
36 |       dataset = DataKitten::Dataset.new(access_url: url, base_url: base)
37 |       expect(dataset.uri).to eql(URI(url))
38 |       expect(dataset.base_uri).to eql(URI(base))
39 |     end
40 |   end
41 | 
42 |   describe "with a supported format" do
43 |     it "returns the original source" do
44 |       datapackage = load_fixture("datapackage.json")
45 |       FakeWeb.register_uri(:get, "http://example.org/datapackage.json", body: datapackage, content_type: "application/json")
46 |       dataset = DataKitten::Dataset.new("http://example.org/datapackage.json")
47 |       source = JSON.parse(datapackage)
48 |       expect(dataset.source).to eql(source)
49 |     end
50 | 
51 |     it "returns the ckan api source after lookup" do
52 |       CKANFakeweb.register_defence_dataset
53 |       data = JSON.parse(load_fixture("ckan/rest-dataset-defence.json"))
54 |       dataset = DataKitten::Dataset.new("http://example.org/dataset/defence")
55 |       expect(dataset.source).to eql(data)
56 |     end
57 |   end
58 | 
59 |   describe "with an unsupported format" do
60 |     before do
61 |       FakeWeb.register_uri(:get, "http://example.org/something.html", body: "", content_type: "text/html")
62 |       @dataset = DataKitten::Dataset.new("http://example.org/something.html")
63 |     end
64 | 
65 |     it "returns nil" do
66 |       expect(@dataset.source).to be_nil
67 |     end
68 |   end
69 | 
70 |   describe "when resource does not exist" do
71 |     before do
72 |       FakeWeb.register_uri(:get, "http://example.org/something.html", body: "Not found", status: [404, "Not found"])
73 |       @dataset = DataKitten::Dataset.new("http://example.org/something.html")
74 |     end
75 | 
76 |     it "returns nil" do
77 |       expect(@dataset.source).to be_nil
78 |     end
79 |   end
80 | end
81 | 


--------------------------------------------------------------------------------
/spec/distribution_format_spec.rb:
--------------------------------------------------------------------------------
 1 | require "spec_helper"
 2 | 
 3 | describe DataKitten::DistributionFormat do
 4 |   def distribution(extension)
 5 |     DataKitten::Distribution.new(nil,
 6 |       ckan_resource: {
 7 |         format: extension
 8 |       })
 9 |   end
10 | 
11 |   describe "#structured?" do
12 |     %w[csv xls xlsx rdf xml wms ods rdfa kml rss json ical sparql kml georss geojson shp].each do |ext|
13 |       it "considers #{ext} structured" do
14 |         expect(distribution(ext).format).to be_structured
15 |       end
16 |     end
17 | 
18 |     %w[html doc pdf unknown].each do |ext|
19 |       it "considers #{ext} not structured" do
20 |         expect(distribution(ext).format).to_not be_structured
21 |       end
22 |     end
23 |   end
24 | 
25 |   describe "#open?" do
26 |     %w[csv xlsx rdf xml wms ods rdfa kml rss json ical sparql kml georss geojson shp html pdf].each do |ext|
27 |       it "considers #{ext} open" do
28 |         expect(distribution(ext).format).to be_open
29 |       end
30 |     end
31 | 
32 |     %w[doc unknown].each do |ext|
33 |       it "considers #{ext} not open" do
34 |         expect(distribution(ext).format).to_not be_open
35 |       end
36 |     end
37 |   end
38 | end
39 | 


--------------------------------------------------------------------------------
/spec/distribution_spec.rb:
--------------------------------------------------------------------------------
 1 | require "spec_helper"
 2 | require "ckan_fakeweb"
 3 | 
 4 | describe DataKitten::Distribution do
 5 |   before(:each) do
 6 |     FakeWeb.clean_registry
 7 |     CKANFakeweb.register_defence_dataset
 8 |   end
 9 | 
10 |   let(:dataset) do
11 |     DataKitten::Dataset.new("http://example.org/dataset/defence")
12 |   end
13 | 
14 |   subject(:distribution) { dataset.distributions[0] }
15 | 
16 |   it { expect(distribution).to_not be_nil }
17 | 
18 |   describe "exists?" do
19 |     it "exists when available" do
20 |       FakeWeb.register_uri(:head, "https://www.gov.uk/government/publications/disposal-database-house-of-commons-report", body: "hi")
21 | 
22 |       expect(distribution).to be_exists
23 |     end
24 | 
25 |     it "does not exist when missing" do
26 |       FakeWeb.register_uri(:head, "https://www.gov.uk/government/publications/disposal-database-house-of-commons-report", status: 404)
27 | 
28 |       expect(distribution).to_not be_exists
29 |     end
30 |   end
31 | 
32 |   describe "data" do
33 |     it "fetches csv data" do
34 |       csv = CSV.generate { |c|
35 |         c << %w[one two three]
36 |         c << %w[1 2 3]
37 |       }
38 |       FakeWeb.register_uri(:get, "https://www.gov.uk/government/publications/disposal-database-house-of-commons-report", body: csv)
39 | 
40 |       expect(distribution.data).to eq(CSV.parse(csv, headers: true))
41 |     end
42 |   end
43 | end
44 | 


--------------------------------------------------------------------------------
/spec/fetcher_spec.rb:
--------------------------------------------------------------------------------
 1 | require "spec_helper"
 2 | 
 3 | describe DataKitten::Fetcher do
 4 |   before(:each) do
 5 |     FakeWeb.clean_registry
 6 |   end
 7 | 
 8 |   describe "wrapping returns same instance" do
 9 |     before do
10 |       FakeWeb.register_uri(:get, "http://example.org/resource", body: "<p>text</p>", content_type: "text/html; encoding=utf-8")
11 |     end
12 |     subject(:resource) { described_class.new("http://example.org/resource") }
13 | 
14 |     it { should eq(DataKitten::Fetcher.wrap(resource)) }
15 |     it "should not request again" do
16 |       expect(resource).to be_ok
17 |       new_resource = DataKitten::Fetcher.wrap(resource)
18 |       FakeWeb.clean_registry
19 |       expect(new_resource).to be_ok
20 |     end
21 |   end
22 | 
23 |   it "follows redirects" do
24 |     FakeWeb.register_uri(:get, "http://example.org/resource", status: 301, location: "http://example.org/dataset")
25 |     FakeWeb.register_uri(:get, "http://example.org/dataset", body: "<p>text</p>")
26 | 
27 |     resource = DataKitten::Fetcher.wrap("http://example.org/resource")
28 |     expect(resource.body).to eq("<p>text</p>")
29 |   end
30 | 
31 |   describe "existence checks" do
32 |     subject(:resource) { described_class.new("http://example.org/dataset") }
33 | 
34 |     it "makes a head request by default" do
35 |       FakeWeb.register_uri(:head, "http://example.org/dataset", body: "<p>text</p>")
36 |       expect(resource).to be_exists
37 |     end
38 | 
39 |     it "reuses response code if already fetched" do
40 |       FakeWeb.register_uri(:get, "http://example.org/dataset", body: "<p>text</p>")
41 |       resource.ok?
42 |       expect(resource).to be_exists
43 |     end
44 | 
45 |     it "handles a not found" do
46 |       FakeWeb.register_uri(:head, "http://example.org/dataset", status: 404)
47 |       expect(resource).to_not be_exists
48 |     end
49 |   end
50 | 
51 |   describe "present resource" do
52 |     before do
53 |       FakeWeb.register_uri(:get, "http://example.org/resource", body: "<p>text</p>", content_type: "text/html; encoding=utf-8")
54 |     end
55 |     subject(:resource) { described_class.new("http://example.org/resource") }
56 | 
57 |     it { should be_ok }
58 |     it { expect(resource.code).to eq(200) }
59 |     it { expect(resource.body).to eq("<p>text</p>") }
60 |     it { expect(resource.as_json).to be_nil }
61 |     it { should be_html }
62 |     it { expect(resource.content_type).to eq("text/html; encoding=utf-8") }
63 |     it { expect(resource.content_type_format).to eq("text/html") }
64 |     it { expect(resource.to_s).to eq("http://example.org/resource") }
65 |   end
66 | 
67 |   describe "present json resource" do
68 |     before do
69 |       FakeWeb.register_uri(:get, "http://example.org/resource", body: '{"hi":"there"}', content_type: "application/json; encoding=utf-8")
70 |     end
71 |     subject(:resource) { described_class.new("http://example.org/resource") }
72 | 
73 |     it { should be_ok }
74 |     it { expect(resource.code).to eq(200) }
75 |     it { should_not be_html }
76 |     it { should be_json }
77 |     it { expect(resource.as_json).to eq({"hi" => "there"}) }
78 |     it { expect(resource.content_type).to eq("application/json; encoding=utf-8") }
79 |     it { expect(resource.content_type_format).to eq("application/json") }
80 |     it { expect(resource.to_s).to eq("http://example.org/resource") }
81 |   end
82 | 
83 |   describe "not found" do
84 |     before do
85 |       FakeWeb.register_uri(:get, "http://example.org/not-found", status: ["404", "Not Found"])
86 |     end
87 |     subject(:resource) { described_class.new("http://example.org/not-found") }
88 | 
89 |     it { should_not be_ok }
90 |     it { expect(resource.code).to eq(404) }
91 |   end
92 | end
93 | 


--------------------------------------------------------------------------------
/spec/fixtures/basic-dcat-rdfa.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html prefix="dct: http://purl.org/dc/terms/
 3 |               rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns#
 4 |               dcat: http://www.w3.org/ns/dcat#
 5 |               foaf: http://xmlns.com/foaf/0.1/
 6 | 			  odrs: http://schema.theodi.org/odrs#">
 7 |     <head>
 8 |         <title>DCAT in RDFa</title>
 9 | 	</head>
10 | 	<body>
11 | 		<div typeof="dcat:Dataset" resource="http://gov.example.org/dataset/finances">
12 |             <h1 property="dct:title">Example DCAT Dataset</h1>	
13 |             
14 |               <p property="dct:created" content='2010-10-25T09:00:00+00:00' datatype='xsd:dateTime'>25th October 2010</p>
15 |               <p property="dct:modified" content='2013-05-10T13:39:36+00:00' datatype='xsd:dateTime'>10th March 2013</p>
16 |               
17 |               <p property="dct:description">This is the description.<p>
18 |               					
19 |             <div property="dct:license" 
20 |                  resource="http://reference.data.gov.uk/id/open-government-licence">
21 | 				<a href="http://reference.data.gov.uk/id/open-government-licence">
22 |                 <span property="dct:title">UK Open Government Licence (OGL)</span>
23 |                 </a>
24 |             </div>
25 | 
26 |             <div property="dct:publisher" 
27 |                  resource="http://example.org/publisher">
28 |                 <a href="http://example.org/publisher" about="http://example.org/publisher" property="foaf:homepage">
29 |                     <span property="foaf:name">Example Publisher</span>
30 |                 </a>
31 |             </div>
32 |             
33 |             <div>
34 |                 <span property="dcat:keyword">Examples</span>, <span property="dcat:keyword">DCAT</span>
35 |             </div>
36 |             
37 |             <div>
38 |                 <a href="http://purl.org/linked-data/sdmx/2009/code#freq-W" property="dcat:accrualPeriodicity">Weekly</a>
39 |             </div>
40 |             
41 | 		    <div property='dcat:distribution' typeof='dcat:Distribution'>
42 | 		        <span property="dct:title">CSV download</span>
43 | 		        <ul>
44 | 		            <li><strong>Format</strong> <span content='text/csv' property='dcat:mediaType'>CSV</span></li>
45 | 		            <li><strong>Size</strong> <span content='240585277' datatype='xsd:decimal' property='dcat:byteSize'>1024MB</span></li>
46 | 		            <li><strong>Issues</strong> <span property='dct:issued'>2012-01-01</span></li>
47 | 		        </ul>        
48 | 		        <p><a class='btn btn-primary' href='http://example.org/distribution.csv.zip' property='dcat:accessURL'>Download the full dataset</a></p>
49 | 		    </div>            
50 |             
51 |     </body>
52 | </html>
53 | 


--------------------------------------------------------------------------------
/spec/fixtures/ckan/organization-show-peterborough.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "help": "http://data.gov.uk/api/3/action/help_show?name=organization_show",
  3 |     "success": true,
  4 |     "result": {
  5 |         "contact-phone": "",
  6 |         "abbreviation": "",
  7 |         "foi-email": "",
  8 |         "id": "e70862ec-8167-48e6-a27c-a0e9db1ebc87",
  9 |         "users": [
 10 |             {
 11 |                 "capacity": "admin",
 12 |                 "name": "user_d105921"
 13 |             },
 14 |             {
 15 |                 "capacity": "editor",
 16 |                 "name": "user_d354621"
 17 |             },
 18 |             {
 19 |                 "capacity": "editor",
 20 |                 "name": "user_d734"
 21 |             }
 22 |         ],
 23 |         "category": "local-council",
 24 |         "replaced_by": [],
 25 |         "approval_status": "pending",
 26 |         "title": "Peterborough City Council",
 27 |         "foi-web": "",
 28 |         "description": "",
 29 |         "contact-email": "",
 30 |         "tags": [],
 31 |         "foi-name": "",
 32 |         "groups": [
 33 |             {
 34 |                 "capacity": "public",
 35 |                 "name": "local-authorities"
 36 |             }
 37 |         ],
 38 |         "packages": [
 39 |             {
 40 |                 "title": "Number of pupils on roll at primary and secondary schools by year",
 41 |                 "id": "9828048a-31d2-4387-bc11-ffe35bc5e7d7",
 42 |                 "name": "number-of-pupils-on-roll-at-primary-and-secondary-schools-by-year-pcc"
 43 |             },
 44 |             {
 45 |                 "title": "Peterborough City Council - Public toilets",
 46 |                 "id": "1de61ff0-cc13-4744-8f53-8b60debca6cd",
 47 |                 "name": "peterborough-city-council-public-toilets"
 48 |             },
 49 |             {
 50 |                 "title": "Peterborough City Council - Payments over \u00a3500",
 51 |                 "id": "b5f46f62-872f-4d92-9cb7-f7f1570b914a",
 52 |                 "name": "peterborough-city-council-payments-over-500"
 53 |             },
 54 |             {
 55 |                 "title": "Public toilets",
 56 |                 "id": "729e1ebe-7bc5-4039-aaab-f34f66481f02",
 57 |                 "name": "public-toilets-pcc"
 58 |             },
 59 |             {
 60 |                 "title": "Primary school places, schools oversubscribed and %s of preferences met",
 61 |                 "id": "d3b6b18a-2e27-4272-a589-7c40f7d335e1",
 62 |                 "name": "primary-school-places-schools-oversubscribed-and-s-of-preferences-met-pcc"
 63 |             },
 64 |             {
 65 |                 "title": "Transparency Code - Tenders and Contracts",
 66 |                 "id": "fb10c68e-0dc2-4837-abc3-d78c8bbb551d",
 67 |                 "name": "transparency-code-tenders-and-contracts-pcc"
 68 |             },
 69 |             {
 70 |                 "title": "Planning applications",
 71 |                 "id": "a32a08d1-1760-47ee-b345-0f6345266657",
 72 |                 "name": "planning-applications-pcc"
 73 |             },
 74 |             {
 75 |                 "title": "Price for older person's placements",
 76 |                 "id": "201fb13d-35e4-4c7b-abf4-8db96ef9d6f4",
 77 |                 "name": "price-for-older-persons-placements-pcc"
 78 |             },
 79 |             {
 80 |                 "title": "Peterborough City Council - Social and affordable housing allocations by nationality",
 81 |                 "id": "793d5e4c-b656-4ff0-b121-d2478441fed1",
 82 |                 "name": "peterborough-city-council-social-and-affordable-housing-allocations-by-nationality"
 83 |             },
 84 |             {
 85 |                 "title": "Discretionary Housing Payments",
 86 |                 "id": "1b611b09-3e2a-44e1-8d51-a9e78677d9eb",
 87 |                 "name": "discretionary-housing-payments-pcc"
 88 |             },
 89 |             {
 90 |                 "title": "Animals frozen, including type and quantity",
 91 |                 "id": "999a2ee6-14c6-4b6d-b2a2-8c1c0a790e38",
 92 |                 "name": "animals-frozen-including-type-and-quantity-pcc"
 93 |             },
 94 |             {
 95 |                 "title": "Peterborough City Council - Primary schools",
 96 |                 "id": "e02f2a8e-53d9-48b1-95b5-2b8449c4ef76",
 97 |                 "name": "peterborough-city-council-primary-schools-oversubscribed"
 98 |             },
 99 |             {
100 |                 "title": "Senior Salaries",
101 |                 "id": "95d70853-b678-451d-b399-caff9a21f4f9",
102 |                 "name": "senior-salaries-pcc"
103 |             },
104 |             {
105 |                 "title": "Transparency Code - Payments over \u00a3500",
106 |                 "id": "a3af6580-2c8c-498f-a528-89fac84d59dd",
107 |                 "name": "transparency-code-payments-over-500-pcc"
108 |             },
109 |             {
110 |                 "title": "Fraud investigation",
111 |                 "id": "467bdc57-c9fd-401a-a0da-a569beb9b54b",
112 |                 "name": "fraud-investigation-pcc"
113 |             },
114 |             {
115 |                 "title": "LA budget spend on older person's services",
116 |                 "id": "ad19cf54-9775-4e42-96df-84ce2a68dd1a",
117 |                 "name": "la-budget-spend-on-older-persons-services-pcc"
118 |             },
119 |             {
120 |                 "title": "Mobile food vendors/Street traders",
121 |                 "id": "4baa85a8-46e4-403c-a27c-97558080a5d2",
122 |                 "name": "mobile-food-vendors-street-traders-pcc"
123 |             },
124 |             {
125 |                 "title": "Parking account",
126 |                 "id": "be1821b1-e888-4440-8804-5d3510faad54",
127 |                 "name": "parking-account-pcc"
128 |             },
129 |             {
130 |                 "title": "Pet shops",
131 |                 "id": "d56091a7-ef2d-461c-b0aa-125f100cd850",
132 |                 "name": "pet-shops-pcc"
133 |             },
134 |             {
135 |                 "title": "Premises licence",
136 |                 "id": "b9fc9a66-2937-43c4-9b93-4268e2fb19b1",
137 |                 "name": "premises-licence-pcc"
138 |             },
139 |             {
140 |                 "title": "Firework licence-registration holders - Type 3 and 4 explosives",
141 |                 "id": "1d35f6fc-4c68-4d53-9590-2735e8296f41",
142 |                 "name": "firework-licence-registration-holders-type-3-and-4-explosives-pcc"
143 |             },
144 |             {
145 |                 "title": "Hereditament Properties",
146 |                 "id": "00b5f003-0195-4530-bd44-9a7a65cb696f",
147 |                 "name": "hereditament-properties-pcc"
148 |             },
149 |             {
150 |                 "title": "Land and Property Assets",
151 |                 "id": "22f8cebe-e46d-404f-8dd7-6baf79890058",
152 |                 "name": "land-and-property-assets-pcc"
153 |             },
154 |             {
155 |                 "title": "Older people placed and funded by council",
156 |                 "id": "9bf02263-0767-4724-b47a-47deaf558b82",
157 |                 "name": "older-people-placed-and-funded-by-council-pcc"
158 |             },
159 |             {
160 |                 "title": "Parking places",
161 |                 "id": "841e12c0-477c-4a2d-ac98-01f637d43133",
162 |                 "name": "parking-places-pcc"
163 |             },
164 |             {
165 |                 "title": "Percent of older person's placements more expensive for LAs than basic contract",
166 |                 "id": "0f7b0e79-7a25-4c56-8d39-d8d29153618a",
167 |                 "name": "percent-of-older-persons-placements-more-expensive-for-las-than-basic-contract-pcc"
168 |             },
169 |             {
170 |                 "title": "Peterborough City Council - Budget spend on older person services",
171 |                 "id": "8ac572a3-d7c9-4287-b29c-603b6ff4e265",
172 |                 "name": "peterborough-city-council-budget-spend-on-older-person-services"
173 |             },
174 |             {
175 |                 "title": "Peterborough City Council - Discretionary housing payments",
176 |                 "id": "1fb200e9-df3e-459d-9b2e-e5501d558b94",
177 |                 "name": "peterborough-city-council-discretionary-housing-payments"
178 |             },
179 |             {
180 |                 "title": "Peterborough City Council - Firework licence holders",
181 |                 "id": "e91059b9-aa51-4e82-a3b0-7a6e38da9e2d",
182 |                 "name": "peterborough-city-council-firework-licence-holders"
183 |             },
184 |             {
185 |                 "title": "Peterborough City Council - Frozen animals",
186 |                 "id": "20695709-1cfb-414e-b90a-9b358aa0ca07",
187 |                 "name": "peterborough-city-council-frozen-animals"
188 |             },
189 |             {
190 |                 "title": "Peterborough City Council - Older people placed and funded",
191 |                 "id": "f1817c3a-a980-499d-b5c4-da73052a40e0",
192 |                 "name": "peterborough-city-council-older-people-placed-and-funded"
193 |             },
194 |             {
195 |                 "title": "Peterborough City Council - Percentage of older people in expensive placements",
196 |                 "id": "02da0b39-1025-40ea-a83f-ed776154465f",
197 |                 "name": "peterborough-city-council-percentage-of-older-people-in-expensive-placements"
198 |             },
199 |             {
200 |                 "title": "Peterborough City Council - Price for older person placements",
201 |                 "id": "2a8efb51-79d6-4c05-bc46-c63ed8d23676",
202 |                 "name": "peterborough-city-council-price-for-older-person-placements"
203 |             },
204 |             {
205 |                 "title": "Primary schools oversubscribed and last child in",
206 |                 "id": "bec56f94-81ed-48fe-9b97-7c27b433c5f0",
207 |                 "name": "primary-schools-oversubscribed-and-last-child-in-pcc"
208 |             },
209 |             {
210 |                 "title": "Social and affordable housing allocations by nationality",
211 |                 "id": "c7233282-1236-45a3-bc6c-dec52af4569a",
212 |                 "name": "social-and-affordable-housing-allocations-by-nationality-pcc"
213 |             }
214 |         ],
215 |         "contact-name": "",
216 |         "name": "peterborough-city-council",
217 |         "image_display_url": "",
218 |         "type": "organization",
219 |         "is_organization": true,
220 |         "extras": [
221 |             {
222 |                 "value": "",
223 |                 "key": "abbreviation"
224 |             },
225 |             {
226 |                 "value": "local-council",
227 |                 "key": "category"
228 |             },
229 |             {
230 |                 "value": "",
231 |                 "key": "contact-email"
232 |             },
233 |             {
234 |                 "value": "",
235 |                 "key": "contact-name"
236 |             },
237 |             {
238 |                 "value": "",
239 |                 "key": "contact-phone"
240 |             },
241 |             {
242 |                 "value": "",
243 |                 "key": "foi-email"
244 |             },
245 |             {
246 |                 "value": "",
247 |                 "key": "foi-name"
248 |             },
249 |             {
250 |                 "value": "",
251 |                 "key": "foi-phone"
252 |             },
253 |             {
254 |                 "value": "",
255 |                 "key": "foi-web"
256 |             }
257 |         ],
258 |         "image_url": "",
259 |         "foi-phone": ""
260 |     }
261 | }


--------------------------------------------------------------------------------
/spec/fixtures/ckan/organization_show-ecology.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "help": "Return the details of a organization.\n\n    :param id: the id or name of the organization\n    :type id: string\n    :param include_datasets: include a list of the organization's datasets\n         (optional, default: ``True``)\n    :type id: boolean\n\n    :rtype: dictionary\n\n    .. note:: Only its first 1000 datasets are returned\n    ",
 3 |     "success": true,
 4 |     "result": {
 5 |         "groups": [{
 6 |             "capacity": "public",
 7 |             "name": "natural-environment-research-council"
 8 |         }],
 9 |         "abbreviation": "CEH",
10 |         "foi-email": "foi@ceh.ac.uk",
11 |         "id": "866f4088-ae4f-43b8-ba8c-6d3141a327f2",
12 |         "users": [],
13 |         "category": "",
14 |         "packages": [],
15 |         "approval_status": "pending",
16 |         "title": "Centre for Ecology & Hydrology",
17 |         "closed": false,
18 |         "foi-web": "http://www.ceh.ac.uk/",
19 |         "description": "The <a href=\"http://www.ceh.ac.uk/data/\" target=\"_blank\">Centre for Ecology &amp; Hydrology</a> is the UK's Centre of  Excellence for integrated research in terrestrial and freshwater  ecosystems and their interaction with the atmosphere. As part of the <a href=\"http://www.nerc.ac.uk/\" target=\"_blank\">Natural Environment Research Council</a>,  we provide National Capability based on innovative, independent and  interdisciplinary science and long-term environmental monitoring,  forming an integral part of NERC's vision and strategy.</p> <p>Working in partnership with the research community,  policy-makers, industry and society, we deliver world-class solutions to  the most complex environmental challenges facing humankind. CEH is a member of <a href=\"http://www.peer.eu/\">PEER</a> (the Partnership for European Environmental Research).",
20 |         "contact-email": "enquiries@ceh.ac.uk",
21 |         "tags": [],
22 |         "foi-name": "Centre for Ecology & Hydrology enquiry service",
23 |         "contact-phone": "",
24 |         "replaced_by": [],
25 |         "contact-name": "Centre for Ecology & Hydrology enquiry service",
26 |         "name": "centre-for-ecology-hydrology",
27 |         "image_display_url": "",
28 |         "type": "organization",
29 |         "is_organization": true,
30 |         "extras": [{
31 |             "value": "CEH",
32 |             "key": "abbreviation"
33 |         }, {
34 |             "value": "",
35 |             "key": "category"
36 |         }, {
37 |             "value": "false",
38 |             "key": "closed"
39 |         }, {
40 |             "value": "enquiries@ceh.ac.uk",
41 |             "key": "contact-email"
42 |         }, {
43 |             "value": "Centre for Ecology & Hydrology enquiry service",
44 |             "key": "contact-name"
45 |         }, {
46 |             "value": "",
47 |             "key": "contact-phone"
48 |         }, {
49 |             "value": "foi@ceh.ac.uk",
50 |             "key": "foi-email"
51 |         }, {
52 |             "value": "Centre for Ecology & Hydrology enquiry service",
53 |             "key": "foi-name"
54 |         }, {
55 |             "value": "01491 692371",
56 |             "key": "foi-phone"
57 |         }, {
58 |             "value": "http://www.ceh.ac.uk/",
59 |             "key": "foi-web"
60 |         }, {
61 |             "value": "[]",
62 |             "key": "replaced_by"
63 |         }, {
64 |             "value": "Centre for Ecology & Hydrology",
65 |             "key": "website-name"
66 |         }, {
67 |             "value": "http://www.ceh.ac.uk/",
68 |             "key": "website-url"
69 |         }],
70 |         "image_url": "",
71 |         "foi-phone": "01491 692371"
72 |     }
73 | }
74 | 


--------------------------------------------------------------------------------
/spec/fixtures/ckan/package-show-frozen-animals.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "help": "http://data.gov.uk/api/3/action/help_show?name=package_show",
  3 |     "success": true,
  4 |     "result": {
  5 |         "license_title": "UK Open Government Licence (OGL)",
  6 |         "maintainer": null,
  7 |         "groups": [],
  8 |         "temporal_coverage-from": "1/1/2012",
  9 |         "relationships_as_object": [],
 10 |         "data_dict": "{\"license_title\": \"UK Open Government Licence (OGL)\", \"maintainer\": null, \"relationships_as_object\": [], \"private\": false, \"maintainer_email\": null, \"revision_timestamp\": \"2014-07-10T13:01:55.052900\", \"id\": \"20695709-1cfb-414e-b90a-9b358aa0ca07\", \"metadata_created\": \"2014-07-10T13:01:55.052900\", \"metadata_modified\": \"2014-07-10T13:01:55.066211\", \"author\": null, \"author_email\": null, \"state\": \"active\", \"version\": null, \"creator_user_id\": \"61e179a9-a9e5-4286-831d-42f688a5e22b\", \"type\": \"dataset\", \"resources\": [{\"resource_group_id\": \"a8a50585-fd1f-4871-b454-1bd87f31c62a\", \"cache_last_updated\": null, \"revision_timestamp\": \"2014-07-10T13:01:55.052900\", \"webstore_last_updated\": null, \"id\": \"3fe95f4b-90b7-4fba-ad8c-258e4ed078f2\", \"size\": null, \"state\": \"active\", \"last_modified\": null, \"hash\": \"\", \"description\": \"Frozen animals\", \"format\": \"HTML\", \"tracking_summary\": {\"total\": 0, \"recent\": 0}, \"mimetype_inner\": null, \"url_type\": null, \"mimetype\": null, \"cache_url\": null, \"name\": null, \"created\": \"2014-07-10T14:01:55.091967\", \"url\": \"http://data.peterborough.gov.uk/View/environmental-protection-animal-welfare/animals-frozen-including-type-and-quantity\", \"webstore_url\": null, \"position\": 0, \"revision_id\": \"682a3b2f-966b-4307-972d-4887f4465ebf\", \"resource_type\": \"file\"}, {\"resource_group_id\": \"a8a50585-fd1f-4871-b454-1bd87f31c62a\", \"cache_last_updated\": null, \"revision_timestamp\": \"2014-07-10T13:01:55.052900\", \"webstore_last_updated\": null, \"id\": \"03635365-29a2-4752-9e50-d68ae50192c2\", \"size\": null, \"state\": \"active\", \"last_modified\": null, \"hash\": \"\", \"description\": \"Frozen animals\", \"format\": \"CSV\", \"tracking_summary\": {\"total\": 0, \"recent\": 0}, \"mimetype_inner\": null, \"url_type\": null, \"mimetype\": null, \"cache_url\": null, \"name\": null, \"created\": \"2014-07-10T14:01:55.091988\", \"url\": \"http://data.peterborough.gov.uk/View/environmental-protection-animal-welfare/animals-frozen-including-type-and-quantity#\", \"webstore_url\": null, \"position\": 1, \"revision_id\": \"682a3b2f-966b-4307-972d-4887f4465ebf\", \"resource_type\": \"file\"}], \"num_resources\": 2, \"tags\": [], \"tracking_summary\": {\"total\": 0, \"recent\": 0}, \"groups\": [], \"license_id\": \"uk-ogl\", \"relationships_as_subject\": [], \"num_tags\": 0, \"organization\": {\"description\": \"\", \"title\": \"Peterborough City Council\", \"created\": \"2014-01-31T16:33:33.959076\", \"approval_status\": \"pending\", \"revision_timestamp\": \"2014-01-31T16:33:33.911635\", \"is_organization\": true, \"state\": \"active\", \"image_url\": \"\", \"revision_id\": \"bd38de89-3194-4c84-b758-a8cd78242549\", \"type\": \"organization\", \"id\": \"e70862ec-8167-48e6-a27c-a0e9db1ebc87\", \"name\": \"peterborough-city-council\"}, \"name\": \"peterborough-city-council-frozen-animals\", \"isopen\": true, \"url\": null, \"notes\": \"Animals frozen, including type and quantity\", \"owner_org\": \"e70862ec-8167-48e6-a27c-a0e9db1ebc87\", \"extras\": [{\"state\": \"active\", \"value\": \"\", \"revision_timestamp\": \"2014-07-10T13:01:55.052900\", \"package_id\": \"20695709-1cfb-414e-b90a-9b358aa0ca07\", \"key\": \"contact-email\", \"revision_id\": \"682a3b2f-966b-4307-972d-4887f4465ebf\", \"id\": \"674b1a0a-4c56-4390-a8ff-914f6c1a3597\"}, {\"state\": \"active\", \"value\": \"\", \"revision_timestamp\": \"2014-07-10T13:01:55.052900\", \"package_id\": \"20695709-1cfb-414e-b90a-9b358aa0ca07\", \"key\": \"contact-name\", \"revision_id\": \"682a3b2f-966b-4307-972d-4887f4465ebf\", \"id\": \"9cf1a5fa-e802-4c1d-acc0-17faca55b41d\"}, {\"state\": \"active\", \"value\": \"\", \"revision_timestamp\": \"2014-07-10T13:01:55.052900\", \"package_id\": \"20695709-1cfb-414e-b90a-9b358aa0ca07\", \"key\": \"contact-phone\", \"revision_id\": \"682a3b2f-966b-4307-972d-4887f4465ebf\", \"id\": \"cd3a8460-85ef-4f92-aa75-e0ece311e838\"}, {\"state\": \"active\", \"value\": \"\", \"revision_timestamp\": \"2014-07-10T13:01:55.052900\", \"package_id\": \"20695709-1cfb-414e-b90a-9b358aa0ca07\", \"key\": \"foi-email\", \"revision_id\": \"682a3b2f-966b-4307-972d-4887f4465ebf\", \"id\": \"59ddf3f1-35c7-40e5-bac8-7f4db84e2f13\"}, {\"state\": \"active\", \"value\": \"\", \"revision_timestamp\": \"2014-07-10T13:01:55.052900\", \"package_id\": \"20695709-1cfb-414e-b90a-9b358aa0ca07\", \"key\": \"foi-name\", \"revision_id\": \"682a3b2f-966b-4307-972d-4887f4465ebf\", \"id\": \"c8553399-7869-4107-bed2-faee3d6dadb3\"}, {\"state\": \"active\", \"value\": \"\", \"revision_timestamp\": \"2014-07-10T13:01:55.052900\", \"package_id\": \"20695709-1cfb-414e-b90a-9b358aa0ca07\", \"key\": \"foi-phone\", \"revision_id\": \"682a3b2f-966b-4307-972d-4887f4465ebf\", \"id\": \"dcc3c9bb-c7f5-4514-a419-86c4f2eac960\"}, {\"state\": \"active\", \"value\": \"\", \"revision_timestamp\": \"2014-07-10T13:01:55.052900\", \"package_id\": \"20695709-1cfb-414e-b90a-9b358aa0ca07\", \"key\": \"foi-web\", \"revision_id\": \"682a3b2f-966b-4307-972d-4887f4465ebf\", \"id\": \"b644d641-9903-487c-b51a-37487c9df0e1\"}, {\"state\": \"active\", \"value\": \"100000: England\", \"revision_timestamp\": \"2014-07-10T13:01:55.052900\", \"package_id\": \"20695709-1cfb-414e-b90a-9b358aa0ca07\", \"key\": \"geographic_coverage\", \"revision_id\": \"682a3b2f-966b-4307-972d-4887f4465ebf\", \"id\": \"f9146159-5471-47dd-9953-d068053c4178\"}, {\"state\": \"active\", \"value\": \"{\\\"status\\\": \\\"final\\\", \\\"source\\\": \\\"Automatically awarded by ODI\\\", \\\"certification_type\\\": \\\"automatically awarded\\\", \\\"level\\\": \\\"raw\\\", \\\"title\\\": \\\"Peterborough City Council - Frozen animals\\\", \\\"created_at\\\": \\\"2014-10-29T16:28:04Z\\\", \\\"jurisdiction\\\": \\\"GB\\\", \\\"certificate_url\\\": \\\"https://certificates.theodi.org/datasets/10677/certificates/23119\\\", \\\"badge_url\\\": \\\"https://certificates.theodi.org/datasets/10677/certificates/23119/badge.png\\\", \\\"cert_title\\\": \\\"Basic Level Certificate\\\"}\", \"revision_timestamp\": \"2014-11-12T01:53:13.148283\", \"package_id\": \"20695709-1cfb-414e-b90a-9b358aa0ca07\", \"key\": \"odi-certificate\", \"revision_id\": \"94654551-437e-41cf-ac25-ba31d60014a9\", \"id\": \"c6e4d449-b9a5-4da5-9451-52fcadbd7168\"}, {\"state\": \"active\", \"value\": \"2012-01-01\", \"revision_timestamp\": \"2014-07-10T13:01:55.052900\", \"package_id\": \"20695709-1cfb-414e-b90a-9b358aa0ca07\", \"key\": \"temporal_coverage-from\", \"revision_id\": \"682a3b2f-966b-4307-972d-4887f4465ebf\", \"id\": \"4a42ad18-7c37-41b6-b6c1-1f1f962efa58\"}, {\"state\": \"active\", \"value\": \"2014-01-01\", \"revision_timestamp\": \"2014-07-10T13:01:55.052900\", \"package_id\": \"20695709-1cfb-414e-b90a-9b358aa0ca07\", \"key\": \"temporal_coverage-to\", \"revision_id\": \"682a3b2f-966b-4307-972d-4887f4465ebf\", \"id\": \"57241bae-7b88-4082-a44e-0f52c81acf3b\"}, {\"state\": \"active\", \"value\": \"Environment\", \"revision_timestamp\": \"2014-07-10T13:01:55.052900\", \"package_id\": \"20695709-1cfb-414e-b90a-9b358aa0ca07\", \"key\": \"theme-primary\", \"revision_id\": \"682a3b2f-966b-4307-972d-4887f4465ebf\", \"id\": \"4589ea2d-d521-499b-9b49-c796ff1af8a3\"}, {\"state\": \"active\", \"value\": \"false\", \"revision_timestamp\": \"2014-07-10T13:01:55.052900\", \"package_id\": \"20695709-1cfb-414e-b90a-9b358aa0ca07\", \"key\": \"unpublished\", \"revision_id\": \"682a3b2f-966b-4307-972d-4887f4465ebf\", \"id\": \"f80ed0be-8e6d-489c-b9a2-8ab1a5a97d02\"}, {\"state\": \"active\", \"value\": \"\", \"revision_timestamp\": \"2014-07-10T13:01:55.052900\", \"package_id\": \"20695709-1cfb-414e-b90a-9b358aa0ca07\", \"key\": \"update_frequency\", \"revision_id\": \"682a3b2f-966b-4307-972d-4887f4465ebf\", \"id\": \"7d56647a-6e70-4fd8-9a03-40dfe32b7ca7\"}], \"license_url\": \"http://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/\", \"title\": \"Peterborough City Council - Frozen animals\", \"revision_id\": \"682a3b2f-966b-4307-972d-4887f4465ebf\"}",
 11 |         "private": false,
 12 |         "maintainer_email": null,
 13 |         "revision_timestamp": "2014-07-10T13:01:55.052900",
 14 |         "foi-email": "",
 15 |         "id": "20695709-1cfb-414e-b90a-9b358aa0ca07",
 16 |         "metadata_created": "2014-07-10T13:01:55.052900",
 17 |         "metadata_modified": "2014-07-10T13:01:55.066211",
 18 |         "author": null,
 19 |         "author_email": null,
 20 |         "geographic_coverage": [
 21 |             "england"
 22 |         ],
 23 |         "state": "active",
 24 |         "version": null,
 25 |         "temporal_coverage-to": "1/1/2014",
 26 |         "license_id": "uk-ogl",
 27 |         "foi-web": "",
 28 |         "unpublished": "false",
 29 |         "resources": [
 30 |             {
 31 |                 "hash": "",
 32 |                 "description": "Frozen animals",
 33 |                 "created": "2014-07-10T14:01:55.091967",
 34 |                 "url": "http://data.peterborough.gov.uk/View/environmental-protection-animal-welfare/animals-frozen-including-type-and-quantity",
 35 |                 "format": "HTML",
 36 |                 "tracking_summary": {
 37 |                     "total": 0,
 38 |                     "recent": 0
 39 |                 },
 40 |                 "position": 0,
 41 |                 "revision_id": "682a3b2f-966b-4307-972d-4887f4465ebf",
 42 |                 "id": "3fe95f4b-90b7-4fba-ad8c-258e4ed078f2",
 43 |                 "resource_type": "file"
 44 |             },
 45 |             {
 46 |                 "hash": "",
 47 |                 "description": "Frozen animals",
 48 |                 "created": "2014-07-10T14:01:55.091988",
 49 |                 "url": "http://data.peterborough.gov.uk/View/environmental-protection-animal-welfare/animals-frozen-including-type-and-quantity#",
 50 |                 "format": "CSV",
 51 |                 "tracking_summary": {
 52 |                     "total": 0,
 53 |                     "recent": 0
 54 |                 },
 55 |                 "position": 1,
 56 |                 "revision_id": "682a3b2f-966b-4307-972d-4887f4465ebf",
 57 |                 "id": "03635365-29a2-4752-9e50-d68ae50192c2",
 58 |                 "resource_type": "file"
 59 |             }
 60 |         ],
 61 |         "num_resources": 2,
 62 |         "contact-email": "",
 63 |         "tags": [
 64 |           {
 65 |             "vocabulary_id": null,
 66 |             "display_name": "Environment",
 67 |             "name": "environment",
 68 |             "revision_timestamp": "2013-02-11T10:40:55.330719",
 69 |             "state": "active",
 70 |             "id": "f5624e90-3567-4554-a582-cf5eeca87dab"
 71 |           }
 72 |         ],
 73 |         "title": "Peterborough City Council - Frozen animals",
 74 |         "foi-name": "",
 75 |         "tracking_summary": {
 76 |             "total": 0,
 77 |             "recent": 0
 78 |         },
 79 |         "contact-phone": "",
 80 |         "creator_user_id": "61e179a9-a9e5-4286-831d-42f688a5e22b",
 81 |         "relationships_as_subject": [],
 82 |         "num_tags": 0,
 83 |         "organization": {
 84 |             "description": "",
 85 |             "created": "2014-01-31T16:33:33.959076",
 86 |             "title": "Peterborough City Council",
 87 |             "name": "peterborough-city-council",
 88 |             "revision_timestamp": "2014-01-31T16:33:33.911635",
 89 |             "is_organization": true,
 90 |             "state": "active",
 91 |             "image_url": "",
 92 |             "revision_id": "bd38de89-3194-4c84-b758-a8cd78242549",
 93 |             "type": "organization",
 94 |             "id": "e70862ec-8167-48e6-a27c-a0e9db1ebc87",
 95 |             "approval_status": "pending"
 96 |         },
 97 |         "contact-name": "",
 98 |         "name": "peterborough-city-council-frozen-animals",
 99 |         "isopen": true,
100 |         "url": null,
101 |         "type": "dataset",
102 |         "notes": "Animals frozen, including type and quantity",
103 |         "owner_org": "e70862ec-8167-48e6-a27c-a0e9db1ebc87",
104 |         "extras": [
105 |             {
106 |                 "package_id": "20695709-1cfb-414e-b90a-9b358aa0ca07",
107 |                 "value": "",
108 |                 "revision_timestamp": "2014-07-10T13:01:55.052900",
109 |                 "state": "active",
110 |                 "key": "contact-email",
111 |                 "revision_id": "682a3b2f-966b-4307-972d-4887f4465ebf",
112 |                 "id": "674b1a0a-4c56-4390-a8ff-914f6c1a3597"
113 |             },
114 |             {
115 |                 "package_id": "20695709-1cfb-414e-b90a-9b358aa0ca07",
116 |                 "value": "",
117 |                 "revision_timestamp": "2014-07-10T13:01:55.052900",
118 |                 "state": "active",
119 |                 "key": "contact-name",
120 |                 "revision_id": "682a3b2f-966b-4307-972d-4887f4465ebf",
121 |                 "id": "9cf1a5fa-e802-4c1d-acc0-17faca55b41d"
122 |             },
123 |             {
124 |                 "package_id": "20695709-1cfb-414e-b90a-9b358aa0ca07",
125 |                 "value": "",
126 |                 "revision_timestamp": "2014-07-10T13:01:55.052900",
127 |                 "state": "active",
128 |                 "key": "contact-phone",
129 |                 "revision_id": "682a3b2f-966b-4307-972d-4887f4465ebf",
130 |                 "id": "cd3a8460-85ef-4f92-aa75-e0ece311e838"
131 |             },
132 |             {
133 |                 "package_id": "20695709-1cfb-414e-b90a-9b358aa0ca07",
134 |                 "value": "",
135 |                 "revision_timestamp": "2014-07-10T13:01:55.052900",
136 |                 "state": "active",
137 |                 "key": "foi-email",
138 |                 "revision_id": "682a3b2f-966b-4307-972d-4887f4465ebf",
139 |                 "id": "59ddf3f1-35c7-40e5-bac8-7f4db84e2f13"
140 |             },
141 |             {
142 |                 "package_id": "20695709-1cfb-414e-b90a-9b358aa0ca07",
143 |                 "value": "",
144 |                 "revision_timestamp": "2014-07-10T13:01:55.052900",
145 |                 "state": "active",
146 |                 "key": "foi-name",
147 |                 "revision_id": "682a3b2f-966b-4307-972d-4887f4465ebf",
148 |                 "id": "c8553399-7869-4107-bed2-faee3d6dadb3"
149 |             },
150 |             {
151 |                 "package_id": "20695709-1cfb-414e-b90a-9b358aa0ca07",
152 |                 "value": "",
153 |                 "revision_timestamp": "2014-07-10T13:01:55.052900",
154 |                 "state": "active",
155 |                 "key": "foi-phone",
156 |                 "revision_id": "682a3b2f-966b-4307-972d-4887f4465ebf",
157 |                 "id": "dcc3c9bb-c7f5-4514-a419-86c4f2eac960"
158 |             },
159 |             {
160 |                 "package_id": "20695709-1cfb-414e-b90a-9b358aa0ca07",
161 |                 "value": "",
162 |                 "revision_timestamp": "2014-07-10T13:01:55.052900",
163 |                 "state": "active",
164 |                 "key": "foi-web",
165 |                 "revision_id": "682a3b2f-966b-4307-972d-4887f4465ebf",
166 |                 "id": "b644d641-9903-487c-b51a-37487c9df0e1"
167 |             },
168 |             {
169 |                 "package_id": "20695709-1cfb-414e-b90a-9b358aa0ca07",
170 |                 "value": "100000: England",
171 |                 "revision_timestamp": "2014-07-10T13:01:55.052900",
172 |                 "state": "active",
173 |                 "key": "geographic_coverage",
174 |                 "revision_id": "682a3b2f-966b-4307-972d-4887f4465ebf",
175 |                 "id": "f9146159-5471-47dd-9953-d068053c4178"
176 |             },
177 |             {
178 |                 "package_id": "20695709-1cfb-414e-b90a-9b358aa0ca07",
179 |                 "value": "{\"status\": \"final\", \"source\": \"Automatically awarded by ODI\", \"certification_type\": \"automatically awarded\", \"level\": \"raw\", \"title\": \"Peterborough City Council - Frozen animals\", \"created_at\": \"2014-10-29T16:28:04Z\", \"jurisdiction\": \"GB\", \"certificate_url\": \"https://certificates.theodi.org/datasets/10677/certificates/23119\", \"badge_url\": \"https://certificates.theodi.org/datasets/10677/certificates/23119/badge.png\", \"cert_title\": \"Basic Level Certificate\"}",
180 |                 "revision_timestamp": "2014-11-12T01:53:13.148283",
181 |                 "state": "active",
182 |                 "key": "odi-certificate",
183 |                 "revision_id": "94654551-437e-41cf-ac25-ba31d60014a9",
184 |                 "id": "c6e4d449-b9a5-4da5-9451-52fcadbd7168"
185 |             },
186 |             {
187 |                 "package_id": "20695709-1cfb-414e-b90a-9b358aa0ca07",
188 |                 "value": "2012-01-01",
189 |                 "revision_timestamp": "2014-07-10T13:01:55.052900",
190 |                 "state": "active",
191 |                 "key": "temporal_coverage-from",
192 |                 "revision_id": "682a3b2f-966b-4307-972d-4887f4465ebf",
193 |                 "id": "4a42ad18-7c37-41b6-b6c1-1f1f962efa58"
194 |             },
195 |             {
196 |                 "package_id": "20695709-1cfb-414e-b90a-9b358aa0ca07",
197 |                 "value": "2014-01-01",
198 |                 "revision_timestamp": "2014-07-10T13:01:55.052900",
199 |                 "state": "active",
200 |                 "key": "temporal_coverage-to",
201 |                 "revision_id": "682a3b2f-966b-4307-972d-4887f4465ebf",
202 |                 "id": "57241bae-7b88-4082-a44e-0f52c81acf3b"
203 |             },
204 |             {
205 |                 "package_id": "20695709-1cfb-414e-b90a-9b358aa0ca07",
206 |                 "value": "Environment",
207 |                 "revision_timestamp": "2014-07-10T13:01:55.052900",
208 |                 "state": "active",
209 |                 "key": "theme-primary",
210 |                 "revision_id": "682a3b2f-966b-4307-972d-4887f4465ebf",
211 |                 "id": "4589ea2d-d521-499b-9b49-c796ff1af8a3"
212 |             },
213 |             {
214 |                 "package_id": "20695709-1cfb-414e-b90a-9b358aa0ca07",
215 |                 "value": "false",
216 |                 "revision_timestamp": "2014-07-10T13:01:55.052900",
217 |                 "state": "active",
218 |                 "key": "unpublished",
219 |                 "revision_id": "682a3b2f-966b-4307-972d-4887f4465ebf",
220 |                 "id": "f80ed0be-8e6d-489c-b9a2-8ab1a5a97d02"
221 |             },
222 |             {
223 |                 "package_id": "20695709-1cfb-414e-b90a-9b358aa0ca07",
224 |                 "value": "",
225 |                 "revision_timestamp": "2014-07-10T13:01:55.052900",
226 |                 "state": "active",
227 |                 "key": "update_frequency",
228 |                 "revision_id": "682a3b2f-966b-4307-972d-4887f4465ebf",
229 |                 "id": "7d56647a-6e70-4fd8-9a03-40dfe32b7ca7"
230 |             }
231 |         ],
232 |         "license_url": "http://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/",
233 |         "individual_resources": [
234 |             {
235 |                 "hash": "",
236 |                 "description": "Frozen animals",
237 |                 "created": "2014-07-10T14:01:55.091967",
238 |                 "url": "http://data.peterborough.gov.uk/View/environmental-protection-animal-welfare/animals-frozen-including-type-and-quantity",
239 |                 "format": "HTML",
240 |                 "tracking_summary": {
241 |                     "total": 0,
242 |                     "recent": 0
243 |                 },
244 |                 "position": 0,
245 |                 "revision_id": "682a3b2f-966b-4307-972d-4887f4465ebf",
246 |                 "id": "3fe95f4b-90b7-4fba-ad8c-258e4ed078f2",
247 |                 "resource_type": "file"
248 |             },
249 |             {
250 |                 "hash": "",
251 |                 "description": "Frozen animals",
252 |                 "created": "2014-07-10T14:01:55.091988",
253 |                 "url": "http://data.peterborough.gov.uk/View/environmental-protection-animal-welfare/animals-frozen-including-type-and-quantity#",
254 |                 "format": "CSV",
255 |                 "tracking_summary": {
256 |                     "total": 0,
257 |                     "recent": 0
258 |                 },
259 |                 "position": 1,
260 |                 "revision_id": "682a3b2f-966b-4307-972d-4887f4465ebf",
261 |                 "id": "03635365-29a2-4752-9e50-d68ae50192c2",
262 |                 "resource_type": "file"
263 |             }
264 |         ],
265 |         "update_frequency": "",
266 |         "revision_id": "682a3b2f-966b-4307-972d-4887f4465ebf",
267 |         "foi-phone": "",
268 |         "theme-primary": "Environment"
269 |     }
270 | }
271 | 


--------------------------------------------------------------------------------
/spec/fixtures/ckan/rest-dataset-cadastral.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "license_title": null,
 3 |     "maintainer": null,
 4 |     "private": false,
 5 |     "maintainer_email": null,
 6 |     "id": "65493c4b-46d5-4125-b7d4-fc1df2b33349",
 7 |     "metadata_created": "2012-12-21T11:41:36.523040",
 8 |     "relationships": [],
 9 |     "license": null,
10 |     "metadata_modified": "2014-02-18T16:38:37.394178",
11 |     "author": null,
12 |     "author_email": null,
13 |     "state": "active",
14 |     "version": null,
15 |     "creator_user_id": null,
16 |     "type": "dataset",
17 |     "resources": [],
18 |     "num_resources": 0,
19 |     "tags": ["Cadastral", "Cadastral Parcels", "Cadastral parcels", "Freehold", "Freehold Land Title", "INSPIRE", "LPS", "Land", "Land Register", "Land and Property Services", "Metadata", "NI", "NIMA", "Northern Ireland", "OSNI", "Parcels", "Property Land Registry", "land register"],
20 |     "groups": [],
21 |     "license_id": null,
22 |     "num_tags": 18,
23 |     "organization": {
24 |         "description": "",
25 |         "title": "Northern Ireland Spatial Data Infrastructure",
26 |         "created": "2012-06-27T14:55:27.012776",
27 |         "approval_status": "approved",
28 |         "revision_timestamp": "2012-06-27T13:48:36.948477",
29 |         "is_organization": true,
30 |         "state": "active",
31 |         "image_url": "",
32 |         "revision_id": "a1f4375a-4afe-4dad-88cd-03f2bd6adaaf",
33 |         "type": "organization",
34 |         "id": "cd937140-1310-4e2a-b211-5de8bebd910d",
35 |         "name": "northern-ireland-spatial-data-infrastructure"
36 |     },
37 |     "name": "lps-cadastral-parcels-ni-metadata",
38 |     "isopen": false,
39 |     "notes_rendered": "<p>The dataset contains the boundaries of each individual freehold title to land.\n</p>",
40 |     "url": null,
41 |     "ckan_url": "http://data.gov.uk/dataset/lps-cadastral-parcels-ni-metadata",
42 |     "notes": "The dataset contains the boundaries of each individual freehold title to land.",
43 |     "owner_org": "cd937140-1310-4e2a-b211-5de8bebd910d",
44 |     "ratings_average": null,
45 |     "extras": {
46 |         "bbox-east-long": "-5.2563",
47 |         "temporal_coverage-from": "[\"2012-11-30\"]",
48 |         "resource-type": "dataset",
49 |         "bbox-north-lat": "55.5369",
50 |         "harvest_source_reference": "{2C499079-DE9B-48A0-84C5-4D2DE8607E34}",
51 |         "coupled-resource": "[]",
52 |         "guid": "{2C499079-DE9B-48A0-84C5-4D2DE8607E34}",
53 |         "bbox-south-lat": "53.8869",
54 |         "temporal_coverage-to": "[\"2012-11-30\"]",
55 |         "spatial-reference-system": "4326",
56 |         "spatial": "{\"type\":\"Polygon\",\"coordinates\":[[[-5.2563, 53.8869],[-5.2563, 55.5369], [-8.1906, 55.5369], [-8.1906, 53.8869], [-5.2563, 53.8869]]]}",
57 |         "access_constraints": "[\"http://www.gistrategyni.gov.uk/index/spatialni/spatial_ni_licences.htm\"]",
58 |         "contact-email": "enquires@lrni.gov.uk",
59 |         "bbox-west-long": "-8.1906",
60 |         "metadata-date": "2014-02-14",
61 |         "dataset-reference-date": "[{\"type\": \"revision\", \"value\": \"2010-07-16\"}]",
62 |         "frequency-of-update": "biannually",
63 |         "licence": "[\"Conditions Apply\"]",
64 |         "harvest_object_id": "a710e52f-b0c5-4464-9a91-d8ec6d1fecf6",
65 |         "responsible-party": "Land & Property Services (LPS) Operations Directorate (Registrations) (owner)",
66 |         "UKLP": "True",
67 |         "spatial-data-service-type": "",
68 |         "metadata-language": "eng",
69 |         "provider": "Land & Property Services (LPS) Operations Directorate (Registrations)",
70 |         "theme-primary": "Mapping"
71 |     },
72 |     "ratings_count": 0,
73 |     "title": "LPS Cadastral Parcels NI (Metadata)",
74 |     "revision_id": "21da630a-8ac9-4d57-9059-324b4153270e"
75 | }
76 | 


--------------------------------------------------------------------------------
/spec/fixtures/ckan/rest-dataset-defence.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "license_title": "UK Open Government Licence (OGL)",
 3 |     "maintainer": "Mx Maintainer",
 4 |     "private": false,
 5 |     "maintainer_email": "mx@maintainer.org",
 6 |     "num_tags": 6,
 7 |     "id": "47f7438a-506d-49c9-b565-7573f8df031e",
 8 |     "metadata_created": "2012-10-05T13:51:55.812923",
 9 |     "relationships": [],
10 |     "license": "UK Open Government Licence (OGL)",
11 |     "metadata_modified": "2013-11-16T02:37:42.408267",
12 |     "author": "Mx Author",
13 |     "author_email": "mx@author.org",
14 |     "download_url": "https://www.gov.uk/government/publications/disposal-database-house-of-commons-report",
15 |     "state": "active",
16 |     "version": null,
17 |     "license_id": "uk-ogl",
18 |     "type": "dataset",
19 |     "resources": [{
20 |         "resource_group_id": "f0c99934-1a9f-4aec-938f-d9a526715f41",
21 |         "cache_last_updated": "2013-06-19T01:43:51.480908",
22 |         "package_id": "47f7438a-506d-49c9-b565-7573f8df031e",
23 |         "webstore_last_updated": null,
24 |         "id": "583cf50b-deb6-45c9-9add-b9b0e27ebbeb",
25 |         "size": "23806",
26 |         "cache_filepath": "/mnt/shared/ckan_resource_cache/58/583cf50b-deb6-45c9-9add-b9b0e27ebbeb/disposal-database-house-of-commons-report",
27 |         "last_modified": "2013-11-16T02:37:37.294479",
28 |         "hash": "85c1ecad7813f97139802584c7a897963aa5e562",
29 |         "description": "Disposals Database House of Commons Report January 2013",
30 |         "format": "CSV",
31 |         "tracking_summary": {
32 |             "total": 0,
33 |             "recent": 0
34 |         },
35 |         "mimetype_inner": null,
36 |         "date": "8/3/2013",
37 |         "mimetype": "text/html",
38 |         "cache_url": "http://data.gov.uk/data/resource_cache/58/583cf50b-deb6-45c9-9add-b9b0e27ebbeb/disposal-database-house-of-commons-report",
39 |         "name": null,
40 |         "created": "2012-11-23T12:34:54.297808",
41 |         "url": "https://www.gov.uk/government/publications/disposal-database-house-of-commons-report",
42 |         "webstore_url": null,
43 |         "position": 0,
44 |         "resource_type": "file"
45 |     }],
46 |     "num_resources": 4,
47 |     "tags": ["Defence", "Government", "Land and Property", "Property", "disposals", "house of commons"],
48 |     "tracking_summary": {
49 |         "total": 0,
50 |         "recent": 0
51 |     },
52 |     "groups": [],
53 |     "organization": {
54 |         "description": "",
55 |         "title": "Defence Infrastructure Organisation",
56 |         "created": "2012-06-27T14:54:58.477573",
57 |         "approval_status": "approved",
58 |         "revision_timestamp": "2012-06-27T13:48:36.948477",
59 |         "is_organization": true,
60 |         "state": "active",
61 |         "image_url": "",
62 |         "revision_id": "a1f4375a-4afe-4dad-88cd-03f2bd6adaaf",
63 |         "type": "organization",
64 |         "id": "a3969e37-3ac3-42fe-8317-c8575a9f5317",
65 |         "name": "defence-infrastructure-organisation"
66 |     },
67 |     "name": "defence-infrastructure-organisation-disposals-database-house-of-commons-report",
68 |     "isopen": true,
69 |     "notes_rendered": "<p>MoD present and future disposal properties that are in the public domain that is provided for reference in the House of Commons library\n</p>",
70 |     "url": null,
71 |     "ckan_url": "http://data.gov.uk/dataset/defence-infrastructure-organisation-disposals-database-house-of-commons-report",
72 |     "notes": "MoD present and future disposal properties that are in the public domain that is provided for reference in the House of Commons library\r\n",
73 |     "owner_org": "a3969e37-3ac3-42fe-8317-c8575a9f5317",
74 |     "ratings_average": null,
75 |     "extras": {
76 |         "geographic_coverage": "111100: United Kingdom (England, Scotland, Wales, Northern Ireland)",
77 |         "contact-name": "",
78 |         "contact-phone": "",
79 |         "contact-email": "",
80 |         "foi-web": "",
81 |         "foi-name": "",
82 |         "temporal_coverage-to": "2013-10-31",
83 |         "theme-secondary": "Government",
84 |         "foi-email": "",
85 |         "core-dataset": "False",
86 |         "mandate": "",
87 |         "date": "4/11/2013",
88 |         "update_frequency": "bi-monthly",
89 |         "foi-phone": "",
90 |         "unpublished": "false",
91 |         "temporal_coverage-from": "2012-11-01",
92 |         "theme-primary": "Defence"
93 |     },
94 |     "license_url": "http://reference.data.gov.uk/id/open-government-licence",
95 |     "ratings_count": 0,
96 |     "title": "Defence Infrastructure Organisation Disposals Database House of Commons Report",
97 |     "revision_id": "6f1493f6-4f71-4c6e-871a-a60c4bfda8c3"
98 | }
99 | 


--------------------------------------------------------------------------------
/spec/fixtures/ckan/rest-dataset-frozen-animals.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "license_title": "UK Open Government Licence (OGL)",
  3 |     "maintainer": null,
  4 |     "private": false,
  5 |     "maintainer_email": null,
  6 |     "id": "20695709-1cfb-414e-b90a-9b358aa0ca07",
  7 |     "metadata_created": "2014-07-10T13:01:55.052900",
  8 |     "relationships": [],
  9 |     "license": "UK Open Government Licence (OGL)",
 10 |     "metadata_modified": "2014-07-10T13:01:55.066211",
 11 |     "author": null,
 12 |     "author_email": null,
 13 |     "state": "active",
 14 |     "version": null,
 15 |     "creator_user_id": "61e179a9-a9e5-4286-831d-42f688a5e22b",
 16 |     "type": "dataset",
 17 |     "resources": [
 18 |         {
 19 |             "resource_group_id": "a8a50585-fd1f-4871-b454-1bd87f31c62a",
 20 |             "cache_last_updated": null,
 21 |             "package_id": "20695709-1cfb-414e-b90a-9b358aa0ca07",
 22 |             "webstore_last_updated": null,
 23 |             "id": "3fe95f4b-90b7-4fba-ad8c-258e4ed078f2",
 24 |             "size": null,
 25 |             "last_modified": null,
 26 |             "hash": "",
 27 |             "description": "Frozen animals",
 28 |             "format": "HTML",
 29 |             "mimetype_inner": null,
 30 |             "url_type": null,
 31 |             "mimetype": null,
 32 |             "cache_url": null,
 33 |             "name": null,
 34 |             "created": "2014-07-10T14:01:55.091967",
 35 |             "url": "http://data.peterborough.gov.uk/View/environmental-protection-animal-welfare/animals-frozen-including-type-and-quantity",
 36 |             "webstore_url": null,
 37 |             "position": 0,
 38 |             "resource_type": "file"
 39 |         },
 40 |         {
 41 |             "resource_group_id": "a8a50585-fd1f-4871-b454-1bd87f31c62a",
 42 |             "cache_last_updated": null,
 43 |             "package_id": "20695709-1cfb-414e-b90a-9b358aa0ca07",
 44 |             "webstore_last_updated": null,
 45 |             "id": "03635365-29a2-4752-9e50-d68ae50192c2",
 46 |             "size": null,
 47 |             "last_modified": null,
 48 |             "hash": "",
 49 |             "description": "Frozen animals",
 50 |             "format": "CSV",
 51 |             "mimetype_inner": null,
 52 |             "url_type": null,
 53 |             "mimetype": null,
 54 |             "cache_url": null,
 55 |             "name": null,
 56 |             "created": "2014-07-10T14:01:55.091988",
 57 |             "url": "http://data.peterborough.gov.uk/View/environmental-protection-animal-welfare/animals-frozen-including-type-and-quantity#",
 58 |             "webstore_url": null,
 59 |             "position": 1,
 60 |             "resource_type": "file"
 61 |         }
 62 |     ],
 63 |     "num_resources": 2,
 64 |     "tags": [],
 65 |     "groups": [],
 66 |     "license_id": "uk-ogl",
 67 |     "num_tags": 0,
 68 |     "organization": {
 69 |         "description": "",
 70 |         "title": "Peterborough City Council",
 71 |         "created": "2014-01-31T16:33:33.959076",
 72 |         "approval_status": "pending",
 73 |         "revision_timestamp": "2014-01-31T16:33:33.911635",
 74 |         "is_organization": true,
 75 |         "state": "active",
 76 |         "image_url": "",
 77 |         "revision_id": "bd38de89-3194-4c84-b758-a8cd78242549",
 78 |         "type": "organization",
 79 |         "id": "e70862ec-8167-48e6-a27c-a0e9db1ebc87",
 80 |         "name": "peterborough-city-council"
 81 |     },
 82 |     "name": "peterborough-city-council-frozen-animals",
 83 |     "isopen": true,
 84 |     "notes_rendered": "<p>Animals frozen, including type and quantity\n</p>",
 85 |     "url": null,
 86 |     "ckan_url": "https://data.gov.uk/dataset/peterborough-city-council-frozen-animals",
 87 |     "notes": "Animals frozen, including type and quantity",
 88 |     "owner_org": "e70862ec-8167-48e6-a27c-a0e9db1ebc87",
 89 |     "ratings_average": null,
 90 |     "extras": {
 91 |         "geographic_coverage": "100000: England",
 92 |         "contact-name": "",
 93 |         "temporal_coverage-from": "2012-01-01",
 94 |         "contact-email": "",
 95 |         "foi-web": "",
 96 |         "foi-name": "",
 97 |         "foi-email": "",
 98 |         "contact-phone": "",
 99 |         "temporal_coverage-to": "2014-01-01",
100 |         "update_frequency": "",
101 |         "odi-certificate": "{\"status\": \"final\", \"source\": \"Automatically awarded by ODI\", \"certification_type\": \"automatically awarded\", \"level\": \"raw\", \"title\": \"Peterborough City Council - Frozen animals\", \"created_at\": \"2014-10-29T16:28:04Z\", \"jurisdiction\": \"GB\", \"certificate_url\": \"https://certificates.theodi.org/datasets/10677/certificates/23119\", \"badge_url\": \"https://certificates.theodi.org/datasets/10677/certificates/23119/badge.png\", \"cert_title\": \"Basic Level Certificate\"}",
102 |         "foi-phone": "",
103 |         "unpublished": "false",
104 |         "theme-primary": "Environment"
105 |     },
106 |     "license_url": "http://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/",
107 |     "ratings_count": 0,
108 |     "title": "Peterborough City Council - Frozen animals",
109 |     "revision_id": "682a3b2f-966b-4307-972d-4887f4465ebf"
110 | }


--------------------------------------------------------------------------------
/spec/fixtures/ckan/rest-dataset-pollinator.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "license_title": null,
  3 |     "maintainer": null,
  4 |     "private": false,
  5 |     "maintainer_email": null,
  6 |     "id": "10d394fd-88b9-489f-9552-b7b567f927e2",
  7 |     "metadata_created": "2014-08-11T08:29:37.215826",
  8 |     "relationships": [],
  9 |     "license": null,
 10 |     "metadata_modified": "2015-08-17T15:29:04.733151",
 11 |     "author": null,
 12 |     "author_email": null,
 13 |     "state": "active",
 14 |     "version": null,
 15 |     "creator_user_id": "60e687bf-a6d8-43e2-a50e-efab84b27952",
 16 |     "type": "dataset",
 17 |     "resources": [{
 18 |         "resource_group_id": "ab6aceb1-3ca9-4f62-99eb-54200a44d932",
 19 |         "cache_last_updated": null,
 20 |         "package_id": "10d394fd-88b9-489f-9552-b7b567f927e2",
 21 |         "webstore_last_updated": null,
 22 |         "id": "d277b163-1a8b-49e8-bc8e-c1bf05e487f2",
 23 |         "size": null,
 24 |         "last_modified": null,
 25 |         "hash": "",
 26 |         "description": "Supporting information available to assist in re-use of this dataset",
 27 |         "format": "",
 28 |         "mimetype_inner": null,
 29 |         "url_type": null,
 30 |         "resource_locator_protocol": "",
 31 |         "mimetype": null,
 32 |         "cache_url": null,
 33 |         "name": "Supporting Information",
 34 |         "created": "2015-08-17T16:29:04.843110",
 35 |         "url": "http://eidc.ceh.ac.uk/metadata/d7b25308-3ec7-4cff-8eed-fe20b815f964/zip_export",
 36 |         "webstore_url": null,
 37 |         "resource_locator_function": "information",
 38 |         "position": 0,
 39 |         "resource_type": null
 40 |     }, {
 41 |         "resource_group_id": "ab6aceb1-3ca9-4f62-99eb-54200a44d932",
 42 |         "cache_last_updated": null,
 43 |         "package_id": "10d394fd-88b9-489f-9552-b7b567f927e2",
 44 |         "webstore_last_updated": null,
 45 |         "id": "5281bf83-e117-4227-a8c6-c9fc2a8fda10",
 46 |         "size": null,
 47 |         "last_modified": null,
 48 |         "hash": "",
 49 |         "description": "Order a copy of the dataset",
 50 |         "format": "",
 51 |         "mimetype_inner": null,
 52 |         "url_type": null,
 53 |         "resource_locator_protocol": "",
 54 |         "mimetype": null,
 55 |         "cache_url": null,
 56 |         "name": "Online ordering",
 57 |         "created": "2015-08-17T16:29:04.843131",
 58 |         "url": "https://catalogue.ceh.ac.uk/download?fileIdentifier=d7b25308-3ec7-4cff-8eed-fe20b815f964",
 59 |         "webstore_url": null,
 60 |         "resource_locator_function": "order",
 61 |         "position": 1,
 62 |         "resource_type": null
 63 |     }],
 64 |     "num_resources": 2,
 65 |     "tags": ["bibionidae", "bumblebees", "cambridgeshire", "environmental-monitoring-facilities", "hoverflies", "lincolnshire", "oilseed-rape", "oxfordshire", "pollinators", "solitary-bees", "suffolk", "sustainable-land-management"],
 66 |     "groups": [],
 67 |     "license_id": null,
 68 |     "num_tags": 12,
 69 |     "organization": {
 70 |         "description": "The <a href=\"http://www.ceh.ac.uk/data/\" target=\"_blank\">Centre for Ecology &amp; Hydrology</a> is the UK's Centre of  Excellence for integrated research in terrestrial and freshwater  ecosystems and their interaction with the atmosphere. As part of the <a href=\"http://www.nerc.ac.uk/\" target=\"_blank\">Natural Environment Research Council</a>,  we provide National Capability based on innovative, independent and  interdisciplinary science and long-term environmental monitoring,  forming an integral part of NERC's vision and strategy.</p> <p>Working in partnership with the research community,  policy-makers, industry and society, we deliver world-class solutions to  the most complex environmental challenges facing humankind. CEH is a member of <a href=\"http://www.peer.eu/\">PEER</a> (the Partnership for European Environmental Research).",
 71 |         "title": "Centre for Ecology & Hydrology",
 72 |         "created": "2012-06-27T14:54:40.370760",
 73 |         "approval_status": "pending",
 74 |         "revision_timestamp": "2015-08-03T08:15:17.564823",
 75 |         "is_organization": true,
 76 |         "state": "active",
 77 |         "image_url": "",
 78 |         "revision_id": "b86a7a82-2de4-464a-8b32-cbc7cf738d4b",
 79 |         "type": "organization",
 80 |         "id": "866f4088-ae4f-43b8-ba8c-6d3141a327f2",
 81 |         "name": "centre-for-ecology-hydrology"
 82 |     },
 83 |     "name": "pollinator-visitation-data-on-oilseed-rape-varieties",
 84 |     "isopen": true,
 85 |     "notes_rendered": "<p>This dataset contains counts of pollinators visiting different varieties of oilseed rape (OSR). Data were collected from four trial sites in the UK in May 2012. The trial sites comprised of 20 varieties (plots) replicated in three blocks on each farm but only 2 of the blocks at each site were used for pollinator observations. Pollinator observations were also only made where there were greater than 30 percent of OSR plants in flower in the plot and only when weather conditions were within standardised limits. For each plot per site a six minute observation period was made during which the number of pollinators within the following taxon groups were counted: bumblebees to the species level, solitary bees identified to general body forms (Lasiglossum to genus level; Osmia separated to bicolour and rufa; Andrena separated to body forms typical of dorsata, carantonica, nigroaenea, haemorrhoa, fulva, flavipies, nitida, cineraria, bicolour and minuta), large hoverflies (&gt; 12 mm), small hoverflies (&lt; 11 mm), and Bibionidae. Each variety was observed for two separate six minute periods to reduce the impacts of minor fluctuations in weather that may reduce pollinator observations within single six minute periods. The dataset was collected as part of a project which aimed to identify key pollinators for OSR and identify if there are feeding preferences for individual varieties.\n</p>",
 86 |     "url": null,
 87 |     "ckan_url": "http://data.gov.uk/dataset/pollinator-visitation-data-on-oilseed-rape-varieties",
 88 |     "notes": "This dataset contains counts of pollinators visiting different varieties of oilseed rape (OSR). Data were collected from four trial sites in the UK in May 2012. The trial sites comprised of 20 varieties (plots) replicated in three blocks on each farm but only 2 of the blocks at each site were used for pollinator observations. Pollinator observations were also only made where there were greater than 30 percent of OSR plants in flower in the plot and only when weather conditions were within standardised limits. For each plot per site a six minute observation period was made during which the number of pollinators within the following taxon groups were counted: bumblebees to the species level, solitary bees identified to general body forms (Lasiglossum to genus level; Osmia separated to bicolour and rufa; Andrena separated to body forms typical of dorsata, carantonica, nigroaenea, haemorrhoa, fulva, flavipies, nitida, cineraria, bicolour and minuta), large hoverflies (> 12 mm), small hoverflies (< 11 mm), and Bibionidae. Each variety was observed for two separate six minute periods to reduce the impacts of minor fluctuations in weather that may reduce pollinator observations within single six minute periods. The dataset was collected as part of a project which aimed to identify key pollinators for OSR and identify if there are feeding preferences for individual varieties.",
 89 |     "owner_org": "866f4088-ae4f-43b8-ba8c-6d3141a327f2",
 90 |     "ratings_average": null,
 91 |     "extras": {
 92 |         "licence_url_title": "This resource is made available under the terms of the Open Government Licence",
 93 |         "bbox-east-long": "1.5329",
 94 |         "temporal_coverage-from": "[\"2012-05-01\"]",
 95 |         "resource-type": "dataset",
 96 |         "bbox-north-lat": "53.206",
 97 |         "harvest_source_reference": "https://catalogue.ceh.ac.uk/documents/gemini/waf/d7b25308-3ec7-4cff-8eed-fe20b815f964.xml",
 98 |         "coupled-resource": "[]",
 99 |         "import_source": "harvest",
100 |         "guid": "d7b25308-3ec7-4cff-8eed-fe20b815f964",
101 |         "bbox-south-lat": "51.616",
102 |         "temporal_coverage-to": "[\"2012-05-31\"]",
103 |         "spatial-reference-system": "27700",
104 |         "provider": "Syngenta",
105 |         "access_constraints": "[\"This resource is made available under the terms of the Open Government Licence [http://eidc.ceh.ac.uk/administration-folder/tools/ceh-standard-licence-texts/ceh-open-government-licence/plain]\", \"This resource is made available under the terms of the Open Government Licence\"]",
106 |         "licence_url": "http://eidc.ceh.ac.uk/administration-folder/tools/ceh-standard-licence-texts/ceh-open-government-licence/plain",
107 |         "contact-email": "enquiries@ceh.ac.uk",
108 |         "bbox-west-long": "-1.095",
109 |         "metadata-date": "2015-08-17",
110 |         "dataset-reference-date": "[{\"type\": \"publication\", \"value\": \"2014-05-07\"}]",
111 |         "frequency-of-update": "notPlanned",
112 |         "licence": "[\"If you reuse this data, you must cite: Woodcock, B.A., Edwards, M., Meek, W.R., Nuttall, P., Falk, S., Pywell, R.F. (2014). Pollinator visitation data on oilseed rape varieties. NERC Environmental Information Data Centre 10.5285/d7b25308-3ec7-4cff-8eed-fe20b815f964\"]",
113 |         "harvest_object_id": "d3d27a48-e4d6-426f-bf9e-fc32210b1405",
114 |         "responsible-party": "EIDC (custodian); Syngenta (owner); Mike Edwards Ecological Services Ltd (author); Pywell Section (resourceProvider); Centre for Ecology & Hydrology (pointOfContact, author); NERC Environmental Information Data Centre (publisher)",
115 |         "UKLP": "True",
116 |         "spatial-data-service-type": "",
117 |         "metadata-language": "eng",
118 |         "theme-secondary": "[]",
119 |         "theme-primary": "Environment"
120 |     },
121 |     "ratings_count": 0,
122 |     "title": "Pollinator visitation data on oilseed rape varieties",
123 |     "revision_id": "fcd89efb-a3da-42ee-b54d-4739bfc88864"
124 | }
125 | 


--------------------------------------------------------------------------------
/spec/fixtures/ckan/rest-dataset-toilets.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "license_title": "Creative Commons Attribution 3.0 Australia",
 3 |     "maintainer": "Department of Health and Ageing",
 4 |     "private": false,
 5 |     "maintainer_email": null,
 6 |     "id": "553b3049-2b8b-46a2-95e6-640d7986a8c1",
 7 |     "metadata_created": "2013-05-12T08:42:38.802401",
 8 |     "relationships": [],
 9 |     "license": "Creative Commons Attribution 3.0 Australia",
10 |     "metadata_modified": "2014-03-02T21:47:57.189910",
11 |     "author": "Department of Health and Ageing",
12 |     "author_email": null,
13 |     "download_url": "http://data.gov.au/storage/f/2013-11-14T05%3A41%3A12.200Z/toiletmapexport-131112-042111.zip",
14 |     "state": "active",
15 |     "version": null,
16 |     "creator_user_id": null,
17 |     "type": "dataset",
18 |     "resources": [{
19 |         "resource_group_id": "36677fd8-7419-4f3c-84aa-76d65524f02f",
20 |         "cache_last_updated": null,
21 |         "package_id": "553b3049-2b8b-46a2-95e6-640d7986a8c1",
22 |         "webstore_last_updated": null,
23 |         "id": "fc4d55de-55a6-483e-924a-093639d95aed",
24 |         "size": "1112225",
25 |         "last_modified": "2013-12-10T00:35:29.489574",
26 |         "hash": "7f73183d310a7ce221cf883162f6174cf5f32e8a",
27 |         "description": "Toilet Map",
28 |         "format": "ZIP",
29 |         "mimetype_inner": null,
30 |         "url_type": null,
31 |         "mimetype": "application/zip",
32 |         "cache_url": null,
33 |         "name": "Toiletmap.zip",
34 |         "created": "2013-05-12T08:42:48.397216",
35 |         "url": "http://data.gov.au/storage/f/2013-11-14T05%3A41%3A12.200Z/toiletmapexport-131112-042111.zip",
36 |         "webstore_url": null,
37 |         "position": 0,
38 |         "resource_type": "file.upload"
39 |     }],
40 |     "num_resources": 14,
41 |     "tags": ["health", "toilet"],
42 |     "groups": ["community", "health", "tourism"],
43 |     "license_id": "cc-by",
44 |     "num_tags": 2,
45 |     "organization": {
46 |         "description": "Department of Health and Ageing",
47 |         "title": "Department of Health and Ageing",
48 |         "created": "2013-05-12T08:42:37.139796",
49 |         "approval_status": "approved",
50 |         "revision_timestamp": "2013-05-12T08:42:37.101157",
51 |         "is_organization": true,
52 |         "state": "active",
53 |         "image_url": "",
54 |         "revision_id": "61c4cd6a-2ca2-4bec-919d-33ddcc4223dc",
55 |         "type": "organization",
56 |         "id": "2df7090e-2ebb-416e-8994-6de43d820d5c",
57 |         "name": "departmentofhealthandageing"
58 |     },
59 |     "name": "national-public-toilet-map",
60 |     "isopen": true,
61 |     "notes_rendered": "Here are some notes",
62 |     "url": "http://www.toiletmap.gov.au/default.aspx",
63 |     "ckan_url": "http://data.gov.au/dataset/national-public-toilet-map",
64 |     "notes": "Here are some notes",
65 |     "owner_org": "2df7090e-2ebb-416e-8994-6de43d820d5c",
66 |     "ratings_average": null,
67 |     "extras": {
68 |         "spatial_coverage": "Australia",
69 |         "jurisdiction": "Commonwealth of Australia",
70 |         "temporal_coverage": "Not specified",
71 |         "agency_program": "The National Public Toilet Map",
72 |         "granularity": "(a)\ttoilet name;(b)\taddress;(c)\tlatitude and longitude;(d)\tgeneral toilet features;(e)\tlocation;(f)\taccessibility;(g)\topening hours;(h)\tadditional features (e.g. showers, baby change facilities etc);(i)\tnotes (e.g. coin operated showers etc).",
73 |         "data_state": "active",
74 |         "contact_point": "project@toiletmap.gov.au",
75 |         "update_freq": "Not specified"
76 |     },
77 |     "license_url": "http://creativecommons.org/licenses/by/3.0/au/",
78 |     "ratings_count": 0,
79 |     "title": "National Public Toilet Map",
80 |     "revision_id": "38b6dfd7-c3a9-4d67-b28f-89cccc799fed"
81 | }
82 | 


--------------------------------------------------------------------------------
/spec/fixtures/ckan/rest-frozen-animals.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "license_title": "UK Open Government Licence (OGL)",
  3 |     "maintainer": null,
  4 |     "private": false,
  5 |     "maintainer_email": null,
  6 |     "id": "20695709-1cfb-414e-b90a-9b358aa0ca07",
  7 |     "metadata_created": "2014-07-10T13:01:55.052900",
  8 |     "relationships": [],
  9 |     "license": "UK Open Government Licence (OGL)",
 10 |     "metadata_modified": "2014-07-10T13:01:55.066211",
 11 |     "author": null,
 12 |     "author_email": null,
 13 |     "download_url": "http://data.peterborough.gov.uk/View/environmental-protection-animal-welfare/animals-frozen-including-type-and-quantity",
 14 |     "state": "active",
 15 |     "version": null,
 16 |     "creator_user_id": "61e179a9-a9e5-4286-831d-42f688a5e22b",
 17 |     "type": "dataset",
 18 |     "resources": [
 19 |         {
 20 |             "resource_group_id": "a8a50585-fd1f-4871-b454-1bd87f31c62a",
 21 |             "cache_last_updated": null,
 22 |             "package_id": "20695709-1cfb-414e-b90a-9b358aa0ca07",
 23 |             "webstore_last_updated": null,
 24 |             "id": "3fe95f4b-90b7-4fba-ad8c-258e4ed078f2",
 25 |             "size": null,
 26 |             "last_modified": null,
 27 |             "hash": "",
 28 |             "description": "Frozen animals",
 29 |             "format": "HTML",
 30 |             "mimetype_inner": null,
 31 |             "url_type": null,
 32 |             "mimetype": null,
 33 |             "cache_url": null,
 34 |             "name": null,
 35 |             "created": "2014-07-10T14:01:55.091967",
 36 |             "url": "http://data.peterborough.gov.uk/View/environmental-protection-animal-welfare/animals-frozen-including-type-and-quantity",
 37 |             "webstore_url": null,
 38 |             "position": 0,
 39 |             "resource_type": "file"
 40 |         },
 41 |         {
 42 |             "resource_group_id": "a8a50585-fd1f-4871-b454-1bd87f31c62a",
 43 |             "cache_last_updated": null,
 44 |             "package_id": "20695709-1cfb-414e-b90a-9b358aa0ca07",
 45 |             "webstore_last_updated": null,
 46 |             "id": "03635365-29a2-4752-9e50-d68ae50192c2",
 47 |             "size": null,
 48 |             "last_modified": null,
 49 |             "hash": "",
 50 |             "description": "Frozen animals",
 51 |             "format": "CSV",
 52 |             "mimetype_inner": null,
 53 |             "url_type": null,
 54 |             "mimetype": null,
 55 |             "cache_url": null,
 56 |             "name": null,
 57 |             "created": "2014-07-10T14:01:55.091988",
 58 |             "url": "http://data.peterborough.gov.uk/View/environmental-protection-animal-welfare/animals-frozen-including-type-and-quantity#",
 59 |             "webstore_url": null,
 60 |             "position": 1,
 61 |             "resource_type": "file"
 62 |         }
 63 |     ],
 64 |     "num_resources": 2,
 65 |     "tags": [],
 66 |     "groups": [],
 67 |     "license_id": "uk-ogl",
 68 |     "num_tags": 0,
 69 |     "organization": {
 70 |         "description": "",
 71 |         "title": "Peterborough City Council",
 72 |         "created": "2014-01-31T16:33:33.959076",
 73 |         "approval_status": "pending",
 74 |         "revision_timestamp": "2014-01-31T16:33:33.911635",
 75 |         "is_organization": true,
 76 |         "state": "active",
 77 |         "image_url": "",
 78 |         "revision_id": "bd38de89-3194-4c84-b758-a8cd78242549",
 79 |         "type": "organization",
 80 |         "id": "e70862ec-8167-48e6-a27c-a0e9db1ebc87",
 81 |         "name": "peterborough-city-council"
 82 |     },
 83 |     "name": "peterborough-city-council-frozen-animals",
 84 |     "isopen": true,
 85 |     "notes_rendered": "<p>Animals frozen, including type and quantity\n</p>",
 86 |     "url": null,
 87 |     "ckan_url": "https://data.gov.uk/dataset/peterborough-city-council-frozen-animals",
 88 |     "notes": "Animals frozen, including type and quantity",
 89 |     "owner_org": "e70862ec-8167-48e6-a27c-a0e9db1ebc87",
 90 |     "ratings_average": null,
 91 |     "extras": {
 92 |         "geographic_coverage": "100000: England",
 93 |         "contact-name": "",
 94 |         "temporal_coverage-from": "2012-01-01",
 95 |         "contact-email": "",
 96 |         "foi-web": "",
 97 |         "foi-name": "",
 98 |         "foi-email": "",
 99 |         "contact-phone": "",
100 |         "temporal_coverage-to": "2014-01-01",
101 |         "update_frequency": "",
102 |         "odi-certificate": "{\"status\": \"final\", \"source\": \"Automatically awarded by ODI\", \"certification_type\": \"automatically awarded\", \"level\": \"raw\", \"title\": \"Peterborough City Council - Frozen animals\", \"created_at\": \"2014-10-29T16:28:04Z\", \"jurisdiction\": \"GB\", \"certificate_url\": \"https://certificates.theodi.org/datasets/10677/certificates/23119\", \"badge_url\": \"https://certificates.theodi.org/datasets/10677/certificates/23119/badge.png\", \"cert_title\": \"Basic Level Certificate\"}",
103 |         "foi-phone": "",
104 |         "unpublished": "false",
105 |         "theme-primary": "Environment"
106 |     },
107 |     "license_url": "http://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/",
108 |     "ratings_count": 0,
109 |     "title": "Peterborough City Council - Frozen animals",
110 |     "revision_id": "682a3b2f-966b-4307-972d-4887f4465ebf"
111 | }


--------------------------------------------------------------------------------
/spec/fixtures/ckan/rest-organization-defence.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "users": [{
 3 |         "openid": null,
 4 |         "about": null,
 5 |         "capacity": "editor",
 6 |         "name": "user_d9495",
 7 |         "created": "2012-06-28T20:06:35.561800",
 8 |         "sysadmin": false,
 9 |         "activity_streams_email_notifications": false,
10 |         "email_hash": "6d577e5a083d77d2e78834e37eeff6d1",
11 |         "number_of_edits": 199,
12 |         "number_administered_packages": 4,
13 |         "display_name": "dio",
14 |         "fullname": "dio",
15 |         "id": "91d4079f-7617-47bd-b0bc-5cfa76fa6740"
16 |     }, {
17 |         "openid": null,
18 |         "about": null,
19 |         "capacity": "editor",
20 |         "name": "user_d9508",
21 |         "created": "2012-06-28T20:06:35.610197",
22 |         "sysadmin": false,
23 |         "activity_streams_email_notifications": false,
24 |         "email_hash": "9b396164e630928c8ac7fbff0f534493",
25 |         "number_of_edits": 0,
26 |         "number_administered_packages": 0,
27 |         "display_name": "DOI 1",
28 |         "fullname": "DOI 1",
29 |         "id": "f7145f08-0d85-4c9b-9b6c-24a7774739a9"
30 |     }],
31 |     "display_name": "Defence Infrastructure Organisation",
32 |     "description": "",
33 |     "title": "Defence Infrastructure Organisation",
34 |     "package_count": 21,
35 |     "created": "2012-06-27T14:54:58.477573",
36 |     "approval_status": "approved",
37 |     "is_organization": true,
38 |     "state": "active",
39 |     "extras": {
40 |         "contact-name": "",
41 |         "contact-email": "foo@example.com",
42 |         "website-url": "http://www.example.com",
43 |         "foi-web": "http://www.whatdotheyknow.com/body/dio",
44 |         "foi-name": "",
45 |         "abbreviation": "",
46 |         "foi-email": "",
47 |         "website-name": "",
48 |         "contact-phone": "",
49 |         "foi-phone": ""
50 |     },
51 |     "image_url": "",
52 |     "groups": [{
53 |         "capacity": "public",
54 |         "description": "We protect the security, independence and interests of our country at home and abroad. We work with our allies and partners whenever possible. Our aim is to ensure that the armed forces have the training, equipment and support necessary for their work, and that we keep within budget.\r\n\r\nMOD is a ministerial department, supported by 28 agencies and public bodies.\r\n\r\nhttps://www.gov.uk/government/organisations/ministry-of-defence\r\n\r\n",
55 |         "title": "Ministry of Defence",
56 |         "approval_status": "pending",
57 |         "is_organization": true,
58 |         "state": "active",
59 |         "image_url": "",
60 |         "display_name": "Ministry of Defence",
61 |         "revision_id": "b270ec2f-1112-4c1a-87fe-cd5a67f972e0",
62 |         "packages": 311,
63 |         "type": "organization",
64 |         "id": "5db6e904-ea2f-42a7-93bd-a61da059246f",
65 |         "name": "ministry-of-defence"
66 |     }],
67 |     "revision_id": "a1f4375a-4afe-4dad-88cd-03f2bd6adaaf",
68 |     "packages": ["business-plan-command-plan", "byelaws", "byelaws-ministry-of-defence-site-review-programme", "cost-of-ownership", "deed-information", "deer-management", "defence-infrastructure-organisation-disposals-database-house-of-commons-report", "dio-operations-accommodation-customer-satisfaction-key-performance-indicators", "estates-information", "headcount-data", "ict-expenditure", "land-quality-assessment", "licenses", "mod_estate_information", "revoked-byelaws-mod-estate", "safeguarding", "sustainability", "training-on-private-land", "tregantle-firing-times", "utilities-management", "value-of-the-estate"],
69 |     "type": "organization",
70 |     "id": "a3969e37-3ac3-42fe-8317-c8575a9f5317",
71 |     "tags": [],
72 |     "name": "defence-infrastructure-organisation"
73 | }
74 | 


--------------------------------------------------------------------------------
/spec/fixtures/ckan/rest-organization-health.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "users": [{
 3 |         "openid": null,
 4 |         "about": "",
 5 |         "capacity": "admin",
 6 |         "name": "custodian",
 7 |         "created": "2013-04-17T08:00:59.243065",
 8 |         "sysadmin": true,
 9 |         "activity_streams_email_notifications": false,
10 |         "email_hash": "99e31a948195fdebcba0afc9313801e2",
11 |         "number_of_edits": 2925,
12 |         "number_administered_packages": 1118,
13 |         "display_name": "data.gov.au Custodian",
14 |         "fullname": "data.gov.au Custodian",
15 |         "id": "91af4eef-efb1-4e4e-87f2-171d81977ae0"
16 |     }],
17 |     "display_name": "Department of Health and Ageing",
18 |     "description": "Department of Health and Ageing",
19 |     "title": "Department of Health and Ageing",
20 |     "package_count": 1,
21 |     "created": "2013-05-12T08:42:37.139796",
22 |     "approval_status": "approved",
23 |     "is_organization": true,
24 |     "state": "active",
25 |     "extras": {
26 |         "website-url": "http://www.example.com",
27 |         "contact-email": "foo@example.com"
28 |     },
29 |     "image_url": "",
30 |     "groups": [],
31 |     "revision_id": "61c4cd6a-2ca2-4bec-919d-33ddcc4223dc",
32 |     "packages": ["national-public-toilet-map"],
33 |     "type": "organization",
34 |     "id": "2df7090e-2ebb-416e-8994-6de43d820d5c",
35 |     "tags": [],
36 |     "name": "departmentofhealthandageing"
37 | }
38 | 


--------------------------------------------------------------------------------
/spec/fixtures/datapackage.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "identifier",
 3 |     "title": "Test Dataset",
 4 |     "datapackage_version": "1.0.0",
 5 |     "description": "This is a test dataset",
 6 |     "licenses": [{
 7 |         "id": "odc-pddl",
 8 |         "url": "http://opendatacommons.org/licenses/pddl/"
 9 |     }],
10 |     "sources": [{
11 |         "name": "Somewhere Else",
12 |         "web": "http://data.example.org/123"
13 |     }],
14 |     "keywords": ["data", "finances", "spending"],
15 |     "last_modified": "2010-11-19T00:06:58.482877"
16 | }


--------------------------------------------------------------------------------
/spec/fixtures/dcat-odrs-rdfa.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html prefix="dct: http://purl.org/dc/terms/
 3 |               rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns#
 4 |               dcat: http://www.w3.org/ns/dcat#
 5 | 			  odrs: http://schema.theodi.org/odrs#">
 6 |     <head>
 7 |         <title>ODRS in RDFa</title>
 8 | 	</head>
 9 | 	<body>
10 | 		<div typeof="dcat:Dataset" resource="http://gov.example.org/dataset/finances">
11 |             <h1 property="dct:title">Example ODRS Dataset</h1>	
12 |             
13 | 			  <div property="dct:rights" resource="#rights">
14 | 				<div resource="#rights">
15 | 					<h2 property="rdfs:label">Rights Statement</h2>
16 | 					<ul>
17 | 						<li>Data Licence: <a href="http://reference.data.gov.uk/id/open-government-licence" 
18 |                                              property="odrs:dataLicense">UK Open Government Licence (OGL)</a>
19 |                         </li>
20 | 					  	<li>Content Licence: <a href="http://reference.data.gov.uk/id/open-government-licence" 
21 |                                                 property="odrs:contentLicense">UK Open Government Licence (OGL)</a>
22 |                         </li>
23 | 					</ul>
24 | 					<p>
25 |                     When re-using this data please preserve the following copyright notice: 
26 |                     "<span property="odrs:copyrightNotice">Contains public sector information licensed under the Open Government Licence v1.0</span>".
27 |                     </p>
28 |                     
29 |                     <p>
30 |                     Copyright <span property="odrs:copyrightYear odrs:databaseRightYear">2013</span>. 
31 |                         <a href="http://example.org" property="odrs:copyrightHolder odrs:databaseRightHolder">Example Org</a>.
32 |                     </p>
33 |                     
34 |                     <p>    
35 |                     <a href="http://example.org/statement" property="odrs:copyrightStatement odrs:databaseRightStatement">Detailed copyright statement</a>.
36 |                     </p>
37 |                                      
38 | 			        <p>
39 |                     If you would like to attribute your use of this dataset, please use a link similar to the following: 
40 |                     <a href="http://gov.example.org/dataset/finances" 
41 |                        property="odrs:attributionURL">
42 |                         <span property="odrs:attributionText">Example Department</span>
43 |                     </a>.
44 |                     </p>
45 |                 </div>
46 | 					
47 |             </div>
48 | 
49 |             <div property="dct:license" 
50 |                  resource="http://reference.data.gov.uk/id/open-government-licence">
51 | 				<a href="http://reference.data.gov.uk/id/open-government-licence">
52 |                 <span property="dct:title">UK Open Government Licence (OGL)</span>
53 |                 </a>
54 |             </div>
55 | 
56 |             <!-- additional markup with further description of dataset -->
57 | 
58 |     </body>
59 | </html>
60 | 


--------------------------------------------------------------------------------
/spec/fixtures/odrs-datapackage.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "identifier",
 3 |     "title": "ODRS Example",
 4 |     "datapackage_version": "1.0.0",
 5 |     "rights": {
 6 |         "contentLicense": "http://reference.data.gov.uk/id/open-government-licence",
 7 |         "dataLicense": "http://reference.data.gov.uk/id/open-government-licence",
 8 |         "copyrightNotice": "© Crown copyright 2013",
 9 |         "attributionText": "Example Department",
10 |         "attributionURL": "http://gov.example.org/dataset/example"
11 |     }        
12 | }


--------------------------------------------------------------------------------
/spec/guessable_lookup_spec.rb:
--------------------------------------------------------------------------------
  1 | require "spec_helper"
  2 | require "data_kitten/utils/guessable_lookup"
  3 | 
  4 | describe GuessableLookup do
  5 |   context "with a hash that has an exact key" do
  6 |     before do
  7 |       @hash = {
  8 |         "some_key" => "some_key",
  9 |         "another_key" => "another_key"
 10 |       }.extend(GuessableLookup)
 11 |     end
 12 | 
 13 |     it "returns exact key" do
 14 |       expect(@hash.lookup("some_key")).to eq("some_key")
 15 |     end
 16 |   end
 17 | 
 18 |   context "with a hash that has a similar key" do
 19 |     before do
 20 |       @hash = {
 21 |         "someKey" => "someKey"
 22 |       }.extend(GuessableLookup)
 23 |     end
 24 | 
 25 |     it "returns similar key" do
 26 |       expect(@hash.lookup("some_key")).to eq("someKey")
 27 |       expect(@hash.lookup("some-key")).to eq("someKey")
 28 |       expect(@hash.lookup("somekey")).to eq("someKey")
 29 |     end
 30 |   end
 31 | 
 32 |   context "with a hash that has an exact and similar key" do
 33 |     before do
 34 |       @hash = {
 35 |         "someKey" => "someKey",
 36 |         "some-key" => "some-key",
 37 |         "some_key" => "some_key"
 38 |       }.extend(GuessableLookup)
 39 |     end
 40 | 
 41 |     it "returns exact key" do
 42 |       expect(@hash.lookup("someKey")).to eq("someKey")
 43 |       expect(@hash.lookup("some_key")).to eq("some_key")
 44 |       expect(@hash.lookup("some-key")).to eq("some-key")
 45 |     end
 46 |   end
 47 | 
 48 |   context "with a hash that doesn't have an exact or similar key" do
 49 |     before do
 50 |       @hash = {
 51 |         "someKeyy" => "someKeyy",
 52 |         "ssomeKey" => "ssomeKey",
 53 |         "some_keyy" => "some_keyy"
 54 |       }.extend(GuessableLookup)
 55 |     end
 56 | 
 57 |     it "returns nil" do
 58 |       expect(@hash.lookup("some_key")).to be_nil
 59 |       expect(@hash.lookup("some-key")).to be_nil
 60 |       expect(@hash.lookup("someKey")).to be_nil
 61 |       expect(@hash.lookup("somekey")).to be_nil
 62 |     end
 63 |   end
 64 | 
 65 |   context "with a nested hash" do
 66 |     before do
 67 |       @hash = {
 68 |         "some_key" => {
 69 |           "anotherKey" => true
 70 |         }
 71 |       }.extend(GuessableLookup)
 72 |     end
 73 | 
 74 |     it "returns nested key" do
 75 |       expect(@hash.lookup("some_key", "anotherKey")).to be true
 76 |       expect(@hash.lookup("some-key", "another_key")).to be true
 77 |     end
 78 | 
 79 |     it "returns nil for missing key" do
 80 |       expect(@hash.lookup("some-key", "anothey_key", "third_key")).to be_nil
 81 |       expect(@hash.lookup("a", "b", "c")).to be_nil
 82 |       expect(@hash.lookup("a", 0, "c")).to be_nil
 83 |     end
 84 |   end
 85 | 
 86 |   context "with a hash containing array" do
 87 |     before do
 88 |       @hash = {
 89 |         "some_key" => [0, 1, 2]
 90 |       }.extend(GuessableLookup)
 91 |     end
 92 | 
 93 |     it "returns array value" do
 94 |       expect(@hash.lookup("some_key", 0)).to eq(0)
 95 |     end
 96 | 
 97 |     it "returns nil for missing key" do
 98 |       expect(@hash.lookup("some-key", 3)).to be_nil
 99 |       expect(@hash.lookup("some-key", 3, "another_key")).to be_nil
100 |       expect(@hash.lookup("some-key", "another_key")).to be_nil
101 |     end
102 |   end
103 | 
104 |   context "with a hash containing an array containig a hash" do
105 |     before do
106 |       @hash = {
107 |         "some_key" => [{
108 |           "another_key" => true
109 |         }]
110 |       }.extend(GuessableLookup)
111 |     end
112 | 
113 |     it "returns key of hash within array" do
114 |       expect(@hash.lookup("some_key", 0, "another_key")).to be true
115 |     end
116 | 
117 |     it "returns nil for missing key" do
118 |       expect(@hash.lookup("some_key", 3)).to be_nil
119 |       expect(@hash.lookup("some_key", 0, "some_key")).to be_nil
120 |       expect(@hash.lookup("some_key", 0, "another_key", 1)).to be_nil
121 |     end
122 |   end
123 | end
124 | 


--------------------------------------------------------------------------------
/spec/hosts/github_spec.rb:
--------------------------------------------------------------------------------
 1 | require "spec_helper"
 2 | 
 3 | describe DataKitten::Dataset do
 4 |   context "with data on github" do
 5 |     def access_url(protocol)
 6 |       "#{protocol}://github.com/theodi/github-viewer-test-data.git"
 7 |     end
 8 | 
 9 |     %w[https http git].each do |protocol|
10 |       it "correctly identified #{protocol} URLs p" do
11 |         FakeWeb.register_uri(:get, access_url(protocol), body: "")
12 |         dataset = DataKitten::Dataset.new(access_url(protocol))
13 |         expect(dataset.host).to eq(:github)
14 |       end
15 |     end
16 |   end
17 | end
18 | 


--------------------------------------------------------------------------------
/spec/license_spec.rb:
--------------------------------------------------------------------------------
 1 | require "spec_helper"
 2 | 
 3 | describe DataKitten::License do
 4 |   describe "with known license URIs" do
 5 |     known_licenses = {
 6 |       "http://www.opendefinition.org/licenses/cc-by" => "cc-by",
 7 |       "http://www.opendefinition.org/licenses/cc-by/" => "cc-by",
 8 |       "http://www.opendefinition.org/licenses/cc-by-sa" => "cc-by-sa",
 9 |       "http://www.opendefinition.org/licenses/gfdl" => "gfdl",
10 |       "http://www.opendefinition.org/licenses/odc-pddl" => "odc-pddl",
11 |       "http://www.opendefinition.org/licenses/cc-zero" => "cc-zero",
12 |       "http://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/" => "ogl-uk",
13 |       "http://reference.data.gov.uk/id/open-government-licence" => "ogl-uk"
14 |     }
15 | 
16 |     it "should supply abbreviation" do
17 |       known_licenses.each do |uri, abbr|
18 |         expect(described_class.new(uri: uri).abbr).to eq(abbr)
19 |       end
20 |     end
21 |   end
22 | 
23 |   describe "with an unknown license URI" do
24 |     it "should not provide an abbreviation" do
25 |       expect(described_class.new(uri: "http://made-up-cc-by-sa.com/cc-by").abbr).to be_nil
26 |     end
27 |   end
28 | 
29 |   describe "with no license URI" do
30 |     it "should not provide an abbreviation" do
31 |       expect(described_class.new({}).abbr).to be_nil
32 |     end
33 |   end
34 | end
35 | 


--------------------------------------------------------------------------------
/spec/origins/linked_data_spec.rb:
--------------------------------------------------------------------------------
 1 | require "spec_helper"
 2 | 
 3 | describe DataKitten::Origins::LinkedData do
 4 |   context "when detecting origin" do
 5 |     it "should ignore errors" do
 6 |       FakeWeb.register_uri(:get, "http://example.org/not-found", status: ["404", "Not Found"])
 7 |       d = DataKitten::Dataset.new("http://example.org/not-found")
 8 |       expect(d.origin).to eql(nil)
 9 |     end
10 | 
11 |     it "should support turtle" do
12 |       FakeWeb.register_uri(:get, "http://example.org/doc/dataset", body: "", content_type: "text/turtle")
13 |       d = DataKitten::Dataset.new("http://example.org/doc/dataset")
14 |       expect(d.origin).to eql(:linkeddata)
15 |     end
16 |   end
17 | end
18 | 


--------------------------------------------------------------------------------
/spec/publishing_format/ckan_spec.rb:
--------------------------------------------------------------------------------
  1 | require "spec_helper"
  2 | require "ckan_fakeweb"
  3 | 
  4 | describe DataKitten::PublishingFormats::CKAN do
  5 |   before(:each) do
  6 |     FakeWeb.clean_registry
  7 |   end
  8 | 
  9 |   context "With a CKAN 2 endpoint" do
 10 |     before { CKANFakeweb.register_defence_dataset }
 11 | 
 12 |     it "should detect CKAN Datasets" do
 13 |       d = DataKitten::Dataset.new("http://example.org/dataset/defence")
 14 |       expect(d.publishing_format).to eql(:ckan)
 15 |       expect(d.supported?).to eql(true)
 16 |     end
 17 | 
 18 |     it "can have 2 instances in memory at the same time" do
 19 |       CKANFakeweb.register_toilets_dataset
 20 |       d1 = DataKitten::Dataset.new("http://example.org/dataset/defence")
 21 |       d2 = DataKitten::Dataset.new("http://example.org/dataset/toilets")
 22 |       expect(d1.data_title).to eq("Defence Infrastructure Organisation Disposals Database House of Commons Report")
 23 |       expect(d2.data_title).to eq("National Public Toilet Map")
 24 |     end
 25 | 
 26 |     context "when parsing CKAN" do
 27 |       before(:each) do
 28 |         @dataset = DataKitten::Dataset.new("http://example.org/dataset/defence")
 29 |       end
 30 | 
 31 |       it "should get the title" do
 32 |         expect(@dataset.data_title).to eql("Defence Infrastructure Organisation Disposals Database House of Commons Report")
 33 |       end
 34 | 
 35 |       it "should get the description" do
 36 |         expect(@dataset.description).to eql("MoD present and future disposal properties that are in the public domain that is provided for reference in the House of Commons library\r\n")
 37 |       end
 38 | 
 39 |       it "should get the identifier" do
 40 |         expect(@dataset.identifier).to eql("defence-infrastructure-organisation-disposals-database-house-of-commons-report")
 41 |       end
 42 | 
 43 |       it "should get the landing page" do
 44 |         expect(@dataset.landing_page).to eql("http://data.gov.uk/dataset/defence-infrastructure-organisation-disposals-database-house-of-commons-report")
 45 |       end
 46 | 
 47 |       it "should get the licence" do
 48 |         expect(@dataset.licenses.length).to eql(1)
 49 |         licence = @dataset.licenses.first
 50 |         expect(licence.uri).to eql("http://reference.data.gov.uk/id/open-government-licence")
 51 |         expect(licence.name).to eql("UK Open Government Licence (OGL)")
 52 |         expect(licence.id).to eql("uk-ogl")
 53 |       end
 54 | 
 55 |       it "should get the keywords" do
 56 |         expect(@dataset.keywords.length).to eql(6)
 57 |         expect(@dataset.keywords[0]).to eql("Defence")
 58 |         expect(@dataset.keywords[1]).to eql("Government")
 59 |         expect(@dataset.keywords[2]).to eql("Land and Property")
 60 |         expect(@dataset.keywords[3]).to eql("Property")
 61 |         expect(@dataset.keywords[4]).to eql("disposals")
 62 |         expect(@dataset.keywords[5]).to eql("house of commons")
 63 |       end
 64 | 
 65 |       it "should get the publisher" do
 66 |         expect(@dataset.publishers.length).to eql(1)
 67 |         publisher = @dataset.publishers.first
 68 |         expect(publisher.name).to eql("Defence Infrastructure Organisation")
 69 |         expect(publisher.uri).to eql("http://www.example.com")
 70 |         expect(publisher.mbox).to eql("foo@example.com")
 71 |       end
 72 | 
 73 |       it "gets the maintainer" do
 74 |         expect(@dataset.maintainers).to eq([DataKitten::Agent.new(name: "Mx Maintainer", mbox: "mx@maintainer.org")])
 75 |       end
 76 | 
 77 |       it "gets the author as a contributor" do
 78 |         expect(@dataset.contributors).to eq([DataKitten::Agent.new(name: "Mx Author", mbox: "mx@author.org")])
 79 |       end
 80 | 
 81 |       it "should list the distributions" do
 82 |         expect(@dataset.distributions.length).to eql(1)
 83 | 
 84 |         expect(@dataset.distributions.first.description).to eql("Disposals Database House of Commons Report January 2013")
 85 |         expect(@dataset.distributions.first.issued).to eql(Date.parse("2012-11-23T12:34:54.297808"))
 86 |         expect(@dataset.distributions.first.modified).to eql(Date.parse("2013-11-16T02:37:37.294479"))
 87 |         expect(@dataset.distributions.first.access_url).to eql("http://data.gov.uk/dataset/defence-infrastructure-organisation-disposals-database-house-of-commons-report")
 88 |         expect(@dataset.distributions.first.download_url).to eql("https://www.gov.uk/government/publications/disposal-database-house-of-commons-report")
 89 |         expect(@dataset.distributions.first.byte_size).to eql(23806)
 90 |         expect(@dataset.distributions.first.media_type).to eql("text/html")
 91 |       end
 92 | 
 93 |       it "should get the update frequency" do
 94 |         expect(@dataset.update_frequency).to eql("bi-monthly")
 95 |       end
 96 | 
 97 |       it "should get the issued date" do
 98 |         expect(@dataset.issued).to eql(Date.parse("2012-10-05T13:51:55.812923"))
 99 |       end
100 | 
101 |       it "should get the modified date" do
102 |         expect(@dataset.modified).to eql(Date.parse("2013-11-16T02:37:42.408267"))
103 |       end
104 | 
105 |       it "should get the temporal coverage" do
106 |         temporal = @dataset.temporal
107 |         expect(temporal.start).to eql(Date.parse("2012-11-01"))
108 |         expect(temporal.end).to eql(Date.parse("2013-10-31"))
109 |       end
110 | 
111 |       it "should get the theme" do
112 |         expect(@dataset.theme).to eql("Defence")
113 |       end
114 |     end
115 | 
116 |     context "and CKAN is not running on the root of the domain" do
117 |       it "loads the dataset" do
118 |         url = CKANFakeweb.register_dataset(
119 |           URI("http://other.org/some/path/"),
120 |           "defence",
121 |           load_fixture("ckan/rest-dataset-defence.json")
122 |         )
123 | 
124 |         dataset = DataKitten::Dataset.new(url)
125 |         expect(dataset.publishing_format).to eq(:ckan)
126 |       end
127 |     end
128 |   end
129 | 
130 |   context "With a CKAN 3 endpoint" do
131 |     before { CKANFakeweb.register_toilets_dataset }
132 | 
133 |     it "should detect CKAN Datasets" do
134 |       d = DataKitten::Dataset.new("http://example.org/dataset/toilets")
135 |       expect(d.publishing_format).to eql(:ckan)
136 |       expect(d.supported?).to eql(true)
137 |     end
138 | 
139 |     context "when the dataset has a UUID" do
140 |       before(:each) do
141 |         @dataset = DataKitten::Dataset.new("http://example.org/dataset/62766308-cb4f-4275-b4a4-937f52a978c5")
142 |       end
143 | 
144 |       it "should get the title" do
145 |         expect(@dataset.data_title).to eql("National Public Toilet Map")
146 |       end
147 | 
148 |       it "should get the description" do
149 |         expect(@dataset.description).to eql("Here are some notes")
150 |       end
151 | 
152 |       it "should get the identifier" do
153 |         expect(@dataset.identifier).to eql("national-public-toilet-map")
154 |       end
155 | 
156 |       it "should get the landing page" do
157 |         expect(@dataset.landing_page).to eql("http://www.toiletmap.gov.au/default.aspx")
158 |       end
159 | 
160 |       it "should get the licence" do
161 |         expect(@dataset.licenses.length).to eql(1)
162 |         licence = @dataset.licenses.first
163 |         expect(licence.uri).to eql("http://creativecommons.org/licenses/by/3.0/au/")
164 |         expect(licence.name).to eql("Creative Commons Attribution 3.0 Australia")
165 |         expect(licence.id).to eql("cc-by")
166 |       end
167 | 
168 |       it "should get the keywords" do
169 |         expect(@dataset.keywords.length).to eql(2)
170 |         expect(@dataset.keywords[0]).to eql("health")
171 |         expect(@dataset.keywords[1]).to eql("toilet")
172 |       end
173 | 
174 |       it "should get the publisher" do
175 |         expect(@dataset.publishers.length).to eql(1)
176 |         publisher = @dataset.publishers.first
177 |         expect(publisher.name).to eql("Department of Health and Ageing")
178 |         expect(publisher.uri).to eql("http://www.example.com")
179 |         expect(publisher.mbox).to eql("foo@example.com")
180 |       end
181 | 
182 |       it "should list the distributions" do
183 |         expect(@dataset.distributions.length).to eql(1)
184 | 
185 |         expect(@dataset.distributions.first.description).to eql("Toilet Map")
186 |         expect(@dataset.distributions.first.issued).to eql(Date.parse("2013-05-12T08:42:48.397216"))
187 |         expect(@dataset.distributions.first.modified).to eql(Date.parse("2013-12-10T00:35:29.489574"))
188 |         expect(@dataset.distributions.first.access_url).to eql("http://www.toiletmap.gov.au/default.aspx")
189 |         expect(@dataset.distributions.first.download_url).to eql("http://data.gov.au/storage/f/2013-11-14T05%3A41%3A12.200Z/toiletmapexport-131112-042111.zip")
190 |         expect(@dataset.distributions.first.byte_size).to eql(1112225)
191 |         expect(@dataset.distributions.first.media_type).to eql("application/zip")
192 |       end
193 | 
194 |       it "should get the issued date" do
195 |         expect(@dataset.issued).to eql(Date.parse("2013-05-12T08:42:38.802401"))
196 |       end
197 | 
198 |       it "should get the modified date" do
199 |         expect(@dataset.modified).to eql(Date.parse("2014-03-02T05:44:59.497920"))
200 |       end
201 | 
202 |       it "should get the theme" do
203 |         expect(@dataset.theme).to eql("community")
204 |       end
205 |     end
206 | 
207 |     context "when parsing CKAN" do
208 |       before(:each) do
209 |         @dataset = DataKitten::Dataset.new("http://example.org/dataset/toilets")
210 |       end
211 | 
212 |       it "should get the title" do
213 |         expect(@dataset.data_title).to eql("National Public Toilet Map")
214 |       end
215 | 
216 |       it "should get the description" do
217 |         expect(@dataset.description).to eql("Here are some notes")
218 |       end
219 | 
220 |       it "should get the licence" do
221 |         expect(@dataset.licenses.length).to eql(1)
222 |         licence = @dataset.licenses.first
223 |         expect(licence.uri).to eql("http://creativecommons.org/licenses/by/3.0/au/")
224 |         expect(licence.name).to eql("Creative Commons Attribution 3.0 Australia")
225 |         expect(licence.id).to eql("cc-by")
226 |       end
227 | 
228 |       it "should get the keywords" do
229 |         expect(@dataset.keywords.length).to eql(2)
230 |         expect(@dataset.keywords[0]).to eql("health")
231 |         expect(@dataset.keywords[1]).to eql("toilet")
232 |       end
233 | 
234 |       it "should get the publisher" do
235 |         expect(@dataset.publishers.length).to eql(1)
236 |         publisher = @dataset.publishers.first
237 |         expect(publisher.name).to eql("Department of Health and Ageing")
238 |         expect(publisher.uri).to eql("http://www.example.com")
239 |         expect(publisher.mbox).to eql("foo@example.com")
240 |       end
241 | 
242 |       it "should list the distributions" do
243 |         expect(@dataset.distributions.length).to eql(1)
244 | 
245 |         expect(@dataset.distributions.first.description).to eql("Toilet Map")
246 |         expect(@dataset.distributions.first.issued).to eql(Date.parse("2013-05-12T08:42:48.397216"))
247 |         expect(@dataset.distributions.first.modified).to eql(Date.parse("2013-12-10T00:35:29.489574"))
248 |         expect(@dataset.distributions.first.access_url).to eql("http://www.toiletmap.gov.au/default.aspx")
249 |         expect(@dataset.distributions.first.download_url).to eql("http://data.gov.au/storage/f/2013-11-14T05%3A41%3A12.200Z/toiletmapexport-131112-042111.zip")
250 |         expect(@dataset.distributions.first.byte_size).to eql(1112225)
251 |         expect(@dataset.distributions.first.media_type).to eql("application/zip")
252 |       end
253 | 
254 |       it "should get the issued date" do
255 |         expect(@dataset.issued).to eql(Date.parse("2013-05-12T08:42:38.802401"))
256 |       end
257 | 
258 |       it "should get the modified date" do
259 |         expect(@dataset.modified).to eql(Date.parse("2014-03-02T05:44:59.497920"))
260 |       end
261 | 
262 |       it "should get the theme" do
263 |         expect(@dataset.theme).to eql("community")
264 |       end
265 |     end
266 |   end
267 | 
268 |   context "with cadastral dataset" do
269 |     before { CKANFakeweb.register_cadastral_dataset }
270 | 
271 |     before(:each) do
272 |       @dataset = DataKitten::Dataset.new("http://example.org/api/rest/package/65493c4b-46d5-4125-b7d4-fc1df2b33349")
273 |     end
274 | 
275 |     it "should get the title" do
276 |       expect(@dataset.data_title).to eql("LPS Cadastral Parcels NI (Metadata)")
277 |     end
278 | 
279 |     it "should get the description" do
280 |       expect(@dataset.description).to eql("The dataset contains the boundaries of each individual freehold title to land.")
281 |     end
282 | 
283 |     it "should get the identifier" do
284 |       expect(@dataset.identifier).to eql("lps-cadastral-parcels-ni-metadata")
285 |     end
286 | 
287 |     it "should get the landing page" do
288 |       expect(@dataset.landing_page).to eql("http://data.gov.uk/dataset/lps-cadastral-parcels-ni-metadata")
289 |     end
290 | 
291 |     it "should get no licence" do
292 |       expect(@dataset.licenses.length).to eql(0)
293 |     end
294 | 
295 |     it "should get the keywords" do
296 |       expect(@dataset.keywords.length).to eql(18)
297 |       expect(@dataset.keywords[0]).to eql("Cadastral")
298 |     end
299 | 
300 |     it "should get the publisher" do
301 |       expect(@dataset.publishers.length).to eql(1)
302 |       publisher = @dataset.publishers.first
303 |       expect(publisher.name).to eql("Northern Ireland Spatial Data Infrastructure")
304 |     end
305 | 
306 |     it "should list no distributions" do
307 |       expect(@dataset.distributions.length).to eql(0)
308 |     end
309 | 
310 |     it "should get the update frequency" do
311 |       expect(@dataset.update_frequency).to eql("biannually")
312 |     end
313 | 
314 |     it "should get the issued date" do
315 |       expect(@dataset.issued).to eql(Date.parse("2012-12-21T11:41:36.523040"))
316 |     end
317 | 
318 |     it "should get the modified date" do
319 |       expect(@dataset.modified).to eql(Date.parse("2014-02-18T16:38:37.394178"))
320 |     end
321 | 
322 |     it "should get the language" do
323 |       expect(@dataset.language).to eql("eng")
324 |     end
325 | 
326 |     it "should get the theme" do
327 |       expect(@dataset.theme).to eql("Mapping")
328 |     end
329 | 
330 |     it "should get the spatial coverage" do
331 |       spatial = @dataset.spatial
332 |       expect(spatial["type"]).to eql("Polygon")
333 |       expect(spatial["coordinates"][0]).to include(
334 |         [-5.2563, 53.8869],
335 |         [-5.2563, 55.5369],
336 |         [-8.1906, 55.5369],
337 |         [-8.1906, 53.8869],
338 |         [-5.2563, 53.8869]
339 |       )
340 |     end
341 |   end
342 | 
343 |   context "with pollinator dataset" do
344 |     before(:each) do
345 |       CKANFakeweb.register_pollinator_dataset
346 |       @dataset = DataKitten::Dataset.new("http://example.org/api/rest/package/10d394fd-88b9-489f-9552-b7b567f927e2")
347 |     end
348 | 
349 |     it "should get the title" do
350 |       expect(@dataset.data_title).to eql("Pollinator visitation data on oilseed rape varieties")
351 |     end
352 | 
353 |     it "should get the description" do
354 |       expect(@dataset.description).to start_with("This dataset contains counts of pollinators visiting different varieties of oilseed rape (OSR).")
355 |     end
356 | 
357 |     it "should get the identifier" do
358 |       expect(@dataset.identifier).to eql("pollinator-visitation-data-on-oilseed-rape-varieties")
359 |     end
360 | 
361 |     it "should get the landing page" do
362 |       expect(@dataset.landing_page).to eql("http://data.gov.uk/dataset/pollinator-visitation-data-on-oilseed-rape-varieties")
363 |     end
364 | 
365 |     it "should get the licence" do
366 |       expect(@dataset.licenses.length).to eql(1)
367 |       licence = @dataset.licenses.first
368 |       expect(licence.uri).to eql("http://eidc.ceh.ac.uk/administration-folder/tools/ceh-standard-licence-texts/ceh-open-government-licence/plain")
369 |       expect(licence.name).to eql("This resource is made available under the terms of the Open Government Licence")
370 |       expect(licence.id).to be_nil
371 |     end
372 | 
373 |     it "should get the keywords" do
374 |       expect(@dataset.keywords.length).to eql(12)
375 |       expect(@dataset.keywords).to include("bibionidae", "bumblebees")
376 |     end
377 | 
378 |     it "should get the publisher" do
379 |       expect(@dataset.publishers.length).to eql(1)
380 |       publisher = @dataset.publishers.first
381 |       expect(publisher.name).to eql("Centre for Ecology & Hydrology")
382 |     end
383 | 
384 |     it "should list the distributions" do
385 |       expect(@dataset.distributions.length).to eql(2)
386 | 
387 |       expect(@dataset.distributions.first.description).to start_with("Supporting information")
388 |       expect(@dataset.distributions.first.issued).to eql(Date.parse("2015-08-17T16:29:04.843110"))
389 |       expect(@dataset.distributions.first.modified).to be_nil
390 |       expect(@dataset.distributions.first.access_url).to eql("http://data.gov.uk/dataset/pollinator-visitation-data-on-oilseed-rape-varieties")
391 |       expect(@dataset.distributions.first.download_url).to eql("http://eidc.ceh.ac.uk/metadata/d7b25308-3ec7-4cff-8eed-fe20b815f964/zip_export")
392 |       expect(@dataset.distributions.first.byte_size).to be_nil
393 |       expect(@dataset.distributions.first.media_type).to be_nil
394 |     end
395 | 
396 |     it "should get the update frequency" do
397 |       expect(@dataset.update_frequency).to eql("notPlanned")
398 |     end
399 | 
400 |     it "should get the issued date" do
401 |       expect(@dataset.issued).to eql(Date.parse("2014-08-11T08:29:37.215826"))
402 |     end
403 | 
404 |     it "should get the modified date" do
405 |       expect(@dataset.modified).to eql(Date.parse("2015-08-17T15:29:04.733151"))
406 |     end
407 | 
408 |     it "should get the language" do
409 |       expect(@dataset.language).to eql("eng")
410 |     end
411 | 
412 |     it "should get the theme" do
413 |       expect(@dataset.theme).to eql("Environment")
414 |     end
415 | 
416 |     it "should get the temporal coverage" do
417 |       temporal = @dataset.temporal
418 |       expect(temporal.start).to eql(Date.parse("2012-05-01"))
419 |       expect(temporal.end).to eql(Date.parse("2012-05-31"))
420 |     end
421 | 
422 |     it "should get the spatial coverage" do
423 |       spatial = @dataset.spatial
424 |       expect(spatial["type"]).to eql("Polygon")
425 |       east = 1.5329
426 |       north = 53.206
427 |       south = 51.616
428 |       west = -1.095
429 |       expect(spatial["coordinates"][0]).to eql([
430 |         [west, north],
431 |         [east, north],
432 |         [east, south],
433 |         [west, south],
434 |         [west, north]
435 |       ])
436 |     end
437 |   end
438 | 
439 |   context "when a v3 api url is provided" do
440 |     let(:dataset) do
441 |       CKANFakeweb.register_frozen_animals_dataset
442 |       DataKitten::Dataset.new("http://example.org/api/3/action/package_show?id=frozen-animals")
443 |     end
444 | 
445 |     it "loads the dataset" do
446 |       expect(dataset.publishing_format).to eql(:ckan)
447 |       expect(dataset.supported?).to eql(true)
448 |     end
449 | 
450 |     it "converts extras to a hash" do
451 |       expect(dataset.metadata["extras"].keys).to include("geographic_coverage", "temporal_coverage-from", "theme-primary")
452 |     end
453 | 
454 |     it "converts tags to a list" do
455 |       expect(dataset.metadata["tags"]).to include("Environment")
456 |     end
457 | 
458 |     it "defauls publisher url to base url if missing from organization" do
459 |       expect(dataset.publishers[0].homepage).to eq("http://example.org/")
460 |     end
461 |   end
462 | 
463 |   context "when a 'rest' api url is provided" do
464 |     it "loads the dataset" do
465 |       CKANFakeweb.register_defence_dataset
466 |       d = DataKitten::Dataset.new("http://example.org/api/2/rest/dataset/defence")
467 |       expect(d.publishing_format).to eql(:ckan)
468 |       expect(d.supported?).to eql(true)
469 |     end
470 |   end
471 | 
472 |   context "when not on the root of a domain" do
473 |     it "accepts a specified base_uri for v3" do
474 |       CKANFakeweb.register_frozen_animals_dataset("http://example.net/hidden_ckan/")
475 |       d = DataKitten::Dataset.new("http://example.net/hidden_ckan/api/3/action/package_show?id=frozen-animals", "http://example.net/hidden_ckan/")
476 |       expect(d.publishing_format).to eql(:ckan)
477 |       expect(d.supported?).to eql(true)
478 |     end
479 | 
480 |     it "accepts a specified base_uri for 'rest'" do
481 |       CKANFakeweb.register_defence_dataset("http://example.net/hidden_ckan/")
482 |       d = DataKitten::Dataset.new("http://example.net/hidden_ckan/api/2/rest/dataset/defence", "http://example.net/hidden_ckan/")
483 |       expect(d.publishing_format).to eql(:ckan)
484 |       expect(d.supported?).to eql(true)
485 |     end
486 |   end
487 | end
488 | 


--------------------------------------------------------------------------------
/spec/publishing_format/datapackage_spec.rb:
--------------------------------------------------------------------------------
 1 | require "spec_helper"
 2 | 
 3 | describe DataKitten::PublishingFormats::Datapackage do
 4 |   context "when detecting format" do
 5 |     it "should detect datapackage.json" do
 6 |       FakeWeb.register_uri(:get, "http://example.org/dataset/datapackage.json", body: load_fixture("datapackage.json"))
 7 |       d = DataKitten::Dataset.new(access_url: "http://example.org/dataset/datapackage.json")
 8 |       expect(d.publishing_format).to eql(:datapackage)
 9 |     end
10 | 
11 |     it "should not be a data package if there is no datapackage.json" do
12 |       FakeWeb.register_uri(:get, "http://example.org/not-a-dataset/datapackage.json", body: "", status: ["404", "Not Found"])
13 |       d = DataKitten::Dataset.new(access_url: "http://example.org/not-a-dataset/datapackage.json")
14 |       expect(d.publishing_format).to eql(nil)
15 |     end
16 |   end
17 | 
18 |   context "when reading a basic datapackage.json file" do
19 |     before(:each) do
20 |       FakeWeb.register_uri(:get, "http://example.org/dataset/datapackage.json", body: load_fixture("datapackage.json"))
21 |       @dataset = DataKitten::Dataset.new(access_url: "http://example.org/dataset/datapackage.json")
22 |     end
23 | 
24 |     it "should parse basic metadata" do
25 |       expect(@dataset.data_title).to eql("Test Dataset")
26 |       expect(@dataset.description).to eql("This is a test dataset")
27 |     end
28 | 
29 |     it "should extract sources" do
30 |       expect(@dataset.sources.length).to eql(1)
31 |       source = @dataset.sources.first
32 |       expect(source.name).to eql("Somewhere Else")
33 |       expect(source.web).to eql("http://data.example.org/123")
34 |     end
35 | 
36 |     it "should extract licenses" do
37 |       expect(@dataset.licenses.length).to eql(1)
38 |       license = @dataset.licenses.first
39 |       expect(license.id).to eql("odc-pddl")
40 |       expect(license.uri).to eql("http://opendatacommons.org/licenses/pddl/")
41 |       expect(@dataset.rights).to eql(nil)
42 |     end
43 | 
44 |     it "should extract keywords" do
45 |       expect(@dataset.keywords.length).to eql(3)
46 |       expect(@dataset.keywords).to eql(["data", "finances", "spending"])
47 |     end
48 | 
49 |     it "should extract modification date" do
50 |       expect(@dataset.modified).to_not eql(nil)
51 |     end
52 |   end
53 | 
54 |   context "when reading rights information" do
55 |     before(:each) do
56 |       FakeWeb.register_uri(:get, "http://example.org/dataset/datapackage.json", body: load_fixture("odrs-datapackage.json"))
57 |       @dataset = DataKitten::Dataset.new(access_url: "http://example.org/dataset/datapackage.json")
58 |       @rights = @dataset.rights
59 |     end
60 | 
61 |     it "should extract licenses" do
62 |       expect(@rights.contentLicense).to eql("http://reference.data.gov.uk/id/open-government-licence")
63 |       expect(@rights.dataLicense).to eql("http://reference.data.gov.uk/id/open-government-licence")
64 |     end
65 | 
66 |     it "should extract attribution details" do
67 |       expect(@rights.attributionURL).to eql("http://gov.example.org/dataset/example")
68 |       expect(@rights.attributionText).to eql("Example Department")
69 |     end
70 |   end
71 | end
72 | 


--------------------------------------------------------------------------------
/spec/publishing_format/linked_data_spec.rb:
--------------------------------------------------------------------------------
 1 | require "spec_helper"
 2 | 
 3 | describe DataKitten::PublishingFormats::LinkedData do
 4 |   before(:each) do
 5 |     FakeWeb.clean_registry
 6 |   end
 7 | 
 8 |   context "when detecting format" do
 9 |     it "should ignore errors" do
10 |       FakeWeb.register_uri(:get, "http://example.org/not-found", status: ["404", "Not Found"])
11 |       d = DataKitten::Dataset.new("http://example.org/not-found")
12 |       expect(d.supported?).to eql(false)
13 |     end
14 | 
15 |     it "should support dataset autodiscovery" do
16 |       rdf_body = <<-EOL
17 |                 <rdf:Description 
18 |                     rdf:about="http://example.org/doc/dataset" 
19 |                     xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
20 |                     rdf:type="http://www.w3.org/ns/dcat#Dataset">
21 |                 </rdf:Description>       
22 |       EOL
23 | 
24 |       html_body = <<-EOL
25 |                 <html>
26 |                     <head>
27 |                         <link rel="alternate" type="application/rdf+xml" 
28 |                             href="http://example.org/doc/dataset.rdf"
29 |                     </head>
30 |                 </html>        
31 |       EOL
32 |       FakeWeb.register_uri(:get, "http://example.org/doc/dataset", body: html_body, content_type: "text/html")
33 |       FakeWeb.register_uri(:get, "http://example.org/doc/dataset.rdf", body: rdf_body, content_type: "application/rdf+xml")
34 | 
35 |       d = DataKitten::Dataset.new("http://example.org/doc/dataset")
36 |       expect(d.publishing_format).to eql(:rdf)
37 |     end
38 | 
39 |     it "should support turtle" do
40 |       body = <<-EOL
41 |               <http://example.org/doc/dataset> a <http://www.w3.org/ns/dcat#Dataset>.
42 |       EOL
43 | 
44 |       FakeWeb.register_uri(:get, "http://example.org/doc/dataset", body: body, content_type: "text/turtle")
45 |       d = DataKitten::Dataset.new("http://example.org/doc/dataset")
46 |       expect(d.publishing_format).to eql(:rdf)
47 |     end
48 | 
49 |     it "should fallback to using suffix of URI" do
50 |       body = <<-EOL
51 |               <http://example.org/doc/dataset> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/ns/dcat#Dataset>.
52 |       EOL
53 | 
54 |       FakeWeb.register_uri(:get, "http://example.org/doc/dataset.ttl", body: body, content_type: "text/plain")
55 |       d = DataKitten::Dataset.new("http://example.org/doc/dataset.ttl")
56 |       expect(d.publishing_format).to eql(:rdf)
57 |     end
58 | 
59 |     it "should support VoiD datasets" do
60 |       body = <<-EOL
61 |               <http://example.org/doc/dataset> a <http://rdfs.org/ns/void#Dataset>.
62 |       EOL
63 | 
64 |       FakeWeb.register_uri(:get, "http://example.org/doc/dataset", body: body, content_type: "text/turtle")
65 |       d = DataKitten::Dataset.new("http://example.org/doc/dataset")
66 |       expect(d.publishing_format).to eql(:rdf)
67 |     end
68 | 
69 |     it "should ignore unknown types" do
70 |       body = <<-EOL
71 |               <http://example.org/doc/dataset> a <http://example.org/doc/Dataset>.
72 |       EOL
73 | 
74 |       FakeWeb.register_uri(:get, "http://example.org/doc/dataset", body: body, content_type: "text/turtle")
75 |       d = DataKitten::Dataset.new("http://example.org/doc/dataset")
76 |       expect(d.publishing_format).to eql(nil)
77 |     end
78 |   end
79 | 
80 |   context "when interpreting RDF" do
81 |     it "should find the title" do
82 |       body = <<-EOL
83 |               @prefix dct: <http://purl.org/dc/terms/> .
84 |               <http://example.org/doc/dataset> a <http://www.w3.org/ns/dcat#Dataset>.
85 |               <http://example.org/doc/dataset> dct:title "Dataset Title".
86 |       EOL
87 | 
88 |       FakeWeb.register_uri(:get, "http://example.org/doc/dataset", body: body, content_type: "text/turtle")
89 |       d = DataKitten::Dataset.new("http://example.org/doc/dataset")
90 |       expect(d.data_title).to eql("Dataset Title")
91 |     end
92 |   end
93 | end
94 | 


--------------------------------------------------------------------------------
/spec/publishing_format/rdfa_spec.rb:
--------------------------------------------------------------------------------
  1 | require "spec_helper"
  2 | 
  3 | describe DataKitten::PublishingFormats::RDFa do
  4 |   before(:each) do
  5 |     FakeWeb.clean_registry
  6 |   end
  7 | 
  8 |   context "when detecting RDFa" do
  9 |     it "should ignore errors" do
 10 |       FakeWeb.register_uri(:get, "http://example.org/not-found", status: ["404", "Not Found"])
 11 |       d = DataKitten::Dataset.new("http://example.org/not-found")
 12 |       expect(d.supported?).to eql(false)
 13 |     end
 14 | 
 15 |     it "should detect DCAT Datasets" do
 16 |       dcat_rdfa = load_fixture("basic-dcat-rdfa.html")
 17 |       FakeWeb.register_uri(:get, "http://example.org/rdfa", body: dcat_rdfa, content_type: "text/html")
 18 |       d = DataKitten::Dataset.new("http://example.org/rdfa")
 19 |       expect(d.publishing_format).to eql(:rdfa)
 20 |       expect(d.supported?).to eql(true)
 21 |     end
 22 |   end
 23 | 
 24 |   context "when parsing RDFa" do
 25 |     before(:each) do
 26 |       dcat_rdfa = load_fixture("basic-dcat-rdfa.html")
 27 |       FakeWeb.register_uri(:get, "http://example.org/rdfa", body: dcat_rdfa, content_type: "text/html")
 28 |       @dataset = DataKitten::Dataset.new("http://example.org/rdfa")
 29 |     end
 30 | 
 31 |     it "should extract the title" do
 32 |       expect(@dataset.data_title).to eql("Example DCAT Dataset")
 33 |     end
 34 | 
 35 |     it "should extract the description" do
 36 |       expect(@dataset.description).to eql("This is the description.")
 37 |     end
 38 | 
 39 |     it "should extract licenses" do
 40 |       expect(@dataset.licenses.length).to eql(1)
 41 |       licence = @dataset.licenses.first
 42 |       expect(licence.uri).to eql("http://reference.data.gov.uk/id/open-government-licence")
 43 |       expect(licence.name).to eql("UK Open Government Licence (OGL)")
 44 |     end
 45 | 
 46 |     it "should extract publisher details" do
 47 |       expect(@dataset.publishers.length).to eql(1)
 48 |       publisher = @dataset.publishers.first
 49 |       expect(publisher.name).to eql("Example Publisher")
 50 |       expect(publisher.uri).to eql("http://example.org/publisher")
 51 |     end
 52 | 
 53 |     it "should extract keywords" do
 54 |       expect(@dataset.keywords.length).to eql(2)
 55 |       expect(@dataset.keywords).to include("Examples")
 56 |       expect(@dataset.keywords).to include("DCAT")
 57 |     end
 58 | 
 59 |     it "should extract update frequency" do
 60 |       expect(@dataset.update_frequency).to eql("http://purl.org/linked-data/sdmx/2009/code#freq-W")
 61 |     end
 62 | 
 63 |     it "should extract distributions" do
 64 |       expect(@dataset.distributions.length).to eql(1)
 65 | 
 66 |       distribution = @dataset.distributions.first
 67 | 
 68 |       expect(distribution.title).to eql("CSV download")
 69 |       expect(distribution.access_url).to eql("http://example.org/distribution.csv.zip")
 70 |     end
 71 | 
 72 |     it "should extract dates" do
 73 |       expect(@dataset.issued.to_s).to eql("2010-10-25")
 74 |       expect(@dataset.modified.to_s).to eql("2013-05-10")
 75 |     end
 76 |   end
 77 | 
 78 |   context "when parsing rights statements" do
 79 |     before(:each) do
 80 |       dcat_rdfa = load_fixture("dcat-odrs-rdfa.html")
 81 |       FakeWeb.register_uri(:get, "http://example.org/rights", body: dcat_rdfa, content_type: "text/html")
 82 |       @dataset = DataKitten::Dataset.new("http://example.org/rights")
 83 |     end
 84 | 
 85 |     it "should extract licence URIs" do
 86 |       expect(@dataset.rights.dataLicense).to eql "http://reference.data.gov.uk/id/open-government-licence"
 87 |       expect(@dataset.rights.contentLicense).to eql "http://reference.data.gov.uk/id/open-government-licence"
 88 |     end
 89 | 
 90 |     it "should extract copyright information" do
 91 |       expect(@dataset.rights.copyrightYear).to eql "2013"
 92 |       expect(@dataset.rights.databaseRightYear).to eql "2013"
 93 |       expect(@dataset.rights.copyrightHolder).to eql "http://example.org"
 94 |       expect(@dataset.rights.databaseRightHolder).to eql "http://example.org"
 95 |       expect(@dataset.rights.copyrightNotice).to eql "Contains public sector information licensed under the Open Government Licence v1.0"
 96 |       expect(@dataset.rights.copyrightStatement).to eql "http://example.org/statement"
 97 |       expect(@dataset.rights.databaseRightStatement).to eql "http://example.org/statement"
 98 |       expect(@dataset.rights.attributionText).to eql "Example Department"
 99 |       expect(@dataset.rights.attributionURL).to eql "http://gov.example.org/dataset/finances"
100 |     end
101 |   end
102 | end
103 | 


--------------------------------------------------------------------------------
/spec/spec_helper.rb:
--------------------------------------------------------------------------------
 1 | require "data_kitten"
 2 | require "fakeweb"
 3 | require "linkeddata"
 4 | require "pry"
 5 | 
 6 | FakeWeb.allow_net_connect = false
 7 | 
 8 | if ENV["COVERAGE"]
 9 |   require "coveralls"
10 |   Coveralls.wear!
11 |   FakeWeb.allow_net_connect = %r{^https://coveralls.io}
12 | end
13 | 
14 | RSpec.configure do |config|
15 |   config.order = "random"
16 | end
17 | 
18 | def load_fixture(file)
19 |   File.read(File.join(__dir__, "fixtures", file))
20 | end
21 | 


--------------------------------------------------------------------------------