├── .flake8
├── .github
    ├── issue_template.md
    └── pull_request_template.md
├── .gitignore
├── .travis.yml
├── LICENSE.md
├── MANIFEST.in
├── Makefile
├── README.md
├── frictionless_ckan_mapper
    ├── VERSION
    ├── __init__.py
    ├── ckan_to_frictionless.py
    └── frictionless_to_ckan.py
├── pylama.ini
├── pytest.ini
├── setup.cfg
├── setup.py
├── tests
    ├── __init__.py
    ├── fixtures
    │   ├── ckan_package.json
    │   ├── ckan_resource.json
    │   ├── frictionless_package.json
    │   ├── frictionless_resource.json
    │   ├── full_ckan_package.json
    │   └── full_ckan_package_first_round_trip.json
    ├── test_ckan_to_frictionless.py
    ├── test_frictionless_to_ckan.py
    └── test_roundtrip.py
└── tox.ini


/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length = 120
3 | max-complexity = 10
4 | exclude = .venv*,venv*,.git,__pycache__,.tox,.eggs,*.egg


--------------------------------------------------------------------------------
/.github/issue_template.md:
--------------------------------------------------------------------------------
1 | # Overview
2 | 
3 | Please replace this line with full information about your idea or problem. If it's a bug share as much as possible to reproduce it
4 | 
5 | ---
6 | 
7 | Please preserve this line to notify @amercader (lead of this repository)
8 | 


--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
1 | # Overview
2 | 
3 | Please replace this line with full information about your pull request. Make sure that tests pass before publishing it
4 | 
5 | ---
6 | 
7 | Please preserve this line to notify @amercader (lead of this repository)
8 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | 
 27 | # PyInstaller
 28 | #  Usually these files are written by a python script from a template
 29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 30 | *.manifest
 31 | *.spec
 32 | 
 33 | # Installer logs
 34 | pip-log.txt
 35 | pip-delete-this-directory.txt
 36 | 
 37 | # Unit test / coverage reports
 38 | htmlcov/
 39 | .tox/
 40 | .coverage
 41 | .coverage.*
 42 | .cache
 43 | nosetests.xml
 44 | coverage.xml
 45 | *,cover
 46 | .hypothesis/
 47 | 
 48 | # Translations
 49 | *.mo
 50 | *.pot
 51 | 
 52 | # Django stuff:
 53 | *.log
 54 | local_settings.py
 55 | 
 56 | # Flask stuff:
 57 | instance/
 58 | .webassets-cache
 59 | 
 60 | # Scrapy stuff:
 61 | .scrapy
 62 | 
 63 | # Sphinx documentation
 64 | docs/_build/
 65 | 
 66 | # PyBuilder
 67 | target/
 68 | 
 69 | # IPython Notebook
 70 | .ipynb_checkpoints
 71 | 
 72 | # pyenv
 73 | .python-version
 74 | 
 75 | # celery beat schedule file
 76 | celerybeat-schedule
 77 | 
 78 | # dotenv
 79 | .env
 80 | 
 81 | # virtualenv
 82 | venv/
 83 | ENV/
 84 | .venv/
 85 | 
 86 | # Spyder project settings
 87 | .spyderproject
 88 | 
 89 | # Rope project settings
 90 | .ropeproject
 91 | 
 92 | # Extra
 93 | .idea/
 94 | *iml
 95 | 
 96 | # VS Code
 97 | .vscode/
 98 | 
 99 | # Makefile
100 | .make-cache
101 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | sudo: required
 2 | dist: trusty
 3 | language: python
 4 | python:
 5 | - 2.7
 6 | - 3.6
 7 | env:
 8 |   global:
 9 |   - TOXENV="py${PYTHON_VERSION//./}"
10 | install:
11 | - make install
12 | - pip install coveralls
13 | script:
14 | - make test
15 | after_success:
16 | - coveralls
17 | deploy:
18 |   provider: pypi
19 |   user: "__token__"
20 |   distributions: sdist bdist_wheel
21 |   skip_cleanup: true
22 |   on:
23 |     tags: true
24 |   password:
25 |     secure: i5+61ZmR8Fv3a31IwnhgM3hEOCcyWCHezR+vVt2J1iEZnyCP1ldh2rpRtk4SOW8tgR9VJ/amhrYnUoDP2WP7KAfA6nX8Y5I8apVL/iNAJssqPiSlPLNeOgzjgqEEE1tSXFmriyieVrDsf+z98XX6LjHDUoQFdop9R1WgXlnXrn967/rBl4a0BA30xi5gLyab6KWkNNAtMriZEHP058rrkWxGMLn/0P+9eu2GHZTkUdmotPMHPAvEB+Ts6BOs4i74A4MnTyxDG8nlEqBP9CjkqiUyX5rGmSggW5wOJltrWjZEZymB0Q3b8uffrfzdwhpVkNlqXEBhnSnJKpRhmKmIWp8Ip7rEfeQQz/VQpSKLLdFF5fhiXUCsLpn1BJ02Y0h9VZuZig2Y2I7Rbo0w1udcGrnPZ5hstjbTQglVTcqMq4BmAJYHIiF5qRe+EF4D5oY42FI2YoinMuC5kljGl0iLbYYRo06Ei8ZSSj3cf+A8m+LePJCuDFaKaoafCUO0WFFcNTaUfwuHBF0Nx2TEuF1v7lVbcTqGMTYH+k89rlRRKOLyn9sYlaloKdW/4bYE+7aI7vCSxw5zl1qVgg+CXoqOEMq/fpkEAjheFngij+eCGcvkHjeEntIOKsAkLm/u8Gs/sd9bGyj7/JWF92JGJ6K4LKm3Mu/JmKiRqbUhc0jEbDw=
26 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2017 Open Knowledge International
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | global-include *.json
 2 | global-include *.yml
 3 | global-include *.txt
 4 | global-include VERSION
 5 | include LICENSE.md
 6 | include Makefile
 7 | include pylama.ini
 8 | include pytest.ini
 9 | include README.md
10 | include tox.ini
11 | prune .tox
12 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | PYTHON := python
 2 | PIP := pip
 3 | GIT := git
 4 | 
 5 | BUILD_DIR := build
 6 | DIST_DIR := dist
 7 | SENTINELS := .make-cache
 8 | 
 9 | SOURCE_FILES := $(shell find ./frictionless_ckan_mapper -type f -name "*.py")
10 | 
11 | PACKAGE := $(shell grep '^PACKAGE =' setup.py | cut -d "'" -f2)
12 | VERSION := $(shell head -n 1 $(PACKAGE)/VERSION)
13 | 
14 | .PHONY: all dist distclean install list release test version
15 | 
16 | ## Clean all generated files
17 | distclean:
18 | 	rm -rf $(BUILD_DIR) $(DIST_DIR)
19 | 	rm -rf $(SENTINELS)/dist
20 | 
21 | ## Create distribution files to upload to pypi
22 | dist: $(SENTINELS)/dist
23 | 
24 | 
25 | 
26 | all: list
27 | 
28 | install:
29 | 	pip install --upgrade -e .[develop]
30 | 
31 | list:
32 | 	@grep '^\.PHONY' Makefile | cut -d' ' -f2- | tr ' ' '\n'
33 | 
34 | # Upload a release of the package to PyPi and create a Git tag
35 | # Note: Travis CI will upload on tag push.
36 | release: $(SENTINELS)/dist
37 | 	@echo
38 | 	@echo "You are about to release authoritative version $(VERSION)"
39 | 	@echo "This will:"
40 | 	@echo " - Create a git tag release-$(VERSION)"
41 | 	@echo " - Create a release package and upload it to PyPi via Travis CI"
42 | 	$(GIT) tag release-$(VERSION)
43 | 	$(GIT) push --tags
44 | # $(PYTHON) -m twine upload dist/*
45 | 
46 | $(SENTINELS):
47 | 	mkdir $@
48 | 
49 | $(SENTINELS)/dist-setup: | $(SENTINELS)
50 | 	$(PIP) install -U pip wheel twine
51 | 	@touch $@
52 | 
53 | $(SENTINELS)/dist: $(SENTINELS)/dist-setup $(DIST_DIR)/frictionless-ckan-mapper-$(VERSION).tar.gz $(DIST_DIR)/frictionless-ckan-mapper-$(VERSION)-py2.py3-none-any.whl | $(SENTINELS)
54 | 	@touch $@
55 | 
56 | $(DIST_DIR)/frictionless-ckan-mapper-$(VERSION).tar.gz $(DIST_DIR)/frictionless-ckan-mapper-$(VERSION)-py2.py3-none-any.whl: $(SOURCE_FILES) setup.py | $(SENTINELS)/dist-setup
57 | 	$(PYTHON) setup.py sdist bdist_wheel --universal
58 | 
59 | test:
60 | 	pylama $(PACKAGE)
61 | 	tox
62 | 
63 | version:
64 | 	@echo $(VERSION)
65 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Frictionless CKAN Mapper
  2 | 
  3 | A library for mapping CKAN metadata <=> Frictionless metadata.
  4 | 
  5 | The library has zero dependencies (not even on Data Package libs). You can use it directly or use it for inspiration. Detailed outline of the algorithm is in the docs or you can read the code.
  6 | 
  7 | [![Travis](https://img.shields.io/travis/frictionlessdata/frictionless-ckan-mapper/master.svg)](https://travis-ci.org/frictionlessdata/frictionless-ckan-mapper)
  8 | [![Coveralls](http://img.shields.io/coveralls/frictionlessdata/frictionless-ckan-mapper/master.svg)](https://coveralls.io/r/frictionlessdata/frictionless-ckan-mapper?branch=master)
  9 | [![PyPi](https://img.shields.io/pypi/v/frictionless-ckan-mapper.svg)](https://pypi.python.org/pypi/frictionless-ckan-mapper)
 10 | [![SemVer](https://img.shields.io/badge/versions-SemVer-brightgreen.svg)](http://semver.org/)
 11 | [![Chat on Discord](https://img.shields.io/discord/695635777199145130)](https://discord.gg/2UgfM2k)
 12 | 
 13 | <!-- toc -->
 14 | 
 15 | - [Frictionless CKAN Mapper](#frictionless-ckan-mapper)
 16 |   - [Installation](#installation)
 17 |   - [Getting started](#getting-started)
 18 |     - [CKAN => Frictionless](#ckan--frictionless)
 19 |     - [Frictionless => CKAN](#frictionless--ckan)
 20 |   - [Reference](#reference)
 21 |     - [`ckan_to_frictionless`](#ckan_to_frictionless)
 22 |       - [`resource(ckandict)`](#resourceckandict)
 23 |       - [`dataset(ckandict)`](#datasetckandict)
 24 |     - [`frictionless_to_ckan`](#frictionless_to_ckan)
 25 |       - [`resource(fddict)`](#resourcefddict)
 26 |       - [`package(fddict)`](#packagefddict)
 27 |   - [Design](#design)
 28 |     - [CKAN reference](#ckan-reference)
 29 |     - [Algorithm: CKAN => Frictionless](#algorithm-ckan--frictionless)
 30 |     - [Algorithm: Frictionless => CKAN](#algorithm-frictionless--ckan)
 31 |   - [Developers](#developers)
 32 |     - [Install the source](#install-the-source)
 33 |     - [Run the tests](#run-the-tests)
 34 |     - [Building and publishing the package](#building-and-publishing-the-package)
 35 |       - [Build the distribution package locally for testing purposes](#build-the-distribution-package-locally-for-testing-purposes)
 36 |       - [Test the package at test.pypi.org](#test-the-package-at-testpypiorg)
 37 |       - [Tag a new Git release and publish to the official PyPi](#tag-a-new-git-release-and-publish-to-the-official-pypi)
 38 |     <!-- tocstop -->
 39 | 
 40 | ## Installation
 41 | 
 42 | - Python: install Python. The library is compatible with both Python 2.7+ and Python 3.3+.
 43 | 
 44 | ```bash
 45 | pip install frictionless-ckan-mapper
 46 | ```
 47 | 
 48 | **Note:** The package is installed as `frictionless-ckan-mapper` and then imported as `frictionless_ckan_mapper`.
 49 | 
 50 | ## Getting started
 51 | 
 52 | ### CKAN => Frictionless
 53 | 
 54 | ```python
 55 | # get a CKAN metadata item
 56 | ckan_dataset = {
 57 |   "name": "my-dataset",
 58 |   "title": "My awesome dataset",
 59 |   "url": "http://www.example.com/data.csv"
 60 | }
 61 | 
 62 | # or load from an API e.g.
 63 | # ckan_dataset = json.load(urllib.urlopen(
 64 | #     https://demo.ckan.org/api/3/package_show?id=my_dataset
 65 | # ))
 66 | 
 67 | from frictionless_ckan_mapper import ckan_to_frictionless as converter
 68 | 
 69 | # convert to frictionless
 70 | frictionless_package = converter.dataset(ckan_dict)
 71 | 
 72 | print(frictionless_package)
 73 | ```
 74 | 
 75 | ### Frictionless => CKAN
 76 | 
 77 | ```python
 78 | frictionless = {
 79 |   'name': "f11s-dataset",
 80 |   'path': "https://datahub.io/data.csv"
 81 | }
 82 | 
 83 | from frictionless_ckan_mapper import frictionless_to_ckan as f2c
 84 | 
 85 | ckanout = f2c.dataset(frictionless)
 86 | 
 87 | print(ckanout)
 88 | ```
 89 | 
 90 | ## Reference
 91 | 
 92 | This package contains two modules:
 93 | 
 94 | - `frictionless_to_ckan`
 95 | - `ckan_to_frictionless`
 96 | 
 97 | You can import them directly like so:
 98 | 
 99 | ```python
100 | from frictionless_ckan_mapper import ckan_to_frictionless
101 | from frictionless_ckan_mapper import frictionless_to_ckan
102 | ```
103 | 
104 | ### `ckan_to_frictionless`
105 | 
106 | #### `resource(ckandict)`
107 | 
108 | ```python
109 | from frictionless_ckan_mapper import ckan_to_frictionless as converter
110 | 
111 | # ... Some code with a CKAN dictionary ...
112 | 
113 | output_frictionless_dict = converter.resource(ckan_dictionary)
114 | ```
115 | 
116 | #### `dataset(ckandict)`
117 | 
118 | ```python
119 | from frictionless_ckan_mapper import ckan_to_frictionless as converter
120 | 
121 | # ... Some code with a CKAN dictionary ...
122 | 
123 | output_frictionless_dict = converter.dataset(ckan_dictionary)
124 | ```
125 | 
126 | ### `frictionless_to_ckan`
127 | 
128 | #### `resource(fddict)`
129 | 
130 | ```python
131 | from frictionless_ckan_mapper import frictionless_to_ckan as converter
132 | 
133 | # ... Some code with a Frictionless dictionary ...
134 | 
135 | output_ckan_dict = converter.resource(frictionless_dictionary)
136 | ```
137 | 
138 | #### `package(fddict)`
139 | 
140 | ```python
141 | from frictionless_ckan_mapper import frictionless_to_ckan as converter
142 | 
143 | # ... Some code with a Frictionless dictionary ...
144 | 
145 | output_ckan_dict = converter.package(frictionless_dictionary)
146 | ```
147 | 
148 | ## Design
149 | 
150 | ```text
151 | Frictionless   <=>        CKAN
152 | --------------------------------------
153 | Data Package   <=>   Package (Dataset)
154 | Data Resource  <=>   Resource
155 | Table Schema   <=>   Data Dictionary?? (datastore resources can have schemas)
156 | ```
157 | 
158 | ### CKAN reference
159 | 
160 | **Summary:**
161 | 
162 | - Class diagram below of key objects (without attributes)
163 | - Objects with their attributes in this spreadsheet: https://docs.google.com/spreadsheets/d/1XdqGTFni5Jfs8AMbcbfsP7m11h9mOHS0eDtUZtqGVSg/edit#gid=1925460244
164 | 
165 | ```mermaid
166 | classDiagram
167 | 
168 | class Package
169 | class Resource
170 | class DataDictionary
171 | 
172 | Package *-- Resource
173 | Resource o-- DataDictionary
174 | ```
175 | 
176 | ![mermaid-diagram-20200703112520](https://user-images.githubusercontent.com/32682903/86486065-f9c08100-bd1f-11ea-8a1a-8f3befca0e6e.png)
177 | 
178 | Source for CKAN metadata structure:
179 | 
180 | - Dataset (Package): https://docs.ckan.org/en/2.8/api/index.html#ckan.logic.action.create.package_create
181 |   - `Package.as_dict` method: https://github.com/ckan/ckan/blob/2.8/ckan/model/package.py#L195-L223
182 |   - `package_show` ...
183 | 
184 | * Resource: https://docs.ckan.org/en/2.8/api/index.html#ckan.logic.action.create.resource_create
185 | 
186 | ### Algorithm: CKAN => Frictionless
187 | 
188 | See the code in [`frictionless_ckan_mapper/ckan_to_frictionless.py`](./frictionless_ckan_mapper/ckan_to_frictionless.py)
189 | 
190 | ### Algorithm: Frictionless => CKAN
191 | 
192 | See the code in [`frictionless_ckan_mapper/frictionless_to_ckan.py`](./frictionless_ckan_mapper/frictionless_to_ckan.py)
193 | 
194 | ## Developers
195 | 
196 | ### Install the source
197 | 
198 | - Clone the repo:
199 | 
200 |   ```bash
201 |   git clone https://github.com/frictionlessdata/frictionless-ckan-mapper.git
202 |   ```
203 | 
204 | - And install it with pip:
205 | 
206 |   ```bash
207 |   pip install -e .
208 |   ```
209 | 
210 | ### Run the tests
211 | 
212 | Use the excellent `pytest` suite as follows:
213 | 
214 | ```bash
215 | pytest tests
216 | ```
217 | 
218 | To test under both Python 2 and Python 3 environments, we use `tox`. You can run the following command:
219 | 
220 | ```bash
221 | make test
222 | ```
223 | 
224 | **Note:** Make sure that the necessary Python versions are in your environment `PATH` (Python 2.7 and Python 3.6).
225 | 
226 | ### Building and publishing the package
227 | 
228 | To see a list of available commands from the `Makefile`, execute:
229 | 
230 | ```bash
231 | make list
232 | ```
233 | 
234 | #### Build the distribution package locally for testing purposes
235 | 
236 | If a previous build exists, make sure to also remove it before building again:
237 | 
238 | ```bash
239 | make distclean
240 | ```
241 | 
242 | Then:
243 | 
244 | ```bash
245 | make dist
246 | ```
247 | 
248 | Alternatively, this command will accomplish the same to build packages for both Python 2 and Python 3:
249 | 
250 | ```bash
251 | python setup.py sdist bdist_wheel --universal
252 | ```
253 | 
254 | #### Test the package at test.pypi.org
255 | 
256 | ```bash
257 | python -m twine upload --repository testpypi dist/*
258 | ```
259 | 
260 | The package will be publicly available at https://test.pypi.org/project/frictionless-ckan-mapper/ and you will be able to `pip install` it as usual.
261 | 
262 | #### Tag a new Git release and publish to the official PyPi
263 | 
264 | Make sure to update the version of the package in the file `frictionless_ckan_mapper/VERSION`. Then:
265 | 
266 | ```bash
267 | make release
268 | ```
269 | 
270 | You can quickly review the version to release with `make version`, which will print the current version stored in `VERSION`.
271 | 


--------------------------------------------------------------------------------
/frictionless_ckan_mapper/VERSION:
--------------------------------------------------------------------------------
1 | 1.0.9
2 | 


--------------------------------------------------------------------------------
/frictionless_ckan_mapper/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/frictionlessdata/frictionless-ckan-mapper/794eaa33a01a2bb88d0d25279a94aeee92fde395/frictionless_ckan_mapper/__init__.py


--------------------------------------------------------------------------------
/frictionless_ckan_mapper/ckan_to_frictionless.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | import six
  3 | import json
  4 | import re
  5 | import unidecode
  6 | from collections import defaultdict
  7 | 
  8 | try:
  9 |     json_parse_exception = json.decoder.JSONDecodeError
 10 | except AttributeError:  # Testing against Python 2
 11 |     json_parse_exception = ValueError
 12 | 
 13 | 
 14 | resource_mapping = {
 15 |     'size': 'bytes',
 16 |     'mimetype': 'mediatype',
 17 |     'url': 'path'
 18 | }
 19 | 
 20 | resource_keys_to_remove = [
 21 |     'position',
 22 |     'datastore_active',
 23 |     'state'
 24 | ]
 25 | 
 26 | 
 27 | def resource(ckandict):
 28 |     '''Convert a CKAN resource to Frictionless Resource.
 29 | 
 30 |     1. Remove unneeded keys
 31 |     2. Expand extras.
 32 |         * Extras are already expanded to key / values by CKAN (unlike on
 33 |             package)
 34 |         * ~~Apply heuristic to unjsonify (if starts with [ or { unjsonify~~
 35 |         * JSON loads everything that starts with [ or {
 36 |     3. Map keys from CKAN to Frictionless (and reformat if needed)
 37 |     4. Remove keys with null values (CKAN has a lot of null valued keys)
 38 |     5. Apply special formatting (if any) for key fields e.g. slugiify
 39 |     '''
 40 |     # TODO: delete keys last as may be needed for something in processing
 41 |     resource = dict(ckandict)
 42 |     for key in resource_keys_to_remove:
 43 |         if key in resource:
 44 |             del resource[key]
 45 | 
 46 |     # unjsonify values
 47 |     # * check if string
 48 |     # * if starts with [ or { => json.loads it ...
 49 |     # HACK: bit of a hacky way to check if value is a jsonified array or
 50 |     # dict
 51 |     # * else do nothing
 52 |     for key, value in resource.items():
 53 |         if isinstance(value, six.text_type) or isinstance(value, six.string_types):
 54 |             value = value.strip()
 55 |             if value.startswith('{') or value.startswith('['):
 56 |                 try:
 57 |                     value = json.loads(value)
 58 |                     resource[key] = value
 59 |                 except (json_parse_exception, TypeError):
 60 |                     pass
 61 | 
 62 |             if key == 'name':
 63 |                 if isinstance(value, six.text_type):
 64 |                     value = unidecode.unidecode(value)
 65 |                 value = value.lower()
 66 |                 value = value.strip()
 67 |                 value = re.sub('(\||[^\w|.|\|])+', '-', value)
 68 |                 if value == '':
 69 |                     value = 'unnamed-resource'
 70 |                 resource[key] = value
 71 | 
 72 |             if key == 'size':
 73 |                 if resource[key]:    
 74 |                     resource[key] = int(resource[key])
 75 | 
 76 |             # 'type' must be lower case 
 77 |             if key == 'type':
 78 |                 resource[key] = value.lower()
 79 | 
 80 |     # Remap differences from CKAN to Frictionless resource
 81 |     for key, value in resource_mapping.items():
 82 |         if key in resource:
 83 |             resource[value] = resource[key] 
 84 |             del resource[key]
 85 | 
 86 |     for key in list(resource.keys()):
 87 |         if resource[key] is None:
 88 |             del resource[key]
 89 | 
 90 |     return resource
 91 | 
 92 | 
 93 | dataset_keys_to_remove = [
 94 |     'state',          # b/c this is state info not metadata about dataset
 95 |     'isopen',         # computed info from license (render info not metadata)
 96 |     'num_resources',  # render info not metadata
 97 |     'num_tags',       # ditto
 98 |     'organization',   # already have owner_org id + this inlines related object
 99 | ]
100 | dataset_mapping = {
101 |     'notes': 'description',
102 |     'url': 'homepage'
103 | }
104 | 
105 | 
106 | def dataset(ckandict):
107 |     '''Convert a CKAN Package (Dataset) to Frictionless Package.
108 | 
109 |     1. Expand extras.
110 |         * JSON loads everything and on error have a string
111 |     2. Map keys from CKAN to Frictionless (and reformat if needed)
112 |     3. Remove keys with null values (CKAN has a lot of null valued keys)
113 |     4. Remove unneeded keys
114 |     5. Apply special formatting for key fields
115 |     '''
116 |     outdict = dict(ckandict)
117 |     # Convert the structure of extras
118 |     # structure of extra item is {key: xxx, value: xxx}
119 |     if 'extras' in ckandict:
120 |         for extra in ckandict['extras']:
121 |             key = extra['key']
122 |             value = extra['value']
123 |             try:
124 |                 value = json.loads(value)
125 |             except (json_parse_exception, TypeError):
126 |                 pass
127 |             outdict[key] = value
128 |         del outdict['extras']
129 | 
130 |     # Map dataset keys
131 |     for key, value in dataset_mapping.items():
132 |         if key in ckandict:
133 |             outdict[value] = ckandict[key]
134 |             del outdict[key]
135 | 
136 |     # map resources inside dataset
137 |     if 'resources' in ckandict:
138 |         outdict['resources'] = [resource(res) for res in
139 |                                 ckandict['resources']]
140 |     else:
141 |         outdict['resources'] = []
142 | 
143 |     for res in outdict['resources']:
144 |         if 'name' not in res:
145 |             res['name'] = 'unnamed-resource'
146 | 
147 |     # prevent having multiple unanmed resources with the same name
148 |     # to follow the specs https://specs.frictionlessdata.io/data-resource/#name
149 |     unnamed_num = 1
150 |     for res in outdict['resources']:
151 |         if res['name'] == 'unnamed-resource':
152 |             res['name'] += '-{}'.format(unnamed_num)
153 |             unnamed_num += 1
154 | 
155 |     # Deal with resources having the same name
156 |     name_count = defaultdict(int)
157 |     resources_names = [r['name'] for r in outdict['resources']]
158 | 
159 |     for name in resources_names:
160 |         name_count[name] += 1
161 | 
162 |     name_index = {n:1 for n in name_count.keys()}
163 | 
164 |     # If a group of resources have the same name
165 |     # add a count to the name and save the original name in the metadata
166 |     for res in outdict['resources']:
167 |         if name_count[res['name']] > 1:
168 |             res_name = res['name']
169 |             res['original_name'] = res_name
170 |             res['name'] = f'{res_name}-{name_index[res_name]}'
171 |             name_index[res_name] += 1
172 | 
173 |     # tags
174 |     if ckandict.get('tags'):
175 |         outdict['keywords'] = [tag['name'] for tag in ckandict['tags']]
176 |     outdict.pop('tags', None)
177 | 
178 |     # author, maintainer => contributors
179 |     # what to do if contributors already there? Options:
180 |     # 1. Just use that and ignore author/maintainer
181 |     # 2. replace with author/maintainer
182 |     # 3. merge i.e. use contributors and merge in (this is sort of complex)
183 |     # e.g. how to i avoid duplicating the same person
184 |     # ANS: for now, is 1 ...
185 |     if (not ('contributors' in outdict and outdict['contributors']) and
186 |             ('author' in outdict or 'maintainer' in outdict)):
187 |         outdict['contributors'] = []
188 |         if 'author' in outdict and outdict['author']:
189 |             contrib = {
190 |                 'title': outdict['author'],
191 |                 'role': 'author'
192 |             }
193 |             if 'author_email' in outdict:
194 |                 contrib['email'] = outdict.get('author_email') or ''
195 |             outdict['contributors'].append(contrib)
196 |         if 'maintainer' in outdict and outdict['maintainer']:
197 |             contrib = {
198 |                 'title': outdict['maintainer'],
199 |                 'role': 'maintainer'
200 |             }
201 |             if 'maintainer_email' in outdict:
202 |                 contrib['email'] = outdict.get('maintainer_email') or ''
203 |             outdict['contributors'].append(contrib)
204 | 
205 |     for key in ['author', 'author_email', 'maintainer', 'maintainer_email']:
206 |         outdict.pop(key, None)
207 | 
208 |     # Algorithm for licenses
209 |     # 1. Use extras first
210 |     # 2. Updating first item in licenses array (if already there -
211 |     # or create it as empty) with stuff at root of ckan dict i.e.
212 |     # values from license_id, license_title etc.
213 | 
214 |     # Looping like this because all those keys are optional according to the
215 |     # docs (though usually license_id will be there if others are there).
216 |     for key in ['license_id', 'license_title', 'license_url']:
217 |         if key in outdict and 'licenses' not in outdict:
218 |             outdict['licenses'] = [{}]
219 |             break  # check to create list of dicts only once
220 |     if 'license_id' in outdict:
221 |         outdict['licenses'][0]['name'] = outdict.get('license_id') or 'no_licerse_name'
222 |         outdict.pop('license_id', None)
223 |     else:
224 |         outdict['licenses'][0]['name'] = 'no_license_name'
225 | 
226 |     if 'license_title' in outdict:
227 |         outdict['licenses'][0]['title'] = outdict.get('license_title') or 'no_license_title'
228 |         outdict.pop('license_title', None)
229 |     else:
230 |         outdict['licenses'][0]['title'] = 'no_license_title'
231 | 
232 |     if 'license_url' in outdict:
233 |         outdict['licenses'][0]['path'] = outdict.get('license_url') or 'no_path'
234 |         outdict.pop('license_url', None)
235 |     else:
236 |         outdict['licenses'][0]['path'] = 'no_license_path'
237 | 
238 |     for key in dataset_keys_to_remove:
239 |         outdict.pop(key, None)
240 | 
241 |     for key in list(outdict.keys()):
242 |         if outdict[key] is None:
243 |             del outdict[key]
244 | 
245 |     return outdict
246 | 


--------------------------------------------------------------------------------
/frictionless_ckan_mapper/frictionless_to_ckan.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | import json
  3 | 
  4 | try:
  5 |     json_parse_exception = json.decoder.JSONDecodeError
  6 | except AttributeError:  # Testing against Python 2
  7 |     json_parse_exception = ValueError
  8 | 
  9 | 
 10 | resource_mapping = {
 11 |     'bytes': 'size',
 12 |     'mediatype': 'mimetype',
 13 |     'path': 'url'
 14 | }
 15 | 
 16 | package_mapping = {
 17 |     'description': 'notes',
 18 |     'homepage': 'url',
 19 | }
 20 | 
 21 | # Any key not in this list is passed as is inside "extras".
 22 | # Further processing will happen for possible matchings, e.g.
 23 | # contributor <=> author
 24 | ckan_package_keys = [
 25 |     'author',
 26 |     'author_email',
 27 |     'creator_user_id',
 28 |     'groups',
 29 |     'id',
 30 |     'license_id',
 31 |     'license_title',
 32 |     'license_url',
 33 |     'maintainer',
 34 |     'maintainer_email',
 35 |     'metadata_created',
 36 |     'metadata_modified',
 37 |     'name',
 38 |     'notes',
 39 |     'owner_org',
 40 |     'private',
 41 |     'relationships_as_object',
 42 |     'relationships_as_subject',
 43 |     'revision_id',
 44 |     'resources',
 45 |     'state',
 46 |     'tags',
 47 |     'tracking_summary',
 48 |     'title',
 49 |     'type',
 50 |     'url',
 51 |     'version'
 52 | ]
 53 | 
 54 | frictionless_package_keys_to_exclude = [
 55 |     'extras'
 56 | ]
 57 | 
 58 | 
 59 | def resource(fddict):
 60 |     '''Convert a Frictionless resource to a CKAN resource.
 61 | 
 62 |     # TODO: (the following is inaccurate)
 63 | 
 64 |     1. Map keys from Frictionless to CKAN (and reformat if needed).
 65 |     2. Apply special formatting (if any) for key fields e.g. slugify.
 66 |     '''
 67 |     resource = dict(fddict)
 68 | 
 69 |     # Remap differences from Frictionless to CKAN resource
 70 |     for key, value in resource_mapping.items():
 71 |         if key in resource:
 72 |             resource[value] = resource[key]
 73 |             del resource[key]
 74 | 
 75 |     return resource
 76 | 
 77 | 
 78 | def package(fddict):
 79 |     '''Convert a Frictionless package to a CKAN package (dataset).
 80 | 
 81 |     # TODO: (the following is inaccurate)
 82 | 
 83 |     1. Map keys from Frictionless to CKAN (and reformat if needed).
 84 |     2. Apply special formatting (if any) for key fields.
 85 |     3. Copy extras across inside the "extras" key.
 86 |     '''
 87 |     outdict = dict(fddict)
 88 | 
 89 |     # Map data package keys
 90 |     for key, value in package_mapping.items():
 91 |         if key in fddict:
 92 |             outdict[value] = fddict[key]
 93 |             del outdict[key]
 94 | 
 95 |     # map resources inside dataset
 96 |     if 'resources' in fddict:
 97 |         outdict['resources'] = [resource(res) for res in fddict['resources']]
 98 | 
 99 |     if 'licenses' in outdict and outdict['licenses']:
100 |         outdict['license_id'] = outdict['licenses'][0].get('name')
101 |         outdict['license_title'] = outdict['licenses'][0].get('title')
102 |         outdict['license_url'] = outdict['licenses'][0].get('path')
103 |         # remove it so it won't get put in extras
104 |         if len(outdict['licenses']) == 1:
105 |             outdict.pop('licenses', None)
106 | 
107 |     if outdict.get('contributors'):
108 |         for c in outdict['contributors']:
109 |             if c.get('role') in [None, 'author']:
110 |                 outdict['author'] = c.get('title')
111 |                 outdict['author_email'] = c.get('email')
112 |                 break
113 | 
114 |         for c in outdict['contributors']:
115 |             if c.get('role') == 'maintainer':
116 |                 outdict['maintainer'] = c.get('title')
117 |                 outdict['maintainer_email'] = c.get('email')
118 |                 break
119 | 
120 |         # we remove contributors where we have extracted everything into
121 |         # ckan core that way it won't end up in extras
122 |         # this helps ensure that round tripping with ckan is good
123 |         # when have we extracted everything?
124 |         # if contributors has length 1 and role in author or maintainer
125 |         # or contributors == 2 and no of authors and maintainer types <= 1
126 |         if (
127 |             (len(outdict.get('contributors')) == 1 and
128 |                 outdict['contributors'][0].get('role') in [None, 'author',
129 |                     'maintainer'])
130 |             or
131 |             (len(outdict.get('contributors')) == 2 and
132 |                 [c.get('role') for c in outdict['contributors']]
133 |                 not in (
134 |                     [None, None],
135 |                     ['maintainer', 'maintainer'],
136 |                     ['author', 'author']))
137 |                     ):
138 |             outdict.pop('contributors', None)
139 | 
140 |     if outdict.get('keywords'):
141 |         outdict['tags'] = [
142 |             {'name': keyword} for keyword in outdict['keywords']
143 |         ]
144 |         del outdict['keywords']
145 | 
146 |     final_dict = dict(outdict)
147 |     for key, value in outdict.items():
148 |         if (
149 |             key not in ckan_package_keys and
150 |             key not in frictionless_package_keys_to_exclude
151 |         ):
152 |             if isinstance(value, (dict, list)):
153 |                 value = json.dumps(value)
154 |             if not final_dict.get('extras'):
155 |                 final_dict['extras'] = []
156 |             final_dict['extras'].append(
157 |                 {'key': key, 'value': value}
158 |             )
159 |             del final_dict[key]
160 |     outdict = dict(final_dict)
161 | 
162 |     return outdict
163 | 


--------------------------------------------------------------------------------
/pylama.ini:
--------------------------------------------------------------------------------
1 | [pylama]
2 | linters = pyflakes,pep8
3 | 
4 | [pylama:*/__init__.py]
5 | ignore = W0611
6 | 
7 | [pylama:*]
8 | ignore = E128,E201,E202
9 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | testpaths = tests
3 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [bdist_wheel]
2 | universal=1
3 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import os
 4 | import io
 5 | from setuptools import setup, find_packages
 6 | 
 7 | 
 8 | # Helpers
 9 | def read(*paths):
10 |     """Read a text file."""
11 |     basedir = os.path.dirname(__file__)
12 |     fullpath = os.path.join(basedir, *paths)
13 |     contents = io.open(fullpath, encoding='utf-8').read().strip()
14 |     return contents
15 | 
16 | 
17 | # Prepare
18 | PACKAGE = 'frictionless_ckan_mapper'
19 | NAME = PACKAGE.replace('_', '-')
20 | INSTALL_REQUIRES = [
21 |     'six>=1.9,<2.0',
22 |     'unidecode'
23 | ]
24 | TESTS_REQUIRE = [
25 |     'pylama',
26 |     'tox'
27 | ]
28 | README = read('README.md')
29 | VERSION = read(PACKAGE, 'VERSION')
30 | PACKAGES = find_packages(exclude=['examples', 'tests'])
31 | 
32 | 
33 | # Run
34 | setup(
35 |     name=NAME,
36 |     version=VERSION,
37 |     packages=PACKAGES,
38 |     include_package_data=True,
39 |     install_requires=INSTALL_REQUIRES,
40 |     tests_require=TESTS_REQUIRE,
41 |     extras_require={'develop': TESTS_REQUIRE},
42 |     zip_safe=False,
43 |     long_description=README,
44 |     long_description_content_type='text/markdown',
45 |     description='A library for mapping CKAN metadata <=> Frictionless metadata.',
46 |     author='Open Knowledge International',
47 |     url='https://github.com/frictionlessdata/frictionless-ckan-mapper',
48 |     copyright='Copyright 2020 (c) Viderum Inc. / Datopian',
49 |     license='MIT',
50 |     keywords=[
51 |         'data',
52 |         'ckan',
53 |         'frictionless',
54 |         'conversion',
55 |         'package',
56 |         'dataset',
57 |         'resource'
58 |     ],
59 |     classifiers=[
60 |         'Environment :: Web Environment',
61 |         'Intended Audience :: Developers',
62 |         'License :: OSI Approved :: MIT License',
63 |         'Operating System :: OS Independent',
64 |         'Programming Language :: Python :: 3.6',
65 |         'Programming Language :: Python :: 2.7',
66 |         'Topic :: Internet :: WWW/HTTP :: Dynamic Content',
67 |         'Topic :: Software Development :: Libraries :: Python Modules',
68 |     ],
69 | )
70 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/frictionlessdata/frictionless-ckan-mapper/794eaa33a01a2bb88d0d25279a94aeee92fde395/tests/__init__.py


--------------------------------------------------------------------------------
/tests/fixtures/ckan_package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "id": "99575b35-8a88-4fd9-b0dc-b9d0479c9b2c",
 3 |   "name": "ckan-to-frictionless-conversion",
 4 |   "title": "Title with Spaces",
 5 |   "url": "https://unicode.org/Public/emoji/latest/emoji-test.txt",
 6 |   "resources": [
 7 |     {
 8 |       "cache_last_updated": null,
 9 |       "cache_url": null,
10 |       "created": "2020-06-14T16:46:36.105271",
11 |       "datastore_active": false,
12 |       "description": "csv",
13 |       "format": "CSV",
14 |       "hash": "",
15 |       "id": "a6d54af7-3119-4691-917b-8655784c5438",
16 |       "last_modified": null,
17 |       "mimetype_inner": null,
18 |       "mimetype": null,
19 |       "name": "Emojis csv",
20 |       "package_id": "99575b35-8a88-4fd9-b0dc-b9d0479c9b2c",
21 |       "position": 0,
22 |       "resource_type": null,
23 |       "revision_id": "72208aa7-6554-4538-80b1-60c8cad05d9c",
24 |       "size": null,
25 |       "state": "active",
26 |       "url_type": null,
27 |       "url": "https://datahub.io/core/unicode-emojis/r/emojis.csv"
28 |     },
29 |     {
30 |       "cache_last_updated": null,
31 |       "cache_url": null,
32 |       "created": "2020-06-14T16:47:13.972133",
33 |       "datastore_active": false,
34 |       "description": "Sequences",
35 |       "format": "TXT",
36 |       "hash": "",
37 |       "id": "ad968429-651f-4aa2-add8-e45c155eef6e",
38 |       "last_modified": null,
39 |       "mimetype_inner": null,
40 |       "mimetype": null,
41 |       "name": "emoji-sequences.txt",
42 |       "package_id": "99575b35-8a88-4fd9-b0dc-b9d0479c9b2c",
43 |       "position": 1,
44 |       "resource_type": null,
45 |       "revision_id": "faaea9d4-3e7f-40ee-82fd-7d28d9034ffa",
46 |       "size": 123123,
47 |       "state": "active",
48 |       "url_type": null,
49 |       "url": "https://unicode.org/Public/emoji/13.0/emoji-sequences.txt"
50 |     }
51 |   ],
52 |   "tags": [
53 |     {
54 |       "vocabulary_id": null,
55 |       "state": "active",
56 |       "display_name": "tag1",
57 |       "id": "6c943152-f6a4-4f84-8451-04740fb32dd9",
58 |       "name": "tag1"
59 |     },
60 |     {
61 |       "vocabulary_id": null,
62 |       "state": "active",
63 |       "display_name": "tag2",
64 |       "id": "a9114a47-9deb-42ce-be80-2b8000397cbd",
65 |       "name": "tag2"
66 |     },
67 |     {
68 |       "vocabulary_id": null,
69 |       "state": "active",
70 |       "display_name": "tag3",
71 |       "id": "19760b34-eaa8-472d-bffd-864a54e69abb",
72 |       "name": "tag3"
73 |     }
74 |   ],
75 |   "extras": [
76 |     { "key": "boolvalue", "value": "false" },
77 |     { "key": "custom2", "value": "12312386" },
78 |     { "key": "customkey1", "value": "value1" },
79 |     { "key": "nullvalue", "value": "null" }
80 |   ]
81 | }
82 | 


--------------------------------------------------------------------------------
/tests/fixtures/ckan_resource.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "mimetype": null,
 3 |   "cache_url": null,
 4 |   "hash": "",
 5 |   "description": "csv",
 6 |   "extras": {
 7 |     "key1": 123,
 8 |     "key2": false,
 9 |     "key3": null,
10 |     "key4": "value4"
11 |   },
12 |   "name": "Emojis csv",
13 |   "format": "CSV",
14 |   "url": "https://datahub.io/core/unicode-emojis/r/emojis.csv",
15 |   "datastore_active": false,
16 |   "cache_last_updated": null,
17 |   "package_id": "99575b35-8a88-4fd9-b0dc-b9d0479c9b2c",
18 |   "created": "2020-06-14T16:46:36.105271",
19 |   "state": "active",
20 |   "mimetype_inner": null,
21 |   "last_modified": null,
22 |   "position": 0,
23 |   "revision_id": "72208aa7-6554-4538-80b1-60c8cad05d9c",
24 |   "url_type": null,
25 |   "id": "a6d54af7-3119-4691-917b-8655784c5438",
26 |   "resource_type": null,
27 |   "size": null
28 | }
29 | 


--------------------------------------------------------------------------------
/tests/fixtures/frictionless_package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "id": "99575b35-8a88-4fd9-b0dc-b9d0479c9b2c",
 3 |   "name": "ckan_to_frictionless_conversion",
 4 |   "title": "Title with Spaces",
 5 |   "homepage": "https://unicode.org/Public/emoji/latest/emoji-test.txt",
 6 |   "resources": [
 7 |     {
 8 |       "bytes": 0,
 9 |       "cache_last_updated": null,
10 |       "cache_url": null,
11 |       "created": "2020-06-14T16:46:36.105271",
12 |       "datastore_active": false,
13 |       "description": "csv",
14 |       "format": "csv",
15 |       "hash": "",
16 |       "id": "a6d54af7-3119-4691-917b-8655784c5438",
17 |       "last_modified": null,
18 |       "mediatype": null,
19 |       "mimetype_inner": null,
20 |       "name": "emojis-csv",
21 |       "path": "https://datahub.io/core/unicode-emojis/r/emojis.csv",
22 |       "resource_type": null,
23 |       "revision_id": "72208aa7-6554-4538-80b1-60c8cad05d9c",
24 |       "state": "active",
25 |       "url_type": null
26 |     },
27 |     {
28 |       "bytes": 123123,
29 |       "cache_last_updated": null,
30 |       "cache_url": null,
31 |       "created": "2020-06-14T16:47:13.972133",
32 |       "datastore_active": false,
33 |       "description": "Sequences",
34 |       "format": "txt",
35 |       "hash": "",
36 |       "id": "ad968429-651f-4aa2-add8-e45c155eef6e",
37 |       "last_modified": null,
38 |       "mediatype": null,
39 |       "mimetype_inner": null,
40 |       "name": "emoji-sequences.txt",
41 |       "path": "https://unicode.org/Public/emoji/13.0/emoji-sequences.txt",
42 |       "resource_type": null,
43 |       "revision_id": "faaea9d4-3e7f-40ee-82fd-7d28d9034ffa",
44 |       "state": "active",
45 |       "url_type": null
46 |     }
47 |   ],
48 |   "keywords": ["tag1", "tag2", "tag3"],
49 |   "extras": [
50 |     { "key": "boolvalue", "value": "false" },
51 |     { "key": "custom2", "value": "12312386" },
52 |     { "key": "customkey1", "value": "value1" },
53 |     { "key": "nullvalue", "value": "null" }
54 |   ]
55 | }
56 | 


--------------------------------------------------------------------------------
/tests/fixtures/frictionless_resource.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "created": "2020-06-14T16:46:36.105271",
 3 |   "description": "csv",
 4 |   "extras": {
 5 |     "key1": 123,
 6 |     "key2": false,
 7 |     "key3": null,
 8 |     "key4": "value4"
 9 |   },
10 |   "format": "CSV",
11 |   "hash": "",
12 |   "id": "a6d54af7-3119-4691-917b-8655784c5438",
13 |   "name": "emojis-csv",
14 |   "package_id": "99575b35-8a88-4fd9-b0dc-b9d0479c9b2c",
15 |   "path": "https://datahub.io/core/unicode-emojis/r/emojis.csv",
16 |   "revision_id": "72208aa7-6554-4538-80b1-60c8cad05d9c"
17 | }
18 | 


--------------------------------------------------------------------------------
/tests/fixtures/full_ckan_package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "author": "Author Name",
 3 |   "author_email": "",
 4 |   "creator_user_id": "c6bd4f0b-f550-4f5d-9537-782a2d1d4c7a",
 5 |   "extras": [],
 6 |   "groups": [],
 7 |   "id": "1f7db1f1-1400-4572-a860-0977326a5521",
 8 |   "isopen": true,
 9 |   "license_id": "cc-by",
10 |   "license_title": "Creative Commons Attribution",
11 |   "license_url": "http://www.opendefinition.org/licenses/cc-by",
12 |   "maintainer": "",
13 |   "maintainer_email": "",
14 |   "metadata_created": "2020-06-25T14:33:18.301040",
15 |   "metadata_modified": "2020-06-25T14:50:34.860070",
16 |   "name": "testing",
17 |   "notes": "Test new description for version 1.2",
18 |   "num_resources": 1,
19 |   "num_tags": 0,
20 |   "organization": {
21 |     "approval_status": "approved",
22 |     "created": "2020-06-25T11:31:03.384316",
23 |     "description": "",
24 |     "id": "5df7636c-a47f-4ab8-b6f0-3212280da926",
25 |     "image_url": "",
26 |     "is_organization": true,
27 |     "name": "odc",
28 |     "revision_id": "f0b87003-e385-48c6-b4b6-5fe725cbee8e",
29 |     "state": "active",
30 |     "title": "ODC",
31 |     "type": "organization"
32 |   },
33 |   "owner_org": "5df7636c-a47f-4ab8-b6f0-3212280da926",
34 |   "private": false,
35 |   "relationships_as_object": [],
36 |   "relationships_as_subject": [],
37 |   "resources": [
38 |     {
39 |       "cache_last_updated": null,
40 |       "cache_url": null,
41 |       "created": "2020-06-25T14:33:49.587300",
42 |       "datastore_active": null,
43 |       "description": "",
44 |       "format": "CSV",
45 |       "hash": "",
46 |       "id": "e778ca29-5fb7-4063-ad9b-68040f88ab8a",
47 |       "last_modified": "2020-06-25T14:33:49.567891",
48 |       "mimetype": "text/csv",
49 |       "mimetype_inner": null,
50 |       "name": "mini-csv.csv",
51 |       "package_id": "1f7db1f1-1400-4572-a860-0977326a5521",
52 |       "position": 0,
53 |       "resource_type": null,
54 |       "revision_id": "175298da-691c-42c8-9a4a-b01350fad6e9",
55 |       "size": 40,
56 |       "state": "active",
57 |       "tracking_summary": {
58 |         "recent": 0,
59 |         "total": 0
60 |       },
61 |       "url": "http://localhost:5000/dataset/1f7db1f1-1400-4572-a860-0977326a5521/resource/e778ca29-5fb7-4063-ad9b-68040f88ab8a/download/mini-csv.csv",
62 |       "url_type": "upload",
63 |       "versions_upload_timestamp": "2020-06-25T11:33:49.567522"
64 |     }
65 |   ],
66 |   "revision_id": "41109a92-138b-4a90-be7b-7f202ad83492",
67 |   "state": "active",
68 |   "title": "Testing",
69 |   "tracking_summary": {
70 |     "recent": 0,
71 |     "total": 0
72 |   },
73 |   "type": "dataset",
74 |   "url": "",
75 |   "version": ""
76 | }
77 | 


--------------------------------------------------------------------------------
/tests/fixtures/full_ckan_package_first_round_trip.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "author": "Author Name",
 3 |   "author_email": "",
 4 |   "creator_user_id": "c6bd4f0b-f550-4f5d-9537-782a2d1d4c7a",
 5 |   "id": "1f7db1f1-1400-4572-a860-0977326a5521",
 6 |   "license_id": "cc-by",
 7 |   "license_title": "Creative Commons Attribution",
 8 |   "license_url": "http://www.opendefinition.org/licenses/cc-by",
 9 |   "groups": [],
10 |   "metadata_created": "2020-06-25T14:33:18.301040",
11 |   "metadata_modified": "2020-06-25T14:50:34.860070",
12 |   "name": "testing",
13 |   "notes": "Test new description for version 1.2",
14 |   "owner_org": "5df7636c-a47f-4ab8-b6f0-3212280da926",
15 |   "private": false,
16 |   "relationships_as_object": [],
17 |   "relationships_as_subject": [],
18 |   "resources": [
19 |     {
20 |       "created": "2020-06-25T14:33:49.587300",
21 |       "description": "",
22 |       "format": "CSV",
23 |       "hash": "",
24 |       "id": "e778ca29-5fb7-4063-ad9b-68040f88ab8a",
25 |       "last_modified": "2020-06-25T14:33:49.567891",
26 |       "name": "mini-csv.csv",
27 |       "package_id": "1f7db1f1-1400-4572-a860-0977326a5521",
28 |       "revision_id": "175298da-691c-42c8-9a4a-b01350fad6e9",
29 |       "tracking_summary": { "recent": 0, "total": 0 },
30 |       "url_type": "upload",
31 |       "versions_upload_timestamp": "2020-06-25T11:33:49.567522",
32 |       "size": 40,
33 |       "mimetype": "text/csv",
34 |       "url": "http://localhost:5000/dataset/1f7db1f1-1400-4572-a860-0977326a5521/resource/e778ca29-5fb7-4063-ad9b-68040f88ab8a/download/mini-csv.csv"
35 |     }
36 |   ],
37 |   "revision_id": "41109a92-138b-4a90-be7b-7f202ad83492",
38 |   "title": "Testing",
39 |   "tracking_summary": {"recent": 0, "total": 0},
40 |   "type": "dataset",
41 |   "version": "",
42 |   "url": ""
43 | }
44 | 


--------------------------------------------------------------------------------
/tests/test_ckan_to_frictionless.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | 
  3 | import json
  4 | 
  5 | import frictionless_ckan_mapper.ckan_to_frictionless as converter
  6 | 
  7 | 
  8 | class TestResourceConversion:
  9 |     '''Notes:
 10 | 
 11 |     * extras do not any special testing since CKAN already just has them as key
 12 |       values.
 13 |     * we do want to test unjsonifying values since that will cover e.g. a Table
 14 |       Schema set in schema field
 15 |     '''
 16 | 
 17 |     def test_fixtures(self):
 18 |         inpath = 'tests/fixtures/ckan_resource.json'
 19 |         exppath = 'tests/fixtures/frictionless_resource.json'
 20 |         indict = json.load(open(inpath))
 21 |         exp = json.load(open(exppath))
 22 |         out = converter.resource(indict)
 23 |         assert out == exp
 24 | 
 25 |     def test_values_are_unjsonified(self):
 26 |         '''Test values which are jsonified dict or arrays are unjsonified'''
 27 |         schema = {
 28 |             "fields": [
 29 |                 {"name": "abc", "type": "string"}
 30 |             ]
 31 |         }
 32 |         indict = {
 33 |             "schema": json.dumps(schema),
 34 |             "otherval": json.dumps(schema),
 35 |             "x": "{'abc': 1"
 36 |         }
 37 |         exp = {
 38 |             "schema": schema,
 39 |             "otherval": schema,
 40 |             # fake json object - not really ... but looks like it ...
 41 |             "x": "{'abc': 1"
 42 |         }
 43 |         out = converter.resource(indict)
 44 |         assert out == exp
 45 | 
 46 |         indict = {
 47 |             "x": "hello world",
 48 |             "y": "1.3"
 49 |         }
 50 |         exp = {
 51 |             "x": "hello world",
 52 |             "y": "1.3"
 53 |         }
 54 |         out = converter.resource(indict)
 55 |         assert out == exp
 56 | 
 57 |     def test_keys_are_removed_that_should_be(self):
 58 |         indict = {
 59 |             "position": 2,
 60 |             "datastore_active": True,
 61 |             "state": "active"
 62 |         }
 63 |         exp = {}
 64 |         out = converter.resource(indict)
 65 |         assert out == exp
 66 | 
 67 |     def test_resource_mapping(self):
 68 |         indict = {
 69 |             "url": "http://www.somewhere.com/data.csv",
 70 |             "size": 110,
 71 |             "mimetype": "text/csv"
 72 |         }
 73 |         exp = {
 74 |             "path": "http://www.somewhere.com/data.csv",
 75 |             "bytes": 110,
 76 |             "mediatype": "text/csv"
 77 |         }
 78 |         out = converter.resource(indict)
 79 |         assert out == exp
 80 | 
 81 |     def test_resource_path_is_set_even_for_uploaded_resources(self):
 82 |         indict = {
 83 |             "url": "http://www.somewhere.com/data.csv",
 84 |             "url_type": "upload"
 85 |         }
 86 |         exp = {
 87 |             'path': 'http://www.somewhere.com/data.csv',
 88 |             'url_type': "upload"
 89 |         }
 90 |         out = converter.resource(indict)
 91 |         assert out == exp
 92 | 
 93 |     def test_resource_keys_pass_through(self):
 94 |         indict = {
 95 |             'id': 'xxx',
 96 |             'name': 'abc',
 97 |             'description': 'GDPs list',
 98 |             'format': 'CSV',
 99 |             'hash': 'e785c0883d7a104330e69aee73d4f235',
100 |             'schema': {
101 |                 'fields': [
102 |                     {'name': 'id', 'type': 'integer'},
103 |                     {'name': 'title', 'type': 'string'},
104 |                 ]
105 |             },
106 |             # random
107 |             'adfajka': 'aaaa',
108 |             '1dafak': 'abbbb'
109 |         }
110 |         exp = indict
111 |         out = converter.resource(indict)
112 |         assert out == exp
113 | 
114 |     def test_nulls_are_stripped(self):
115 |         indict = {
116 |             'abc': 'xxx',
117 |             'size': None,
118 |             'xyz': None
119 |         }
120 |         exp = {
121 |             'abc': 'xxx'
122 |         }
123 |         out = converter.resource(indict)
124 |         assert out == exp
125 | 
126 | 
127 | class TestPackageConversion:
128 |     def test_dataset_extras(self):
129 |         indict = {
130 |             'extras': [
131 |                 {'key': 'title_cn', 'value': u'國內生產總值'},
132 |                 {'key': 'years', 'value': '[2015, 2016]'},
133 |                 {'key': 'last_year', 'value': 2016},
134 |                 {'key': 'location', 'value': '{"country": "China"}'}
135 |             ]
136 |         }
137 |         exp = {
138 |             'title_cn': u'國內生產總值',
139 |             'years': [2015, 2016],
140 |             'last_year': 2016,
141 |             'location': {'country': 'China'}
142 |         }
143 |         out = converter.dataset(indict)
144 |         assert out == exp
145 | 
146 |     def test_unjsonify_all_extra_values(self):
147 |         indict = {
148 |             'extras': [
149 |                 {
150 |                     'key': 'location',
151 |                     'value': '{"country": {"China": {"population": '
152 |                              '"1233214331", "capital": "Beijing"}}}'
153 |                 },
154 |                 {
155 |                     'key': 'numbers',
156 |                     'value': '[[[1, 2, 3], [2, 4, 5]], [[7, 6, 0]]]'
157 |                 }
158 |             ]
159 |         }
160 |         out = converter.dataset(indict)
161 |         exp = {
162 |             "location": {
163 |                 "country":
164 |                 {"China":
165 |                  {"population": "1233214331",
166 |                   "capital": "Beijing"}}
167 |             },
168 |             "numbers": [[[1, 2, 3], [2, 4, 5]], [[7, 6, 0]]]
169 |         }
170 |         assert out == exp
171 | 
172 |     def test_dataset_license(self):
173 |         # No license_title nor license_url
174 |         indict = {
175 |             'license_id': 'odc-odbl'
176 |         }
177 |         exp = {
178 |             'licenses': [{
179 |                 'name': 'odc-odbl',
180 |             }]
181 |         }
182 |         out = converter.dataset(indict)
183 |         assert out == exp
184 | 
185 |         # Remap everything in licenses
186 |         indict = {
187 |             'license_id': 'cc-by',
188 |             'license_title': 'Creative Commons Attribution',
189 |             'license_url': 'http://www.opendefinition.org/licenses/cc-by'
190 |         }
191 |         exp = {
192 |             'licenses': [{
193 |                 'name': 'cc-by',
194 |                 'title': 'Creative Commons Attribution',
195 |                 'path': 'http://www.opendefinition.org/licenses/cc-by'
196 |             }]
197 |         }
198 |         out = converter.dataset(indict)
199 |         assert out == exp
200 | 
201 |     def test_dataset_license_with_licenses_in_extras(self):
202 |         indict = {
203 |             'license_id': 'odc-odbl',
204 |             'license_title': 'Open Data Commons Open Database License',
205 |             'license_url': 'https://opendatacommons.org/licenses/odbl/1-0/index.html',
206 |             'extras': [
207 |                 {
208 |                     'key': 'licenses',
209 |                     'value': json.dumps(
210 |                         [
211 |                             {
212 |                                 'name': 'cc-by',
213 |                                 'title': 'Creative Commons Attribution',
214 |                                 'path': 'http://www.opendefinition.org/licenses/cc-by'
215 |                             },
216 |                             {
217 |                                 'name': 'odc-by',
218 |                                 'title': 'Open Data Commons Attribution License',
219 |                                 'path': 'https://opendatacommons.org/licenses/by/1-0/index.html'
220 |                             }
221 |                         ]
222 |                     )
223 |                 }
224 |             ]
225 |         }
226 |         exp = {
227 |             'licenses': [
228 |                 {
229 |                     'name': 'odc-odbl',
230 |                     'title': 'Open Data Commons Open Database License',
231 |                     'path': 'https://opendatacommons.org/licenses/odbl/1-0/index.html'
232 |                 },
233 |                 {
234 |                     'name': 'odc-by',
235 |                     'title': 'Open Data Commons Attribution License',
236 |                     'path': 'https://opendatacommons.org/licenses/by/1-0/index.html'
237 |                 }
238 |             ]
239 |         }
240 |         out = converter.dataset(indict)
241 |         assert out == exp
242 | 
243 |     def test_keys_are_passed_through(self):
244 |         indict = {
245 |             'name': 'gdp',
246 |             'id': 'xxxx',
247 |             'title': 'Countries GDP',
248 |             'version': '1.0',
249 |             # random
250 |             'xxx': 'aldka'
251 |         }
252 |         out = converter.dataset(indict)
253 |         exp = {
254 |             'name': 'gdp',
255 |             'id': 'xxxx',
256 |             'title': 'Countries GDP',
257 |             'version': '1.0',
258 |             'xxx': 'aldka'
259 |         }
260 |         assert out == exp
261 | 
262 |     def test_key_mappings(self):
263 |         # notes
264 |         indict = {
265 |             'notes': 'Country, regional and world GDP',
266 |             'url': 'https://datopian.com'
267 |         }
268 |         exp = {
269 |             'description': 'Country, regional and world GDP',
270 |             'homepage': 'https://datopian.com'
271 |         }
272 |         out = converter.dataset(indict)
273 |         assert out == exp
274 | 
275 |     def test_dataset_author_and_maintainer(self):
276 |         indict = {
277 |             'author': 'World Bank and OECD',
278 |             'author_email': 'someone@worldbank.org'
279 |         }
280 |         exp = {
281 |             'contributors': [
282 |                 {
283 |                     'title': 'World Bank and OECD',
284 |                     'email': 'someone@worldbank.org',
285 |                     'role': 'author'
286 |                 }
287 |             ]
288 |         }
289 |         out = converter.dataset(indict)
290 |         assert out == exp
291 | 
292 |         indict = {
293 |             'author': 'World Bank and OECD',
294 |             'author_email': 'someone@worldbank.org',
295 |             'maintainer': 'Datopian',
296 |             'maintainer_email': 'helloxxx@datopian.com'
297 |         }
298 |         exp = {
299 |             'contributors': [
300 |                 {
301 |                     'title': 'World Bank and OECD',
302 |                     'email': 'someone@worldbank.org',
303 |                     'role': 'author'
304 |                 },
305 |                 {
306 |                     'title': 'Datopian',
307 |                     'email': 'helloxxx@datopian.com',
308 |                     'role': 'maintainer'
309 |                 },
310 | 
311 |             ]
312 |         }
313 |         out = converter.dataset(indict)
314 |         assert out == exp
315 | 
316 |         # if we already have contributors use that ...
317 |         indict = {
318 |             'contributors': [{
319 |                 'title': 'Datopians'
320 |             }],
321 |             'author': 'World Bank and OECD',
322 |         }
323 |         exp = {
324 |             'contributors': [{
325 |                 'title': 'Datopians'
326 |             }]
327 |         }
328 |         out = converter.dataset(indict)
329 |         assert out == exp
330 | 
331 |     def test_dataset_tags(self):
332 |         indict = {
333 |             'tags': [
334 |                 {
335 |                     'display_name': 'economy',
336 |                     'id': '9d602a79-7742-44a7-9029-50b9eca38c90',
337 |                     'name': 'economy',
338 |                     'state': 'active'
339 |                 },
340 |                 {
341 |                     'display_name': 'worldbank',
342 |                     'id': '3ccc2e3b-f875-49ef-a39d-6601d6c0ef76',
343 |                     'name': 'worldbank',
344 |                     'state': 'active'
345 |                 }
346 |             ]
347 |         }
348 |         exp = {
349 |             'keywords': ['economy', 'worldbank']
350 |         }
351 |         out = converter.dataset(indict)
352 |         assert out == exp
353 | 
354 |     def test_resources_are_converted(self):
355 |         indict = {
356 |             'name': 'gdp',
357 |             'resources': [{
358 |                 'name': 'data.csv',
359 |                 'url': 'http://someplace.com/data.csv',
360 |                 'size': 100
361 |             }]
362 |         }
363 |         exp = {
364 |             'name': 'gdp',
365 |             'resources': [{
366 |                 'name': 'data.csv',
367 |                 'path': 'http://someplace.com/data.csv',
368 |                 'bytes': 100
369 |             }]
370 |         }
371 |         out = converter.dataset(indict)
372 |         assert out == exp
373 | 
374 |     def test_all_keys_are_passed_through(self):
375 |         indict = {
376 |             'description': 'GDPs list',
377 |             'schema': {
378 |                 'fields': [
379 |                     {'name': 'id', 'type': 'integer'},
380 |                     {'name': 'title', 'type': 'string'},
381 |                 ]
382 |             },
383 |             # random
384 |             'adfajka': 'aaaa',
385 |             '1dafak': 'abbbb'
386 |         }
387 |         exp = indict
388 |         out = converter.resource(indict)
389 |         assert out == exp
390 | 
391 |     def test_keys_are_removed_that_should_be(self):
392 |         indict = {
393 |             'isopen': True,
394 |             'num_tags': 1,
395 |             'num_resources': 10,
396 |             'state': 'active',
397 |             "organization": {
398 |                 "description": "",
399 |                 "title": "primary_care_prescribing_dispensing",
400 |                 "created": "2020-03-31T21:51:41.334189",
401 |                 "approval_status": "approved",
402 |                 "is_organization": True,
403 |                 "state": "active",
404 |                 "image_url": "",
405 |                 "revision_id": "7c86fde3-9899-41d6-b0bb-6c72dd4b6b94",
406 |                 "type": "organization",
407 |                 "id": "a275814e-6c15-40a8-99fd-af911f1568ef",
408 |                 "name": "primary_care_prescribing_dispensing"
409 |             }
410 |         }
411 |         exp = {}
412 |         out = converter.dataset(indict)
413 |         assert out == exp
414 | 
415 |     def test_null_values_are_stripped(self):
416 |         indict = {
417 |             'id': '12312',
418 |             'title': 'title here',
419 |             'format': None
420 |         }
421 |         exp = {
422 |             'id': '12312',
423 |             'title': 'title here'
424 |         }
425 |         out = converter.dataset(indict)
426 |         assert out == exp
427 | 
428 |     def test_empty_tags_ignored(self):
429 |         indict = {
430 |             "tags": []
431 |             }
432 |         exp = {}
433 |         out = converter.dataset(indict)
434 |         assert out == exp
435 | 


--------------------------------------------------------------------------------
/tests/test_frictionless_to_ckan.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | import json
  3 | 
  4 | import frictionless_ckan_mapper.frictionless_to_ckan as converter
  5 | 
  6 | 
  7 | class TestResourceConversion:
  8 |     def test_non_ckan_keys_passthrough(self):
  9 |         indict = {
 10 |             'title_cn': u'國內生產總值',
 11 |             'years': [2015, 2016],
 12 |             'last_year': 2016,
 13 |             'location': {'country': 'China'}
 14 |         }
 15 |         out = converter.resource(indict)
 16 |         exp = {
 17 |             'title_cn': u'國內生產總值',
 18 |             'years': [2015, 2016],
 19 |             'last_year': 2016,
 20 |             'location': {'country': 'China'}
 21 |         }
 22 |         assert out == exp
 23 | 
 24 |     def test_path_to_url(self):
 25 |         # Test remote path
 26 |         indict = {'path': 'http://www.somewhere.com/data.csv'}
 27 |         out = converter.resource(indict)
 28 |         assert out['url'] == indict['path']
 29 | 
 30 |         # Test local path
 31 |         indict = {'path': './data.csv'}
 32 |         out = converter.resource(indict)
 33 |         assert out['url'] == indict['path']
 34 | 
 35 |         # Test POSIX path
 36 |         indict = {'path': '/home/user/data.csv'}
 37 |         out = converter.resource(indict)
 38 |         assert out['url'] == indict['path']
 39 | 
 40 |     def test_other_remapping(self):
 41 |         indict = {
 42 |             'bytes': 10,
 43 |             'mediatype': 'text/csv'
 44 |         }
 45 |         exp = {
 46 |             'size': 10,
 47 |             'mimetype': 'text/csv'
 48 |         }
 49 |         out = converter.resource(indict)
 50 |         assert out == exp
 51 | 
 52 |     def test_passthrough(self):
 53 |         indict = {
 54 |             'description': 'GDPs list',
 55 |             'format': 'CSV',
 56 |             'hash': 'e785c0883d7a104330e69aee73d4f235'
 57 |         }
 58 |         out = converter.resource(indict)
 59 |         assert out == indict
 60 | 
 61 | 
 62 | class TestPackageConversion:
 63 |     def test_passthrough(self):
 64 |         indict = {
 65 |             'name': 'gdp',
 66 |             'id': 'xxxx',
 67 |             'title': 'Countries GDP',
 68 |             'version': '1.0',
 69 |             "owner_org": "a275814e-6c15-40a8-99fd-af911f1568ef",
 70 |             "metadata_created": "2020-03-31T21:57:48.676558",
 71 |             "metadata_modified": "2020-03-31T21:57:50.215642",
 72 |             "creator_user_id": "b5ab876c-0d04-479a-92de-f66db5dd6fb3",
 73 |             "private": False,
 74 |             "revision_id": "xxx",
 75 |             # TODO: test groups
 76 |         }
 77 |         out = converter.package(indict)
 78 |         assert out == indict
 79 | 
 80 |     def test_basic_mappings(self):
 81 |         indict = {
 82 |             'description': 'Country, regional and world GDP in current USD.',
 83 |             'homepage': 'https://datopian.com'
 84 |         }
 85 |         exp = {
 86 |             'notes': 'Country, regional and world GDP in current USD.',
 87 |             'url': 'https://datopian.com'
 88 |         }
 89 |         out = converter.package(indict)
 90 |         assert out == exp
 91 | 
 92 |     def test_dataset_license(self):
 93 |         indict = {
 94 |             'licenses': [{
 95 |                 'name': 'odc-odbl',
 96 |                 'path': 'http://example.com/file.csv',
 97 |             }]
 98 |         }
 99 |         exp = {
100 |             'license_id': 'odc-odbl',
101 |             'license_title': None,
102 |             'license_url': 'http://example.com/file.csv'
103 |         }
104 |         out = converter.package(indict)
105 |         assert out == exp
106 | 
107 |         indict = {
108 |             'licenses': [{
109 |                 'title': 'Open Data Commons Open Database License',
110 |                 'name': 'odc-odbl'
111 |             }]
112 |         }
113 |         exp = {
114 |             'license_id': 'odc-odbl',
115 |             'license_title': 'Open Data Commons Open Database License',
116 |             'license_url': None
117 |         }
118 |         out = converter.package(indict)
119 |         assert out == exp
120 | 
121 |         # Finally, what if more than one license
122 |         indict = {
123 |             'licenses': [
124 |                 {
125 |                     'title': 'Open Data Commons Open Database License',
126 |                     'name': 'odc-pddl'
127 |                 },
128 |                 {
129 |                     'title': 'Creative Commons CC Zero License (cc-zero)',
130 |                     'name': 'cc-zero'
131 |                 }
132 |             ]
133 |         }
134 |         exp = {
135 |             'license_id': 'odc-pddl',
136 |             'license_title': 'Open Data Commons Open Database License',
137 |             'license_url': None,
138 |             'extras': [
139 |                 {
140 |                     'key': 'licenses',
141 |                     'value': json.dumps(indict['licenses'])
142 |                 }
143 |             ]
144 |         }
145 |         out = converter.package(indict)
146 |         assert out == exp
147 | 
148 |     # TODO: get clear on the spelling of the key "organization".
149 |     # It's "organisation" in the JSON schema at
150 |     # https://specs.frictionlessdata.io/schemas/data-package.json
151 |     # while it's "organization" in the page of the specs at
152 |     # https://specs.frictionlessdata.io/data-package/#metadata
153 |     def test_contributors(self):
154 |         # author conversion
155 |         indict = {
156 |             'contributors': [
157 |                 {
158 |                     'title': 'John Smith'
159 |                 }
160 |             ]
161 |         }
162 |         exp = {
163 |             'author': 'John Smith',
164 |             'author_email': None
165 |         }
166 |         out = converter.package(indict)
167 |         assert out == exp
168 | 
169 |         # check maintainer conversion
170 |         indict = {
171 |             'contributors': [
172 |                 {
173 |                     'title': 'xyz',
174 |                     'email': 'xyz@abc.com',
175 |                     'organisation': 'xxxxx',
176 |                     'role': 'maintainer'
177 |                 }
178 |             ]
179 |         }
180 |         exp = {
181 |             'maintainer': 'xyz',
182 |             'maintainer_email': 'xyz@abc.com'
183 |         }
184 |         out = converter.package(indict)
185 |         assert out == exp
186 | 
187 |         # Make sure that we also get the correct data when there are multiple
188 |         # contributors
189 |         indict = {
190 |             'contributors': [
191 |                 {
192 |                     'title': 'abc',
193 |                     'email': 'abc@abc.com'
194 |                 },
195 |                 {
196 |                     'title': 'xyz',
197 |                     'email': 'xyz@xyz.com',
198 |                     'role': 'maintainer'
199 |                 }
200 |             ]
201 |         }
202 |         exp = {
203 |             'author': 'abc',
204 |             'author_email': 'abc@abc.com',
205 |             'maintainer': 'xyz',
206 |             'maintainer_email': 'xyz@xyz.com'
207 |         }
208 |         out = converter.package(indict)
209 |         assert out == exp
210 | 
211 |         # finally if we have contributors beyond that expected for ckan we keep
212 |         # that in extras (raw)
213 |         indict = {
214 |             'contributors': [
215 |                 {"role": "author", "email": "", "title": "Patricio"},
216 |                 {"role": "maintainer", "email": "", "title": "Rufus"},
217 |                 {"role": "author", "email": "", "title": "Paul"}
218 |             ]
219 |         }
220 |         exp = {
221 |             'author': 'Patricio',
222 |             'author_email': '',
223 |             'maintainer': 'Rufus',
224 |             'maintainer_email': '',
225 |             'extras': [{
226 |                 'key': u'contributors',
227 |                 'value': json.dumps(indict['contributors'])
228 |              }]
229 |         }
230 |         out = converter.package(indict)
231 |         assert out == exp
232 | 
233 |     def test_keywords_converted_to_tags(self):
234 |         keywords = ['economy!!!', 'World Bank']
235 |         indict = {'keywords': keywords}
236 |         out = converter.package(indict)
237 |         assert out.get('tags') == [
238 |             {'name': 'economy!!!'},
239 |             {'name': 'World Bank'},
240 |         ]
241 | 
242 |     def test_extras_is_converted(self):
243 |         indict = {
244 |             'homepage': 'www.example.com',
245 |             'newdict': {'key1': 'dict_to_jsonify'},
246 |             'newint': 123,
247 |             'newkey': 'new value',
248 |             'newlist': [1, 2, 3, 'string'],
249 |             'title': 'Title here'
250 |         }
251 |         exp = {
252 |             'title': 'Title here',
253 |             'url': 'www.example.com',
254 |             'extras': [
255 |                 {
256 |                     'key': 'newdict', 'value': '{"key1": "dict_to_jsonify"}'
257 |                 },
258 |                 {'key': 'newint', 'value': 123},
259 |                 {'key': 'newkey', 'value': 'new value'},
260 |                 {'key': 'newlist', 'value': '[1, 2, 3, "string"]'},
261 |             ]
262 |         }
263 |         out = converter.package(indict)
264 |         out['extras'] = sorted(out['extras'], key=lambda i: i['key'])
265 |         assert out == exp
266 | 
267 |     def test_resources_are_converted(self):
268 |         indict = {
269 |             'name': 'gdp',
270 |             'resources': [{
271 |                 'name': 'data.csv',
272 |                 'path': 'http://someplace.com/data.csv',
273 |                 'bytes': 100
274 |             }]
275 |         }
276 |         exp = {
277 |             'name': 'gdp',
278 |             'resources': [{
279 |                 'name': 'data.csv',
280 |                 'url': 'http://someplace.com/data.csv',
281 |                 'size': 100
282 |             }]
283 |         }
284 |         out = converter.package(indict)
285 |         assert out == exp
286 | 
287 | 


--------------------------------------------------------------------------------
/tests/test_roundtrip.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | import frictionless_ckan_mapper.ckan_to_frictionless as ckan_to_frictionless
 4 | import frictionless_ckan_mapper.frictionless_to_ckan as frictionless_to_ckan
 5 | 
 6 | import six
 7 | 
 8 | 
 9 | class TestPackageConversion:
10 |     def test_round_trip_ckan(self):
11 |         # `ckan1` != `ckan2` but `ckan2` == `ckan3`
12 |         inpath = 'tests/fixtures/full_ckan_package.json'
13 |         ckan1 = json.load(open(inpath))
14 |         fd1 = ckan_to_frictionless.dataset(ckan1)
15 |         ckan2 = frictionless_to_ckan.package(fd1)
16 |         fd2 = ckan_to_frictionless.dataset(ckan2)
17 |         ckan3 = frictionless_to_ckan.package(fd2)
18 | 
19 |         # FIXME: this currently doesn't work for Python 2 due to the way
20 |         # Unicode is handled and because the dictionary keys do not keep
21 |         # the same order.
22 |         # Solution 1: Skip for Python 2 (it's clearly the same dictionary
23 |         # if the build passes on Python 3)
24 |         # Solution 2: Hard code the dicts as in `test_extras_is_converted`
25 |         # in test_frictionless_to_ckan.py instead of loading JSON and
26 |         # sort the keys.
27 |         if not six.PY2:
28 |             assert ckan2 == ckan3
29 | 
30 |     def test_differences_ckan_round_trip(self):
31 |         # When converting ckan1 to fd1 then fd1 to ckan2,
32 |         # ckan1 is bound to differ from ckan2.
33 |         # Those fixtures illustrate the expected differences.
34 |         inpath = 'tests/fixtures/full_ckan_package.json'
35 |         ckan1 = json.load(open(inpath))
36 |         fd1 = ckan_to_frictionless.dataset(ckan1)
37 |         ckan2 = frictionless_to_ckan.package(fd1)
38 |         inpath_round_trip = ('tests/fixtures/'
39 |                              'full_ckan_package_first_round_trip.json')
40 |         exp = json.load(open(inpath_round_trip))
41 | 
42 |         # FIXME: this currently doesn't work for Python 2 due to the way
43 |         # Unicode is handled and because the dictionary keys do not keep
44 |         # the same order.
45 |         # Solution 1: Skip for Python 2 (it's clearly the same dictionary
46 |         # if the build passes on Python 3)
47 |         # Solution 2: Hard code the dicts as in `test_extras_is_converted`
48 |         # in test_frictionless_to_ckan.py instead of loading JSON and
49 |         # sort the keys.
50 |         if not six.PY2:
51 |             assert ckan2 == exp
52 | 
53 |         # Notable differences in `exp` from ckan1 are:
54 |         # - Keys not defined in a standard CKAN package such as
55 |         #  `creator_user_id` will go to `extras`.
56 |         # - In our `full_ckan_package.json` fixture, 'extras' is empty but
57 |         #   Frictionless fills it and it will exist in the CKAN package after
58 |         #   the first round trip.
59 |         # - Keys defined in CKAN but ignored in Frictionless, such as `id`
60 |         #   (because a Frictionless package doesn't have an id property) will
61 |         #   also go to 'extras'.
62 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | package=frictionless_ckan_mapper
 3 | skip_missing_interpreters=true
 4 | envlist=
 5 |   py36
 6 |   py27
 7 | 
 8 | [testenv]
 9 | deps=
10 |   pytest
11 |   pytest-cov
12 |   coverage
13 | passenv=
14 |   CI
15 |   TRAVIS
16 |   TRAVIS_JOB_ID
17 |   TRAVIS_BRANCH
18 |   DYLD_LIBRARY_PATH
19 |   LC_ALL
20 | commands=
21 |   py.test \
22 |     --cov {[tox]package} \
23 |     --cov-config tox.ini \
24 |     --cov-report term-missing \
25 |     {posargs}
26 | 


--------------------------------------------------------------------------------