├── .flake8 ├── .github ├── issue_template.md └── pull_request_template.md ├── .gitignore ├── .travis.yml ├── LICENSE.md ├── MANIFEST.in ├── Makefile ├── README.md ├── frictionless_ckan_mapper ├── VERSION ├── __init__.py ├── ckan_to_frictionless.py └── frictionless_to_ckan.py ├── pylama.ini ├── pytest.ini ├── setup.cfg ├── setup.py ├── tests ├── __init__.py ├── fixtures │ ├── ckan_package.json │ ├── ckan_resource.json │ ├── frictionless_package.json │ ├── frictionless_resource.json │ ├── full_ckan_package.json │ └── full_ckan_package_first_round_trip.json ├── test_ckan_to_frictionless.py ├── test_frictionless_to_ckan.py └── test_roundtrip.py └── tox.ini /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 120 3 | max-complexity = 10 4 | exclude = .venv*,venv*,.git,__pycache__,.tox,.eggs,*.egg -------------------------------------------------------------------------------- /.github/issue_template.md: -------------------------------------------------------------------------------- 1 | # Overview 2 | 3 | Please replace this line with full information about your idea or problem. If it's a bug share as much as possible to reproduce it 4 | 5 | --- 6 | 7 | Please preserve this line to notify @amercader (lead of this repository) 8 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | # Overview 2 | 3 | Please replace this line with full information about your pull request. Make sure that tests pass before publishing it 4 | 5 | --- 6 | 7 | Please preserve this line to notify @amercader (lead of this repository) 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | .venv/ 85 | 86 | # Spyder project settings 87 | .spyderproject 88 | 89 | # Rope project settings 90 | .ropeproject 91 | 92 | # Extra 93 | .idea/ 94 | *iml 95 | 96 | # VS Code 97 | .vscode/ 98 | 99 | # Makefile 100 | .make-cache 101 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: required 2 | dist: trusty 3 | language: python 4 | python: 5 | - 2.7 6 | - 3.6 7 | env: 8 | global: 9 | - TOXENV="py${PYTHON_VERSION//./}" 10 | install: 11 | - make install 12 | - pip install coveralls 13 | script: 14 | - make test 15 | after_success: 16 | - coveralls 17 | deploy: 18 | provider: pypi 19 | user: "__token__" 20 | distributions: sdist bdist_wheel 21 | skip_cleanup: true 22 | on: 23 | tags: true 24 | password: 25 | secure: i5+61ZmR8Fv3a31IwnhgM3hEOCcyWCHezR+vVt2J1iEZnyCP1ldh2rpRtk4SOW8tgR9VJ/amhrYnUoDP2WP7KAfA6nX8Y5I8apVL/iNAJssqPiSlPLNeOgzjgqEEE1tSXFmriyieVrDsf+z98XX6LjHDUoQFdop9R1WgXlnXrn967/rBl4a0BA30xi5gLyab6KWkNNAtMriZEHP058rrkWxGMLn/0P+9eu2GHZTkUdmotPMHPAvEB+Ts6BOs4i74A4MnTyxDG8nlEqBP9CjkqiUyX5rGmSggW5wOJltrWjZEZymB0Q3b8uffrfzdwhpVkNlqXEBhnSnJKpRhmKmIWp8Ip7rEfeQQz/VQpSKLLdFF5fhiXUCsLpn1BJ02Y0h9VZuZig2Y2I7Rbo0w1udcGrnPZ5hstjbTQglVTcqMq4BmAJYHIiF5qRe+EF4D5oY42FI2YoinMuC5kljGl0iLbYYRo06Ei8ZSSj3cf+A8m+LePJCuDFaKaoafCUO0WFFcNTaUfwuHBF0Nx2TEuF1v7lVbcTqGMTYH+k89rlRRKOLyn9sYlaloKdW/4bYE+7aI7vCSxw5zl1qVgg+CXoqOEMq/fpkEAjheFngij+eCGcvkHjeEntIOKsAkLm/u8Gs/sd9bGyj7/JWF92JGJ6K4LKm3Mu/JmKiRqbUhc0jEbDw= 26 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2017 Open Knowledge International 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | global-include *.json 2 | global-include *.yml 3 | global-include *.txt 4 | global-include VERSION 5 | include LICENSE.md 6 | include Makefile 7 | include pylama.ini 8 | include pytest.ini 9 | include README.md 10 | include tox.ini 11 | prune .tox 12 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PYTHON := python 2 | PIP := pip 3 | GIT := git 4 | 5 | BUILD_DIR := build 6 | DIST_DIR := dist 7 | SENTINELS := .make-cache 8 | 9 | SOURCE_FILES := $(shell find ./frictionless_ckan_mapper -type f -name "*.py") 10 | 11 | PACKAGE := $(shell grep '^PACKAGE =' setup.py | cut -d "'" -f2) 12 | VERSION := $(shell head -n 1 $(PACKAGE)/VERSION) 13 | 14 | .PHONY: all dist distclean install list release test version 15 | 16 | ## Clean all generated files 17 | distclean: 18 | rm -rf $(BUILD_DIR) $(DIST_DIR) 19 | rm -rf $(SENTINELS)/dist 20 | 21 | ## Create distribution files to upload to pypi 22 | dist: $(SENTINELS)/dist 23 | 24 | 25 | 26 | all: list 27 | 28 | install: 29 | pip install --upgrade -e .[develop] 30 | 31 | list: 32 | @grep '^\.PHONY' Makefile | cut -d' ' -f2- | tr ' ' '\n' 33 | 34 | # Upload a release of the package to PyPi and create a Git tag 35 | # Note: Travis CI will upload on tag push. 36 | release: $(SENTINELS)/dist 37 | @echo 38 | @echo "You are about to release authoritative version $(VERSION)" 39 | @echo "This will:" 40 | @echo " - Create a git tag release-$(VERSION)" 41 | @echo " - Create a release package and upload it to PyPi via Travis CI" 42 | $(GIT) tag release-$(VERSION) 43 | $(GIT) push --tags 44 | # $(PYTHON) -m twine upload dist/* 45 | 46 | $(SENTINELS): 47 | mkdir $@ 48 | 49 | $(SENTINELS)/dist-setup: | $(SENTINELS) 50 | $(PIP) install -U pip wheel twine 51 | @touch $@ 52 | 53 | $(SENTINELS)/dist: $(SENTINELS)/dist-setup $(DIST_DIR)/frictionless-ckan-mapper-$(VERSION).tar.gz $(DIST_DIR)/frictionless-ckan-mapper-$(VERSION)-py2.py3-none-any.whl | $(SENTINELS) 54 | @touch $@ 55 | 56 | $(DIST_DIR)/frictionless-ckan-mapper-$(VERSION).tar.gz $(DIST_DIR)/frictionless-ckan-mapper-$(VERSION)-py2.py3-none-any.whl: $(SOURCE_FILES) setup.py | $(SENTINELS)/dist-setup 57 | $(PYTHON) setup.py sdist bdist_wheel --universal 58 | 59 | test: 60 | pylama $(PACKAGE) 61 | tox 62 | 63 | version: 64 | @echo $(VERSION) 65 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Frictionless CKAN Mapper 2 | 3 | A library for mapping CKAN metadata <=> Frictionless metadata. 4 | 5 | The library has zero dependencies (not even on Data Package libs). You can use it directly or use it for inspiration. Detailed outline of the algorithm is in the docs or you can read the code. 6 | 7 | [![Travis](https://img.shields.io/travis/frictionlessdata/frictionless-ckan-mapper/master.svg)](https://travis-ci.org/frictionlessdata/frictionless-ckan-mapper) 8 | [![Coveralls](http://img.shields.io/coveralls/frictionlessdata/frictionless-ckan-mapper/master.svg)](https://coveralls.io/r/frictionlessdata/frictionless-ckan-mapper?branch=master) 9 | [![PyPi](https://img.shields.io/pypi/v/frictionless-ckan-mapper.svg)](https://pypi.python.org/pypi/frictionless-ckan-mapper) 10 | [![SemVer](https://img.shields.io/badge/versions-SemVer-brightgreen.svg)](http://semver.org/) 11 | [![Chat on Discord](https://img.shields.io/discord/695635777199145130)](https://discord.gg/2UgfM2k) 12 | 13 | 14 | 15 | - [Frictionless CKAN Mapper](#frictionless-ckan-mapper) 16 | - [Installation](#installation) 17 | - [Getting started](#getting-started) 18 | - [CKAN => Frictionless](#ckan--frictionless) 19 | - [Frictionless => CKAN](#frictionless--ckan) 20 | - [Reference](#reference) 21 | - [`ckan_to_frictionless`](#ckan_to_frictionless) 22 | - [`resource(ckandict)`](#resourceckandict) 23 | - [`dataset(ckandict)`](#datasetckandict) 24 | - [`frictionless_to_ckan`](#frictionless_to_ckan) 25 | - [`resource(fddict)`](#resourcefddict) 26 | - [`package(fddict)`](#packagefddict) 27 | - [Design](#design) 28 | - [CKAN reference](#ckan-reference) 29 | - [Algorithm: CKAN => Frictionless](#algorithm-ckan--frictionless) 30 | - [Algorithm: Frictionless => CKAN](#algorithm-frictionless--ckan) 31 | - [Developers](#developers) 32 | - [Install the source](#install-the-source) 33 | - [Run the tests](#run-the-tests) 34 | - [Building and publishing the package](#building-and-publishing-the-package) 35 | - [Build the distribution package locally for testing purposes](#build-the-distribution-package-locally-for-testing-purposes) 36 | - [Test the package at test.pypi.org](#test-the-package-at-testpypiorg) 37 | - [Tag a new Git release and publish to the official PyPi](#tag-a-new-git-release-and-publish-to-the-official-pypi) 38 | 39 | 40 | ## Installation 41 | 42 | - Python: install Python. The library is compatible with both Python 2.7+ and Python 3.3+. 43 | 44 | ```bash 45 | pip install frictionless-ckan-mapper 46 | ``` 47 | 48 | **Note:** The package is installed as `frictionless-ckan-mapper` and then imported as `frictionless_ckan_mapper`. 49 | 50 | ## Getting started 51 | 52 | ### CKAN => Frictionless 53 | 54 | ```python 55 | # get a CKAN metadata item 56 | ckan_dataset = { 57 | "name": "my-dataset", 58 | "title": "My awesome dataset", 59 | "url": "http://www.example.com/data.csv" 60 | } 61 | 62 | # or load from an API e.g. 63 | # ckan_dataset = json.load(urllib.urlopen( 64 | # https://demo.ckan.org/api/3/package_show?id=my_dataset 65 | # )) 66 | 67 | from frictionless_ckan_mapper import ckan_to_frictionless as converter 68 | 69 | # convert to frictionless 70 | frictionless_package = converter.dataset(ckan_dict) 71 | 72 | print(frictionless_package) 73 | ``` 74 | 75 | ### Frictionless => CKAN 76 | 77 | ```python 78 | frictionless = { 79 | 'name': "f11s-dataset", 80 | 'path': "https://datahub.io/data.csv" 81 | } 82 | 83 | from frictionless_ckan_mapper import frictionless_to_ckan as f2c 84 | 85 | ckanout = f2c.dataset(frictionless) 86 | 87 | print(ckanout) 88 | ``` 89 | 90 | ## Reference 91 | 92 | This package contains two modules: 93 | 94 | - `frictionless_to_ckan` 95 | - `ckan_to_frictionless` 96 | 97 | You can import them directly like so: 98 | 99 | ```python 100 | from frictionless_ckan_mapper import ckan_to_frictionless 101 | from frictionless_ckan_mapper import frictionless_to_ckan 102 | ``` 103 | 104 | ### `ckan_to_frictionless` 105 | 106 | #### `resource(ckandict)` 107 | 108 | ```python 109 | from frictionless_ckan_mapper import ckan_to_frictionless as converter 110 | 111 | # ... Some code with a CKAN dictionary ... 112 | 113 | output_frictionless_dict = converter.resource(ckan_dictionary) 114 | ``` 115 | 116 | #### `dataset(ckandict)` 117 | 118 | ```python 119 | from frictionless_ckan_mapper import ckan_to_frictionless as converter 120 | 121 | # ... Some code with a CKAN dictionary ... 122 | 123 | output_frictionless_dict = converter.dataset(ckan_dictionary) 124 | ``` 125 | 126 | ### `frictionless_to_ckan` 127 | 128 | #### `resource(fddict)` 129 | 130 | ```python 131 | from frictionless_ckan_mapper import frictionless_to_ckan as converter 132 | 133 | # ... Some code with a Frictionless dictionary ... 134 | 135 | output_ckan_dict = converter.resource(frictionless_dictionary) 136 | ``` 137 | 138 | #### `package(fddict)` 139 | 140 | ```python 141 | from frictionless_ckan_mapper import frictionless_to_ckan as converter 142 | 143 | # ... Some code with a Frictionless dictionary ... 144 | 145 | output_ckan_dict = converter.package(frictionless_dictionary) 146 | ``` 147 | 148 | ## Design 149 | 150 | ```text 151 | Frictionless <=> CKAN 152 | -------------------------------------- 153 | Data Package <=> Package (Dataset) 154 | Data Resource <=> Resource 155 | Table Schema <=> Data Dictionary?? (datastore resources can have schemas) 156 | ``` 157 | 158 | ### CKAN reference 159 | 160 | **Summary:** 161 | 162 | - Class diagram below of key objects (without attributes) 163 | - Objects with their attributes in this spreadsheet: https://docs.google.com/spreadsheets/d/1XdqGTFni5Jfs8AMbcbfsP7m11h9mOHS0eDtUZtqGVSg/edit#gid=1925460244 164 | 165 | ```mermaid 166 | classDiagram 167 | 168 | class Package 169 | class Resource 170 | class DataDictionary 171 | 172 | Package *-- Resource 173 | Resource o-- DataDictionary 174 | ``` 175 | 176 | ![mermaid-diagram-20200703112520](https://user-images.githubusercontent.com/32682903/86486065-f9c08100-bd1f-11ea-8a1a-8f3befca0e6e.png) 177 | 178 | Source for CKAN metadata structure: 179 | 180 | - Dataset (Package): https://docs.ckan.org/en/2.8/api/index.html#ckan.logic.action.create.package_create 181 | - `Package.as_dict` method: https://github.com/ckan/ckan/blob/2.8/ckan/model/package.py#L195-L223 182 | - `package_show` ... 183 | 184 | * Resource: https://docs.ckan.org/en/2.8/api/index.html#ckan.logic.action.create.resource_create 185 | 186 | ### Algorithm: CKAN => Frictionless 187 | 188 | See the code in [`frictionless_ckan_mapper/ckan_to_frictionless.py`](./frictionless_ckan_mapper/ckan_to_frictionless.py) 189 | 190 | ### Algorithm: Frictionless => CKAN 191 | 192 | See the code in [`frictionless_ckan_mapper/frictionless_to_ckan.py`](./frictionless_ckan_mapper/frictionless_to_ckan.py) 193 | 194 | ## Developers 195 | 196 | ### Install the source 197 | 198 | - Clone the repo: 199 | 200 | ```bash 201 | git clone https://github.com/frictionlessdata/frictionless-ckan-mapper.git 202 | ``` 203 | 204 | - And install it with pip: 205 | 206 | ```bash 207 | pip install -e . 208 | ``` 209 | 210 | ### Run the tests 211 | 212 | Use the excellent `pytest` suite as follows: 213 | 214 | ```bash 215 | pytest tests 216 | ``` 217 | 218 | To test under both Python 2 and Python 3 environments, we use `tox`. You can run the following command: 219 | 220 | ```bash 221 | make test 222 | ``` 223 | 224 | **Note:** Make sure that the necessary Python versions are in your environment `PATH` (Python 2.7 and Python 3.6). 225 | 226 | ### Building and publishing the package 227 | 228 | To see a list of available commands from the `Makefile`, execute: 229 | 230 | ```bash 231 | make list 232 | ``` 233 | 234 | #### Build the distribution package locally for testing purposes 235 | 236 | If a previous build exists, make sure to also remove it before building again: 237 | 238 | ```bash 239 | make distclean 240 | ``` 241 | 242 | Then: 243 | 244 | ```bash 245 | make dist 246 | ``` 247 | 248 | Alternatively, this command will accomplish the same to build packages for both Python 2 and Python 3: 249 | 250 | ```bash 251 | python setup.py sdist bdist_wheel --universal 252 | ``` 253 | 254 | #### Test the package at test.pypi.org 255 | 256 | ```bash 257 | python -m twine upload --repository testpypi dist/* 258 | ``` 259 | 260 | The package will be publicly available at https://test.pypi.org/project/frictionless-ckan-mapper/ and you will be able to `pip install` it as usual. 261 | 262 | #### Tag a new Git release and publish to the official PyPi 263 | 264 | Make sure to update the version of the package in the file `frictionless_ckan_mapper/VERSION`. Then: 265 | 266 | ```bash 267 | make release 268 | ``` 269 | 270 | You can quickly review the version to release with `make version`, which will print the current version stored in `VERSION`. 271 | -------------------------------------------------------------------------------- /frictionless_ckan_mapper/VERSION: -------------------------------------------------------------------------------- 1 | 1.0.9 2 | -------------------------------------------------------------------------------- /frictionless_ckan_mapper/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/frictionless-ckan-mapper/794eaa33a01a2bb88d0d25279a94aeee92fde395/frictionless_ckan_mapper/__init__.py -------------------------------------------------------------------------------- /frictionless_ckan_mapper/ckan_to_frictionless.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | import six 3 | import json 4 | import re 5 | import unidecode 6 | from collections import defaultdict 7 | 8 | try: 9 | json_parse_exception = json.decoder.JSONDecodeError 10 | except AttributeError: # Testing against Python 2 11 | json_parse_exception = ValueError 12 | 13 | 14 | resource_mapping = { 15 | 'size': 'bytes', 16 | 'mimetype': 'mediatype', 17 | 'url': 'path' 18 | } 19 | 20 | resource_keys_to_remove = [ 21 | 'position', 22 | 'datastore_active', 23 | 'state' 24 | ] 25 | 26 | 27 | def resource(ckandict): 28 | '''Convert a CKAN resource to Frictionless Resource. 29 | 30 | 1. Remove unneeded keys 31 | 2. Expand extras. 32 | * Extras are already expanded to key / values by CKAN (unlike on 33 | package) 34 | * ~~Apply heuristic to unjsonify (if starts with [ or { unjsonify~~ 35 | * JSON loads everything that starts with [ or { 36 | 3. Map keys from CKAN to Frictionless (and reformat if needed) 37 | 4. Remove keys with null values (CKAN has a lot of null valued keys) 38 | 5. Apply special formatting (if any) for key fields e.g. slugiify 39 | ''' 40 | # TODO: delete keys last as may be needed for something in processing 41 | resource = dict(ckandict) 42 | for key in resource_keys_to_remove: 43 | if key in resource: 44 | del resource[key] 45 | 46 | # unjsonify values 47 | # * check if string 48 | # * if starts with [ or { => json.loads it ... 49 | # HACK: bit of a hacky way to check if value is a jsonified array or 50 | # dict 51 | # * else do nothing 52 | for key, value in resource.items(): 53 | if isinstance(value, six.text_type) or isinstance(value, six.string_types): 54 | value = value.strip() 55 | if value.startswith('{') or value.startswith('['): 56 | try: 57 | value = json.loads(value) 58 | resource[key] = value 59 | except (json_parse_exception, TypeError): 60 | pass 61 | 62 | if key == 'name': 63 | if isinstance(value, six.text_type): 64 | value = unidecode.unidecode(value) 65 | value = value.lower() 66 | value = value.strip() 67 | value = re.sub('(\||[^\w|.|\|])+', '-', value) 68 | if value == '': 69 | value = 'unnamed-resource' 70 | resource[key] = value 71 | 72 | if key == 'size': 73 | if resource[key]: 74 | resource[key] = int(resource[key]) 75 | 76 | # 'type' must be lower case 77 | if key == 'type': 78 | resource[key] = value.lower() 79 | 80 | # Remap differences from CKAN to Frictionless resource 81 | for key, value in resource_mapping.items(): 82 | if key in resource: 83 | resource[value] = resource[key] 84 | del resource[key] 85 | 86 | for key in list(resource.keys()): 87 | if resource[key] is None: 88 | del resource[key] 89 | 90 | return resource 91 | 92 | 93 | dataset_keys_to_remove = [ 94 | 'state', # b/c this is state info not metadata about dataset 95 | 'isopen', # computed info from license (render info not metadata) 96 | 'num_resources', # render info not metadata 97 | 'num_tags', # ditto 98 | 'organization', # already have owner_org id + this inlines related object 99 | ] 100 | dataset_mapping = { 101 | 'notes': 'description', 102 | 'url': 'homepage' 103 | } 104 | 105 | 106 | def dataset(ckandict): 107 | '''Convert a CKAN Package (Dataset) to Frictionless Package. 108 | 109 | 1. Expand extras. 110 | * JSON loads everything and on error have a string 111 | 2. Map keys from CKAN to Frictionless (and reformat if needed) 112 | 3. Remove keys with null values (CKAN has a lot of null valued keys) 113 | 4. Remove unneeded keys 114 | 5. Apply special formatting for key fields 115 | ''' 116 | outdict = dict(ckandict) 117 | # Convert the structure of extras 118 | # structure of extra item is {key: xxx, value: xxx} 119 | if 'extras' in ckandict: 120 | for extra in ckandict['extras']: 121 | key = extra['key'] 122 | value = extra['value'] 123 | try: 124 | value = json.loads(value) 125 | except (json_parse_exception, TypeError): 126 | pass 127 | outdict[key] = value 128 | del outdict['extras'] 129 | 130 | # Map dataset keys 131 | for key, value in dataset_mapping.items(): 132 | if key in ckandict: 133 | outdict[value] = ckandict[key] 134 | del outdict[key] 135 | 136 | # map resources inside dataset 137 | if 'resources' in ckandict: 138 | outdict['resources'] = [resource(res) for res in 139 | ckandict['resources']] 140 | else: 141 | outdict['resources'] = [] 142 | 143 | for res in outdict['resources']: 144 | if 'name' not in res: 145 | res['name'] = 'unnamed-resource' 146 | 147 | # prevent having multiple unanmed resources with the same name 148 | # to follow the specs https://specs.frictionlessdata.io/data-resource/#name 149 | unnamed_num = 1 150 | for res in outdict['resources']: 151 | if res['name'] == 'unnamed-resource': 152 | res['name'] += '-{}'.format(unnamed_num) 153 | unnamed_num += 1 154 | 155 | # Deal with resources having the same name 156 | name_count = defaultdict(int) 157 | resources_names = [r['name'] for r in outdict['resources']] 158 | 159 | for name in resources_names: 160 | name_count[name] += 1 161 | 162 | name_index = {n:1 for n in name_count.keys()} 163 | 164 | # If a group of resources have the same name 165 | # add a count to the name and save the original name in the metadata 166 | for res in outdict['resources']: 167 | if name_count[res['name']] > 1: 168 | res_name = res['name'] 169 | res['original_name'] = res_name 170 | res['name'] = f'{res_name}-{name_index[res_name]}' 171 | name_index[res_name] += 1 172 | 173 | # tags 174 | if ckandict.get('tags'): 175 | outdict['keywords'] = [tag['name'] for tag in ckandict['tags']] 176 | outdict.pop('tags', None) 177 | 178 | # author, maintainer => contributors 179 | # what to do if contributors already there? Options: 180 | # 1. Just use that and ignore author/maintainer 181 | # 2. replace with author/maintainer 182 | # 3. merge i.e. use contributors and merge in (this is sort of complex) 183 | # e.g. how to i avoid duplicating the same person 184 | # ANS: for now, is 1 ... 185 | if (not ('contributors' in outdict and outdict['contributors']) and 186 | ('author' in outdict or 'maintainer' in outdict)): 187 | outdict['contributors'] = [] 188 | if 'author' in outdict and outdict['author']: 189 | contrib = { 190 | 'title': outdict['author'], 191 | 'role': 'author' 192 | } 193 | if 'author_email' in outdict: 194 | contrib['email'] = outdict.get('author_email') or '' 195 | outdict['contributors'].append(contrib) 196 | if 'maintainer' in outdict and outdict['maintainer']: 197 | contrib = { 198 | 'title': outdict['maintainer'], 199 | 'role': 'maintainer' 200 | } 201 | if 'maintainer_email' in outdict: 202 | contrib['email'] = outdict.get('maintainer_email') or '' 203 | outdict['contributors'].append(contrib) 204 | 205 | for key in ['author', 'author_email', 'maintainer', 'maintainer_email']: 206 | outdict.pop(key, None) 207 | 208 | # Algorithm for licenses 209 | # 1. Use extras first 210 | # 2. Updating first item in licenses array (if already there - 211 | # or create it as empty) with stuff at root of ckan dict i.e. 212 | # values from license_id, license_title etc. 213 | 214 | # Looping like this because all those keys are optional according to the 215 | # docs (though usually license_id will be there if others are there). 216 | for key in ['license_id', 'license_title', 'license_url']: 217 | if key in outdict and 'licenses' not in outdict: 218 | outdict['licenses'] = [{}] 219 | break # check to create list of dicts only once 220 | if 'license_id' in outdict: 221 | outdict['licenses'][0]['name'] = outdict.get('license_id') or 'no_licerse_name' 222 | outdict.pop('license_id', None) 223 | else: 224 | outdict['licenses'][0]['name'] = 'no_license_name' 225 | 226 | if 'license_title' in outdict: 227 | outdict['licenses'][0]['title'] = outdict.get('license_title') or 'no_license_title' 228 | outdict.pop('license_title', None) 229 | else: 230 | outdict['licenses'][0]['title'] = 'no_license_title' 231 | 232 | if 'license_url' in outdict: 233 | outdict['licenses'][0]['path'] = outdict.get('license_url') or 'no_path' 234 | outdict.pop('license_url', None) 235 | else: 236 | outdict['licenses'][0]['path'] = 'no_license_path' 237 | 238 | for key in dataset_keys_to_remove: 239 | outdict.pop(key, None) 240 | 241 | for key in list(outdict.keys()): 242 | if outdict[key] is None: 243 | del outdict[key] 244 | 245 | return outdict 246 | -------------------------------------------------------------------------------- /frictionless_ckan_mapper/frictionless_to_ckan.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | import json 3 | 4 | try: 5 | json_parse_exception = json.decoder.JSONDecodeError 6 | except AttributeError: # Testing against Python 2 7 | json_parse_exception = ValueError 8 | 9 | 10 | resource_mapping = { 11 | 'bytes': 'size', 12 | 'mediatype': 'mimetype', 13 | 'path': 'url' 14 | } 15 | 16 | package_mapping = { 17 | 'description': 'notes', 18 | 'homepage': 'url', 19 | } 20 | 21 | # Any key not in this list is passed as is inside "extras". 22 | # Further processing will happen for possible matchings, e.g. 23 | # contributor <=> author 24 | ckan_package_keys = [ 25 | 'author', 26 | 'author_email', 27 | 'creator_user_id', 28 | 'groups', 29 | 'id', 30 | 'license_id', 31 | 'license_title', 32 | 'license_url', 33 | 'maintainer', 34 | 'maintainer_email', 35 | 'metadata_created', 36 | 'metadata_modified', 37 | 'name', 38 | 'notes', 39 | 'owner_org', 40 | 'private', 41 | 'relationships_as_object', 42 | 'relationships_as_subject', 43 | 'revision_id', 44 | 'resources', 45 | 'state', 46 | 'tags', 47 | 'tracking_summary', 48 | 'title', 49 | 'type', 50 | 'url', 51 | 'version' 52 | ] 53 | 54 | frictionless_package_keys_to_exclude = [ 55 | 'extras' 56 | ] 57 | 58 | 59 | def resource(fddict): 60 | '''Convert a Frictionless resource to a CKAN resource. 61 | 62 | # TODO: (the following is inaccurate) 63 | 64 | 1. Map keys from Frictionless to CKAN (and reformat if needed). 65 | 2. Apply special formatting (if any) for key fields e.g. slugify. 66 | ''' 67 | resource = dict(fddict) 68 | 69 | # Remap differences from Frictionless to CKAN resource 70 | for key, value in resource_mapping.items(): 71 | if key in resource: 72 | resource[value] = resource[key] 73 | del resource[key] 74 | 75 | return resource 76 | 77 | 78 | def package(fddict): 79 | '''Convert a Frictionless package to a CKAN package (dataset). 80 | 81 | # TODO: (the following is inaccurate) 82 | 83 | 1. Map keys from Frictionless to CKAN (and reformat if needed). 84 | 2. Apply special formatting (if any) for key fields. 85 | 3. Copy extras across inside the "extras" key. 86 | ''' 87 | outdict = dict(fddict) 88 | 89 | # Map data package keys 90 | for key, value in package_mapping.items(): 91 | if key in fddict: 92 | outdict[value] = fddict[key] 93 | del outdict[key] 94 | 95 | # map resources inside dataset 96 | if 'resources' in fddict: 97 | outdict['resources'] = [resource(res) for res in fddict['resources']] 98 | 99 | if 'licenses' in outdict and outdict['licenses']: 100 | outdict['license_id'] = outdict['licenses'][0].get('name') 101 | outdict['license_title'] = outdict['licenses'][0].get('title') 102 | outdict['license_url'] = outdict['licenses'][0].get('path') 103 | # remove it so it won't get put in extras 104 | if len(outdict['licenses']) == 1: 105 | outdict.pop('licenses', None) 106 | 107 | if outdict.get('contributors'): 108 | for c in outdict['contributors']: 109 | if c.get('role') in [None, 'author']: 110 | outdict['author'] = c.get('title') 111 | outdict['author_email'] = c.get('email') 112 | break 113 | 114 | for c in outdict['contributors']: 115 | if c.get('role') == 'maintainer': 116 | outdict['maintainer'] = c.get('title') 117 | outdict['maintainer_email'] = c.get('email') 118 | break 119 | 120 | # we remove contributors where we have extracted everything into 121 | # ckan core that way it won't end up in extras 122 | # this helps ensure that round tripping with ckan is good 123 | # when have we extracted everything? 124 | # if contributors has length 1 and role in author or maintainer 125 | # or contributors == 2 and no of authors and maintainer types <= 1 126 | if ( 127 | (len(outdict.get('contributors')) == 1 and 128 | outdict['contributors'][0].get('role') in [None, 'author', 129 | 'maintainer']) 130 | or 131 | (len(outdict.get('contributors')) == 2 and 132 | [c.get('role') for c in outdict['contributors']] 133 | not in ( 134 | [None, None], 135 | ['maintainer', 'maintainer'], 136 | ['author', 'author'])) 137 | ): 138 | outdict.pop('contributors', None) 139 | 140 | if outdict.get('keywords'): 141 | outdict['tags'] = [ 142 | {'name': keyword} for keyword in outdict['keywords'] 143 | ] 144 | del outdict['keywords'] 145 | 146 | final_dict = dict(outdict) 147 | for key, value in outdict.items(): 148 | if ( 149 | key not in ckan_package_keys and 150 | key not in frictionless_package_keys_to_exclude 151 | ): 152 | if isinstance(value, (dict, list)): 153 | value = json.dumps(value) 154 | if not final_dict.get('extras'): 155 | final_dict['extras'] = [] 156 | final_dict['extras'].append( 157 | {'key': key, 'value': value} 158 | ) 159 | del final_dict[key] 160 | outdict = dict(final_dict) 161 | 162 | return outdict 163 | -------------------------------------------------------------------------------- /pylama.ini: -------------------------------------------------------------------------------- 1 | [pylama] 2 | linters = pyflakes,pep8 3 | 4 | [pylama:*/__init__.py] 5 | ignore = W0611 6 | 7 | [pylama:*] 8 | ignore = E128,E201,E202 9 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | testpaths = tests 3 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal=1 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os 4 | import io 5 | from setuptools import setup, find_packages 6 | 7 | 8 | # Helpers 9 | def read(*paths): 10 | """Read a text file.""" 11 | basedir = os.path.dirname(__file__) 12 | fullpath = os.path.join(basedir, *paths) 13 | contents = io.open(fullpath, encoding='utf-8').read().strip() 14 | return contents 15 | 16 | 17 | # Prepare 18 | PACKAGE = 'frictionless_ckan_mapper' 19 | NAME = PACKAGE.replace('_', '-') 20 | INSTALL_REQUIRES = [ 21 | 'six>=1.9,<2.0', 22 | 'unidecode' 23 | ] 24 | TESTS_REQUIRE = [ 25 | 'pylama', 26 | 'tox' 27 | ] 28 | README = read('README.md') 29 | VERSION = read(PACKAGE, 'VERSION') 30 | PACKAGES = find_packages(exclude=['examples', 'tests']) 31 | 32 | 33 | # Run 34 | setup( 35 | name=NAME, 36 | version=VERSION, 37 | packages=PACKAGES, 38 | include_package_data=True, 39 | install_requires=INSTALL_REQUIRES, 40 | tests_require=TESTS_REQUIRE, 41 | extras_require={'develop': TESTS_REQUIRE}, 42 | zip_safe=False, 43 | long_description=README, 44 | long_description_content_type='text/markdown', 45 | description='A library for mapping CKAN metadata <=> Frictionless metadata.', 46 | author='Open Knowledge International', 47 | url='https://github.com/frictionlessdata/frictionless-ckan-mapper', 48 | copyright='Copyright 2020 (c) Viderum Inc. / Datopian', 49 | license='MIT', 50 | keywords=[ 51 | 'data', 52 | 'ckan', 53 | 'frictionless', 54 | 'conversion', 55 | 'package', 56 | 'dataset', 57 | 'resource' 58 | ], 59 | classifiers=[ 60 | 'Environment :: Web Environment', 61 | 'Intended Audience :: Developers', 62 | 'License :: OSI Approved :: MIT License', 63 | 'Operating System :: OS Independent', 64 | 'Programming Language :: Python :: 3.6', 65 | 'Programming Language :: Python :: 2.7', 66 | 'Topic :: Internet :: WWW/HTTP :: Dynamic Content', 67 | 'Topic :: Software Development :: Libraries :: Python Modules', 68 | ], 69 | ) 70 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/frictionless-ckan-mapper/794eaa33a01a2bb88d0d25279a94aeee92fde395/tests/__init__.py -------------------------------------------------------------------------------- /tests/fixtures/ckan_package.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "99575b35-8a88-4fd9-b0dc-b9d0479c9b2c", 3 | "name": "ckan-to-frictionless-conversion", 4 | "title": "Title with Spaces", 5 | "url": "https://unicode.org/Public/emoji/latest/emoji-test.txt", 6 | "resources": [ 7 | { 8 | "cache_last_updated": null, 9 | "cache_url": null, 10 | "created": "2020-06-14T16:46:36.105271", 11 | "datastore_active": false, 12 | "description": "csv", 13 | "format": "CSV", 14 | "hash": "", 15 | "id": "a6d54af7-3119-4691-917b-8655784c5438", 16 | "last_modified": null, 17 | "mimetype_inner": null, 18 | "mimetype": null, 19 | "name": "Emojis csv", 20 | "package_id": "99575b35-8a88-4fd9-b0dc-b9d0479c9b2c", 21 | "position": 0, 22 | "resource_type": null, 23 | "revision_id": "72208aa7-6554-4538-80b1-60c8cad05d9c", 24 | "size": null, 25 | "state": "active", 26 | "url_type": null, 27 | "url": "https://datahub.io/core/unicode-emojis/r/emojis.csv" 28 | }, 29 | { 30 | "cache_last_updated": null, 31 | "cache_url": null, 32 | "created": "2020-06-14T16:47:13.972133", 33 | "datastore_active": false, 34 | "description": "Sequences", 35 | "format": "TXT", 36 | "hash": "", 37 | "id": "ad968429-651f-4aa2-add8-e45c155eef6e", 38 | "last_modified": null, 39 | "mimetype_inner": null, 40 | "mimetype": null, 41 | "name": "emoji-sequences.txt", 42 | "package_id": "99575b35-8a88-4fd9-b0dc-b9d0479c9b2c", 43 | "position": 1, 44 | "resource_type": null, 45 | "revision_id": "faaea9d4-3e7f-40ee-82fd-7d28d9034ffa", 46 | "size": 123123, 47 | "state": "active", 48 | "url_type": null, 49 | "url": "https://unicode.org/Public/emoji/13.0/emoji-sequences.txt" 50 | } 51 | ], 52 | "tags": [ 53 | { 54 | "vocabulary_id": null, 55 | "state": "active", 56 | "display_name": "tag1", 57 | "id": "6c943152-f6a4-4f84-8451-04740fb32dd9", 58 | "name": "tag1" 59 | }, 60 | { 61 | "vocabulary_id": null, 62 | "state": "active", 63 | "display_name": "tag2", 64 | "id": "a9114a47-9deb-42ce-be80-2b8000397cbd", 65 | "name": "tag2" 66 | }, 67 | { 68 | "vocabulary_id": null, 69 | "state": "active", 70 | "display_name": "tag3", 71 | "id": "19760b34-eaa8-472d-bffd-864a54e69abb", 72 | "name": "tag3" 73 | } 74 | ], 75 | "extras": [ 76 | { "key": "boolvalue", "value": "false" }, 77 | { "key": "custom2", "value": "12312386" }, 78 | { "key": "customkey1", "value": "value1" }, 79 | { "key": "nullvalue", "value": "null" } 80 | ] 81 | } 82 | -------------------------------------------------------------------------------- /tests/fixtures/ckan_resource.json: -------------------------------------------------------------------------------- 1 | { 2 | "mimetype": null, 3 | "cache_url": null, 4 | "hash": "", 5 | "description": "csv", 6 | "extras": { 7 | "key1": 123, 8 | "key2": false, 9 | "key3": null, 10 | "key4": "value4" 11 | }, 12 | "name": "Emojis csv", 13 | "format": "CSV", 14 | "url": "https://datahub.io/core/unicode-emojis/r/emojis.csv", 15 | "datastore_active": false, 16 | "cache_last_updated": null, 17 | "package_id": "99575b35-8a88-4fd9-b0dc-b9d0479c9b2c", 18 | "created": "2020-06-14T16:46:36.105271", 19 | "state": "active", 20 | "mimetype_inner": null, 21 | "last_modified": null, 22 | "position": 0, 23 | "revision_id": "72208aa7-6554-4538-80b1-60c8cad05d9c", 24 | "url_type": null, 25 | "id": "a6d54af7-3119-4691-917b-8655784c5438", 26 | "resource_type": null, 27 | "size": null 28 | } 29 | -------------------------------------------------------------------------------- /tests/fixtures/frictionless_package.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "99575b35-8a88-4fd9-b0dc-b9d0479c9b2c", 3 | "name": "ckan_to_frictionless_conversion", 4 | "title": "Title with Spaces", 5 | "homepage": "https://unicode.org/Public/emoji/latest/emoji-test.txt", 6 | "resources": [ 7 | { 8 | "bytes": 0, 9 | "cache_last_updated": null, 10 | "cache_url": null, 11 | "created": "2020-06-14T16:46:36.105271", 12 | "datastore_active": false, 13 | "description": "csv", 14 | "format": "csv", 15 | "hash": "", 16 | "id": "a6d54af7-3119-4691-917b-8655784c5438", 17 | "last_modified": null, 18 | "mediatype": null, 19 | "mimetype_inner": null, 20 | "name": "emojis-csv", 21 | "path": "https://datahub.io/core/unicode-emojis/r/emojis.csv", 22 | "resource_type": null, 23 | "revision_id": "72208aa7-6554-4538-80b1-60c8cad05d9c", 24 | "state": "active", 25 | "url_type": null 26 | }, 27 | { 28 | "bytes": 123123, 29 | "cache_last_updated": null, 30 | "cache_url": null, 31 | "created": "2020-06-14T16:47:13.972133", 32 | "datastore_active": false, 33 | "description": "Sequences", 34 | "format": "txt", 35 | "hash": "", 36 | "id": "ad968429-651f-4aa2-add8-e45c155eef6e", 37 | "last_modified": null, 38 | "mediatype": null, 39 | "mimetype_inner": null, 40 | "name": "emoji-sequences.txt", 41 | "path": "https://unicode.org/Public/emoji/13.0/emoji-sequences.txt", 42 | "resource_type": null, 43 | "revision_id": "faaea9d4-3e7f-40ee-82fd-7d28d9034ffa", 44 | "state": "active", 45 | "url_type": null 46 | } 47 | ], 48 | "keywords": ["tag1", "tag2", "tag3"], 49 | "extras": [ 50 | { "key": "boolvalue", "value": "false" }, 51 | { "key": "custom2", "value": "12312386" }, 52 | { "key": "customkey1", "value": "value1" }, 53 | { "key": "nullvalue", "value": "null" } 54 | ] 55 | } 56 | -------------------------------------------------------------------------------- /tests/fixtures/frictionless_resource.json: -------------------------------------------------------------------------------- 1 | { 2 | "created": "2020-06-14T16:46:36.105271", 3 | "description": "csv", 4 | "extras": { 5 | "key1": 123, 6 | "key2": false, 7 | "key3": null, 8 | "key4": "value4" 9 | }, 10 | "format": "CSV", 11 | "hash": "", 12 | "id": "a6d54af7-3119-4691-917b-8655784c5438", 13 | "name": "emojis-csv", 14 | "package_id": "99575b35-8a88-4fd9-b0dc-b9d0479c9b2c", 15 | "path": "https://datahub.io/core/unicode-emojis/r/emojis.csv", 16 | "revision_id": "72208aa7-6554-4538-80b1-60c8cad05d9c" 17 | } 18 | -------------------------------------------------------------------------------- /tests/fixtures/full_ckan_package.json: -------------------------------------------------------------------------------- 1 | { 2 | "author": "Author Name", 3 | "author_email": "", 4 | "creator_user_id": "c6bd4f0b-f550-4f5d-9537-782a2d1d4c7a", 5 | "extras": [], 6 | "groups": [], 7 | "id": "1f7db1f1-1400-4572-a860-0977326a5521", 8 | "isopen": true, 9 | "license_id": "cc-by", 10 | "license_title": "Creative Commons Attribution", 11 | "license_url": "http://www.opendefinition.org/licenses/cc-by", 12 | "maintainer": "", 13 | "maintainer_email": "", 14 | "metadata_created": "2020-06-25T14:33:18.301040", 15 | "metadata_modified": "2020-06-25T14:50:34.860070", 16 | "name": "testing", 17 | "notes": "Test new description for version 1.2", 18 | "num_resources": 1, 19 | "num_tags": 0, 20 | "organization": { 21 | "approval_status": "approved", 22 | "created": "2020-06-25T11:31:03.384316", 23 | "description": "", 24 | "id": "5df7636c-a47f-4ab8-b6f0-3212280da926", 25 | "image_url": "", 26 | "is_organization": true, 27 | "name": "odc", 28 | "revision_id": "f0b87003-e385-48c6-b4b6-5fe725cbee8e", 29 | "state": "active", 30 | "title": "ODC", 31 | "type": "organization" 32 | }, 33 | "owner_org": "5df7636c-a47f-4ab8-b6f0-3212280da926", 34 | "private": false, 35 | "relationships_as_object": [], 36 | "relationships_as_subject": [], 37 | "resources": [ 38 | { 39 | "cache_last_updated": null, 40 | "cache_url": null, 41 | "created": "2020-06-25T14:33:49.587300", 42 | "datastore_active": null, 43 | "description": "", 44 | "format": "CSV", 45 | "hash": "", 46 | "id": "e778ca29-5fb7-4063-ad9b-68040f88ab8a", 47 | "last_modified": "2020-06-25T14:33:49.567891", 48 | "mimetype": "text/csv", 49 | "mimetype_inner": null, 50 | "name": "mini-csv.csv", 51 | "package_id": "1f7db1f1-1400-4572-a860-0977326a5521", 52 | "position": 0, 53 | "resource_type": null, 54 | "revision_id": "175298da-691c-42c8-9a4a-b01350fad6e9", 55 | "size": 40, 56 | "state": "active", 57 | "tracking_summary": { 58 | "recent": 0, 59 | "total": 0 60 | }, 61 | "url": "http://localhost:5000/dataset/1f7db1f1-1400-4572-a860-0977326a5521/resource/e778ca29-5fb7-4063-ad9b-68040f88ab8a/download/mini-csv.csv", 62 | "url_type": "upload", 63 | "versions_upload_timestamp": "2020-06-25T11:33:49.567522" 64 | } 65 | ], 66 | "revision_id": "41109a92-138b-4a90-be7b-7f202ad83492", 67 | "state": "active", 68 | "title": "Testing", 69 | "tracking_summary": { 70 | "recent": 0, 71 | "total": 0 72 | }, 73 | "type": "dataset", 74 | "url": "", 75 | "version": "" 76 | } 77 | -------------------------------------------------------------------------------- /tests/fixtures/full_ckan_package_first_round_trip.json: -------------------------------------------------------------------------------- 1 | { 2 | "author": "Author Name", 3 | "author_email": "", 4 | "creator_user_id": "c6bd4f0b-f550-4f5d-9537-782a2d1d4c7a", 5 | "id": "1f7db1f1-1400-4572-a860-0977326a5521", 6 | "license_id": "cc-by", 7 | "license_title": "Creative Commons Attribution", 8 | "license_url": "http://www.opendefinition.org/licenses/cc-by", 9 | "groups": [], 10 | "metadata_created": "2020-06-25T14:33:18.301040", 11 | "metadata_modified": "2020-06-25T14:50:34.860070", 12 | "name": "testing", 13 | "notes": "Test new description for version 1.2", 14 | "owner_org": "5df7636c-a47f-4ab8-b6f0-3212280da926", 15 | "private": false, 16 | "relationships_as_object": [], 17 | "relationships_as_subject": [], 18 | "resources": [ 19 | { 20 | "created": "2020-06-25T14:33:49.587300", 21 | "description": "", 22 | "format": "CSV", 23 | "hash": "", 24 | "id": "e778ca29-5fb7-4063-ad9b-68040f88ab8a", 25 | "last_modified": "2020-06-25T14:33:49.567891", 26 | "name": "mini-csv.csv", 27 | "package_id": "1f7db1f1-1400-4572-a860-0977326a5521", 28 | "revision_id": "175298da-691c-42c8-9a4a-b01350fad6e9", 29 | "tracking_summary": { "recent": 0, "total": 0 }, 30 | "url_type": "upload", 31 | "versions_upload_timestamp": "2020-06-25T11:33:49.567522", 32 | "size": 40, 33 | "mimetype": "text/csv", 34 | "url": "http://localhost:5000/dataset/1f7db1f1-1400-4572-a860-0977326a5521/resource/e778ca29-5fb7-4063-ad9b-68040f88ab8a/download/mini-csv.csv" 35 | } 36 | ], 37 | "revision_id": "41109a92-138b-4a90-be7b-7f202ad83492", 38 | "title": "Testing", 39 | "tracking_summary": {"recent": 0, "total": 0}, 40 | "type": "dataset", 41 | "version": "", 42 | "url": "" 43 | } 44 | -------------------------------------------------------------------------------- /tests/test_ckan_to_frictionless.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | import json 4 | 5 | import frictionless_ckan_mapper.ckan_to_frictionless as converter 6 | 7 | 8 | class TestResourceConversion: 9 | '''Notes: 10 | 11 | * extras do not any special testing since CKAN already just has them as key 12 | values. 13 | * we do want to test unjsonifying values since that will cover e.g. a Table 14 | Schema set in schema field 15 | ''' 16 | 17 | def test_fixtures(self): 18 | inpath = 'tests/fixtures/ckan_resource.json' 19 | exppath = 'tests/fixtures/frictionless_resource.json' 20 | indict = json.load(open(inpath)) 21 | exp = json.load(open(exppath)) 22 | out = converter.resource(indict) 23 | assert out == exp 24 | 25 | def test_values_are_unjsonified(self): 26 | '''Test values which are jsonified dict or arrays are unjsonified''' 27 | schema = { 28 | "fields": [ 29 | {"name": "abc", "type": "string"} 30 | ] 31 | } 32 | indict = { 33 | "schema": json.dumps(schema), 34 | "otherval": json.dumps(schema), 35 | "x": "{'abc': 1" 36 | } 37 | exp = { 38 | "schema": schema, 39 | "otherval": schema, 40 | # fake json object - not really ... but looks like it ... 41 | "x": "{'abc': 1" 42 | } 43 | out = converter.resource(indict) 44 | assert out == exp 45 | 46 | indict = { 47 | "x": "hello world", 48 | "y": "1.3" 49 | } 50 | exp = { 51 | "x": "hello world", 52 | "y": "1.3" 53 | } 54 | out = converter.resource(indict) 55 | assert out == exp 56 | 57 | def test_keys_are_removed_that_should_be(self): 58 | indict = { 59 | "position": 2, 60 | "datastore_active": True, 61 | "state": "active" 62 | } 63 | exp = {} 64 | out = converter.resource(indict) 65 | assert out == exp 66 | 67 | def test_resource_mapping(self): 68 | indict = { 69 | "url": "http://www.somewhere.com/data.csv", 70 | "size": 110, 71 | "mimetype": "text/csv" 72 | } 73 | exp = { 74 | "path": "http://www.somewhere.com/data.csv", 75 | "bytes": 110, 76 | "mediatype": "text/csv" 77 | } 78 | out = converter.resource(indict) 79 | assert out == exp 80 | 81 | def test_resource_path_is_set_even_for_uploaded_resources(self): 82 | indict = { 83 | "url": "http://www.somewhere.com/data.csv", 84 | "url_type": "upload" 85 | } 86 | exp = { 87 | 'path': 'http://www.somewhere.com/data.csv', 88 | 'url_type': "upload" 89 | } 90 | out = converter.resource(indict) 91 | assert out == exp 92 | 93 | def test_resource_keys_pass_through(self): 94 | indict = { 95 | 'id': 'xxx', 96 | 'name': 'abc', 97 | 'description': 'GDPs list', 98 | 'format': 'CSV', 99 | 'hash': 'e785c0883d7a104330e69aee73d4f235', 100 | 'schema': { 101 | 'fields': [ 102 | {'name': 'id', 'type': 'integer'}, 103 | {'name': 'title', 'type': 'string'}, 104 | ] 105 | }, 106 | # random 107 | 'adfajka': 'aaaa', 108 | '1dafak': 'abbbb' 109 | } 110 | exp = indict 111 | out = converter.resource(indict) 112 | assert out == exp 113 | 114 | def test_nulls_are_stripped(self): 115 | indict = { 116 | 'abc': 'xxx', 117 | 'size': None, 118 | 'xyz': None 119 | } 120 | exp = { 121 | 'abc': 'xxx' 122 | } 123 | out = converter.resource(indict) 124 | assert out == exp 125 | 126 | 127 | class TestPackageConversion: 128 | def test_dataset_extras(self): 129 | indict = { 130 | 'extras': [ 131 | {'key': 'title_cn', 'value': u'國內生產總值'}, 132 | {'key': 'years', 'value': '[2015, 2016]'}, 133 | {'key': 'last_year', 'value': 2016}, 134 | {'key': 'location', 'value': '{"country": "China"}'} 135 | ] 136 | } 137 | exp = { 138 | 'title_cn': u'國內生產總值', 139 | 'years': [2015, 2016], 140 | 'last_year': 2016, 141 | 'location': {'country': 'China'} 142 | } 143 | out = converter.dataset(indict) 144 | assert out == exp 145 | 146 | def test_unjsonify_all_extra_values(self): 147 | indict = { 148 | 'extras': [ 149 | { 150 | 'key': 'location', 151 | 'value': '{"country": {"China": {"population": ' 152 | '"1233214331", "capital": "Beijing"}}}' 153 | }, 154 | { 155 | 'key': 'numbers', 156 | 'value': '[[[1, 2, 3], [2, 4, 5]], [[7, 6, 0]]]' 157 | } 158 | ] 159 | } 160 | out = converter.dataset(indict) 161 | exp = { 162 | "location": { 163 | "country": 164 | {"China": 165 | {"population": "1233214331", 166 | "capital": "Beijing"}} 167 | }, 168 | "numbers": [[[1, 2, 3], [2, 4, 5]], [[7, 6, 0]]] 169 | } 170 | assert out == exp 171 | 172 | def test_dataset_license(self): 173 | # No license_title nor license_url 174 | indict = { 175 | 'license_id': 'odc-odbl' 176 | } 177 | exp = { 178 | 'licenses': [{ 179 | 'name': 'odc-odbl', 180 | }] 181 | } 182 | out = converter.dataset(indict) 183 | assert out == exp 184 | 185 | # Remap everything in licenses 186 | indict = { 187 | 'license_id': 'cc-by', 188 | 'license_title': 'Creative Commons Attribution', 189 | 'license_url': 'http://www.opendefinition.org/licenses/cc-by' 190 | } 191 | exp = { 192 | 'licenses': [{ 193 | 'name': 'cc-by', 194 | 'title': 'Creative Commons Attribution', 195 | 'path': 'http://www.opendefinition.org/licenses/cc-by' 196 | }] 197 | } 198 | out = converter.dataset(indict) 199 | assert out == exp 200 | 201 | def test_dataset_license_with_licenses_in_extras(self): 202 | indict = { 203 | 'license_id': 'odc-odbl', 204 | 'license_title': 'Open Data Commons Open Database License', 205 | 'license_url': 'https://opendatacommons.org/licenses/odbl/1-0/index.html', 206 | 'extras': [ 207 | { 208 | 'key': 'licenses', 209 | 'value': json.dumps( 210 | [ 211 | { 212 | 'name': 'cc-by', 213 | 'title': 'Creative Commons Attribution', 214 | 'path': 'http://www.opendefinition.org/licenses/cc-by' 215 | }, 216 | { 217 | 'name': 'odc-by', 218 | 'title': 'Open Data Commons Attribution License', 219 | 'path': 'https://opendatacommons.org/licenses/by/1-0/index.html' 220 | } 221 | ] 222 | ) 223 | } 224 | ] 225 | } 226 | exp = { 227 | 'licenses': [ 228 | { 229 | 'name': 'odc-odbl', 230 | 'title': 'Open Data Commons Open Database License', 231 | 'path': 'https://opendatacommons.org/licenses/odbl/1-0/index.html' 232 | }, 233 | { 234 | 'name': 'odc-by', 235 | 'title': 'Open Data Commons Attribution License', 236 | 'path': 'https://opendatacommons.org/licenses/by/1-0/index.html' 237 | } 238 | ] 239 | } 240 | out = converter.dataset(indict) 241 | assert out == exp 242 | 243 | def test_keys_are_passed_through(self): 244 | indict = { 245 | 'name': 'gdp', 246 | 'id': 'xxxx', 247 | 'title': 'Countries GDP', 248 | 'version': '1.0', 249 | # random 250 | 'xxx': 'aldka' 251 | } 252 | out = converter.dataset(indict) 253 | exp = { 254 | 'name': 'gdp', 255 | 'id': 'xxxx', 256 | 'title': 'Countries GDP', 257 | 'version': '1.0', 258 | 'xxx': 'aldka' 259 | } 260 | assert out == exp 261 | 262 | def test_key_mappings(self): 263 | # notes 264 | indict = { 265 | 'notes': 'Country, regional and world GDP', 266 | 'url': 'https://datopian.com' 267 | } 268 | exp = { 269 | 'description': 'Country, regional and world GDP', 270 | 'homepage': 'https://datopian.com' 271 | } 272 | out = converter.dataset(indict) 273 | assert out == exp 274 | 275 | def test_dataset_author_and_maintainer(self): 276 | indict = { 277 | 'author': 'World Bank and OECD', 278 | 'author_email': 'someone@worldbank.org' 279 | } 280 | exp = { 281 | 'contributors': [ 282 | { 283 | 'title': 'World Bank and OECD', 284 | 'email': 'someone@worldbank.org', 285 | 'role': 'author' 286 | } 287 | ] 288 | } 289 | out = converter.dataset(indict) 290 | assert out == exp 291 | 292 | indict = { 293 | 'author': 'World Bank and OECD', 294 | 'author_email': 'someone@worldbank.org', 295 | 'maintainer': 'Datopian', 296 | 'maintainer_email': 'helloxxx@datopian.com' 297 | } 298 | exp = { 299 | 'contributors': [ 300 | { 301 | 'title': 'World Bank and OECD', 302 | 'email': 'someone@worldbank.org', 303 | 'role': 'author' 304 | }, 305 | { 306 | 'title': 'Datopian', 307 | 'email': 'helloxxx@datopian.com', 308 | 'role': 'maintainer' 309 | }, 310 | 311 | ] 312 | } 313 | out = converter.dataset(indict) 314 | assert out == exp 315 | 316 | # if we already have contributors use that ... 317 | indict = { 318 | 'contributors': [{ 319 | 'title': 'Datopians' 320 | }], 321 | 'author': 'World Bank and OECD', 322 | } 323 | exp = { 324 | 'contributors': [{ 325 | 'title': 'Datopians' 326 | }] 327 | } 328 | out = converter.dataset(indict) 329 | assert out == exp 330 | 331 | def test_dataset_tags(self): 332 | indict = { 333 | 'tags': [ 334 | { 335 | 'display_name': 'economy', 336 | 'id': '9d602a79-7742-44a7-9029-50b9eca38c90', 337 | 'name': 'economy', 338 | 'state': 'active' 339 | }, 340 | { 341 | 'display_name': 'worldbank', 342 | 'id': '3ccc2e3b-f875-49ef-a39d-6601d6c0ef76', 343 | 'name': 'worldbank', 344 | 'state': 'active' 345 | } 346 | ] 347 | } 348 | exp = { 349 | 'keywords': ['economy', 'worldbank'] 350 | } 351 | out = converter.dataset(indict) 352 | assert out == exp 353 | 354 | def test_resources_are_converted(self): 355 | indict = { 356 | 'name': 'gdp', 357 | 'resources': [{ 358 | 'name': 'data.csv', 359 | 'url': 'http://someplace.com/data.csv', 360 | 'size': 100 361 | }] 362 | } 363 | exp = { 364 | 'name': 'gdp', 365 | 'resources': [{ 366 | 'name': 'data.csv', 367 | 'path': 'http://someplace.com/data.csv', 368 | 'bytes': 100 369 | }] 370 | } 371 | out = converter.dataset(indict) 372 | assert out == exp 373 | 374 | def test_all_keys_are_passed_through(self): 375 | indict = { 376 | 'description': 'GDPs list', 377 | 'schema': { 378 | 'fields': [ 379 | {'name': 'id', 'type': 'integer'}, 380 | {'name': 'title', 'type': 'string'}, 381 | ] 382 | }, 383 | # random 384 | 'adfajka': 'aaaa', 385 | '1dafak': 'abbbb' 386 | } 387 | exp = indict 388 | out = converter.resource(indict) 389 | assert out == exp 390 | 391 | def test_keys_are_removed_that_should_be(self): 392 | indict = { 393 | 'isopen': True, 394 | 'num_tags': 1, 395 | 'num_resources': 10, 396 | 'state': 'active', 397 | "organization": { 398 | "description": "", 399 | "title": "primary_care_prescribing_dispensing", 400 | "created": "2020-03-31T21:51:41.334189", 401 | "approval_status": "approved", 402 | "is_organization": True, 403 | "state": "active", 404 | "image_url": "", 405 | "revision_id": "7c86fde3-9899-41d6-b0bb-6c72dd4b6b94", 406 | "type": "organization", 407 | "id": "a275814e-6c15-40a8-99fd-af911f1568ef", 408 | "name": "primary_care_prescribing_dispensing" 409 | } 410 | } 411 | exp = {} 412 | out = converter.dataset(indict) 413 | assert out == exp 414 | 415 | def test_null_values_are_stripped(self): 416 | indict = { 417 | 'id': '12312', 418 | 'title': 'title here', 419 | 'format': None 420 | } 421 | exp = { 422 | 'id': '12312', 423 | 'title': 'title here' 424 | } 425 | out = converter.dataset(indict) 426 | assert out == exp 427 | 428 | def test_empty_tags_ignored(self): 429 | indict = { 430 | "tags": [] 431 | } 432 | exp = {} 433 | out = converter.dataset(indict) 434 | assert out == exp 435 | -------------------------------------------------------------------------------- /tests/test_frictionless_to_ckan.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | import json 3 | 4 | import frictionless_ckan_mapper.frictionless_to_ckan as converter 5 | 6 | 7 | class TestResourceConversion: 8 | def test_non_ckan_keys_passthrough(self): 9 | indict = { 10 | 'title_cn': u'國內生產總值', 11 | 'years': [2015, 2016], 12 | 'last_year': 2016, 13 | 'location': {'country': 'China'} 14 | } 15 | out = converter.resource(indict) 16 | exp = { 17 | 'title_cn': u'國內生產總值', 18 | 'years': [2015, 2016], 19 | 'last_year': 2016, 20 | 'location': {'country': 'China'} 21 | } 22 | assert out == exp 23 | 24 | def test_path_to_url(self): 25 | # Test remote path 26 | indict = {'path': 'http://www.somewhere.com/data.csv'} 27 | out = converter.resource(indict) 28 | assert out['url'] == indict['path'] 29 | 30 | # Test local path 31 | indict = {'path': './data.csv'} 32 | out = converter.resource(indict) 33 | assert out['url'] == indict['path'] 34 | 35 | # Test POSIX path 36 | indict = {'path': '/home/user/data.csv'} 37 | out = converter.resource(indict) 38 | assert out['url'] == indict['path'] 39 | 40 | def test_other_remapping(self): 41 | indict = { 42 | 'bytes': 10, 43 | 'mediatype': 'text/csv' 44 | } 45 | exp = { 46 | 'size': 10, 47 | 'mimetype': 'text/csv' 48 | } 49 | out = converter.resource(indict) 50 | assert out == exp 51 | 52 | def test_passthrough(self): 53 | indict = { 54 | 'description': 'GDPs list', 55 | 'format': 'CSV', 56 | 'hash': 'e785c0883d7a104330e69aee73d4f235' 57 | } 58 | out = converter.resource(indict) 59 | assert out == indict 60 | 61 | 62 | class TestPackageConversion: 63 | def test_passthrough(self): 64 | indict = { 65 | 'name': 'gdp', 66 | 'id': 'xxxx', 67 | 'title': 'Countries GDP', 68 | 'version': '1.0', 69 | "owner_org": "a275814e-6c15-40a8-99fd-af911f1568ef", 70 | "metadata_created": "2020-03-31T21:57:48.676558", 71 | "metadata_modified": "2020-03-31T21:57:50.215642", 72 | "creator_user_id": "b5ab876c-0d04-479a-92de-f66db5dd6fb3", 73 | "private": False, 74 | "revision_id": "xxx", 75 | # TODO: test groups 76 | } 77 | out = converter.package(indict) 78 | assert out == indict 79 | 80 | def test_basic_mappings(self): 81 | indict = { 82 | 'description': 'Country, regional and world GDP in current USD.', 83 | 'homepage': 'https://datopian.com' 84 | } 85 | exp = { 86 | 'notes': 'Country, regional and world GDP in current USD.', 87 | 'url': 'https://datopian.com' 88 | } 89 | out = converter.package(indict) 90 | assert out == exp 91 | 92 | def test_dataset_license(self): 93 | indict = { 94 | 'licenses': [{ 95 | 'name': 'odc-odbl', 96 | 'path': 'http://example.com/file.csv', 97 | }] 98 | } 99 | exp = { 100 | 'license_id': 'odc-odbl', 101 | 'license_title': None, 102 | 'license_url': 'http://example.com/file.csv' 103 | } 104 | out = converter.package(indict) 105 | assert out == exp 106 | 107 | indict = { 108 | 'licenses': [{ 109 | 'title': 'Open Data Commons Open Database License', 110 | 'name': 'odc-odbl' 111 | }] 112 | } 113 | exp = { 114 | 'license_id': 'odc-odbl', 115 | 'license_title': 'Open Data Commons Open Database License', 116 | 'license_url': None 117 | } 118 | out = converter.package(indict) 119 | assert out == exp 120 | 121 | # Finally, what if more than one license 122 | indict = { 123 | 'licenses': [ 124 | { 125 | 'title': 'Open Data Commons Open Database License', 126 | 'name': 'odc-pddl' 127 | }, 128 | { 129 | 'title': 'Creative Commons CC Zero License (cc-zero)', 130 | 'name': 'cc-zero' 131 | } 132 | ] 133 | } 134 | exp = { 135 | 'license_id': 'odc-pddl', 136 | 'license_title': 'Open Data Commons Open Database License', 137 | 'license_url': None, 138 | 'extras': [ 139 | { 140 | 'key': 'licenses', 141 | 'value': json.dumps(indict['licenses']) 142 | } 143 | ] 144 | } 145 | out = converter.package(indict) 146 | assert out == exp 147 | 148 | # TODO: get clear on the spelling of the key "organization". 149 | # It's "organisation" in the JSON schema at 150 | # https://specs.frictionlessdata.io/schemas/data-package.json 151 | # while it's "organization" in the page of the specs at 152 | # https://specs.frictionlessdata.io/data-package/#metadata 153 | def test_contributors(self): 154 | # author conversion 155 | indict = { 156 | 'contributors': [ 157 | { 158 | 'title': 'John Smith' 159 | } 160 | ] 161 | } 162 | exp = { 163 | 'author': 'John Smith', 164 | 'author_email': None 165 | } 166 | out = converter.package(indict) 167 | assert out == exp 168 | 169 | # check maintainer conversion 170 | indict = { 171 | 'contributors': [ 172 | { 173 | 'title': 'xyz', 174 | 'email': 'xyz@abc.com', 175 | 'organisation': 'xxxxx', 176 | 'role': 'maintainer' 177 | } 178 | ] 179 | } 180 | exp = { 181 | 'maintainer': 'xyz', 182 | 'maintainer_email': 'xyz@abc.com' 183 | } 184 | out = converter.package(indict) 185 | assert out == exp 186 | 187 | # Make sure that we also get the correct data when there are multiple 188 | # contributors 189 | indict = { 190 | 'contributors': [ 191 | { 192 | 'title': 'abc', 193 | 'email': 'abc@abc.com' 194 | }, 195 | { 196 | 'title': 'xyz', 197 | 'email': 'xyz@xyz.com', 198 | 'role': 'maintainer' 199 | } 200 | ] 201 | } 202 | exp = { 203 | 'author': 'abc', 204 | 'author_email': 'abc@abc.com', 205 | 'maintainer': 'xyz', 206 | 'maintainer_email': 'xyz@xyz.com' 207 | } 208 | out = converter.package(indict) 209 | assert out == exp 210 | 211 | # finally if we have contributors beyond that expected for ckan we keep 212 | # that in extras (raw) 213 | indict = { 214 | 'contributors': [ 215 | {"role": "author", "email": "", "title": "Patricio"}, 216 | {"role": "maintainer", "email": "", "title": "Rufus"}, 217 | {"role": "author", "email": "", "title": "Paul"} 218 | ] 219 | } 220 | exp = { 221 | 'author': 'Patricio', 222 | 'author_email': '', 223 | 'maintainer': 'Rufus', 224 | 'maintainer_email': '', 225 | 'extras': [{ 226 | 'key': u'contributors', 227 | 'value': json.dumps(indict['contributors']) 228 | }] 229 | } 230 | out = converter.package(indict) 231 | assert out == exp 232 | 233 | def test_keywords_converted_to_tags(self): 234 | keywords = ['economy!!!', 'World Bank'] 235 | indict = {'keywords': keywords} 236 | out = converter.package(indict) 237 | assert out.get('tags') == [ 238 | {'name': 'economy!!!'}, 239 | {'name': 'World Bank'}, 240 | ] 241 | 242 | def test_extras_is_converted(self): 243 | indict = { 244 | 'homepage': 'www.example.com', 245 | 'newdict': {'key1': 'dict_to_jsonify'}, 246 | 'newint': 123, 247 | 'newkey': 'new value', 248 | 'newlist': [1, 2, 3, 'string'], 249 | 'title': 'Title here' 250 | } 251 | exp = { 252 | 'title': 'Title here', 253 | 'url': 'www.example.com', 254 | 'extras': [ 255 | { 256 | 'key': 'newdict', 'value': '{"key1": "dict_to_jsonify"}' 257 | }, 258 | {'key': 'newint', 'value': 123}, 259 | {'key': 'newkey', 'value': 'new value'}, 260 | {'key': 'newlist', 'value': '[1, 2, 3, "string"]'}, 261 | ] 262 | } 263 | out = converter.package(indict) 264 | out['extras'] = sorted(out['extras'], key=lambda i: i['key']) 265 | assert out == exp 266 | 267 | def test_resources_are_converted(self): 268 | indict = { 269 | 'name': 'gdp', 270 | 'resources': [{ 271 | 'name': 'data.csv', 272 | 'path': 'http://someplace.com/data.csv', 273 | 'bytes': 100 274 | }] 275 | } 276 | exp = { 277 | 'name': 'gdp', 278 | 'resources': [{ 279 | 'name': 'data.csv', 280 | 'url': 'http://someplace.com/data.csv', 281 | 'size': 100 282 | }] 283 | } 284 | out = converter.package(indict) 285 | assert out == exp 286 | 287 | -------------------------------------------------------------------------------- /tests/test_roundtrip.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import frictionless_ckan_mapper.ckan_to_frictionless as ckan_to_frictionless 4 | import frictionless_ckan_mapper.frictionless_to_ckan as frictionless_to_ckan 5 | 6 | import six 7 | 8 | 9 | class TestPackageConversion: 10 | def test_round_trip_ckan(self): 11 | # `ckan1` != `ckan2` but `ckan2` == `ckan3` 12 | inpath = 'tests/fixtures/full_ckan_package.json' 13 | ckan1 = json.load(open(inpath)) 14 | fd1 = ckan_to_frictionless.dataset(ckan1) 15 | ckan2 = frictionless_to_ckan.package(fd1) 16 | fd2 = ckan_to_frictionless.dataset(ckan2) 17 | ckan3 = frictionless_to_ckan.package(fd2) 18 | 19 | # FIXME: this currently doesn't work for Python 2 due to the way 20 | # Unicode is handled and because the dictionary keys do not keep 21 | # the same order. 22 | # Solution 1: Skip for Python 2 (it's clearly the same dictionary 23 | # if the build passes on Python 3) 24 | # Solution 2: Hard code the dicts as in `test_extras_is_converted` 25 | # in test_frictionless_to_ckan.py instead of loading JSON and 26 | # sort the keys. 27 | if not six.PY2: 28 | assert ckan2 == ckan3 29 | 30 | def test_differences_ckan_round_trip(self): 31 | # When converting ckan1 to fd1 then fd1 to ckan2, 32 | # ckan1 is bound to differ from ckan2. 33 | # Those fixtures illustrate the expected differences. 34 | inpath = 'tests/fixtures/full_ckan_package.json' 35 | ckan1 = json.load(open(inpath)) 36 | fd1 = ckan_to_frictionless.dataset(ckan1) 37 | ckan2 = frictionless_to_ckan.package(fd1) 38 | inpath_round_trip = ('tests/fixtures/' 39 | 'full_ckan_package_first_round_trip.json') 40 | exp = json.load(open(inpath_round_trip)) 41 | 42 | # FIXME: this currently doesn't work for Python 2 due to the way 43 | # Unicode is handled and because the dictionary keys do not keep 44 | # the same order. 45 | # Solution 1: Skip for Python 2 (it's clearly the same dictionary 46 | # if the build passes on Python 3) 47 | # Solution 2: Hard code the dicts as in `test_extras_is_converted` 48 | # in test_frictionless_to_ckan.py instead of loading JSON and 49 | # sort the keys. 50 | if not six.PY2: 51 | assert ckan2 == exp 52 | 53 | # Notable differences in `exp` from ckan1 are: 54 | # - Keys not defined in a standard CKAN package such as 55 | # `creator_user_id` will go to `extras`. 56 | # - In our `full_ckan_package.json` fixture, 'extras' is empty but 57 | # Frictionless fills it and it will exist in the CKAN package after 58 | # the first round trip. 59 | # - Keys defined in CKAN but ignored in Frictionless, such as `id` 60 | # (because a Frictionless package doesn't have an id property) will 61 | # also go to 'extras'. 62 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | package=frictionless_ckan_mapper 3 | skip_missing_interpreters=true 4 | envlist= 5 | py36 6 | py27 7 | 8 | [testenv] 9 | deps= 10 | pytest 11 | pytest-cov 12 | coverage 13 | passenv= 14 | CI 15 | TRAVIS 16 | TRAVIS_JOB_ID 17 | TRAVIS_BRANCH 18 | DYLD_LIBRARY_PATH 19 | LC_ALL 20 | commands= 21 | py.test \ 22 | --cov {[tox]package} \ 23 | --cov-config tox.ini \ 24 | --cov-report term-missing \ 25 | {posargs} 26 | --------------------------------------------------------------------------------