├── .coveragerc ├── .dockerignore ├── .editorconfig ├── .github └── workflows │ ├── pypi-release.yml │ └── tests.yml ├── .gitignore ├── AUTHORS.rst ├── CHANGES.rst ├── CONTRIBUTING.rst ├── Dockerfile ├── LICENSE ├── MAINTAINERS ├── MANIFEST.in ├── README.rst ├── docker-compose.yml ├── docs ├── Makefile ├── api.rst ├── authors.rst ├── changes.rst ├── conf.py ├── contributing.rst ├── index.rst ├── license.rst ├── requirements.txt └── usage.rst ├── dojson ├── __init__.py ├── __main__.py ├── _compat.py ├── cli │ ├── __init__.py │ ├── command.py │ └── utils.py ├── contrib │ ├── __init__.py │ ├── marc21 │ │ ├── __init__.py │ │ ├── fields │ │ │ ├── __init__.py │ │ │ ├── ad00x.py │ │ │ ├── ad01x09x.py │ │ │ ├── ad1xx3xx.py │ │ │ ├── ad260360.py │ │ │ ├── ad4xx.py │ │ │ ├── ad5xx.py │ │ │ ├── ad64x.py │ │ │ ├── ad663666.py │ │ │ ├── ad66768x.py │ │ │ ├── ad7xx.py │ │ │ ├── ad8xx.py │ │ │ ├── adleader.py │ │ │ ├── bd00x.py │ │ │ ├── bd01x09x.py │ │ │ ├── bd1xx.py │ │ │ ├── bd20x24x.py │ │ │ ├── bd25x28x.py │ │ │ ├── bd3xx.py │ │ │ ├── bd4xx.py │ │ │ ├── bd5xx.py │ │ │ ├── bd6xx.py │ │ │ ├── bd70x75x.py │ │ │ ├── bd76x78x.py │ │ │ ├── bd80x83x.py │ │ │ ├── bd84188x.py │ │ │ ├── bdleader.py │ │ │ ├── hd00x.py │ │ │ ├── hd0xx.py │ │ │ ├── hd3xx5xx84x.py │ │ │ └── hd85xhd88x.py │ │ ├── model.py │ │ ├── schemas │ │ │ ├── __init__.py │ │ │ └── marc21 │ │ │ │ ├── authority │ │ │ │ ├── ad-v1.0.0.json │ │ │ │ ├── ad-v1.0.1.json │ │ │ │ ├── ad-v1.0.2.json │ │ │ │ ├── ad00x.json │ │ │ │ ├── ad01x09x-v1.0.1.json │ │ │ │ ├── ad01x09x.json │ │ │ │ ├── ad1xx3xx.json │ │ │ │ ├── ad260360-v1.0.1.json │ │ │ │ ├── ad260360.json │ │ │ │ ├── ad4xx-v1.0.1.json │ │ │ │ ├── ad4xx.json │ │ │ │ ├── ad5xx-v1.0.1.json │ │ │ │ ├── ad5xx.json │ │ │ │ ├── ad64x.json │ │ │ │ ├── ad663666.json │ │ │ │ ├── ad66768x-v1.0.1.json │ │ │ │ ├── ad66768x.json │ │ │ │ ├── ad7xx.json │ │ │ │ ├── ad8xx.json │ │ │ │ └── adleader.json │ │ │ │ ├── bibliographic │ │ │ │ ├── bd-v1.0.0.json │ │ │ │ ├── bd-v1.0.1.json │ │ │ │ ├── bd-v1.0.2.json │ │ │ │ ├── bd00x-v1.0.1.json │ │ │ │ ├── bd00x.json │ │ │ │ ├── bd01x09x-v1.0.1.json │ │ │ │ ├── bd01x09x.json │ │ │ │ ├── bd1xx-v1.0.1.json │ │ │ │ ├── bd1xx.json │ │ │ │ ├── bd20x24x-v1.0.1.json │ │ │ │ ├── bd20x24x.json │ │ │ │ ├── bd25x28x.json │ │ │ │ ├── bd3xx-v1.0.1.json │ │ │ │ ├── bd3xx.json │ │ │ │ ├── bd4xx-v1.0.1.json │ │ │ │ ├── bd4xx.json │ │ │ │ ├── bd5xx-v1.0.1.json │ │ │ │ ├── bd5xx.json │ │ │ │ ├── bd6xx-v1.0.1.json │ │ │ │ ├── bd6xx.json │ │ │ │ ├── bd70x75x-v1.0.1.json │ │ │ │ ├── bd70x75x.json │ │ │ │ ├── bd76x78x.json │ │ │ │ ├── bd80x83x-v1.0.1.json │ │ │ │ ├── bd80x83x.json │ │ │ │ ├── bd84188x-v1.0.1.json │ │ │ │ ├── bd84188x.json │ │ │ │ └── bdleader.json │ │ │ │ └── holdings │ │ │ │ ├── hd-v1.0.0.json │ │ │ │ ├── hd00x.json │ │ │ │ ├── hd0xx.json │ │ │ │ ├── hd3xx5xx84x.json │ │ │ │ └── hd85xhd88x.json │ │ └── utils.py │ └── to_marc21 │ │ ├── __init__.py │ │ ├── fields │ │ ├── __init__.py │ │ ├── ad00x.py │ │ ├── ad01x09x.py │ │ ├── ad1xx.py │ │ ├── ad25x28x.py │ │ ├── ad3xx.py │ │ ├── ad4xx.py │ │ ├── ad5xx.py │ │ ├── ad6xx.py │ │ ├── ad70x75x.py │ │ ├── ad76x78x.py │ │ ├── ad84188x.py │ │ ├── bd00x.py │ │ ├── bd01x09x.py │ │ ├── bd1xx.py │ │ ├── bd20x24x.py │ │ ├── bd25x28x.py │ │ ├── bd3xx.py │ │ ├── bd4xx.py │ │ ├── bd5xx.py │ │ ├── bd6xx.py │ │ ├── bd70x75x.py │ │ ├── bd76x78x.py │ │ ├── bd80x83x.py │ │ ├── bd84188x.py │ │ └── bdleader.py │ │ ├── model.py │ │ └── utils.py ├── errors.py ├── overdo.py ├── utils.py └── version.py ├── pytest.ini ├── run-tests.sh ├── setup.cfg ├── setup.py ├── tests ├── MARC21slimUtils.xsl ├── data │ ├── authority │ │ ├── ad01x09x.xml │ │ ├── ad1xx.xml │ │ ├── ad25x28x.xml │ │ ├── ad3xx.xml │ │ ├── ad4xx.xml │ │ ├── ad5xx.xml │ │ ├── ad6xx.xml │ │ ├── ad70x75x.xml │ │ ├── ad76x78x.xml │ │ └── ad84188x.xml │ ├── handcrafted │ │ ├── bd01x09x.xml │ │ ├── bd3xx.xml │ │ ├── bd6xx.xml │ │ └── bdleader.xml │ ├── library_of_congress │ │ ├── bd01x09x.xml │ │ ├── bd1xx.xml │ │ ├── bd20x24x.xml │ │ ├── bd25x28x.xml │ │ ├── bd3xx.xml │ │ ├── bd4xx.xml │ │ ├── bd5xx.xml │ │ ├── bd6xx.xml │ │ ├── bd70x75x.xml │ │ ├── bd76x78x.xml │ │ ├── bd80x83x.xml │ │ └── bd84188x.xml │ ├── test_1.xml │ ├── test_10.xml │ ├── test_11.xml │ ├── test_12.xml │ ├── test_13.xml │ ├── test_14.xml │ ├── test_15.xml │ ├── test_16.xml │ ├── test_2.xml │ ├── test_3.xml │ ├── test_4.xml │ ├── test_5.xml │ ├── test_6.xml │ ├── test_7.xml │ ├── test_8.xml │ ├── test_9.xml │ └── test_cds_marc21.xml ├── demo_marc21_to_dc.converted.xml ├── demo_marc21_to_dc.xml ├── demo_marc21_to_dc.xslt ├── test_cli.py ├── test_contrib_to_marc21_utils.py ├── test_core.py └── test_utils.py └── tox.ini /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | source = dojson 3 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | .git 2 | *.pyc 3 | __pycache__/ 4 | .tox 5 | .cache 6 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # This file is part of DoJSON 4 | # Copyright (C) 2016 CERN. 5 | # 6 | # DoJSON is free software; you can redistribute it and/or modify 7 | # it under the terms of the Revised BSD License; see LICENSE file for 8 | # more details. 9 | 10 | root = true 11 | 12 | [*] 13 | indent_style = space 14 | end_of_line = lf 15 | insert_final_newline = true 16 | trim_trailing_whitespace = true 17 | charset = utf-8 18 | 19 | # Python files 20 | [*.py] 21 | indent_size = 4 22 | # isort plugin configuration 23 | known_first_party = dojson,test_core 24 | multi_line_output = 2 25 | default_section = THIRDPARTY 26 | 27 | # RST files (used by sphinx) 28 | [*.rst] 29 | indent_size = 4 30 | 31 | # CSS, HTML, JS, JSON, YML 32 | [*.{css,html,js,json,yml}] 33 | indent_size = 2 34 | 35 | # Matches the exact files either package.json or .github/workflows/*.yml 36 | [{package.json, .github/workflows/*.yml}] 37 | indent_size = 2 38 | 39 | # Dockerfile 40 | [Dockerfile] 41 | indent_size = 4 42 | -------------------------------------------------------------------------------- /.github/workflows/pypi-release.yml: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # This file is part of Invenio. 4 | # Copyright (C) 2020 CERN. 5 | # Copyright (C) 2023 Graz University of Technology. 6 | # 7 | # Invenio is free software; you can redistribute it and/or modify it 8 | # under the terms of the MIT License; see LICENSE file for more details 9 | 10 | name: Publish 11 | 12 | on: 13 | push: 14 | tags: 15 | - v* 16 | 17 | jobs: 18 | Publish: 19 | runs-on: ubuntu-latest 20 | 21 | steps: 22 | - name: Checkout 23 | uses: actions/checkout@v2 24 | 25 | - name: Set up Python 26 | uses: actions/setup-python@v2 27 | with: 28 | python-version: 3.9 29 | 30 | - name: Install dependencies 31 | run: | 32 | python -m pip install --upgrade pip 33 | pip install setuptools wheel babel 34 | 35 | - name: Build package 36 | run: python setup.py sdist bdist_wheel 37 | 38 | - name: Publish on PyPI 39 | uses: pypa/gh-action-pypi-publish@v1.3.1 40 | with: 41 | user: __token__ 42 | # The token is provided by the inveniosoftware organization 43 | password: ${{ secrets.pypi_token }} 44 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # This file is part of Invenio. 4 | # Copyright (C) 2020 CERN. 5 | # 6 | # Invenio is free software; you can redistribute it and/or modify it 7 | # under the terms of the MIT License; see LICENSE file for more details. 8 | 9 | name: CI 10 | 11 | on: 12 | push: 13 | branches: master 14 | pull_request: 15 | branches: master 16 | schedule: 17 | # * is a special character in YAML so you have to quote this string 18 | - cron: '0 3 * * 6' 19 | workflow_dispatch: 20 | inputs: 21 | reason: 22 | description: 'Reason' 23 | required: false 24 | default: 'Manual trigger' 25 | 26 | jobs: 27 | Tests: 28 | runs-on: ubuntu-20.04 29 | strategy: 30 | matrix: 31 | python-version: [3.8, 3.9] 32 | requirements-level: [pypi] 33 | 34 | steps: 35 | - name: Checkout 36 | uses: actions/checkout@v2 37 | 38 | - name: Set up Python ${{ matrix.python-version }} 39 | uses: actions/setup-python@v2 40 | with: 41 | python-version: ${{ matrix.python-version }} 42 | 43 | - name: Generate dependencies 44 | run: | 45 | sudo apt-get install libxml2-dev libxslt1-dev 46 | python -m pip install --upgrade pip setuptools py wheel requirements-builder 47 | requirements-builder -e all --level=${{ matrix.requirements-level }} setup.py > .${{ matrix.requirements-level }}-${{ matrix.python-version }}-requirements.txt 48 | 49 | - name: Cache pip 50 | uses: actions/cache@v2 51 | with: 52 | path: ~/.cache/pip 53 | key: ${{ runner.os }}-pip-${{ hashFiles('.${{ matrix.requirements-level }}-${{ matrix.python-version }}-requirements.txt') }} 54 | 55 | - name: Install dependencies 56 | run: | 57 | pip install -r .${{ matrix.requirements-level }}-${{ matrix.python-version }}-requirements.txt 58 | pip install .[all] 59 | pip freeze 60 | 61 | - name: Run tests 62 | run: | 63 | ./run-tests.sh 64 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | bin/ 10 | build/ 11 | develop-eggs/ 12 | dist/ 13 | .eggs 14 | lib/ 15 | lib64/ 16 | parts/ 17 | sdist/ 18 | var/ 19 | *.egg-info/ 20 | .installed.cfg 21 | *.egg 22 | 23 | # Installer logs 24 | pip-log.txt 25 | pip-delete-this-directory.txt 26 | 27 | # Unit test / coverage reports 28 | .tox/ 29 | .coverage 30 | .cache 31 | nosetests.xml 32 | coverage.xml 33 | 34 | # Translations 35 | *.mo 36 | 37 | # Mr Developer 38 | .mr.developer.cfg 39 | .project 40 | .pydevproject 41 | 42 | # Rope 43 | .ropeproject 44 | 45 | # Django stuff: 46 | *.log 47 | *.pot 48 | 49 | # Sphinx documentation 50 | docs/_build/ 51 | 52 | # Backup files 53 | *~ 54 | -------------------------------------------------------------------------------- /AUTHORS.rst: -------------------------------------------------------------------------------- 1 | Authors 2 | ======= 3 | 4 | DoJSON is developed for the `Invenio 5 | `_ digital library software. 6 | 7 | Contact us at `info@inveniosoftware.org 8 | `_. 9 | 10 | Active contributors: 11 | 12 | * David Caro 13 | * Dinos Kousidis 14 | * Esteban J. G. Gabancho 15 | * Jacopo Notarstefano 16 | * Jiri Kuncar 17 | * Sami Hiltunen 18 | * Samuele Kaplun 19 | * Tibor Simko 20 | * zazasa 21 | * Øystein Blixhavn 22 | -------------------------------------------------------------------------------- /CONTRIBUTING.rst: -------------------------------------------------------------------------------- 1 | Contributing 2 | ============ 3 | 4 | Bug reports, feature requests, and other contributions are welcome. 5 | If you find a demonstrable problem that is caused by the code of this 6 | library, please: 7 | 8 | 1. Search for `already reported problems 9 | `_. 10 | 2. Check if the issue has been fixed or is still reproducible on the 11 | latest `master` branch. 12 | 3. Create an issue with **a test case**. 13 | 14 | If you create a feature branch, you can run the tests to ensure everything is 15 | operating correctly: 16 | 17 | .. code-block:: console 18 | 19 | $ python setup.py test 20 | 21 | ... 22 | 23 | ====== 31 passed, 23 skipped in 1.37 seconds ====== 24 | 25 | You can also test your feature branch using Docker:: 26 | 27 | $ docker-compose build 28 | $ docker-compose run web python setup.py test 29 | $ docker-compose run web python setup.py build_sphinx 30 | $ docker-compose run web pydocstyle --match-dir='dojson' 31 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # This file is part of DoJSON 2 | # Copyright (C) 2015 CERN. 3 | # 4 | # DoJSON is free software; you can redistribute it and/or 5 | # modify it under the terms of the Revised BSD License; see LICENSE 6 | # file for more details. 7 | 8 | # Use Python-2.7: 9 | FROM python:2.7 10 | 11 | # Install some prerequisites ahead of `setup.py` in order to profit 12 | # from the docker build cache: 13 | RUN pip install coveralls \ 14 | esmre \ 15 | ipython \ 16 | lxml \ 17 | mock \ 18 | pydocstyle \ 19 | pytest \ 20 | pytest-cache \ 21 | pytest-cov \ 22 | pytest-pep8 \ 23 | sphinx_rtd_theme 24 | 25 | # Add sources to `code` and work there: 26 | WORKDIR /code 27 | ADD . /code 28 | 29 | # Install dojson: 30 | RUN pip install -e .[docs] 31 | 32 | # Run container as user `dojson` with UID `1000`, which should match 33 | # current host user in most situations: 34 | RUN adduser --uid 1000 --disabled-password --gecos '' dojson && \ 35 | chown -R dojson:dojson /code 36 | 37 | # Run test suite instead of starting the application: 38 | USER dojson 39 | CMD ["python", "setup.py", "test"] 40 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | DoJSON is free software; you can redistribute it and/or modify it 2 | under the terms of the Revised BSD License quoted below. 3 | 4 | Copyright (C) 2015, 2016 CERN. 5 | 6 | All rights reserved. 7 | 8 | Redistribution and use in source and binary forms, with or without 9 | modification, are permitted provided that the following conditions are 10 | met: 11 | 12 | * Redistributions of source code must retain the above copyright 13 | notice, this list of conditions and the following disclaimer. 14 | 15 | * Redistributions in binary form must reproduce the above copyright 16 | notice, this list of conditions and the following disclaimer in the 17 | documentation and/or other materials provided with the distribution. 18 | 19 | * Neither the name of the copyright holder nor the names of its 20 | contributors may be used to endorse or promote products derived from 21 | this software without specific prior written permission. 22 | 23 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 27 | HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 28 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 29 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 30 | OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 31 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR 32 | TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE 33 | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 34 | DAMAGE. 35 | 36 | In applying this license, CERN does not waive the privileges and 37 | immunities granted to it by virtue of its status as an 38 | Intergovernmental Organization or submit itself to any jurisdiction. 39 | -------------------------------------------------------------------------------- /MAINTAINERS: -------------------------------------------------------------------------------- 1 | Jiri Kuncar (@jirikuncar) 2 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | # This file is part of DoJSON 2 | # Copyright (C) 2015, 2016 CERN. 3 | # 4 | # DoJSON is free software; you can redistribute it and/or 5 | # modify it under the terms of the Revised BSD License; see LICENSE 6 | # file for more details. 7 | 8 | include *.rst 9 | include *.sh 10 | include *.yml 11 | include .coveragerc 12 | include .editorconfig 13 | include .lgtm 14 | include .dockerignore 15 | include docs/requirements.txt 16 | include Dockerfile 17 | include LICENSE 18 | include MAINTAINERS 19 | include pytest.ini 20 | include tox.ini 21 | recursive-include docs *.py 22 | recursive-include docs *.rst 23 | recursive-include docs Makefile 24 | recursive-include dojson *.py 25 | recursive-include dojson *.json 26 | recursive-include tests *.py 27 | recursive-include tests *.xml 28 | recursive-include tests *.xsl 29 | recursive-include tests *.xslt 30 | recursive-include .github/workflows *.yml 31 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | ======== 2 | DoJSON 3 | ======== 4 | 5 | .. image:: https://github.com/inveniosoftware/dojson/workflows/CI/badge.svg 6 | :target: https://github.com/inveniosoftware/dojson/actions 7 | 8 | .. image:: https://img.shields.io/coveralls/inveniosoftware/dojson.svg 9 | :target: https://coveralls.io/r/inveniosoftware/dojson 10 | 11 | .. image:: https://img.shields.io/github/tag/inveniosoftware/dojson.svg 12 | :target: https://github.com/inveniosoftware/dojson/releases 13 | 14 | .. image:: https://img.shields.io/pypi/dm/dojson.svg 15 | :target: https://pypi.python.org/pypi/dojson 16 | 17 | .. image:: https://img.shields.io/github/license/inveniosoftware/dojson.svg 18 | :target: https://github.com/inveniosoftware/dojson/blob/master/LICENSE 19 | 20 | 21 | About 22 | ===== 23 | 24 | DoJSON is a simple Pythonic JSON to JSON converter. 25 | 26 | Installation 27 | ============ 28 | 29 | DoJSON is on PyPI so all you need is: 30 | 31 | .. code-block:: console 32 | 33 | $ pip install dojson 34 | 35 | Documentation 36 | ============= 37 | 38 | Documentation is readable at https://dojson.readthedocs.io/ or 39 | it can be built using Sphinx: 40 | 41 | .. code-block:: console 42 | 43 | $ pip install dojson[docs] 44 | $ python setup.py build_sphinx 45 | 46 | Testing 47 | ======= 48 | 49 | Running the test suite is as simple as: 50 | 51 | .. code-block:: console 52 | 53 | $ python setup.py test 54 | 55 | Example 56 | ======= 57 | 58 | A simple example on how to convert MARCXML to JSON: 59 | 60 | .. code:: python 61 | 62 | from dojson.contrib.marc21.utils import create_record, split_stream 63 | from dojson.contrib.marc21 import marc21 64 | [marc21.do(create_record(data)) for data in split_stream(open('/tmp/data.xml', 'r'))] 65 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | # This file is part of DoJSON 2 | # Copyright (C) 2015 CERN. 3 | # 4 | # DoJSON is free software; you can redistribute it and/or 5 | # modify it under the terms of the Revised BSD License; see LICENSE 6 | # file for more details. 7 | 8 | web: 9 | build: . 10 | command: python setup.py test 11 | volumes: 12 | - .:/code 13 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 21 | 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 23 | 24 | help: 25 | @echo "Please use \`make ' where is one of" 26 | @echo " html to make standalone HTML files" 27 | @echo " dirhtml to make HTML files named index.html in directories" 28 | @echo " singlehtml to make a single large HTML file" 29 | @echo " pickle to make pickle files" 30 | @echo " json to make JSON files" 31 | @echo " htmlhelp to make HTML files and a HTML help project" 32 | @echo " qthelp to make HTML files and a qthelp project" 33 | @echo " devhelp to make HTML files and a Devhelp project" 34 | @echo " epub to make an epub" 35 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 36 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 37 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 38 | @echo " text to make text files" 39 | @echo " man to make manual pages" 40 | @echo " texinfo to make Texinfo files" 41 | @echo " info to make Texinfo files and run them through makeinfo" 42 | @echo " gettext to make PO message catalogs" 43 | @echo " changes to make an overview of all changed/added/deprecated items" 44 | @echo " xml to make Docutils-native XML files" 45 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 46 | @echo " linkcheck to check all external links for integrity" 47 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 48 | 49 | clean: 50 | rm -rf $(BUILDDIR)/* 51 | 52 | html: 53 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 54 | @echo 55 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 56 | 57 | dirhtml: 58 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 59 | @echo 60 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 61 | 62 | singlehtml: 63 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 64 | @echo 65 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 66 | 67 | pickle: 68 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 69 | @echo 70 | @echo "Build finished; now you can process the pickle files." 71 | 72 | json: 73 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 74 | @echo 75 | @echo "Build finished; now you can process the JSON files." 76 | 77 | htmlhelp: 78 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 79 | @echo 80 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 81 | ".hhp project file in $(BUILDDIR)/htmlhelp." 82 | 83 | qthelp: 84 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 85 | @echo 86 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 87 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 88 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/DoJSON.qhcp" 89 | @echo "To view the help file:" 90 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/DoJSON.qhc" 91 | 92 | devhelp: 93 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 94 | @echo 95 | @echo "Build finished." 96 | @echo "To view the help file:" 97 | @echo "# mkdir -p $$HOME/.local/share/devhelp/DoJSON" 98 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/DoJSON" 99 | @echo "# devhelp" 100 | 101 | epub: 102 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 103 | @echo 104 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 105 | 106 | latex: 107 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 108 | @echo 109 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 110 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 111 | "(use \`make latexpdf' here to do that automatically)." 112 | 113 | latexpdf: 114 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 115 | @echo "Running LaTeX files through pdflatex..." 116 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 117 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 118 | 119 | latexpdfja: 120 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 121 | @echo "Running LaTeX files through platex and dvipdfmx..." 122 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 123 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 124 | 125 | text: 126 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 127 | @echo 128 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 129 | 130 | man: 131 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 132 | @echo 133 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 134 | 135 | texinfo: 136 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 137 | @echo 138 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 139 | @echo "Run \`make' in that directory to run these through makeinfo" \ 140 | "(use \`make info' here to do that automatically)." 141 | 142 | info: 143 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 144 | @echo "Running Texinfo files through makeinfo..." 145 | make -C $(BUILDDIR)/texinfo info 146 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 147 | 148 | gettext: 149 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 150 | @echo 151 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 152 | 153 | changes: 154 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 155 | @echo 156 | @echo "The overview file is in $(BUILDDIR)/changes." 157 | 158 | linkcheck: 159 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 160 | @echo 161 | @echo "Link check complete; look for any errors in the above output " \ 162 | "or in $(BUILDDIR)/linkcheck/output.txt." 163 | 164 | doctest: 165 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 166 | @echo "Testing of doctests in the sources finished, look at the " \ 167 | "results in $(BUILDDIR)/doctest/output.txt." 168 | 169 | xml: 170 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 171 | @echo 172 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 173 | 174 | pseudoxml: 175 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 176 | @echo 177 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 178 | -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- 1 | .. 2 | This file is part of DoJSON 3 | Copyright (C) 2016 CERN. 4 | 5 | DoJSON is free software; you can redistribute it and/or 6 | modify it under the terms of the Revised BSD License; see LICENSE 7 | file for more details. 8 | 9 | API 10 | === 11 | 12 | .. automodule:: dojson.overdo 13 | :members: 14 | 15 | Errors 16 | ~~~~~~ 17 | 18 | .. automodule:: dojson.errors 19 | :members: 20 | 21 | CLI 22 | --- 23 | 24 | .. automodule:: dojson.cli.command 25 | :members: 26 | 27 | .. autodata:: dojson.cli.command.process_do 28 | .. autodata:: dojson.cli.command.process_missing 29 | .. autodata:: dojson.cli.command.process_schema 30 | 31 | .. automodule:: dojson.cli.utils 32 | :members: 33 | 34 | Contrib 35 | ------- 36 | 37 | There are set of rules to manage translation from other formats. 38 | 39 | MARC21 40 | ~~~~~~ 41 | 42 | .. automodule:: dojson.contrib.marc21 43 | :members: 44 | -------------------------------------------------------------------------------- /docs/authors.rst: -------------------------------------------------------------------------------- 1 | .. 2 | This file is part of DoJSON 3 | Copyright (C) 2016 CERN. 4 | 5 | DoJSON is free software; you can redistribute it and/or 6 | modify it under the terms of the Revised BSD License; see LICENSE 7 | file for more details. 8 | 9 | .. include:: ../AUTHORS.rst 10 | -------------------------------------------------------------------------------- /docs/changes.rst: -------------------------------------------------------------------------------- 1 | .. 2 | This file is part of DoJSON 3 | Copyright (C) 2016 CERN. 4 | 5 | DoJSON is free software; you can redistribute it and/or 6 | modify it under the terms of the Revised BSD License; see LICENSE 7 | file for more details. 8 | 9 | .. include:: ../CHANGES.rst 10 | -------------------------------------------------------------------------------- /docs/contributing.rst: -------------------------------------------------------------------------------- 1 | .. 2 | This file is part of DoJSON 3 | Copyright (C) 2016 CERN. 4 | 5 | DoJSON is free software; you can redistribute it and/or 6 | modify it under the terms of the Revised BSD License; see LICENSE 7 | file for more details. 8 | 9 | .. include:: ../CONTRIBUTING.rst 10 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. 2 | This file is part of DoJSON 3 | Copyright (C) 2015, 2016 CERN. 4 | 5 | DoJSON is free software; you can redistribute it and/or 6 | modify it under the terms of the Revised BSD License; see LICENSE 7 | file for more details. 8 | 9 | .. include:: ../README.rst 10 | 11 | User's Guide 12 | ------------ 13 | 14 | This part of the documentation will show you how to get started in using 15 | DoJSON. 16 | 17 | .. toctree:: 18 | :maxdepth: 2 19 | 20 | usage 21 | 22 | API Reference 23 | ------------- 24 | 25 | If you are looking for information on a specific function, class or method, 26 | this part of the documentation is for you. 27 | 28 | .. toctree:: 29 | :maxdepth: 2 30 | 31 | api 32 | 33 | Additional Notes 34 | ---------------- 35 | 36 | Notes on how to contribute, legal information and changes are here for the 37 | interested. 38 | 39 | .. toctree:: 40 | :maxdepth: 1 41 | 42 | contributing 43 | changes 44 | license 45 | authors 46 | -------------------------------------------------------------------------------- /docs/license.rst: -------------------------------------------------------------------------------- 1 | .. 2 | This file is part of DoJSON 3 | Copyright (C) 2016 CERN. 4 | 5 | DoJSON is free software; you can redistribute it and/or 6 | modify it under the terms of the Revised BSD License; see LICENSE 7 | file for more details. 8 | 9 | License 10 | ======= 11 | 12 | .. include:: ../LICENSE 13 | :literal: 14 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | -e .[docs,tests] 2 | -------------------------------------------------------------------------------- /docs/usage.rst: -------------------------------------------------------------------------------- 1 | .. 2 | This file is part of DoJSON 3 | Copyright (C) 2016 CERN. 4 | 5 | DoJSON is free software; you can redistribute it and/or 6 | modify it under the terms of the Revised BSD License; see LICENSE 7 | file for more details. 8 | 9 | 10 | ======= 11 | Usage 12 | ======= 13 | 14 | .. automodule:: dojson 15 | 16 | Command line interface 17 | ---------------------- 18 | 19 | .. automodule:: dojson.cli 20 | -------------------------------------------------------------------------------- /dojson/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # This file is part of DoJSON 4 | # Copyright (C) 2015, 2016 CERN. 5 | # 6 | # DoJSON is free software; you can redistribute it and/or 7 | # modify it under the terms of the Revised BSD License; see LICENSE 8 | # file for more details. 9 | 10 | """DoJSON is a simple Pythonic JSON to JSON converter. 11 | 12 | The main goal of this package is to help with managing a set of rules 13 | for manipulation of Python dictionaries with focus on JSON serialization. 14 | Each rule is associated with regular expression and key. The regular expression 15 | has to match a key in the source mapping and produces a new value that is added 16 | to the output mapping under the new key. 17 | 18 | Initialization 19 | -------------- 20 | First create an `Overdo` object that is holding the index with rules. 21 | 22 | >>> import dojson 23 | >>> simple = dojson.Overdo() 24 | 25 | Next step is to create rules that will manupulate a source object. 26 | 27 | >>> @simple.over('first', '^.*st$') 28 | ... def first(self, key, value): 29 | ... return value + 1 30 | >>> @simple.over('second', '^.*nd$') 31 | ... def second(self, key, value): 32 | ... return value + 2 33 | 34 | And now we can try to match the source object and produce new data. 35 | 36 | >>> data = simple.do({'1st': 1, '2nd': 2}) 37 | >>> assert 2 == data['first'] 38 | >>> assert 4 == data['second'] 39 | """ 40 | 41 | from .overdo import Overdo 42 | from .version import __version__ 43 | 44 | __all__ = ( 45 | 'Overdo', 46 | '__version__', 47 | ) 48 | -------------------------------------------------------------------------------- /dojson/__main__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # This file is part of DoJSON 4 | # Copyright (C) 2016 CERN. 5 | # 6 | # DoJSON is free software; you can redistribute it and/or 7 | # modify it under the terms of the Revised BSD License; see LICENSE 8 | # file for more details. 9 | 10 | """DoJSON is a simple Pythonic JSON to JSON converter.""" 11 | 12 | from dojson.cli import cli 13 | 14 | if __name__ == '__main__': 15 | cli() 16 | -------------------------------------------------------------------------------- /dojson/_compat.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # This file is part of DoJSON 4 | # Copyright (C) 2016 CERN. 5 | # 6 | # DoJSON is free software; you can redistribute it and/or 7 | # modify it under the terms of the Revised BSD License; see LICENSE 8 | # file for more details. 9 | 10 | """Compatibility module for Python 2 and 3. 11 | 12 | The code is inspired by ``six`` library and cheat sheet from 13 | `http://python-future.org/compatible_idioms.html`_ 14 | """ 15 | 16 | import sys 17 | 18 | PY2 = sys.version_info[0] == 2 19 | PY3 = sys.version_info[0] == 3 20 | 21 | if PY3: 22 | import io 23 | StringIO = io.StringIO 24 | BytesIO = io.BytesIO 25 | stdin = getattr(sys.stdin, 'buffer', sys.stdin) 26 | 27 | binary_type = bytes 28 | string_types = str, 29 | text_type = str 30 | 31 | def iteritems(d, **kw): 32 | """Return iterator with dict items.""" 33 | return iter(d.items(**kw)) 34 | 35 | from itertools import zip_longest 36 | else: 37 | import StringIO 38 | StringIO = BytesIO = StringIO.StringIO 39 | stdin = sys.stdin 40 | 41 | binary_type = str 42 | string_types = basestring, 43 | text_type = unicode 44 | 45 | def iteritems(d, **kw): 46 | """Return iterator with dict items.""" 47 | return iter(d.iteritems(**kw)) 48 | 49 | from itertools import izip_longest as zip_longest 50 | -------------------------------------------------------------------------------- /dojson/cli/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # This file is part of DoJSON 4 | # Copyright (C) 2015, 2016 CERN. 5 | # 6 | # DoJSON is free software; you can redistribute it and/or 7 | # modify it under the terms of the Revised BSD License; see LICENSE 8 | # file for more details. 9 | 10 | r"""Command line interface script is installed as ``dojson``. 11 | 12 | The easiest way to get started by applying already registered rule to a JSON 13 | data. 14 | 15 | .. code-block:: text 16 | 17 | {"245__": {"a": "Test title"}} 18 | 19 | DoJSON comes with set of rules for processing MARC21 fields. 20 | 21 | .. code-block:: console 22 | 23 | $ echo '{"245__": {"a": "Test title"}}' | dojson do marc21 24 | {"title_statement": {"title": "Test title"}} 25 | 26 | Sometimes one can get input with fields that does not match any rule. 27 | To get such a list of fields one can use the ``missing`` command. 28 | 29 | .. code-block:: console 30 | 31 | $ echo '{"999__": {"a": "Test title"}}' | dojson missing marc21 32 | 999__ 33 | 34 | The usual problem comes with reading different file formats such as XML. 35 | 36 | .. code-block:: xml 37 | 38 | 39 | 40 | 41 | 42 | Test title 43 | 44 | 45 | 46 | 47 | You can specify regitered loader using ``-l `` argument. Save the above 48 | example as ``example.xml`` and check following command. 49 | 50 | .. code-block:: console 51 | 52 | $ dojson -i example.xml -l marcxml do marc21 53 | {"title_statement": {"title": "Test title"}} 54 | 55 | In similar way it is possible to specify different output serializer (``-d``). 56 | 57 | .. code-block:: console 58 | 59 | $ echo '{"title_statement": {"title": "Test title"}}' | \ 60 | dojson -d marcxml do marc21 61 | 62 | 63 | 64 | 65 | Test title 66 | 67 | 68 | 69 | 70 | Command chaining 71 | ---------------- 72 | 73 | This makes JSON manipulation even easier. For first example see ``schema`` 74 | command that accept string argument containing URL of JSON-Schema that 75 | should be added to ``$schema`` field. 76 | 77 | .. code-block:: console 78 | 79 | $ dojson -i example.xml -l marcxml do marc21 \ 80 | schema http://example.org/schema/marc21.json 81 | ..."schema": "http://example.org/schema/marc21.json"... 82 | 83 | Second example shows easy verification that rules produce an identity function. 84 | 85 | .. code-block:: console 86 | 87 | $ dojson -l marcxml -d marcxml do marc21 do to_marc21 < example.xml | \ 88 | diff - example.xml 89 | 90 | Extensibility 91 | ------------- 92 | 93 | New commands, loaders, dumpers, or rules can be provided via entry points. 94 | 95 | - ``dojson.cli`` commands that return a processor acception an iterator; 96 | - ``dojson.cli.load`` functions expecting a stream and returning Python dict or 97 | iterator; 98 | - ``dojson.cli.dump`` functions expecting a Python object and returning 99 | ``str``; 100 | - ``dojson.cli.rule`` instances of :class:`dojson.overdo.Overdo` with loaded 101 | rules. 102 | """ 103 | 104 | import sys 105 | 106 | import click 107 | 108 | from .._compat import stdin 109 | from .utils import open_entry_point, with_plugins 110 | 111 | 112 | @with_plugins('dojson.cli') 113 | @click.group(chain=True, invoke_without_command=True) 114 | @click.option('-i', '--input', 'source', type=click.File('rb'), 115 | default=stdin) 116 | @click.option('-l', '--load', callback=open_entry_point('dojson.cli.load'), 117 | default='json') 118 | @click.option('-d', '--dump', callback=open_entry_point('dojson.cli.dump'), 119 | default='json') 120 | def cli(**kwargs): 121 | """Command line interface.""" 122 | 123 | 124 | @cli.result_callback() 125 | def process_pipeline(processors, source, load, dump): 126 | """Call data processors.""" 127 | def loader(iterator): 128 | data = load(iterator) 129 | if isinstance(data, dict): 130 | yield data 131 | else: 132 | for item in data: 133 | yield item 134 | 135 | source = loader(source) 136 | 137 | for processor in processors: 138 | source = processor(source) 139 | 140 | click.echo(dump(source)) 141 | -------------------------------------------------------------------------------- /dojson/cli/command.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # This file is part of DoJSON 4 | # Copyright (C) 2016 CERN. 5 | # 6 | # DoJSON is free software; you can redistribute it and/or 7 | # modify it under the terms of the Revised BSD License; see LICENSE 8 | # file for more details. 9 | 10 | """Define chainable commands for processing loaded data.""" 11 | 12 | import json 13 | import os 14 | import sys 15 | 16 | import click 17 | 18 | from .utils import open_entry_point 19 | 20 | 21 | @click.command('do') 22 | @click.argument('rule', callback=open_entry_point('dojson.cli.rule')) 23 | @click.option('--strict', is_flag=True, default=False, 24 | help='Raise when there is not matching rule for a key.') 25 | def process_do(rule, strict): 26 | """Process data using given rule.""" 27 | def processor(iterator): 28 | for item in iterator: 29 | yield rule.do(item, ignore_missing=not strict) 30 | return processor 31 | 32 | 33 | @click.command('missing') 34 | @click.argument('rule', callback=open_entry_point('dojson.cli.rule')) 35 | def process_missing(rule): 36 | """List fields with missing rules.""" 37 | def processor(iterator): 38 | missing = set() 39 | for item in iterator: 40 | missing |= set(rule.missing(item)) 41 | missing.discard('__order__') 42 | 43 | if missing: 44 | click.echo(', '.join(missing), nl=False) 45 | sys.exit(1) 46 | 47 | click.echo('', nl=False) 48 | sys.exit(0) 49 | 50 | return processor 51 | 52 | 53 | @click.command('schema') 54 | @click.argument('schema') 55 | def process_schema(schema): 56 | """Add $schema to an item.""" 57 | def processor(iterator): 58 | for item in iterator: 59 | assert '$schema' not in item 60 | item['$schema'] = schema 61 | yield item 62 | 63 | return processor 64 | 65 | 66 | @click.command('validate') 67 | @click.argument('schema') 68 | def process_validate(schema): 69 | """Validate data using given JSON schema.""" 70 | import jsonschema 71 | 72 | def _customize_validator(): 73 | # jsonschema 3.x reworked how to customize types, 74 | # the old way was removed in 4.x 75 | type_checker = jsonschema.Draft4Validator.TYPE_CHECKER.redefine( 76 | 'array', lambda checker, instance: isinstance(instance, (list, tuple)) 77 | ) 78 | validator_cls = jsonschema.validators.extend( 79 | jsonschema.Draft4Validator, type_checker=type_checker 80 | ) 81 | return validator_cls 82 | 83 | schema_dir = os.path.dirname(os.path.abspath(schema)) 84 | schema_name = os.path.basename(schema) 85 | 86 | with open(schema) as f: 87 | schema_json = json.load(f) 88 | 89 | resolver = jsonschema.RefResolver( 90 | 'file://' + '/'.join(os.path.split(schema_dir)) + '/', schema_name 91 | ) 92 | validator_cls = _customize_validator() 93 | validator = validator_cls(schema_json, resolver=resolver) 94 | 95 | def processor(iterator): 96 | for item in iterator: 97 | validator.validate(item) 98 | yield item 99 | 100 | return processor 101 | 102 | __all__ = ( 103 | 'process_do', 104 | 'process_missing', 105 | 'process_schema', 106 | 'process_validate', 107 | ) 108 | -------------------------------------------------------------------------------- /dojson/cli/utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # This file is part of DoJSON 4 | # Copyright (C) 2016 CERN. 5 | # 6 | # DoJSON is free software; you can redistribute it and/or 7 | # modify it under the terms of the Revised BSD License; see LICENSE 8 | # file for more details. 9 | 10 | """Utility function to manage CLI entry points""" 11 | 12 | import traceback 13 | 14 | import click 15 | import pkg_resources 16 | 17 | 18 | def open_entry_point(group_name): 19 | """Open entry point.""" 20 | def loader(dummy_ctx, param, value): 21 | """Load entry point from group name based on given value.""" 22 | entry_points = list(pkg_resources.iter_entry_points( 23 | group_name, value 24 | )) 25 | assert len(entry_points) == 1 26 | return entry_points[0].load() 27 | return loader 28 | 29 | 30 | def with_plugins(group_name): 31 | """Register external CLI commands.""" 32 | def decorator(group): 33 | """Attach loaded commands to the group.""" 34 | if not isinstance(group, click.Group): 35 | raise TypeError( 36 | 'Plugins can only be attached to an instance of click.Group.' 37 | ) 38 | for entry_point in pkg_resources.iter_entry_points(group_name): 39 | try: 40 | group.add_command(entry_point.load()) 41 | except Exception: 42 | click.echo('Command {0} could not be loaded. \n\n{1}'.format( 43 | entry_point.name, traceback.format_exc() 44 | )) 45 | return group 46 | return decorator 47 | -------------------------------------------------------------------------------- /dojson/contrib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inveniosoftware/dojson/0426164a7f1667766949d20d9f34788e2686bb6a/dojson/contrib/__init__.py -------------------------------------------------------------------------------- /dojson/contrib/marc21/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # This file is part of DoJSON 4 | # Copyright (C) 2015, 2016 CERN. 5 | # 6 | # DoJSON is free software; you can redistribute it and/or 7 | # modify it under the terms of the Revised BSD License; see LICENSE 8 | # file for more details. 9 | 10 | """MARC standards based on `www.loc.gov/marc/ `_.""" 11 | 12 | from __future__ import absolute_import 13 | 14 | from .model import marc21, marc21_authority, marc21_holdings 15 | 16 | __all__ = ('marc21', 'marc21_authority', 'marc21_holdings') 17 | -------------------------------------------------------------------------------- /dojson/contrib/marc21/fields/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # This file is part of DoJSON 4 | # Copyright (C) 2015 CERN. 5 | # 6 | # DoJSON is free software; you can redistribute it and/or 7 | # modify it under the terms of the Revised BSD License; see LICENSE 8 | # file for more details. 9 | 10 | """MARC 21 model definition.""" 11 | -------------------------------------------------------------------------------- /dojson/contrib/marc21/fields/ad00x.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # This file is part of DoJSON 4 | # Copyright (C) 2015, 2016 CERN. 5 | # 6 | # DoJSON is free software; you can redistribute it and/or 7 | # modify it under the terms of the Revised BSD License; see LICENSE 8 | # file for more details. 9 | 10 | """MARC 21 model definition.""" 11 | 12 | from dojson import utils 13 | 14 | from ..model import marc21_authority 15 | 16 | 17 | @marc21_authority.over('control_number', '^001') 18 | def control_number(self, key, value): 19 | """Control Number.""" 20 | return value 21 | 22 | 23 | @marc21_authority.over('control_number_identifier', '^003') 24 | def control_number_identifier(self, key, value): 25 | """Control Number Identifier.""" 26 | return value 27 | 28 | 29 | @marc21_authority.over('date_and_time_of_latest_transaction', '^005') 30 | def date_and_time_of_latest_transaction(self, key, value): 31 | """Date and Time of Latest Transaction.""" 32 | return value 33 | 34 | 35 | @marc21_authority.over('fixed_length_data_elements', '^008') 36 | def fixed_length_data_elements(self, key, value): 37 | """Fixed-Length Data Elements.""" 38 | return value 39 | -------------------------------------------------------------------------------- /dojson/contrib/marc21/fields/ad260360.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # This file is part of DoJSON 4 | # Copyright (C) 2015 CERN. 5 | # 6 | # DoJSON is free software; you can redistribute it and/or 7 | # modify it under the terms of the Revised BSD License; see LICENSE 8 | # file for more details. 9 | 10 | """MARC 21 model definition.""" 11 | 12 | from dojson import utils 13 | 14 | from ..model import marc21_authority 15 | 16 | 17 | @marc21_authority.over('complex_see_reference_subject', '^260..') 18 | @utils.for_each_value 19 | @utils.filter_values 20 | def complex_see_reference_subject(self, key, value): 21 | """Complex See Reference-Subject.""" 22 | field_map = { 23 | 'a': 'heading_referred_to', 24 | 'i': 'explanatory_text', 25 | '0': 'authority_record_control_number', 26 | '6': 'linkage', 27 | '8': 'field_link_and_sequence_number', 28 | } 29 | order = utils.map_order(field_map, value) 30 | 31 | return { 32 | '__order__': tuple(order) if len(order) else None, 33 | 'heading_referred_to': utils.force_list( 34 | value.get('a') 35 | ), 36 | 'authority_record_control_number': utils.force_list( 37 | value.get('0') 38 | ), 39 | 'explanatory_text': utils.force_list( 40 | value.get('i') 41 | ), 42 | 'field_link_and_sequence_number': utils.force_list( 43 | value.get('8') 44 | ), 45 | 'linkage': value.get('6'), 46 | } 47 | 48 | 49 | @marc21_authority.over('complex_see_also_reference_subject', '^360..') 50 | @utils.for_each_value 51 | @utils.filter_values 52 | def complex_see_also_reference_subject(self, key, value): 53 | """Complex See Also Reference-Subject.""" 54 | field_map = { 55 | 'a': 'heading_referred_to', 56 | 'i': 'explanatory_text', 57 | '0': 'authority_record_control_number', 58 | '6': 'linkage', 59 | '8': 'field_link_and_sequence_number', 60 | } 61 | order = utils.map_order(field_map, value) 62 | 63 | return { 64 | '__order__': tuple(order) if len(order) else None, 65 | 'heading_referred_to': utils.force_list( 66 | value.get('a') 67 | ), 68 | 'authority_record_control_number': utils.force_list( 69 | value.get('0') 70 | ), 71 | 'explanatory_text': utils.force_list( 72 | value.get('i') 73 | ), 74 | 'field_link_and_sequence_number': utils.force_list( 75 | value.get('8') 76 | ), 77 | 'linkage': value.get('6'), 78 | } 79 | -------------------------------------------------------------------------------- /dojson/contrib/marc21/fields/ad663666.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # This file is part of DoJSON 4 | # Copyright (C) 2015 CERN. 5 | # 6 | # DoJSON is free software; you can redistribute it and/or 7 | # modify it under the terms of the Revised BSD License; see LICENSE 8 | # file for more details. 9 | 10 | """MARC 21 model definition.""" 11 | 12 | from dojson import utils 13 | 14 | from ..model import marc21_authority 15 | 16 | 17 | @marc21_authority.over('complex_see_also_reference_name', '^663..') 18 | @utils.filter_values 19 | def complex_see_also_reference_name(self, key, value): 20 | """Complex See Also Reference-Name.""" 21 | field_map = { 22 | 'a': 'explanatory_text', 23 | 'b': 'heading_referred_to', 24 | 't': 'title_referred_to', 25 | '6': 'linkage', 26 | '8': 'field_link_and_sequence_number', 27 | } 28 | order = utils.map_order(field_map, value) 29 | 30 | return { 31 | '__order__': tuple(order) if len(order) else None, 32 | 'explanatory_text': utils.force_list( 33 | value.get('a') 34 | ), 35 | 'field_link_and_sequence_number': utils.force_list( 36 | value.get('8') 37 | ), 38 | 'heading_referred_to': utils.force_list( 39 | value.get('b') 40 | ), 41 | 'title_referred_to': utils.force_list( 42 | value.get('t') 43 | ), 44 | 'linkage': value.get('6'), 45 | } 46 | 47 | 48 | @marc21_authority.over('complex_see_reference_name', '^664..') 49 | @utils.filter_values 50 | def complex_see_reference_name(self, key, value): 51 | """Complex See Reference-Name.""" 52 | field_map = { 53 | 'a': 'explanatory_text', 54 | 'b': 'heading_referred_to', 55 | 't': 'title_referred_to', 56 | '6': 'linkage', 57 | '8': 'field_link_and_sequence_number', 58 | } 59 | order = utils.map_order(field_map, value) 60 | 61 | return { 62 | '__order__': tuple(order) if len(order) else None, 63 | 'explanatory_text': utils.force_list( 64 | value.get('a') 65 | ), 66 | 'field_link_and_sequence_number': utils.force_list( 67 | value.get('8') 68 | ), 69 | 'heading_referred_to': utils.force_list( 70 | value.get('b') 71 | ), 72 | 'title_referred_to': utils.force_list( 73 | value.get('t') 74 | ), 75 | 'linkage': value.get('6'), 76 | } 77 | 78 | 79 | @marc21_authority.over('history_reference', '^665..') 80 | @utils.filter_values 81 | def history_reference(self, key, value): 82 | """History Reference.""" 83 | field_map = { 84 | 'a': 'history_reference', 85 | '6': 'linkage', 86 | '8': 'field_link_and_sequence_number', 87 | } 88 | order = utils.map_order(field_map, value) 89 | 90 | return { 91 | '__order__': tuple(order) if len(order) else None, 92 | 'history_reference': utils.force_list( 93 | value.get('a') 94 | ), 95 | 'field_link_and_sequence_number': utils.force_list( 96 | value.get('8') 97 | ), 98 | 'linkage': value.get('6'), 99 | } 100 | 101 | 102 | @marc21_authority.over('general_explanatory_reference_name', '^666..') 103 | @utils.filter_values 104 | def general_explanatory_reference_name(self, key, value): 105 | """General Explanatory Reference-Name.""" 106 | field_map = { 107 | 'a': 'general_explanatory_reference', 108 | '6': 'linkage', 109 | '8': 'field_link_and_sequence_number', 110 | } 111 | order = utils.map_order(field_map, value) 112 | 113 | return { 114 | '__order__': tuple(order) if len(order) else None, 115 | 'general_explanatory_reference': utils.force_list( 116 | value.get('a') 117 | ), 118 | 'field_link_and_sequence_number': utils.force_list( 119 | value.get('8') 120 | ), 121 | 'linkage': value.get('6'), 122 | } 123 | -------------------------------------------------------------------------------- /dojson/contrib/marc21/fields/adleader.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # This file is part of DoJSON 4 | # Copyright (C) 2015 CERN. 5 | # 6 | # DoJSON is free software; you can redistribute it and/or 7 | # modify it under the terms of the Revised BSD License; see LICENSE 8 | # file for more details. 9 | 10 | """MARC 21 model definition.""" 11 | 12 | from dojson import utils 13 | 14 | from ..model import marc21_authority 15 | 16 | 17 | @marc21_authority.over('leader', '^leader') 18 | @utils.filter_values 19 | def leader(self, key, value): 20 | """Leader.""" 21 | record_status = { 22 | 'a': 'increase_in_encoding_level', 23 | 'c': 'corrected_or_revised', 24 | 'd': 'deleted', 25 | 'n': 'new', 26 | 'o': 'obsolete', 27 | 's': 'deleted_heading_split_into_two_or_more_headings', 28 | 'x': 'deleted_heading_replaced_by_another_heading', 29 | } 30 | type_of_record = { 31 | 'z': 'authority_data', 32 | } 33 | character_coding_scheme = { 34 | '#': 'marc-8', 35 | 'a': 'ucs_unicode' 36 | } 37 | indicator_count = { 38 | '2': 'number_of_character_positions_used_for_a_subfield_code', 39 | } 40 | subfield_code_length = { 41 | '2': 'number_of_character_positions_used_for_indicators', 42 | } 43 | encoding_level = { 44 | 'n': 'complete_authority_record', 45 | 'o': 'incomplete_authority_record', 46 | } 47 | 48 | length_of_the_length_of_field_portion = { 49 | '4': 4, 50 | } 51 | length_of_the_starting_character_position_portion = { 52 | '5': 5, 53 | } 54 | length_of_the_implementation_defined_portion = { 55 | '0': 0, 56 | } 57 | undefined = { 58 | '0': 0, 59 | } 60 | 61 | return { 62 | 'record_length': int(value[:5]), 63 | 'record_status': record_status.get(value[5]), 64 | 'type_of_record': type_of_record.get(value[6]), 65 | 'character_coding_scheme': character_coding_scheme.get(value[9]), 66 | 'indicator_count': int(value[10]), 67 | 'subfield_code_length': int(value[11]), 68 | 'base_address_of_data': int(value[12:17]), 69 | 'encoding_level': encoding_level.get(value[17]), 70 | 'length_of_the_length_of_field_portion': 71 | length_of_the_length_of_field_portion.get(value[20]), 72 | 'length_of_the_starting_character_position_portion': 73 | length_of_the_starting_character_position_portion.get(value[21]), 74 | 'length_of_the_implementation_defined_portion': 75 | length_of_the_implementation_defined_portion.get(value[22]), 76 | 'undefined': undefined.get(value[23]) 77 | } 78 | -------------------------------------------------------------------------------- /dojson/contrib/marc21/fields/bd00x.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # This file is part of DoJSON 4 | # Copyright (C) 2015, 2016 CERN. 5 | # 6 | # DoJSON is free software; you can redistribute it and/or 7 | # modify it under the terms of the Revised BSD License; see LICENSE 8 | # file for more details. 9 | 10 | """MARC 21 model definition.""" 11 | 12 | from dojson import utils 13 | 14 | from ..model import marc21 15 | 16 | 17 | @marc21.over('control_number', '^001') 18 | def control_number(self, key, value): 19 | """Control Number.""" 20 | return value 21 | 22 | 23 | @marc21.over('control_number_identifier', '^003') 24 | def control_number_identifier(self, key, value): 25 | """Control Number Identifier.""" 26 | return value 27 | 28 | 29 | @marc21.over('date_and_time_of_latest_transaction', '^005') 30 | def date_and_time_of_latest_transaction(self, key, value): 31 | """Date and Time of Latest Transaction.""" 32 | return value 33 | 34 | 35 | @marc21.over( 36 | 'fixed_length_data_elements_additional_material_characteristics', '^006') 37 | def fixed_length_data_elements_additional_material_characteristics( 38 | self, key, value): 39 | """Fixed-Length Data Elements-Additional Material Characteristics.""" 40 | return value 41 | 42 | 43 | @marc21.over('physical_description_fixed_field_general_information', '^007') 44 | def physical_description_fixed_field_general_information(self, key, value): 45 | """Physical Description Fixed Field-General Information.""" 46 | return value 47 | 48 | 49 | @marc21.over('fixed_length_data_elements', '^008') 50 | def fixed_length_data_elements(self, key, value): 51 | """Fixed-Length Data Elements.""" 52 | return value 53 | -------------------------------------------------------------------------------- /dojson/contrib/marc21/fields/bdleader.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # This file is part of DoJSON 4 | # Copyright (C) 2016 CERN. 5 | # 6 | # DoJSON is free software; you can redistribute it and/or 7 | # modify it under the terms of the Revised BSD License; see LICENSE 8 | # file for more details. 9 | 10 | """MARC 21 model definition.""" 11 | 12 | from dojson import utils 13 | 14 | from ..model import marc21 15 | 16 | 17 | @marc21.over('leader', '^leader') 18 | @utils.filter_values 19 | def leader(self, key, value): 20 | """Leader.""" 21 | record_status = { 22 | 'a': 'increase_in_encoding_level', 23 | 'c': 'corrected_or_revised', 24 | 'd': 'deleted', 25 | 'n': 'new', 26 | 'p': 'increase_in_encoding_level_from_prepublication' 27 | } 28 | type_of_record = { 29 | 'a': 'language_material', 30 | 'c': 'notated_music', 31 | 'd': 'manuscript_notated_music', 32 | 'e': 'cartographic_material', 33 | 'f': 'manuscript_cartographic_material', 34 | 'g': 'projected_medium', 35 | 'i': 'nonmusical_sound_recording', 36 | 'j': 'musical_sound_recording', 37 | 'k': 'two-dimensional_nonprojectable_graphic', 38 | 'm': 'computer_file', 39 | 'o': 'kit', 40 | 'p': 'mixed_materials', 41 | 'r': 'three-dimensional_artifact_or_naturally_occuring_object', 42 | 't': 'manuscript_language_material', 43 | } 44 | bibliographic_level = { 45 | 'a': 'monographic_component_part', 46 | 'b': 'serial_component_part', 47 | 'c': 'collection', 48 | 'd': 'subunit', 49 | 'i': 'integrating_resource', 50 | 'm': 'monograph_item', 51 | 's': 'serial', 52 | } 53 | type_of_control = { 54 | '#': 'no_specified_type', 55 | 'a': 'archival', 56 | } 57 | character_coding_scheme = { 58 | '#': 'marc-8', 59 | 'a': 'ucs_unicode' 60 | } 61 | encoding_level = { 62 | '#': 'full_level', 63 | '1': 'full_level_material_not_examined', 64 | '2': 'less-than-full_level_material_not_examined', 65 | '3': 'abbreviated_level', 66 | '4': 'core_level', 67 | '5': 'partial_preliminary_level', 68 | '7': 'minimal_level', 69 | '8': 'prepublication_level', 70 | 'u': 'unknown', 71 | 'z': 'not_applicable', 72 | } 73 | descriptive_cataloging_form = { 74 | '#': 'non-isbd', 75 | 'a': 'aacr_2', 76 | 'c': 'isbd_punctuation_omitteed', 77 | 'i': 'isbd_punctuation_included', 78 | 'u': 'unknown', 79 | } 80 | multipart_resource_record_level = { 81 | '#': 'not_specified_or_not_applicable', 82 | 'a': 'set', 83 | 'b': 'part_with_independent_title', 84 | 'c': 'part_with_dependent_title', 85 | } 86 | 87 | length_of_the_length_of_field_portion = { 88 | '4': 4, 89 | } 90 | length_of_the_starting_character_position_portion = { 91 | '5': 5, 92 | } 93 | length_of_the_implementation_defined_portion = { 94 | '0': 0, 95 | } 96 | undefined = { 97 | '0': 0, 98 | } 99 | 100 | return { 101 | 'record_length': int(value[:5]), 102 | 'record_status': record_status.get(value[5]), 103 | 'type_of_record': type_of_record.get(value[6]), 104 | 'bibliographic_level': bibliographic_level.get(value[7]), 105 | 'type_of_control': type_of_control.get(value[8]), 106 | 'character_coding_scheme': character_coding_scheme.get(value[9]), 107 | 'indicator_count': int(value[10]), 108 | 'subfield_code_count': int(value[11]), 109 | 'base_address_of_data': int(value[12:17]), 110 | 'encoding_level': encoding_level.get(value[17]), 111 | 'descriptive_cataloging_form': descriptive_cataloging_form.get(value[18]), 112 | 'multipart_resource_record_level': multipart_resource_record_level.get(value[19]), 113 | 'length_of_the_length_of_field_portion': 114 | length_of_the_length_of_field_portion.get(value[20]), 115 | 'length_of_the_starting_character_position_portion': 116 | length_of_the_starting_character_position_portion.get(value[21]), 117 | 'length_of_the_implementation_defined_portion': 118 | length_of_the_implementation_defined_portion.get(value[22]), 119 | 'undefined': undefined.get(value[23]) 120 | } 121 | -------------------------------------------------------------------------------- /dojson/contrib/marc21/fields/hd00x.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # This file is part of DoJSON 4 | # Copyright (C) 2015 CERN. 5 | # 6 | # DoJSON is free software; you can redistribute it and/or 7 | # modify it under the terms of the Revised BSD License; see LICENSE 8 | # file for more details. 9 | 10 | """MARC 21 model definition.""" 11 | 12 | from dojson import utils 13 | 14 | from ..model import marc21_holdings 15 | 16 | 17 | @marc21_holdings.over('control_number', '^001') 18 | def control_number(self, key, value): 19 | """Control Number.""" 20 | return value 21 | 22 | 23 | @marc21_holdings.over('control_number_identifier', '^003') 24 | def control_number_identifier(self, key, value): 25 | """Control Number Identifier.""" 26 | return value 27 | 28 | 29 | @marc21_holdings.over('date_and_time_of_latest_transaction', '^005') 30 | def date_and_time_of_latest_transaction(self, key, value): 31 | """Date and Time of Latest Transaction.""" 32 | return value 33 | -------------------------------------------------------------------------------- /dojson/contrib/marc21/model.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # This file is part of DoJSON 4 | # Copyright (C) 2015 CERN. 5 | # 6 | # DoJSON is free software; you can redistribute it and/or 7 | # modify it under the terms of the Revised BSD License; see LICENSE 8 | # file for more details. 9 | 10 | """MARC 21 model definition.""" 11 | 12 | from dojson import Overdo, utils 13 | 14 | marc21 = Overdo(entry_point_group='dojson.contrib.marc21') 15 | """MARC 21 Format for Bibliographic Data.""" 16 | 17 | marc21_authority = Overdo(entry_point_group='dojson.contrib.marc21_authority') 18 | """MARC 21 Format for Authority Data.""" 19 | 20 | 21 | @marc21.over('__order__', '__order__') 22 | def order(self, key, value): 23 | """Preserve order of datafields.""" 24 | order = [] 25 | for field in value: 26 | name = marc21.index.query(field) 27 | if name: 28 | name = name[0] 29 | else: 30 | name = field 31 | order.append(name) 32 | 33 | return order 34 | 35 | 36 | @marc21_authority.over('__order__', '__order__') 37 | def order_ad(self, key, value): 38 | """Preserve order of datafields.""" 39 | order = [] 40 | for field in value: 41 | name = marc21_authority.index.query(field) 42 | if name: 43 | name = name[0] 44 | else: 45 | name = field 46 | order.append(name) 47 | 48 | return order 49 | 50 | marc21_holdings = Overdo(entry_point_group='dojson.contrib.marc21_holdings') 51 | """MARC 21 Format for Holdings Data.""" 52 | -------------------------------------------------------------------------------- /dojson/contrib/marc21/schemas/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inveniosoftware/dojson/0426164a7f1667766949d20d9f34788e2686bb6a/dojson/contrib/marc21/schemas/__init__.py -------------------------------------------------------------------------------- /dojson/contrib/marc21/schemas/marc21/authority/ad-v1.0.0.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-04/schema#", 3 | "allOf": [ 4 | {"$ref": "ad00x.json"}, 5 | {"$ref": "ad01x09x.json"}, 6 | {"$ref": "ad1xx3xx.json"}, 7 | {"$ref": "ad260360.json"}, 8 | {"$ref": "ad4xx.json"}, 9 | {"$ref": "ad5xx.json"}, 10 | {"$ref": "ad64x.json"}, 11 | {"$ref": "ad663666.json"}, 12 | {"$ref": "ad66768x.json"}, 13 | {"$ref": "ad7xx.json"}, 14 | {"$ref": "ad8xx.json"} 15 | ] 16 | } 17 | -------------------------------------------------------------------------------- /dojson/contrib/marc21/schemas/marc21/authority/ad-v1.0.1.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-04/schema#", 3 | "allOf": [ 4 | {"$ref": "adleader.json"}, 5 | {"$ref": "ad00x.json"}, 6 | {"$ref": "ad01x09x.json"}, 7 | {"$ref": "ad1xx3xx.json"}, 8 | {"$ref": "ad260360.json"}, 9 | {"$ref": "ad4xx.json"}, 10 | {"$ref": "ad5xx.json"}, 11 | {"$ref": "ad64x.json"}, 12 | {"$ref": "ad663666.json"}, 13 | {"$ref": "ad66768x.json"}, 14 | {"$ref": "ad7xx.json"}, 15 | {"$ref": "ad8xx.json"} 16 | ] 17 | } 18 | -------------------------------------------------------------------------------- /dojson/contrib/marc21/schemas/marc21/authority/ad-v1.0.2.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-04/schema#", 3 | "allOf": [ 4 | {"$ref": "adleader.json"}, 5 | {"$ref": "ad00x.json"}, 6 | {"$ref": "ad01x09x-v1.0.1.json"}, 7 | {"$ref": "ad1xx3xx.json"}, 8 | {"$ref": "ad260360-v1.0.1.json"}, 9 | {"$ref": "ad4xx-v1.0.1.json"}, 10 | {"$ref": "ad5xx-v1.0.1.json"}, 11 | {"$ref": "ad64x.json"}, 12 | {"$ref": "ad663666.json"}, 13 | {"$ref": "ad66768x.json"}, 14 | {"$ref": "ad7xx.json"}, 15 | {"$ref": "ad8xx.json"} 16 | ] 17 | } 18 | -------------------------------------------------------------------------------- /dojson/contrib/marc21/schemas/marc21/authority/ad00x.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "object", 3 | "properties": { 4 | "control_number": { 5 | "description": "Control Number", 6 | "type": "string" 7 | }, 8 | "control_number_identifier": { 9 | "description": "Control Number Identifier", 10 | "type": "string" 11 | }, 12 | "date_and_time_of_latest_transaction": { 13 | "description": "Date and Time of Latest Transaction", 14 | "type": "string" 15 | }, 16 | "fixed_length_data_elements": { 17 | "description": "Fixed-Length Data Elements", 18 | "type": "string" 19 | } 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /dojson/contrib/marc21/schemas/marc21/authority/ad260360-v1.0.1.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "object", 3 | "properties": { 4 | "complex_see_reference_subject": { 5 | "description": "Complex See Reference-Subject", 6 | "type": "array", 7 | "items": { 8 | "type": "object", 9 | "properties": { 10 | "heading_referred_to": { 11 | "type": "array", 12 | "items": { 13 | "type": "string" 14 | } 15 | }, 16 | "authority_record_control_number": { 17 | "type": "array", 18 | "items": { 19 | "type": "string" 20 | } 21 | }, 22 | "explanatory_text": { 23 | "type": "array", 24 | "items": { 25 | "type": "string" 26 | } 27 | }, 28 | "field_link_and_sequence_number": { 29 | "type": "array", 30 | "items": { 31 | "type": "string" 32 | } 33 | }, 34 | "linkage": { 35 | "type": "string" 36 | } 37 | } 38 | } 39 | }, 40 | "complex_see_also_reference_subject": { 41 | "description": "Complex See Also Reference-Subject", 42 | "type": "array", 43 | "items": { 44 | "type": "object", 45 | "properties": { 46 | "heading_referred_to": { 47 | "type": "array", 48 | "items": { 49 | "type": "string" 50 | } 51 | }, 52 | "authority_record_control_number": { 53 | "type": "array", 54 | "items": { 55 | "type": "string" 56 | } 57 | }, 58 | "explanatory_text": { 59 | "type": "array", 60 | "items": { 61 | "type": "string" 62 | } 63 | }, 64 | "field_link_and_sequence_number": { 65 | "type": "array", 66 | "items": { 67 | "type": "string" 68 | } 69 | }, 70 | "linkage": { 71 | "type": "string" 72 | } 73 | } 74 | } 75 | } 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /dojson/contrib/marc21/schemas/marc21/authority/ad260360.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "object", 3 | "properties": { 4 | "complex_see_reference_subject": { 5 | "description": "Complex See Reference-Subject", 6 | "type": "array", 7 | "items": { 8 | "type": "object", 9 | "properties": { 10 | "heading_referred_to": { 11 | "type": "array", 12 | "items": { 13 | "type": "string" 14 | } 15 | }, 16 | "authority_record_control_number_or_standard_number": { 17 | "type": "array", 18 | "items": { 19 | "type": "string" 20 | } 21 | }, 22 | "explanatory_text": { 23 | "type": "array", 24 | "items": { 25 | "type": "string" 26 | } 27 | }, 28 | "field_link_and_sequence_number": { 29 | "type": "array", 30 | "items": { 31 | "type": "string" 32 | } 33 | }, 34 | "linkage": { 35 | "type": "string" 36 | } 37 | } 38 | } 39 | }, 40 | "complex_see_also_reference_subject": { 41 | "description": "Complex See Also Reference-Subject", 42 | "type": "array", 43 | "items": { 44 | "type": "object", 45 | "properties": { 46 | "heading_referred_to": { 47 | "type": "array", 48 | "items": { 49 | "type": "string" 50 | } 51 | }, 52 | "authority_record_control_number_or_standard_number": { 53 | "type": "array", 54 | "items": { 55 | "type": "string" 56 | } 57 | }, 58 | "explanatory_text": { 59 | "type": "array", 60 | "items": { 61 | "type": "string" 62 | } 63 | }, 64 | "field_link_and_sequence_number": { 65 | "type": "array", 66 | "items": { 67 | "type": "string" 68 | } 69 | }, 70 | "linkage": { 71 | "type": "string" 72 | } 73 | } 74 | } 75 | } 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /dojson/contrib/marc21/schemas/marc21/authority/ad663666.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "object", 3 | "properties": { 4 | "complex_see_also_reference_name": { 5 | "description": "Complex See Also Reference-Name", 6 | "properties": { 7 | "explanatory_text": { 8 | "type": "array", 9 | "items": { 10 | "type": "string" 11 | } 12 | }, 13 | "field_link_and_sequence_number": { 14 | "type": "array", 15 | "items": { 16 | "type": "string" 17 | } 18 | }, 19 | "heading_referred_to": { 20 | "type": "array", 21 | "items": { 22 | "type": "string" 23 | } 24 | }, 25 | "title_referred_to": { 26 | "type": "array", 27 | "items": { 28 | "type": "string" 29 | } 30 | }, 31 | "linkage": { 32 | "type": "string" 33 | } 34 | } 35 | }, 36 | "complex_see_reference_name": { 37 | "description": "Complex See Reference-Name", 38 | "properties": { 39 | "explanatory_text": { 40 | "type": "array", 41 | "items": { 42 | "type": "string" 43 | } 44 | }, 45 | "field_link_and_sequence_number": { 46 | "type": "array", 47 | "items": { 48 | "type": "string" 49 | } 50 | }, 51 | "heading_referred_to": { 52 | "type": "array", 53 | "items": { 54 | "type": "string" 55 | } 56 | }, 57 | "title_referred_to": { 58 | "type": "array", 59 | "items": { 60 | "type": "string" 61 | } 62 | }, 63 | "linkage": { 64 | "type": "string" 65 | } 66 | } 67 | }, 68 | "history_reference": { 69 | "description": "History Reference", 70 | "properties": { 71 | "history_reference": { 72 | "type": "array", 73 | "items": { 74 | "type": "string" 75 | } 76 | }, 77 | "field_link_and_sequence_number": { 78 | "type": "array", 79 | "items": { 80 | "type": "string" 81 | } 82 | }, 83 | "linkage": { 84 | "type": "string" 85 | } 86 | } 87 | }, 88 | "general_explanatory_reference_name": { 89 | "description": "General Explanatory Reference-Name", 90 | "properties": { 91 | "general_explanatory_reference": { 92 | "type": "array", 93 | "items": { 94 | "type": "string" 95 | } 96 | }, 97 | "field_link_and_sequence_number": { 98 | "type": "array", 99 | "items": { 100 | "type": "string" 101 | } 102 | }, 103 | "linkage": { 104 | "type": "string" 105 | } 106 | } 107 | } 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /dojson/contrib/marc21/schemas/marc21/authority/ad66768x.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "object", 3 | "properties": { 4 | "nonpublic_general_note": { 5 | "description": "Nonpublic General Note", 6 | "type": "array", 7 | "items": { 8 | "type": "object", 9 | "properties": { 10 | "nonpublic_general_note": { 11 | "type": "string" 12 | }, 13 | "field_link_and_sequence_number": { 14 | "type": "array", 15 | "items": { 16 | "type": "string" 17 | } 18 | }, 19 | "institution_to_which_field_applies": { 20 | "type": "array", 21 | "items": { 22 | "type": "string" 23 | } 24 | }, 25 | "linkage": { 26 | "type": "string" 27 | } 28 | } 29 | } 30 | }, 31 | "public_general_note": { 32 | "description": "Public General Note", 33 | "type": "array", 34 | "items": { 35 | "type": "object", 36 | "properties": { 37 | "heading_or_subdivision_term": { 38 | "type": "array", 39 | "items": { 40 | "type": "string" 41 | } 42 | }, 43 | "field_link_and_sequence_number": { 44 | "type": "array", 45 | "items": { 46 | "type": "string" 47 | } 48 | }, 49 | "institution_to_which_field_applies": { 50 | "type": "array", 51 | "items": { 52 | "type": "string" 53 | } 54 | }, 55 | "explanatory_text": { 56 | "type": "array", 57 | "items": { 58 | "type": "string" 59 | } 60 | }, 61 | "linkage": { 62 | "type": "string" 63 | } 64 | } 65 | } 66 | }, 67 | "subject_example_tracing_note": { 68 | "description": "Subject Example Tracing Note", 69 | "type": "array", 70 | "items": { 71 | "type": "object", 72 | "properties": { 73 | "subject_heading_or_subdivision_term": { 74 | "type": "array", 75 | "items": { 76 | "type": "string" 77 | } 78 | }, 79 | "field_link_and_sequence_number": { 80 | "type": "array", 81 | "items": { 82 | "type": "string" 83 | } 84 | }, 85 | "explanatory_text": { 86 | "type": "array", 87 | "items": { 88 | "type": "string" 89 | } 90 | }, 91 | "linkage": { 92 | "type": "string" 93 | } 94 | } 95 | } 96 | }, 97 | "deleted_heading_information": { 98 | "description": "Deleted Heading Information", 99 | "properties": { 100 | "replacement_heading": { 101 | "type": "array", 102 | "items": { 103 | "type": "string" 104 | } 105 | }, 106 | "replacement_authority_record_control_number": { 107 | "type": "array", 108 | "items": { 109 | "type": "string" 110 | } 111 | }, 112 | "explanatory_text": { 113 | "type": "array", 114 | "items": { 115 | "type": "string" 116 | } 117 | }, 118 | "field_link_and_sequence_number": { 119 | "type": "array", 120 | "items": { 121 | "type": "string" 122 | } 123 | }, 124 | "linkage": { 125 | "type": "string" 126 | } 127 | } 128 | }, 129 | "application_history_note": { 130 | "description": "Application History Note", 131 | "type": "array", 132 | "items": { 133 | "type": "object", 134 | "properties": { 135 | "application_history_note": { 136 | "type": "string" 137 | }, 138 | "field_link_and_sequence_number": { 139 | "type": "array", 140 | "items": { 141 | "type": "string" 142 | } 143 | }, 144 | "institution_to_which_field_applies": { 145 | "type": "array", 146 | "items": { 147 | "type": "string" 148 | } 149 | }, 150 | "linkage": { 151 | "type": "string" 152 | } 153 | } 154 | } 155 | } 156 | } 157 | } 158 | -------------------------------------------------------------------------------- /dojson/contrib/marc21/schemas/marc21/authority/adleader.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "object", 3 | "properties": { 4 | "leader": { 5 | "description": "Leader", 6 | "type": "array", 7 | "items": { 8 | "type": "object", 9 | "properties": { 10 | "record_length": { 11 | "type": "integer" 12 | }, 13 | "record_status": { 14 | "type": "string" 15 | }, 16 | "type_of_record": { 17 | "type": "string" 18 | }, 19 | "character_coding_scheme": { 20 | "type": "string" 21 | }, 22 | "indicator_count": { 23 | "type": "integer" 24 | }, 25 | "subfield_code_length": { 26 | "type": "integer" 27 | }, 28 | "base_address_of_data": { 29 | "type": "integer" 30 | }, 31 | "encoding_level": { 32 | "type": "string" 33 | }, 34 | "length_of_the_length_of_field_portion": { 35 | "type": "integer" 36 | }, 37 | "length_of_the_starting_character_position_portion": { 38 | "type": "integer" 39 | }, 40 | "length_of_the_implementation_defined_portion": { 41 | "type": "integer" 42 | }, 43 | "undefined": { 44 | "type": "integer" 45 | } 46 | } 47 | } 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /dojson/contrib/marc21/schemas/marc21/bibliographic/bd-v1.0.0.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-04/schema#", 3 | "allOf": [ 4 | {"$ref": "bd00x.json"}, 5 | {"$ref": "bd01x09x.json"}, 6 | {"$ref": "bd1xx.json"}, 7 | {"$ref": "bd20x24x.json"}, 8 | {"$ref": "bd25x28x.json"}, 9 | {"$ref": "bd3xx.json"}, 10 | {"$ref": "bd4xx.json"}, 11 | {"$ref": "bd5xx.json"}, 12 | {"$ref": "bd6xx.json"}, 13 | {"$ref": "bd70x75x.json"}, 14 | {"$ref": "bd76x78x.json"}, 15 | {"$ref": "bd80x83x.json"}, 16 | {"$ref": "bd84188x.json"} 17 | ] 18 | } 19 | -------------------------------------------------------------------------------- /dojson/contrib/marc21/schemas/marc21/bibliographic/bd-v1.0.1.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-04/schema#", 3 | "allOf": [ 4 | {"$ref": "bdleader.json"}, 5 | {"$ref": "bd00x.json"}, 6 | {"$ref": "bd01x09x.json"}, 7 | {"$ref": "bd1xx.json"}, 8 | {"$ref": "bd20x24x.json"}, 9 | {"$ref": "bd25x28x.json"}, 10 | {"$ref": "bd3xx.json"}, 11 | {"$ref": "bd4xx.json"}, 12 | {"$ref": "bd5xx.json"}, 13 | {"$ref": "bd6xx.json"}, 14 | {"$ref": "bd70x75x.json"}, 15 | {"$ref": "bd76x78x.json"}, 16 | {"$ref": "bd80x83x.json"}, 17 | {"$ref": "bd84188x.json"} 18 | ] 19 | } 20 | -------------------------------------------------------------------------------- /dojson/contrib/marc21/schemas/marc21/bibliographic/bd-v1.0.2.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-04/schema#", 3 | "allOf": [ 4 | {"$ref": "bdleader.json"}, 5 | {"$ref": "bd00x-v1.0.1.json"}, 6 | {"$ref": "bd01x09x-v1.0.1.json"}, 7 | {"$ref": "bd1xx-v1.0.1.json"}, 8 | {"$ref": "bd20x24x-v1.0.1.json"}, 9 | {"$ref": "bd25x28x.json"}, 10 | {"$ref": "bd3xx-v1.0.1.json"}, 11 | {"$ref": "bd4xx-v1.0.1.json"}, 12 | {"$ref": "bd5xx-v1.0.1.json"}, 13 | {"$ref": "bd6xx-v1.0.1.json"}, 14 | {"$ref": "bd70x75x-v1.0.1.json"}, 15 | {"$ref": "bd76x78x.json"}, 16 | {"$ref": "bd80x83x-v1.0.1.json"}, 17 | {"$ref": "bd84188x-v1.0.1.json"} 18 | ] 19 | } 20 | -------------------------------------------------------------------------------- /dojson/contrib/marc21/schemas/marc21/bibliographic/bd00x-v1.0.1.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "object", 3 | "properties": { 4 | "control_number": { 5 | "description": "Control Number", 6 | "type": "string" 7 | }, 8 | "control_number_identifier": { 9 | "description": "Control Number Identifier", 10 | "type": "string" 11 | }, 12 | "date_and_time_of_latest_transaction": { 13 | "description": "Date and Time of Latest Transaction", 14 | "type": "string" 15 | }, 16 | "fixed_length_data_elements_additional_material_characteristics": { 17 | "description": "Fixed-Length Data Elements-Additional Material Characteristics", 18 | "type": "string" 19 | }, 20 | "physical_description_fixed_field_general_information": { 21 | "description": "Physical Description Fixed Field-General Information", 22 | "type": "string" 23 | }, 24 | "fixed_length_data_elements": { 25 | "description": "Fixed-Length Data Elements", 26 | "type": "string" 27 | } 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /dojson/contrib/marc21/schemas/marc21/bibliographic/bd00x.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "object", 3 | "properties": { 4 | "control_number": { 5 | "description": "Control Number", 6 | "type": "string" 7 | }, 8 | "control_number_identifier": { 9 | "description": "Control Number Identifier", 10 | "type": "string" 11 | }, 12 | "date_and_time_of_latest_transaction": { 13 | "description": "Date and Time of Latest Transaction", 14 | "type": "string" 15 | }, 16 | "fixed_length_data_elements_additional_material_characteristics": { 17 | "description": "Fixed-Length Data Elements-Additional Material Characteristics", 18 | "type": "string" 19 | }, 20 | "physical_description_fixed_field": { 21 | "description": "Physical Description Fixed Field", 22 | "type": "string" 23 | }, 24 | "fixed_length_data_elements": { 25 | "description": "Fixed-Length Data Elements", 26 | "type": "string" 27 | } 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /dojson/contrib/marc21/schemas/marc21/bibliographic/bdleader.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "object", 3 | "properties": { 4 | "leader": { 5 | "description": "Leader", 6 | "type": "object", 7 | "properties": { 8 | "record_length": { 9 | "type": "integer" 10 | }, 11 | "record_status": { 12 | "type": "string" 13 | }, 14 | "type_of_record": { 15 | "type": "string" 16 | }, 17 | "bibliographic_level": { 18 | "type": "string" 19 | }, 20 | "type_of_control": { 21 | "type": "string" 22 | }, 23 | "character_coding_scheme": { 24 | "type": "string" 25 | }, 26 | "indicator_count": { 27 | "type": "integer" 28 | }, 29 | "subfield_code_count": { 30 | "type": "integer" 31 | }, 32 | "base_address_of_data": { 33 | "type": "integer" 34 | }, 35 | "encoding_level": { 36 | "type": "string" 37 | }, 38 | "descriptive_cataloging_form": { 39 | "type": "string" 40 | }, 41 | "multipart_resource_record_level": { 42 | "type": "string" 43 | }, 44 | "length_of_the_length_of_field_portion": { 45 | "type": "integer" 46 | }, 47 | "length_of_the_starting_character_position_portion": { 48 | "type": "integer" 49 | }, 50 | "length_of_the_implementation_defined_portion": { 51 | "type": "integer" 52 | }, 53 | "undefined": { 54 | "type": "integer" 55 | } 56 | } 57 | } 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /dojson/contrib/marc21/schemas/marc21/holdings/hd-v1.0.0.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-04/schema#", 3 | "allOf": [ 4 | {"$ref": "hd00x.json"}, 5 | {"$ref": "hd0xx.json"}, 6 | {"$ref": "hd3xx5xx84x.json"}, 7 | {"$ref": "hd85xhd88x.json"} 8 | ] 9 | } 10 | -------------------------------------------------------------------------------- /dojson/contrib/marc21/schemas/marc21/holdings/hd00x.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "object", 3 | "properties": { 4 | "control_number": { 5 | "description": "Control Number", 6 | "type": "string" 7 | }, 8 | "control_number_identifier": { 9 | "description": "Control Number Identifier", 10 | "type": "string" 11 | }, 12 | "date_and_time_of_latest_transaction": { 13 | "description": "Date and Time of Latest Transaction", 14 | "type": "string" 15 | } 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /dojson/contrib/marc21/utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # This file is part of DoJSON 4 | # Copyright (C) 2015, 2016 CERN. 5 | # 6 | # DoJSON is free software; you can redistribute it and/or 7 | # modify it under the terms of the Revised BSD License; see LICENSE 8 | # file for more details. 9 | 10 | """Utilities for converting MARC21.""" 11 | 12 | import re 13 | from collections import Counter, OrderedDict 14 | 15 | import pkg_resources 16 | from lxml import etree 17 | 18 | from dojson._compat import StringIO, binary_type, iteritems, text_type 19 | from dojson.utils import GroupableOrderedDict 20 | 21 | split_marc = re.compile('.*?', re.DOTALL) 22 | 23 | MARC21_DTD = pkg_resources.resource_filename( 24 | 'dojson.contrib.marc21', 'MARC21slim.dtd') 25 | """Location of the MARC21 DTD file""" 26 | 27 | 28 | def create_record(marcxml, correct=False, keep_singletons=True): 29 | """Create a record object using the LXML parser. 30 | 31 | If correct == 1, then perform DTD validation 32 | If correct == 0, then do not perform DTD validation 33 | """ 34 | if isinstance(marcxml, binary_type): 35 | marcxml = marcxml.decode('utf-8') 36 | 37 | if isinstance(marcxml, text_type): 38 | parser = etree.XMLParser(dtd_validation=correct, recover=True) 39 | 40 | if correct: 41 | marcxml = (u'\n' 42 | u'\n' 43 | u'\n{1}\n'.format( 44 | MARC21_DTD, marcxml)) 45 | 46 | tree = etree.parse(StringIO(marcxml), parser) 47 | else: 48 | tree = marcxml 49 | record = [] 50 | 51 | leader_iterator = tree.iter(tag='{*}leader') 52 | for leader in leader_iterator: 53 | text = leader.text or '' 54 | record.append(('leader', text)) 55 | 56 | controlfield_iterator = tree.iter(tag='{*}controlfield') 57 | for controlfield in controlfield_iterator: 58 | tag = controlfield.attrib.get('tag', '!') 59 | text = controlfield.text or '' 60 | if text or keep_singletons: 61 | record.append((tag, text)) 62 | 63 | datafield_iterator = tree.iter(tag='{*}datafield') 64 | for datafield in datafield_iterator: 65 | tag = datafield.attrib.get('tag', '!') 66 | ind1 = datafield.attrib.get('ind1', '!') 67 | ind2 = datafield.attrib.get('ind2', '!') 68 | if ind1 in ('', '#'): 69 | ind1 = '_' 70 | if ind2 in ('', '#'): 71 | ind2 = '_' 72 | ind1 = ind1.replace(' ', '_') 73 | ind2 = ind2.replace(' ', '_') 74 | 75 | fields = [] 76 | subfield_iterator = datafield.iter(tag='{*}subfield') 77 | for subfield in subfield_iterator: 78 | code = subfield.attrib.get('code', '!').lower() # .encode("UTF-8") 79 | text = subfield.text or '' 80 | if text or keep_singletons: 81 | fields.append((code, text)) 82 | 83 | if fields or keep_singletons: 84 | key = '{0}{1}{2}'.format(tag, ind1, ind2) 85 | record.append((key, GroupableOrderedDict(fields))) 86 | 87 | return GroupableOrderedDict(record) 88 | 89 | 90 | def split_blob(blob): 91 | """Split the blob using .*? as pattern.""" 92 | for match in split_marc.finditer(blob): 93 | yield match.group() 94 | raise StopIteration() 95 | 96 | 97 | def split_stream(stream): 98 | """Yield record elements from given stream.""" 99 | for _, element in etree.iterparse(stream, tag='{*}record'): 100 | yield element 101 | 102 | 103 | def load(source): 104 | """Load MARC XML and return Python dict.""" 105 | for data in split_stream(source): 106 | yield create_record(data) 107 | -------------------------------------------------------------------------------- /dojson/contrib/to_marc21/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # This file is part of DoJSON 4 | # Copyright (C) 2015 CERN. 5 | # 6 | # DoJSON is free software; you can redistribute it and/or 7 | # modify it under the terms of the Revised BSD License; see LICENSE 8 | # file for more details. 9 | 10 | from __future__ import absolute_import 11 | 12 | from .model import to_marc21, to_marc21_authority 13 | 14 | __all__ = ('to_marc21', 'to_marc21_authority',) 15 | -------------------------------------------------------------------------------- /dojson/contrib/to_marc21/fields/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inveniosoftware/dojson/0426164a7f1667766949d20d9f34788e2686bb6a/dojson/contrib/to_marc21/fields/__init__.py -------------------------------------------------------------------------------- /dojson/contrib/to_marc21/fields/ad00x.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # This file is part of DoJSON 4 | # Copyright (C) 2015 CERN. 5 | # 6 | # DoJSON is free software; you can redistribute it and/or 7 | # modify it under the terms of the Revised BSD License; see LICENSE 8 | # file for more details. 9 | 10 | """To MARC 21 model definition.""" 11 | 12 | from dojson import utils 13 | 14 | from ..model import to_marc21_authority 15 | 16 | 17 | @to_marc21_authority.over('001', '^control_number$') 18 | def reverse_control_number(self, key, value): 19 | """Reverse - Control Number.""" 20 | return [value] 21 | 22 | 23 | @to_marc21_authority.over('003', '^control_number_identifier$') 24 | def reverse_control_number_identifier(self, key, value): 25 | """Reverse - Control Number Identifier.""" 26 | return [value] 27 | 28 | 29 | @to_marc21_authority.over('005', '^date_and_time_of_latest_transaction$') 30 | def reverse_date_and_time_of_latest_transaction(self, key, value): 31 | """Reverse - Date and Time of Latest Transaction.""" 32 | return [value] 33 | 34 | 35 | @to_marc21_authority.over('008', '^fixed_length_data_elements$') 36 | def reverse_fixed_length_data_elements(self, key, value): 37 | """Reverse - Fixed-Length Data Elements.""" 38 | return [value] 39 | -------------------------------------------------------------------------------- /dojson/contrib/to_marc21/fields/ad25x28x.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # This file is part of DoJSON 4 | # Copyright (C) 2015 CERN. 5 | # 6 | # DoJSON is free software; you can redistribute it and/or 7 | # modify it under the terms of the Revised BSD License; see LICENSE 8 | # file for more details. 9 | 10 | """To MARC 21 model definition.""" 11 | 12 | from dojson import utils 13 | 14 | from ..model import to_marc21_authority 15 | 16 | 17 | @to_marc21_authority.over('260', '^complex_see_reference_subject$') 18 | @utils.reverse_for_each_value 19 | @utils.filter_values 20 | def reverse_complex_see_reference_subject(self, key, value): 21 | """Reverse - Complex See Reference-Subject.""" 22 | field_map = { 23 | 'heading_referred_to': 'a', 24 | 'explanatory_text': 'i', 25 | 'authority_record_control_number': '0', 26 | 'linkage': '6', 27 | 'field_link_and_sequence_number': '8', 28 | } 29 | order = utils.map_order(field_map, value) 30 | 31 | return { 32 | '__order__': tuple(order) if len(order) else None, 33 | 'a': utils.reverse_force_list( 34 | value.get('heading_referred_to') 35 | ), 36 | '0': utils.reverse_force_list( 37 | value.get('authority_record_control_number') 38 | ), 39 | 'i': utils.reverse_force_list( 40 | value.get('explanatory_text') 41 | ), 42 | '8': utils.reverse_force_list( 43 | value.get('field_link_and_sequence_number') 44 | ), 45 | '6': value.get('linkage'), 46 | '$ind1': '_', 47 | '$ind2': '_', 48 | } 49 | -------------------------------------------------------------------------------- /dojson/contrib/to_marc21/fields/bd00x.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # This file is part of DoJSON 4 | # Copyright (C) 2015, 2016 CERN. 5 | # 6 | # DoJSON is free software; you can redistribute it and/or 7 | # modify it under the terms of the Revised BSD License; see LICENSE 8 | # file for more details. 9 | 10 | """To MARC 21 model definition.""" 11 | 12 | from dojson import utils 13 | 14 | from ..model import to_marc21 15 | 16 | 17 | @to_marc21.over('001', '^control_number$') 18 | def reverse_control_number(self, key, value): 19 | """Reverse - Control Number.""" 20 | return [value] 21 | 22 | 23 | @to_marc21.over('003', '^control_number_identifier$') 24 | def reverse_control_number_identifier(self, key, value): 25 | """Reverse - Control Number Identifier.""" 26 | return [value] 27 | 28 | 29 | @to_marc21.over('005', '^date_and_time_of_latest_transaction$') 30 | def reverse_date_and_time_of_latest_transaction(self, key, value): 31 | """Reverse - Date and Time of Latest Transaction.""" 32 | return [value] 33 | 34 | 35 | @to_marc21.over( 36 | '006', '^fixed_length_data_elements_additional_material_characteristics$') 37 | def reverse_fixed_length_data_elements_additional_material_characteristics( 38 | self, key, value): 39 | """Reverse - Fixed-Length Data Elements-Additional Material Characteristics.""" 40 | return [value] 41 | 42 | 43 | @to_marc21.over( 44 | '007', '^physical_description_fixed_field_general_information$') 45 | def reverse_physical_description_fixed_field_general_information( 46 | self, key, value): 47 | """Reverse - Physical Description Fixed Field-General Information.""" 48 | return [value] 49 | 50 | 51 | @to_marc21.over('008', '^fixed_length_data_elements$') 52 | def reverse_fixed_length_data_elements(self, key, value): 53 | """Reverse - Fixed-Length Data Elements.""" 54 | return [value] 55 | -------------------------------------------------------------------------------- /dojson/contrib/to_marc21/fields/bdleader.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # This file is part of DoJSON 4 | # Copyright (C) 2016 CERN. 5 | # 6 | # DoJSON is free software; you can redistribute it and/or 7 | # modify it under the terms of the Revised BSD License; see LICENSE 8 | # file for more details. 9 | 10 | """To MARC 21 model definition.""" 11 | 12 | from dojson import utils 13 | 14 | from ..model import to_marc21 15 | 16 | 17 | @to_marc21.over('leader', '^leader') 18 | def to_leader(self, key, value): 19 | """To Leader.""" 20 | record_status = { 21 | 'increase_in_encoding_level': 'a', 22 | 'corrected_or_revised': 'c', 23 | 'deleted': 'd', 24 | 'new': 'n', 25 | 'increase_in_encoding_level_from_prepublication': 'p' 26 | } 27 | type_of_record = { 28 | 'language_material': 'a', 29 | 'notated_music': 'c', 30 | 'manuscript_notated_music': 'd', 31 | 'cartographic_material': 'e', 32 | 'manuscript_cartographic_material': 'f', 33 | 'projected_medium': 'g', 34 | 'nonmusical_sound_recording': 'i', 35 | 'musical_sound_recording': 'j', 36 | 'two-dimensional_nonprojectable_graphic': 'k', 37 | 'computer_file': 'm', 38 | 'kit': 'o', 39 | 'mixed_materials': 'p', 40 | 'three-dimensional_artifact_or_naturally_occuring_object': 'r', 41 | 'manuscript_language_material': 't', 42 | } 43 | bibliographic_level = { 44 | 'monographic_component_part': 'a', 45 | 'serial_component_part': 'b', 46 | 'collection': 'c', 47 | 'subunit': 'd', 48 | 'integrating_resource': 'i', 49 | 'monograph_item': 'm', 50 | 'serial': 's', 51 | } 52 | type_of_control = { 53 | 'no_specified_type': '#', 54 | 'archival': 'a', 55 | } 56 | character_coding_scheme = { 57 | 'marc-8': '#', 58 | 'ucs_unicode': 'a' 59 | } 60 | encoding_level = { 61 | 'full_level': '#', 62 | 'full_level_material_not_examined': '1', 63 | 'less-than-full_level_material_not_examined': '2', 64 | 'abbreviated_level': '3', 65 | 'core_level': '4', 66 | 'partial_preliminary_level': '5', 67 | 'minimal_level': '7', 68 | 'prepublication_level': '8', 69 | 'unknown': 'u', 70 | 'not_applicable': 'z', 71 | } 72 | descriptive_cataloging_form = { 73 | 'non-isbd': '#', 74 | 'aacr_2': 'a', 75 | 'isbd_punctuation_omitteed': 'c', 76 | 'isbd_punctuation_included': 'i', 77 | 'unknown': 'u', 78 | } 79 | multipart_resource_record_level = { 80 | 'not_specified_or_not_applicable': '#', 81 | 'set': 'a', 82 | 'part_with_independent_title': 'b', 83 | 'part_with_dependent_title': 'c', 84 | } 85 | 86 | length_of_field_portion = { 87 | 4: '4', 88 | } 89 | starting_character_position_portion = { 90 | 5: '5', 91 | } 92 | implementation_defined_portion = { 93 | 0: '0', 94 | } 95 | undefined = { 96 | 0: '0', 97 | } 98 | 99 | leader_string = ( 100 | '{record_length!s:0>5}{record_status}{type_of_record}' 101 | '{bibliographic_level}{type_of_control}{character_coding_scheme}' 102 | '{indicator_count!s}{subfield_code_count!s}{base_address_of_data!s:0>5}' 103 | '{encoding_level}{descriptive_cataloging_form}' 104 | '{multipart_resource_record_level}' 105 | '{length_of_the_length_of_field_portion}' 106 | '{length_of_the_starting_character_position_portion}' 107 | '{length_of_the_implementation_defined_portion}' 108 | '{undefined}' 109 | ).format( 110 | record_length=value.get('record_length', ' '), 111 | record_status=record_status.get(value.get('record_status'), ' '), 112 | type_of_record=type_of_record.get(value.get('type_of_record'), ' '), 113 | bibliographic_level=bibliographic_level.get(value.get( 114 | 'bibliographic_level'), ' '), 115 | type_of_control=type_of_control.get(value.get('type_of_control'), ' '), 116 | character_coding_scheme=character_coding_scheme.get( 117 | value.get('character_coding_scheme'), ' '), 118 | indicator_count=value.get('indicator_count', ' '), 119 | subfield_code_count=value.get('subfield_code_count', ' '), 120 | base_address_of_data=value.get('base_address_of_data', ' '), 121 | encoding_level=encoding_level.get(value.get('encoding_level'), ' '), 122 | descriptive_cataloging_form=descriptive_cataloging_form.get( 123 | value.get('descriptive_cataloging_form'), ' '), 124 | multipart_resource_record_level=multipart_resource_record_level.get( 125 | value.get('multipart_resource_record_level'), ' '), 126 | length_of_the_length_of_field_portion=length_of_field_portion.get( 127 | value.get('length_of_the_length_of_field_portion'), ' '), 128 | length_of_the_starting_character_position_portion=starting_character_position_portion.get( 129 | value.get('length_of_the_starting_character_position_portion'), ' '), 130 | length_of_the_implementation_defined_portion=implementation_defined_portion.get( 131 | value.get('length_of_the_implementation_defined_portion'), ' '), 132 | undefined=undefined.get(value.get('undefined'), ' '), 133 | ) 134 | 135 | assert len(leader_string) == 24 136 | 137 | return leader_string 138 | -------------------------------------------------------------------------------- /dojson/contrib/to_marc21/model.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # This file is part of DoJSON 4 | # Copyright (C) 2015, 2016 CERN. 5 | # 6 | # DoJSON is free software; you can redistribute it and/or 7 | # modify it under the terms of the Revised BSD License; see LICENSE 8 | # file for more details. 9 | 10 | """To MARC 21 model definition.""" 11 | 12 | from collections.abc import MutableMapping, MutableSequence 13 | from operator import itemgetter 14 | 15 | from dojson import Overdo 16 | from dojson._compat import iteritems 17 | from dojson.errors import IgnoreKey, MissingRule 18 | from dojson.utils import GroupableOrderedDict 19 | 20 | 21 | class Underdo(Overdo): 22 | """Translation index specification for reverse marc21 translation.""" 23 | 24 | def do(self, blob, ignore_missing=True, exception_handlers=None): 25 | """Translate blob values and instantiate new model instance. 26 | 27 | Takes out the indicators, if any, from the returned dictionary and puts 28 | them into the key. 29 | 30 | :param blob: ``dict``-like object on which the matching rules are 31 | going to be applied. 32 | :param ignore_missing: Set to ``False`` if you prefer to raise 33 | an exception ``MissingRule`` for the first 34 | key that it is not matching any rule. 35 | :param exception_handlers: Give custom exception handlers to take care 36 | of non-standard names that are installation 37 | specific. 38 | 39 | .. versionadded:: 1.0.0 40 | 41 | ``ignore_missing`` allows to specify if the function should raise 42 | an exception. 43 | 44 | .. versionadded:: 1.1.0 45 | 46 | ``exception_handlers`` allows unknown keys to treated in a custom 47 | fashion. 48 | """ 49 | handlers = {IgnoreKey: None} 50 | handlers.update(exception_handlers or {}) 51 | 52 | if ignore_missing: 53 | handlers.setdefault(MissingRule, None) 54 | 55 | output = [] 56 | 57 | if self.index is None: 58 | self.build() 59 | 60 | if '__order__' in blob and not isinstance(blob, GroupableOrderedDict): 61 | blob = GroupableOrderedDict(blob) 62 | 63 | if '__order__' in blob: 64 | items = blob.iteritems(repeated=True) 65 | else: 66 | items = iteritems(blob) 67 | 68 | for key, value in items: 69 | try: 70 | result = self.index.query(key) 71 | if not result: 72 | raise MissingRule(key) 73 | 74 | name, creator = result 75 | item = creator(output, key, value) 76 | if isinstance(item, MutableMapping): 77 | field = '{0}{1}{2}'.format( 78 | name, item.pop('$ind1', '_'), 79 | item.pop('$ind2', '_')) 80 | if '__order__' in item: 81 | item = GroupableOrderedDict(item) 82 | output.append((field, item)) 83 | elif isinstance(item, MutableSequence): 84 | for v in item: 85 | try: 86 | field = '{0}{1}{2}'.format( 87 | name, v.pop('$ind1', '_'), 88 | v.pop('$ind2', '_')) 89 | except AttributeError: 90 | field = name 91 | output.append((field, v)) 92 | else: 93 | output.append((name, item)) 94 | except Exception as exc: 95 | if exc.__class__ in handlers: 96 | handler = handlers[exc.__class__] 97 | if handler is not None: 98 | handler(exc, output, key, value) 99 | else: 100 | raise 101 | 102 | return GroupableOrderedDict(output) 103 | 104 | 105 | to_marc21 = Underdo(entry_point_group='dojson.contrib.to_marc21') 106 | 107 | to_marc21_authority = Underdo( 108 | entry_point_group='dojson.contrib.to_marc21_authority') 109 | """MARC 21 Format for Authority Data.""" 110 | -------------------------------------------------------------------------------- /dojson/contrib/to_marc21/utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # This file is part of DoJSON 4 | # Copyright (C) 2015, 2016 CERN. 5 | # 6 | # DoJSON is free software; you can redistribute it and/or 7 | # modify it under the terms of the Revised BSD License; see LICENSE 8 | # file for more details. 9 | 10 | """Utilities for converting to MARC21.""" 11 | 12 | import pkg_resources 13 | from lxml import etree 14 | from lxml.builder import ElementMaker 15 | 16 | from dojson._compat import iteritems, string_types 17 | from dojson.utils import GroupableOrderedDict 18 | 19 | MARC21_DTD = pkg_resources.resource_filename( 20 | 'dojson.contrib.marc21', 'MARC21slim.dtd') 21 | """Location of the MARC21 DTD file""" 22 | 23 | MARC21_NS = "http://www.loc.gov/MARC21/slim" 24 | """MARCXML XML Schema""" 25 | 26 | 27 | def dumps_etree(records, xslt_filename=None, prefix=None): 28 | """Dump records into a etree.""" 29 | E = ElementMaker(namespace=MARC21_NS, nsmap={prefix: MARC21_NS}) 30 | 31 | def dump_record(record): 32 | """Dump a single record.""" 33 | rec = E.record() 34 | 35 | leader = record.get('leader') 36 | if leader: 37 | rec.append(E.leader(leader)) 38 | 39 | if isinstance(record, GroupableOrderedDict): 40 | items = record.iteritems(with_order=False, repeated=True) 41 | else: 42 | items = iteritems(record) 43 | 44 | for df, subfields in items: 45 | # Control fields 46 | if len(df) == 3: 47 | if isinstance(subfields, string_types): 48 | controlfield = E.controlfield(subfields) 49 | controlfield.attrib['tag'] = df[0:3] 50 | rec.append(controlfield) 51 | elif isinstance(subfields, (list, tuple, set)): 52 | for subfield in subfields: 53 | controlfield = E.controlfield(subfield) 54 | controlfield.attrib['tag'] = df[0:3] 55 | rec.append(controlfield) 56 | else: 57 | # Skip leader. 58 | if df == 'leader': 59 | continue 60 | 61 | if not isinstance(subfields, (list, tuple, set)): 62 | subfields = (subfields,) 63 | 64 | df = df.replace('_', ' ') 65 | for subfield in subfields: 66 | if not isinstance(subfield, (list, tuple, set)): 67 | subfield = [subfield] 68 | 69 | for s in subfield: 70 | datafield = E.datafield() 71 | datafield.attrib['tag'] = df[0:3] 72 | datafield.attrib['ind1'] = df[3] 73 | datafield.attrib['ind2'] = df[4] 74 | 75 | if isinstance(s, GroupableOrderedDict): 76 | items = s.iteritems(with_order=False, repeated=True) 77 | elif isinstance(s, dict): 78 | items = iteritems(s) 79 | else: 80 | datafield.append(E.subfield(s)) 81 | 82 | items = tuple() 83 | 84 | for code, value in items: 85 | if not isinstance(value, string_types): 86 | for v in value: 87 | datafield.append(E.subfield(v, code=code)) 88 | else: 89 | datafield.append(E.subfield(value, code=code)) 90 | 91 | rec.append(datafield) 92 | return rec 93 | 94 | if isinstance(records, dict): 95 | root = dump_record(records) 96 | else: 97 | root = E.collection() 98 | for record in records: 99 | root.append(dump_record(record)) 100 | 101 | if xslt_filename is not None: 102 | xslt_root = etree.parse(open(xslt_filename)) 103 | transform = etree.XSLT(xslt_root) 104 | root = transform(root).getroot() 105 | 106 | return root 107 | 108 | 109 | def dumps(records, xslt_filename=None, **kwargs): 110 | """Dump records into a MarcXML file.""" 111 | root = dumps_etree(records=records, xslt_filename=xslt_filename) 112 | return etree.tostring( 113 | root, 114 | pretty_print=True, 115 | xml_declaration=True, 116 | encoding='UTF-8', 117 | **kwargs 118 | ) 119 | -------------------------------------------------------------------------------- /dojson/errors.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # This file is part of DoJSON 4 | # Copyright (C) 2015, 2016 CERN. 5 | # 6 | # DoJSON is free software; you can redistribute it and/or 7 | # modify it under the terms of the Revised BSD License; see LICENSE 8 | # file for more details. 9 | 10 | """Define all DoJSON exceptions.""" 11 | 12 | 13 | class DoJSONException(Exception): 14 | """Parent for all DoJSON exceptions. 15 | 16 | .. versionadded:: 1.0.0 17 | """ 18 | 19 | 20 | class IgnoreKey(DoJSONException): 21 | """The corresponding key has been ignored. 22 | 23 | .. versionadded:: 0.2.0 24 | """ 25 | 26 | 27 | class IgnoreItem(DoJSONException): 28 | """The corresponding item from the current iterable has been ignored. 29 | 30 | .. versionadded:: 1.3.0 31 | """ 32 | 33 | 34 | class MissingRule(DoJSONException): 35 | """Raise when no matching rule was found. 36 | 37 | .. versionadded:: 1.0.0 38 | """ 39 | -------------------------------------------------------------------------------- /dojson/overdo.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # This file is part of DoJSON 4 | # Copyright (C) 2015, 2016 CERN. 5 | # 6 | # DoJSON is free software; you can redistribute it and/or 7 | # modify it under the terms of the Revised BSD License; see LICENSE 8 | # file for more details. 9 | 10 | """Do JSON translation.""" 11 | 12 | import re 13 | 14 | from pkg_resources import iter_entry_points 15 | 16 | from ._compat import iteritems, zip_longest 17 | from .errors import IgnoreKey, MissingRule 18 | from .utils import GroupableOrderedDict 19 | 20 | try: 21 | from _sre import MAXGROUPS 22 | except ImportError: 23 | MAXGROUPS = 100 24 | 25 | 26 | class Index(object): 27 | """Index implementation based on build-in Python SRE module.""" 28 | 29 | def __init__(self, rules=None, flags=0, branch_size=MAXGROUPS - 1): 30 | """Initialize index structures. 31 | 32 | :param rules: list of tuples (regular expression, data) 33 | :param flags: additional flags passed to SRE parser 34 | :param branch_size: number of groups in a branch (max. 99) 35 | """ 36 | self._patterns = [] 37 | self.flags = flags 38 | self.rules = rules or [] 39 | self.branch_size = min(branch_size, len(self.rules)) 40 | 41 | def make_pattern(rules, flags=0): 42 | """Compile a rules to single branch with groups.""" 43 | return re.compile('|'.join('(?P{regex})'.format( 44 | name=name, regex=regex 45 | ) for name, (regex, _) in enumerate(rules)), flags=flags) 46 | 47 | for rules in zip_longest(*[iter(self.rules)] * self.branch_size): 48 | self._patterns.append(make_pattern([ 49 | rule for rule in rules if rule is not None 50 | ])) 51 | 52 | def query(self, key): 53 | """Return data matching the key.""" 54 | for section, pattern in enumerate(self._patterns): 55 | match = pattern.match(key) 56 | if match: 57 | return self.rules[section * self.branch_size + int( 58 | match.lastgroup[1:] 59 | )][1] 60 | 61 | 62 | class Overdo(object): 63 | """Translation index.""" 64 | 65 | def __init__(self, bases=None, entry_point_group=None): 66 | """Initialize.""" 67 | self.rules = [] 68 | if bases: 69 | for base in bases: 70 | base._collect_entry_points() 71 | self.rules.extend(base.rules) 72 | self.entry_point_group = entry_point_group 73 | self.index = None 74 | 75 | def _collect_entry_points(self): 76 | """Collect entry points.""" 77 | if self.entry_point_group is not None: 78 | for entry_point in iter_entry_points( 79 | group=self.entry_point_group, name=None): 80 | entry_point.load() 81 | 82 | def build(self): 83 | """Build.""" 84 | self._collect_entry_points() 85 | self.index = Index(self.rules) 86 | 87 | def over(self, name, *source_tags): 88 | """Register creator rule.""" 89 | def decorator(creator): 90 | self.index = None 91 | for field in source_tags: 92 | self.rules.append((field, (name, creator))) 93 | return creator 94 | return decorator 95 | 96 | def do(self, blob, ignore_missing=True, exception_handlers=None): 97 | """Translate blob values and instantiate new model instance. 98 | 99 | Raises ``MissingRule`` when no rule matched and ``ignore_missing`` 100 | is ``False``. 101 | 102 | :param blob: ``dict``-like object on which the matching rules are 103 | going to be applied. 104 | :param ignore_missing: Set to ``False`` if you prefer to raise 105 | an exception ``MissingRule`` for the first 106 | key that it is not matching any rule. 107 | :param exception_handlers: Give custom exception handlers to take care 108 | of non-standard codes that are installation 109 | specific. 110 | 111 | .. versionchanged:: 1.0.0 112 | 113 | ``ignore_missing`` allows to specify if the function should raise 114 | an exception. 115 | 116 | .. versionchanged:: 1.1.0 117 | 118 | ``exception_handlers`` allows to set custom handlers for 119 | non-standard MARC codes. 120 | """ 121 | handlers = {IgnoreKey: None} 122 | handlers.update(exception_handlers or {}) 123 | 124 | def clean_missing(exc, output, key, value): 125 | order = output.get('__order__') 126 | if order: 127 | order.remove(key) 128 | 129 | if ignore_missing: 130 | handlers.setdefault(MissingRule, clean_missing) 131 | 132 | output = {} 133 | 134 | if self.index is None: 135 | self.build() 136 | 137 | if isinstance(blob, GroupableOrderedDict): 138 | items = blob.iteritems(repeated=True) 139 | else: 140 | items = iteritems(blob) 141 | 142 | for key, value in items: 143 | try: 144 | result = self.index.query(key) 145 | if not result: 146 | raise MissingRule(key) 147 | 148 | name, creator = result 149 | data = creator(output, key, value) 150 | if getattr(creator, '__extend__', False): 151 | existing = output.get(name, []) 152 | existing.extend(data) 153 | output[name] = existing 154 | else: 155 | output[name] = data 156 | except Exception as exc: 157 | if exc.__class__ in handlers: 158 | handler = handlers[exc.__class__] 159 | if handler is not None: 160 | handler(exc, output, key, value) 161 | else: 162 | raise 163 | 164 | return output 165 | 166 | def missing(self, blob): 167 | """Return keys with missing rules.""" 168 | if self.index is None: 169 | self.build() 170 | return [key for key in blob.keys() if self.index.query(key) is None] 171 | -------------------------------------------------------------------------------- /dojson/version.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # This file is part of DoJSON 4 | # Copyright (C) 2015, 2016, 2017 CERN. 5 | # 6 | # DoJSON is free software; you can redistribute it and/or 7 | # modify it under the terms of the Revised BSD License; see LICENSE 8 | # file for more details. 9 | 10 | """Version information for DoJSON. 11 | 12 | This file is imported by ``dojson.__init__``, and parsed by 13 | ``setup.py`` as well as ``docs/conf.py``. 14 | """ 15 | 16 | # Do not change the format of this next line. Doing so risks breaking 17 | # setup.py and docs/conf.py 18 | 19 | __version__ = "1.5.0" 20 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # This file is part of DoJSON 4 | # Copyright (C) 2014, 2016 CERN. 5 | # 6 | # DoJSON is free software; you can redistribute it and/or modify 7 | # it under the terms of the Revised BSD License; see LICENSE file for 8 | # more details. 9 | 10 | 11 | [pytest] 12 | addopts = --isort --pydocstyle --pycodestyle --doctest-glob="*.rst" --doctest-modules --cov=dojson --cov-report=term-missing 13 | testpaths = tests dojson 14 | filterwarnings = ignore::pytest.PytestDeprecationWarning 15 | -------------------------------------------------------------------------------- /run-tests.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | # 4 | # This file is part of Invenio. 5 | # Copyright (C) 2015-2020 CERN. 6 | # 7 | # Invenio is free software; you can redistribute it and/or modify it 8 | # under the terms of the MIT License; see LICENSE file for more details. 9 | 10 | # Quit on errors 11 | set -o errexit 12 | 13 | # Quit on unbound symbols 14 | set -o nounset 15 | 16 | python -m check_manifest --ignore ".*-requirements.txt" 17 | python -m sphinx.cmd.build -qnNW docs docs/_build/html 18 | python -m pytest 19 | python -m sphinx.cmd.build -qnNW -b doctest docs docs/_build/doctest 20 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | # This file is part of DoJSON 2 | # Copyright (C) 2015, 2016 CERN. 3 | # 4 | # DoJSON is free software; you can redistribute it and/or 5 | # modify it under the terms of the Revised BSD License; see LICENSE 6 | # file for more details. 7 | 8 | [aliases] 9 | test=pytest 10 | 11 | [build_sphinx] 12 | source-dir = docs/ 13 | build-dir = docs/_build 14 | all_files = 1 15 | 16 | [bdist_wheel] 17 | universal = 1 18 | 19 | [upload_sphinx] 20 | upload-dir = docs/_build/html 21 | 22 | [pycodestyle] 23 | ignore = E501, W504, E225, E305 24 | 25 | [pydocstyle] 26 | ignore = E305, D400, D401, D104, D415, D203, D213 27 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # This file is part of DoJSON 4 | # Copyright (C) 2015, 2016, 2017 CERN. 5 | # 6 | # DoJSON is free software; you can redistribute it and/or 7 | # modify it under the terms of the Revised BSD License; see LICENSE 8 | # file for more details. 9 | 10 | """DoJSON is a simple Pythonic JSON to JSON converter.""" 11 | 12 | import os 13 | import re 14 | 15 | from setuptools import setup 16 | 17 | # Get the version string. Cannot be done with import! 18 | with open(os.path.join('dojson', 'version.py'), 'rt') as f: 19 | version = re.search( 20 | '__version__\s*=\s*"(?P.*)"\n', 21 | f.read() 22 | ).group('version') 23 | 24 | tests_require = [ 25 | 'jsonschema>=3.0', 26 | 'mock>=1.3.0', 27 | 'pytest-invenio>=1.4.0', 28 | ] 29 | 30 | extras_require = { 31 | 'docs': [ 32 | 'Sphinx>=3', 33 | ], 34 | 'jsonschema': [ 35 | 'jsonschema>=2.5.1', 36 | ], 37 | 'tests': tests_require, 38 | } 39 | 40 | extras_require['all'] = [] 41 | for name, reqs in extras_require.items(): 42 | extras_require['all'].extend(reqs) 43 | 44 | install_requires = [ 45 | 'click>=8.1', 46 | 'lxml>=4.1.1', 47 | 'simplejson>=3.8.1', 48 | ] 49 | 50 | setup( 51 | name='dojson', 52 | version=version, 53 | url='http://github.com/inveniosoftware/dojson/', 54 | license='BSD', 55 | author='Invenio collaboration', 56 | author_email='info@inveniosoftware.org', 57 | description=__doc__, 58 | long_description=open('README.rst').read(), 59 | packages=['dojson'], 60 | zip_safe=False, 61 | include_package_data=True, 62 | platforms='any', 63 | setup_requires=[ 64 | 'pytest-runner>=2.6.2', 65 | 'setuptools>=17.1', 66 | ], 67 | extras_require=extras_require, 68 | classifiers=[ 69 | 'Intended Audience :: Developers', 70 | 'License :: OSI Approved :: BSD License', 71 | 'Operating System :: OS Independent', 72 | 'Programming Language :: Python', 73 | 'Topic :: Software Development :: Libraries :: Python Modules', 74 | 'Programming Language :: Python :: 3', 75 | 'Programming Language :: Python :: 3.8', 76 | 'Programming Language :: Python :: 3.9', 77 | 'Programming Language :: Python :: Implementation :: PyPy', 78 | 'Development Status :: 4 - Beta', 79 | ], 80 | install_requires=install_requires, 81 | tests_require=tests_require, 82 | entry_points={ 83 | 'console_scripts': [ 84 | 'dojson = dojson.cli:cli', 85 | ], 86 | 'dojson.contrib.marc21': [ 87 | 'bdleader = dojson.contrib.marc21.fields.bdleader', 88 | 'bd00x = dojson.contrib.marc21.fields.bd00x', 89 | 'bd01x09x = dojson.contrib.marc21.fields.bd01x09x', 90 | 'bd1xx = dojson.contrib.marc21.fields.bd1xx', 91 | 'bd20x24x = dojson.contrib.marc21.fields.bd20x24x', 92 | 'bd25x28x = dojson.contrib.marc21.fields.bd25x28x', 93 | 'bd3xx = dojson.contrib.marc21.fields.bd3xx', 94 | 'bd4xx = dojson.contrib.marc21.fields.bd4xx', 95 | 'bd5xx = dojson.contrib.marc21.fields.bd5xx', 96 | 'bd6xx = dojson.contrib.marc21.fields.bd6xx', 97 | 'bd70x75x = dojson.contrib.marc21.fields.bd70x75x', 98 | 'bd76x78x = dojson.contrib.marc21.fields.bd76x78x', 99 | 'bd80x83x = dojson.contrib.marc21.fields.bd80x83x', 100 | 'bd84188x = dojson.contrib.marc21.fields.bd84188x', 101 | ], 102 | 'dojson.contrib.to_marc21': [ 103 | 'bdleader = dojson.contrib.to_marc21.fields.bdleader', 104 | 'bd00x = dojson.contrib.to_marc21.fields.bd00x', 105 | 'bd01x09x = dojson.contrib.to_marc21.fields.bd01x09x', 106 | 'bd1xx = dojson.contrib.to_marc21.fields.bd1xx', 107 | 'bd20x24x = dojson.contrib.to_marc21.fields.bd20x24x', 108 | 'bd25x28x = dojson.contrib.to_marc21.fields.bd25x28x', 109 | 'bd3xx = dojson.contrib.to_marc21.fields.bd3xx', 110 | 'bd4xx = dojson.contrib.to_marc21.fields.bd4xx', 111 | 'bd5xx = dojson.contrib.to_marc21.fields.bd5xx', 112 | 'bd6xx = dojson.contrib.to_marc21.fields.bd6xx', 113 | 'bd70x75x = dojson.contrib.to_marc21.fields.bd70x75x', 114 | 'bd76x78x = dojson.contrib.to_marc21.fields.bd76x78x', 115 | 'bd80x83x = dojson.contrib.to_marc21.fields.bd80x83x', 116 | 'bd84188x = dojson.contrib.to_marc21.fields.bd84188x', 117 | ], 118 | 'dojson.contrib.marc21_authority': [ 119 | 'adleader = dojson.contrib.marc21.fields.adleader', 120 | 'ad00x = dojson.contrib.marc21.fields.ad00x', 121 | 'ad01x09x = dojson.contrib.marc21.fields.ad01x09x', 122 | 'ad1xx3xx = dojson.contrib.marc21.fields.ad1xx3xx', 123 | 'ad260360 = dojson.contrib.marc21.fields.ad260360', 124 | 'ad4xx = dojson.contrib.marc21.fields.ad4xx', 125 | 'ad5xx = dojson.contrib.marc21.fields.ad5xx', 126 | 'ad64x = dojson.contrib.marc21.fields.ad64x', 127 | 'ad663666 = dojson.contrib.marc21.fields.ad663666', 128 | 'ad66768x = dojson.contrib.marc21.fields.ad66768x', 129 | 'ad7xx = dojson.contrib.marc21.fields.ad7xx', 130 | 'ad8xx = dojson.contrib.marc21.fields.ad8xx', 131 | ], 132 | 'dojson.contrib.to_marc21_authority': [ 133 | 'ad00x = dojson.contrib.to_marc21.fields.ad00x', 134 | 'ad01x09x = dojson.contrib.to_marc21.fields.ad01x09x', 135 | 'ad1xx = dojson.contrib.to_marc21.fields.ad1xx', 136 | 'ad25x28x = dojson.contrib.to_marc21.fields.ad25x28x', 137 | 'ad3xx = dojson.contrib.to_marc21.fields.ad3xx', 138 | 'ad4xx = dojson.contrib.to_marc21.fields.ad4xx', 139 | 'ad5xx = dojson.contrib.to_marc21.fields.ad5xx', 140 | 'ad64x = dojson.contrib.to_marc21.fields.ad6xx', 141 | 'ad70x75x = dojson.contrib.to_marc21.fields.ad70x75x', 142 | 'ad76x78x = dojson.contrib.to_marc21.fields.ad76x78x', 143 | 'ad84188x = dojson.contrib.to_marc21.fields.ad84188x', 144 | ], 145 | 'dojson.contrib.marc21_holdings': [ 146 | 'hd00x = dojson.contrib.marc21.fields.hd00x', 147 | 'hd0xx = dojson.contrib.marc21.fields.hd0xx', 148 | 'hd3xx5xx84x = dojson.contrib.marc21.fields.hd3xx5xx84x', 149 | 'hd85xhd88x = dojson.contrib.marc21.fields.hd85xhd88x', 150 | ], 151 | 'dojson.cli': [ 152 | 'do = dojson.cli.command:process_do', 153 | 'missing = dojson.cli.command:process_missing', 154 | 'schema = dojson.cli.command:process_schema', 155 | 'validate = dojson.cli.command:process_validate', 156 | ], 157 | 'dojson.cli.rule': [ 158 | 'marc21 = dojson.contrib.marc21:marc21', 159 | 'marc21_authority = dojson.contrib.marc21:marc21_authority', 160 | 'marc21_holdings = dojson.contrib.marc21:marc21_holdings', 161 | 'to_marc21 = dojson.contrib.to_marc21:to_marc21', 162 | 'to_marc21_authority = dojson.contrib.to_marc21:to_marc21_authority', 163 | ], 164 | 'dojson.cli.load': [ 165 | 'json = dojson.utils:load', 166 | 'marcxml = dojson.contrib.marc21.utils:load', 167 | ], 168 | 'dojson.cli.dump': [ 169 | 'json = dojson.utils:dump', 170 | 'marcxml = dojson.contrib.to_marc21.utils:dumps', 171 | ], 172 | }, 173 | ) 174 | -------------------------------------------------------------------------------- /tests/MARC21slimUtils.xsl: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ 12 | 13 | 14 | 15 | ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ 16 | 17 | 18 | 19 | !'()*-.0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~ 20 | 21 | 22 | 0123456789ABCDEF 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | abcdefghijklmnopqrstuvwxyz 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | .:,;/ 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | .:,;/] 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 156 | 157 | 158 | 160 | 161 | 162 | 163 | Warning: string contains a character 164 | that is out of range! Substituting "?". 165 | 63 166 | 167 | 168 | 169 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 189 | -------------------------------------------------------------------------------- /tests/data/authority/ad25x28x.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | heading_referred_to 6 | explanatory_text 7 | authority_record_control_number 8 | linkage 9 | field_link_and_sequence_number 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /tests/data/authority/ad6xx.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | dates_of_publication_and_or_sequential_designation 6 | source_of_information 7 | linkage 8 | field_link_and_sequence_number 9 | 10 | 11 | numbering_peculiarities_note 12 | source_of_information 13 | linkage 14 | field_link_and_sequence_number 15 | 16 | 17 | series_numbering_example 18 | volumes_dates_to_which_series_numbering_example_applies 19 | institution_copy_to_which_field_applies 20 | linkage 21 | field_link_and_sequence_number 22 | 23 | 24 | place 25 | publisher_issuing_body 26 | volumes_dates_to_which_place_and_publisher_issuing_body_apply 27 | linkage 28 | field_link_and_sequence_number 29 | 30 | 31 | series_analysis_practice 32 | exceptions_to_analysis_practice 33 | volumes_dates_to_which_analysis_practice_applies 34 | institution_copy_to_which_field_applies 35 | linkage 36 | field_link_and_sequence_number 37 | 38 | 39 | series_tracing_practice 40 | volumes_dates_to_which_tracing_practice_applies 41 | institution_copy_to_which_field_applies 42 | linkage 43 | field_link_and_sequence_number 44 | 45 | 46 | series_classification_practice 47 | volumes_dates_to_which_classification_practice_applies 48 | institution_to_which_field_applies 49 | linkage 50 | field_link_and_sequence_number 51 | 52 | 53 | explanatory_text 54 | heading_referred_to 55 | title_referred_to 56 | linkage 57 | field_link_and_sequence_number 58 | 59 | 60 | explanatory_text 61 | heading_referred_to 62 | title_referred_to 63 | linkage 64 | field_link_and_sequence_number 65 | 66 | 67 | history_reference 68 | linkage 69 | field_link_and_sequence_number 70 | 71 | 72 | general_explanatory_reference 73 | linkage 74 | field_link_and_sequence_number 75 | 76 | 77 | nonpublic_general_note 78 | institution_to_which_field_applies 79 | linkage 80 | field_link_and_sequence_number 81 | 82 | 83 | source_citation 84 | information_found 85 | uniform_resource_identifier 86 | bibliographic_record_control_number 87 | linkage 88 | field_link_and_sequence_number 89 | 90 | 91 | title 92 | remainder_of_title 93 | date 94 | bibliographic_record_control_number 95 | authority_record_control_number_or_standard_number 96 | linkage 97 | field_link_and_sequence_number 98 | 99 | 100 | title 101 | remainder_of_title 102 | date 103 | bibliographic_record_control_number 104 | authority_record_control_number_or_standard_number 105 | linkage 106 | field_link_and_sequence_number 107 | 108 | 109 | source_citation 110 | linkage 111 | field_link_and_sequence_number 112 | 113 | 114 | biographical_or_historical_data 115 | expansion 116 | uniform_resource_identifier 117 | linkage 118 | field_link_and_sequence_number 119 | 120 | 121 | heading_or_subdivision_term 122 | explanatory_text 123 | institution_to_which_field_applies 124 | linkage 125 | field_link_and_sequence_number 126 | 127 | 128 | subject_heading_or_subdivision_term 129 | explanatory_text 130 | linkage 131 | field_link_and_sequence_number 132 | 133 | 134 | replacement_heading 135 | explanatory_text 136 | replacement_authority_record_control_number 137 | linkage 138 | field_link_and_sequence_number 139 | 140 | 141 | application_history_note 142 | institution_to_which_field_applies 143 | linkage 144 | field_link_and_sequence_number 145 | 146 | 147 | 148 | -------------------------------------------------------------------------------- /tests/data/authority/ad76x78x.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | medium_of_performance_term_as_entry_element 6 | relationship_information 7 | control_subfield 8 | authority_record_control_number_or_standard_number 9 | source_of_heading_or_term 10 | relationship_code 11 | institution_to_which_field_applies 12 | linkage 13 | field_link_and_sequence_number 14 | 15 | 16 | relationship_information 17 | form_subdivision 18 | control_subfield 19 | general_subdivision 20 | chronological_subdivision 21 | geographic_subdivision 22 | authority_record_control_number_or_standard_number 23 | source_of_heading_or_term 24 | relationship_code 25 | institution_to_which_field_applies 26 | linkage 27 | field_link_and_sequence_number 28 | 29 | 30 | relationship_information 31 | form_subdivision 32 | control_subfield 33 | general_subdivision 34 | chronological_subdivision 35 | geographic_subdivision 36 | authority_record_control_number_or_standard_number 37 | source_of_heading_or_term 38 | relationship_code 39 | institution_to_which_field_applies 40 | linkage 41 | field_link_and_sequence_number 42 | 43 | 44 | relationship_information 45 | form_subdivision 46 | control_subfield 47 | general_subdivision 48 | chronological_subdivision 49 | geographic_subdivision 50 | authority_record_control_number_or_standard_number 51 | source_of_heading_or_term 52 | relationship_code 53 | institution_to_which_field_applies 54 | linkage 55 | field_link_and_sequence_number 56 | 57 | 58 | relationship_information 59 | form_subdivision 60 | control_subfield 61 | general_subdivision 62 | chronological_subdivision 63 | geographic_subdivision 64 | authority_record_control_number_or_standard_number 65 | source_of_heading_or_term 66 | relationship_code 67 | institution_to_which_field_applies 68 | linkage 69 | field_link_and_sequence_number 70 | 71 | 72 | heading_referred_to 73 | explanatory_text 74 | source_of_heading_or_term 75 | relationship_code 76 | institution_to_which_field_applies 77 | linkage 78 | field_link_and_sequence_number 79 | 80 | 81 | 82 | -------------------------------------------------------------------------------- /tests/data/authority/ad84188x.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | host_name 6 | access_number 7 | compression_information 8 | path 9 | electronic_name 10 | processor_of_request 11 | instruction 12 | bits_per_second 13 | password 14 | logon 15 | contact_for_access_assistance 16 | name_of_location_of_host 17 | operating_system 18 | port 19 | electronic_format_type 20 | settings 21 | file_size 22 | terminal_emulation 23 | uniform_resource_identifier 24 | hours_access_method_available 25 | record_control_number 26 | nonpublic_note 27 | link_text 28 | public_note 29 | Snail mail 30 | materials_specified 31 | linkage 32 | field_link_and_sequence_number 33 | 34 | 35 | same_as_associated_field_a 36 | same_as_associated_field_b 37 | same_as_associated_field_c 38 | same_as_associated_field_d 39 | same_as_associated_field_e 40 | same_as_associated_field_f 41 | same_as_associated_field_g 42 | same_as_associated_field_h 43 | same_as_associated_field_i 44 | same_as_associated_field_j 45 | same_as_associated_field_k 46 | same_as_associated_field_l 47 | same_as_associated_field_m 48 | same_as_associated_field_n 49 | same_as_associated_field_o 50 | same_as_associated_field_p 51 | same_as_associated_field_q 52 | same_as_associated_field_r 53 | same_as_associated_field_s 54 | same_as_associated_field_t 55 | same_as_associated_field_u 56 | same_as_associated_field_v 57 | same_as_associated_field_w 58 | same_as_associated_field_x 59 | same_as_associated_field_y 60 | same_as_associated_field_z 61 | same_as_associated_field_0 62 | same_as_associated_field_1 63 | same_as_associated_field_2 64 | same_as_associated_field_3 65 | same_as_associated_field_4 66 | same_as_associated_field_5 67 | linkage 68 | same_as_associated_field_7 69 | same_as_associated_field_8 70 | same_as_associated_field_9 71 | 72 | 73 | generation_process 74 | confidence_value 75 | generation_date 76 | generation_agency 77 | uniform_resource_identifier 78 | bibliographic_record_control_number 79 | validity_end_date 80 | authority_record_control_number_or_standard_number 81 | field_link_and_sequence_number 82 | 83 | 84 | conversion_process 85 | conversion_date 86 | identifier_of_source_metadata 87 | conversion_agency 88 | uniform_resource_identifier 89 | 90 | 91 | 92 | 93 | -------------------------------------------------------------------------------- /tests/data/handcrafted/bd01x09x.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | class_num#1 6 | class_num#2 7 | class_num#3 8 | 5158208052 9 | copy info blah 10 | 55521.5151\616 11 | 55521.5151\617 12 | 55521.5151\618 13 | 14 | 15 | 16 | 17 | class_num#1 18 | class_num#2 19 | class_num#3 20 | 5158208052 21 | copy info blah#1 22 | copy info blah#2 23 | copy info blah#3 24 | 55521.5151\616 25 | 55521.5151\617 26 | 55521.5151\618 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /tests/data/handcrafted/bd3xx.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Master key 42 6 | 880-01$a 7 | 12421.45552\52144 8 | 22421.45552\52145 9 | 32421.45552\52146 10 | 11 | 12 | 13 | 14 | en 15 | de 16 | fi 17 | it 18 | en 19 | de 20 | fi 21 | it 22 | 880-01$a 23 | 5161254241.2421414124\5251 24 | 25 | 26 | 27 | 28 | en 29 | de 30 | fi 31 | it 32 | en 33 | de 34 | fi 35 | it 36 | library of lala 37 | 880-01$a 38 | 5161254241.2421414124\5251 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /tests/data/handcrafted/bd6xx.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 1800-1899 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /tests/data/handcrafted/bdleader.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 01230nama#2200289ucb4500 5 | 6 | Author 7 | 8 | 9 | 10 | 01230nam 2200289# 4500 11 | 12 | Author2 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /tests/data/test_1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 136 5 | 20160203181958.0 6 | 030529s1993\\\\ne\a\\\\\b\\\\101\0\eng\d 7 | 8 | 0444899251 9 | 10 | 11 | en 12 | fr 13 | it 14 | iso639-1 15 | 16 | 17 | IFIP International Workshop on Protocols for High Speed Networks 18 | (3rd : 19 | 1992 : 20 | Stockholm, Sweden) 21 | 22 | 23 | Protocols for high-speed networks III.: 24 | proceedings of the IFIP WG 6.1 / WG 6.4 Third International Workshop on Protocols for High Speed Networks, Stockholm, Sweden, 13-15 May, 1992 / 25 | edited by B. Pehrson, P. Gunningberg and S. Pink. 26 | 27 | 28 | Amsterdam : 29 | North-Holland, 30 | c.1993. 31 | 32 | 33 | 267 p. 34 | 35 | 36 | Telecommunications - Networks 37 | 38 | 39 | Transmission quality 40 | 41 | 42 | Transmission protocols 43 | 44 | 45 | Communications protocols 46 | 47 | 48 | Pehrson, Björn 49 | 50 | 51 | Gunningberg, Per 52 | 53 | 54 | Pink, Stephen 55 | 56 | 57 | IFIP 58 | 59 | 60 | Library Reading Room 61 | 621.39.B6210L 62 | I23 63 | 64 | 65 | 66 | -------------------------------------------------------------------------------- /tests/data/test_10.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Actes du colloque international informatique et société (24-28 septembre 1979; Paris) 7 | 8 | 9 | 10 | Toimituksia. 11 | Annales. 12 | Sarja/Series A. 13 | III: Geologica-geographica 14 | 1-60 15 | 16 | 17 | Panoplist 18 | (varies slightly) 19 | June 1805-May 1808 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /tests/data/test_11.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Monthly, 6 | 7 | 8 | Bimonthly, 9 | 2003- 10 | 11 | 12 | book 13 | unmediated 14 | n 15 | rdamedia 16 | 17 | 18 | volume 19 | nc 20 | rdacarrier 21 | book 22 | 23 | 24 | digital 25 | optical 26 | Dolby digital 5.1 27 | rda 28 | 29 | 30 | digital 31 | stereo 32 | rda 33 | 34 | 35 | abstracts ; 36 | organized by topic on program ; 37 | arranged alphabetically by author 38 | 39 | 40 | Vol. 17, no. 1(1968)- . 41 | 42 | 43 | Textbooks 44 | lcsh 45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /tests/data/test_12.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Chemistry of functional groups. 6 | Supplement ; 7 | A 8 | 9 | 10 | Electrophoresis ; 11 | 12 | 13 | Neues Jahrbuch für Mineralogie. 14 | v. 166, no. 1 15 | 16 | 17 | Journal de physique. 18 | Colloque ; 19 | C3 20 | 21 | 22 | IEEE transactions on circuits and systems. 23 | I, 24 | Regular papers ; 25 | v. 55, special issue 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /tests/data/test_15.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Convention for the Protection of Human Rights and Fundamental Freedoms and Protocol 7 | 8 | (1950) 9 | 10 | 11 | American Mathematical Society translations. 12 | (Indexes) 13 | 14 | 15 | Bible. 16 | O.T. 17 | Nehemiah. 18 | English. 19 | 1965. 20 | Myers 21 | 22 | 23 | Blaue Engel 24 | (motion picture) 25 | 26 | 27 | Vedas. 28 | Yajurveda. 29 | Vājasaneyisaṃhitā. 30 | Selections. 31 | French 32 | 33 | 34 | Erlanger Forschungen. 35 | Reihe B. 36 | Naturwissenschaften 37 | 38 | 39 | I/EC. 40 | Industrial and engineering chemistry 41 | 42 | 43 | God save the King; 44 | arr 45 | 46 | 47 | BioOne (Organization) 48 | 49 | 50 | SpringerLink online journal archive. 51 | Mathematics and statistics 52 | CIT 53 | 54 | 55 | 880-07 56 | Wen hai. 57 | Tangut 58 | 59 | 60 | translation of (work) 61 | Modellbildung und Simulation. 62 | English 63 | 64 | 65 | Carleton College (Northfield, Minn.). 66 | Publications ; 67 | 68 | 69 | Atmospheric chemistry and physics 70 | 1680-7316 71 | 72 | 73 | 880-05 74 | 75 | Erh pai chung Chung-kuo tʻung su hsiao shuo shu yao 76 | 77 | 78 | 79 | Martini Buceri Opera omnia. 80 | Series III, 81 | Correspondance 82 | 83 | 84 | Boris Godunov [original version] 85 | Sound recording 86 | 87 | 88 | United States 89 | New York 90 | New York 91 | New York 92 | 93 | 94 | CD-ROM drive 95 | ISO 9660-formatted standard CD-ROM 96 | 97 | 98 | PC 99 | MS-DOS with Microsoft extensions 100 | 101 | 102 | 103 | -------------------------------------------------------------------------------- /tests/data/test_16.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Gilbert, 6 | of Hoyland. 7 | Works. 8 | English ; 9 | v. 1-3 10 | 11 | 12 | Ṭabarī, 13 | 838?-923. 14 | Taʼrīkh al-rusul wa-al-mulūk. 15 | English ; 16 | v. 35 17 | 18 | 19 | Aristophanes. 20 | Works. 21 | English & Greek. 22 | 1980 ; 23 | v. 4 24 | 25 | 26 | Liszt, Franz, 27 | 1811-1886. 28 | Piano music. 29 | Selections (Bolet) ; 30 | v. 5 31 | 32 | 33 | Wagner, Richard, 34 | 1813-1883. 35 | Ring des Nibelungen. 36 | Walküre 37 | 38 | 39 | Clark, C. H. Douglas 40 | (Cecil Henry Douglas), 41 | 1890- 42 | 43 | Comprehensive treatise of atomic and molecular structure. 44 | 45 | v. 2 46 | 47 | 48 | Shakespeare, William, 49 | 1564-1616. 50 | Works. 51 | 1984. 52 | Cambridge University Press 53 | 54 | 55 | University of California. 56 | University of California publications. 57 | English studies ; 58 | 8 59 | 60 | 61 | United States. 62 | Congress 63 | (82d, 1st session : 64 | 1951). 65 | Senate. 66 | Report 67 | 82d Congress ; no. 1039 68 | 69 | 70 | International Primatological Society. 71 | Congress 72 | (10th : 73 | 1984 : 74 | Nairobi, Kenya) 75 | 76 | Selected proceedings of the Tenth Congress of the International Primatological Society ; 77 | 78 | v. 3 79 | 80 | 81 | Eastern Analytical Symposium 82 | (17th : 83 | 1977 : 84 | New York). 85 | 1977 Eastern Analytical Symposium series 86 | 87 | 88 | International Geological Congress. 89 | 90 | International Subcommission on Stratigraphic Classification. 91 | 92 | Report ; 93 | no. 5 94 | 95 | 96 | 97 | -------------------------------------------------------------------------------- /tests/data/test_2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 413092 5 | 20150923221003.0 6 | 750804s1931\\\\nyu\\\\\\\\\\\00010\eng\\ 7 | 8 | 31034955 9 | 10 | 11 | .b10472952 12 | 13 | 14 | (OCoLC)1507739 15 | 16 | 17 | DLC 18 | FQG 19 | FDA 20 | OCL 21 | CIT 22 | 23 | 24 | Winchell, Alexander Newton, 25 | 1874-1958 26 | 27 | 28 | The microscopic characters of artificial inorganic solid substances or artificial minerals, 29 | by Alexander Newton Winchell, with a chapter on the universal stage by Richard Conrad Emmons. 30 | 31 | 32 | 2d ed 33 | 34 | 35 | New York 36 | J. Wiley & sons, inc. 37 | London 38 | Chapman & Hall, ltd. 39 | [c1931] 40 | 41 | 42 | xvii, 403 p. 43 | incl. illus., tables. diagrs. 44 | 24 cm 45 | 46 | 47 | "First edition...published in 1927 as Bo. 4 of the University of Wisconsin studies in science [under title "The optic and microscopic characters of artifical minerals"]. Part I on ʻPrinciples and methods, ̓which did not appear in the first edition of this work, consists in large part of revised selections from the author's ʻElements of optical mineralogy, ̓part I, 3d edition."--Pref 48 | 49 | 50 | Mineralogy, Determinative 51 | 52 | 53 | Emmons, Richard Conrad, 54 | 1893- 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /tests/data/test_3.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 404855 5 | 20151017082215.0 6 | 750228c19509999dcuar1\\\\\\\f0\\\\0eng\\ 7 | 8 | 47032975 //r512 9 | 10 | 11 | 0193-1180 12 | 13 | 14 | .b10375697 15 | 16 | 17 | (OCoLC)1193149 18 | 19 | 20 | DLC 21 | UDI 22 | COO 23 | DLC 24 | NSD 25 | GPO 26 | m.c. 27 | GPO 28 | NST 29 | GPO 30 | OCL 31 | GPO 32 | AIP 33 | HUL 34 | NSD 35 | GPO 36 | m/c 37 | OCL 38 | GPO 39 | NST 40 | GPO 41 | AGL 42 | GPO 43 | NST 44 | 45 | 46 | n-us--- 47 | 48 | 49 | HC106.5 50 | .A272 51 | 52 | 53 | Pr 40.9: 54 | 55 | 56 | Pr 41.9: 57 | 58 | 59 | United States. 60 | President 61 | 62 | 63 | Econ. rep. Pres. transm. Congr 64 | 65 | 66 | Economic report of the President transmitted to the Congress 67 | 68 | 69 | Economic report of the President transmitted to the Congress 70 | 71 | 72 | [Dept. ed.] 73 | 74 | 75 | Washington : 76 | G.P.O. : 77 | For sale by Supt. of Docs., U.S. G.P.O., 78 | 1950- 79 | 80 | 81 | v. : 82 | ill ; 83 | 24 cm 84 | 85 | 86 | Annual 87 | 88 | 89 | Jan. 6, 1950- 90 | 91 | 92 | Reports for 1950-1953 include: The annual economic review by the Council of Economic Advisers (July issue has title: The economic situation at midyear); 1954- include: The annual report of the Council of Economic Advisers (title varies slightly) 93 | 94 | 95 | Predicasts 96 | 97 | 98 | Vols. for 1950-<1952> accompanied by a supplementary report, dated July, with title: The midyear economic report 99 | 100 | 101 | United States 102 | Economic policy 103 | Periodicals 104 | 105 | 106 | United States 107 | Economic conditions 108 | 1945- 109 | Periodicals 110 | 111 | 112 | Council of Economic Advisers (U.S.). 113 | Annual economic review 114 | 115 | 116 | Council of Economic Advisers (U.S.). 117 | Midyear economic report 118 | 119 | 120 | Council of Economic Advisers (U.S.). 121 | Annual report of the Council of Economic Advisers 122 | 123 | 124 | United States. President. 125 | Economic report of the President transmitted to the Congress (Doc. ed.) 126 | (DLC)sn 87042042 127 | (OCoLC)8198980 128 | 129 | 130 | United States. President. 131 | Economic report of the President to the Congress 132 | 133 | 134 | 135 | -------------------------------------------------------------------------------- /tests/data/test_4.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 748306 5 | 20150923234828.0 6 | m\\\\\o\\d\\\\\\\\ 7 | cr\cn| 8 | 110809s2011\\\\enk\\\\\ob\\\\001\0\eng\d 9 | 10 | 015799976 11 | Uk 12 | 13 | 14 | 9781849733472 (electronic bk.) 15 | 16 | 17 | 1849733473 (electronic bk.) 18 | 19 | 20 | (OCoLC)746035477 21 | 22 | 23 | T3517 24 | Royal Society of Chemistry 25 | http://www.rsc.org/spr 26 | 27 | 28 | UKRSC 29 | eng 30 | UKRSC 31 | UKMGB 32 | OCLCQ 33 | N$T 34 | OCLCF 35 | 36 | 37 | TP248.25.M46 38 | M46 2011 39 | 40 | 41 | Membrane engineering for the treatment of gases. 42 | Volume 1, 43 | Gas-separation problems with membranes 44 | [electronic resource] / 45 | editors: Enrico Drioli, Giuseppe Barbieri 46 | 47 | 48 | Gas-separation problems with membranes 49 | 50 | 51 | Cambridge : 52 | Royal Society of Chemistry, 53 | 2011 54 | 55 | 56 | 1 online resource (xix, 297 p.) 57 | 58 | 59 | Includes bibliographical references and index 60 | 61 | 62 | Membranes already have important applications in artificial organs, the processing of biotechnological products, food manufacture, waste water treatment, and seawater desalination. Their uses in gaseous mixture separations are, however, far from achieving their full potential. Separation of air components, natural gas dehumidification and sweeting, separation and recovery of CO2 from biogas, and H2 from refinery gases are all examples of current industrial applications. The use of membranes for reducing the greenhouse effect and improving energy efficiency has also been suggested. New process intensification strategies in the petrochemical industry have opened up another growth area for gas separation membrane systems and membrane reactors. This two volume set presents the state-of-the-art in membrane engineering for the separation of gases. It addresses future developments in carbon capture and utilization, H2 production and purification, and O2/N2 separation. Topics covered include the: applications of membrane gas separation in the petrochemical industry; implementation of membrane processes for post-combustion capture; commercial applications of membranes in gas separations; simulation of membrane systems for CO2 capture; design and development of membrane reactors for industrial applications; Pd-based membranes in hydrogen production; modelling and simulation of membrane reactors for hydrogen production and purification; novel hybrid membrane/pressure swing adsorption process for gas separation; molecular dynamics as a new tool for membrane design, and physical aging of membranes for gas separations.Volume 1 focuses predominantly on problems relating to membranes 63 | 64 | 65 | Gas separation membranes 66 | 67 | 68 | Electronic books 69 | 70 | 71 | Drioli, E 72 | 73 | 74 | Barbieri, Giuseppe 75 | 76 | 77 | Royal Society of Chemistry eBooks 78 | 79 | 80 | https://clsproxy.library.caltech.edu/login?url=http://dx.doi.org/10.1039/9781849733472 81 | <a href="https://clsproxy.library.caltech.edu/login?url=http://dx.doi.org/10.1039/9781849733472" TARGET="_blank"><img src="http://sfx.caltech.edu:8088/images/sfx.gif" alt="Caltech Connect"></a> 82 | 83 | 84 | 85 | -------------------------------------------------------------------------------- /tests/data/test_5.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 404855 5 | 20151017082215.0 6 | 750228c19509999dcuar1\\\\\\\f0\\\\0eng\\ 7 | 8 | 47032975 //r512 9 | 10 | 11 | 0193-1180 12 | 13 | 14 | .b10375697 15 | 16 | 17 | (OCoLC)1193149 18 | 19 | 20 | DLC 21 | UDI 22 | COO 23 | DLC 24 | NSD 25 | GPO 26 | m.c. 27 | GPO 28 | NST 29 | GPO 30 | OCL 31 | GPO 32 | AIP 33 | HUL 34 | NSD 35 | GPO 36 | m/c 37 | OCL 38 | GPO 39 | NST 40 | GPO 41 | AGL 42 | GPO 43 | NST 44 | 45 | 46 | n-us--- 47 | 48 | 49 | HC106.5 50 | .A272 51 | 52 | 53 | Pr 40.9: 54 | 55 | 56 | Pr 41.9: 57 | 58 | 59 | United States. 60 | President 61 | 62 | 63 | Econ. rep. Pres. transm. Congr 64 | 65 | 66 | Economic report of the President transmitted to the Congress 67 | 68 | 69 | Economic report of the President transmitted to the Congress 70 | 71 | 72 | [Dept. ed.] 73 | 74 | 75 | Washington : 76 | G.P.O. : 77 | For sale by Supt. of Docs., U.S. G.P.O., 78 | 1950- 79 | 80 | 81 | v. : 82 | ill ; 83 | 24 cm 84 | 85 | 86 | Annual 87 | 88 | 89 | Jan. 6, 1950- 90 | 91 | 92 | Reports for 1950-1953 include: The annual economic review by the Council of Economic Advisers (July issue has title: The economic situation at midyear); 1954- include: The annual report of the Council of Economic Advisers (title varies slightly) 93 | 94 | 95 | Predicasts 96 | 97 | 98 | Vols. for 1950-<1952> accompanied by a supplementary report, dated July, with title: The midyear economic report 99 | 100 | 101 | United States 102 | Economic policy 103 | Periodicals 104 | 105 | 106 | United States 107 | Economic conditions 108 | 1945- 109 | Periodicals 110 | 111 | 112 | Council of Economic Advisers (U.S.). 113 | Annual economic review 114 | 115 | 116 | Council of Economic Advisers (U.S.). 117 | Midyear economic report 118 | 119 | 120 | Council of Economic Advisers (U.S.). 121 | Annual report of the Council of Economic Advisers 122 | 123 | 124 | United States. President. 125 | Economic report of the President transmitted to the Congress (Doc. ed.) 126 | (DLC)sn 87042042 127 | (OCoLC)8198980 128 | 129 | 130 | United States. President. 131 | Economic report of the President to the Congress 132 | 133 | 134 | 135 | -------------------------------------------------------------------------------- /tests/data/test_7.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 1 5 | 6 | Test Description 7 | 8 | 9 | lsid 10 | urn:lsid:ubio.org:namebank:11815 11 | alternateIdentifier 12 | 13 | 14 | ads 15 | 2011ApJS..192...18K 16 | alternateIdentifier 17 | 18 | 19 | doi 20 | 10.1234/alternate.doi 21 | alternateIdentifier 22 | 23 | 24 | Harvard-Smithsonian Center for Astrophysics 25 | The 13th Biennial HITRAN Conference 26 | HITRAN13 27 | 23-25 June, 2014 28 | VI 29 | 1 30 | 31 | 32 | 4 33 | 34 | 35 | 1234 36 | Grant Title 37 | 38 | 39 | 4321 40 | Title Grant 41 | 42 | 43 | CERN 44 | ths 45 | Smith, Jane 46 | 47 | 48 | CERN 49 | ths 50 | Kowalski, Jane 51 | 52 | 53 | kw1 54 | 55 | 56 | kw2 57 | 58 | 59 | kw3 60 | 61 | 62 | 2014-02-27 63 | 64 | 65 | CERN 66 | Doe, Jane 67 | 68 | 69 | CERN 70 | Smith, John 71 | 72 | 73 | CERN 74 | Nowak, Jack 75 | 76 | 77 | 9876 78 | 79 | 80 | DOI 81 | 10.1234/foo.bar 82 | 83 | 84 | CERN 85 | oth 86 | Smith, Other 87 | 88 | 89 | 90 | oth 91 | Hansen, Viggo 92 | 93 | 94 | CERN 95 | dtm 96 | Kowalski, Manager 97 | 98 | 99 | http://zenodo.org 100 | Creative Commons 101 | 102 | 103 | cc-by 104 | opendefinition.org 105 | 106 | 107 | Test title 108 | 109 | 110 | test_term 111 | 112 | 113 | notes 114 | 115 | 116 | CERN 117 | Doe, John 118 | 119 | 120 | 10.1234/foo.bar 121 | cites 122 | doi 123 | 124 | 125 | 1234.4321 126 | cites 127 | arxiv 128 | 129 | 130 | open 131 | 132 | 133 | 134 | 135 | -------------------------------------------------------------------------------- /tests/data/test_8.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 011220910 6 | dnb 7 | 8 | 9 | CAT10911698 10 | DNAL 11 | 12 | 13 | DLE-20120307-13801 14 | FR 15 | 16 | 17 | 44601 18 | G. Schirmer 19 | 20 | 21 | 19911119 22 | 19920423 23 | 4034 24 | D2 25 | 26 | 27 | a 28 | 12000 29 | 3200000 30 | W1484000 31 | W1482000 32 | N0702000 33 | N0701000 34 | 35 | 36 | pcc 37 | nsdp 38 | 39 | 40 | cau 41 | gw 42 | fr 43 | au 44 | xxu 45 | usa 46 | 47 | 48 | 2009 49 | 50 | 51 | Tue Aug 02 08:13:10 EDT 2011 52 | 53 | 54 | CIT6 55 | 56 | 57 | 00281313 58 | 59 | 60 | 3182 61 | J8 62 | T5 63 | 64 | 65 | QV 350 66 | S224 2015 67 | 68 | 69 | (S 70 | 71 | 72 | QC981.8.G56 73 | H69 2014 74 | 75 | 76 | 856-B-9 (MF) 77 | 78 | 79 | 80 | -------------------------------------------------------------------------------- /tests/data/test_9.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Francis, 6 | of Assisi, Saint, 7 | 1182-1226. 8 | Legend. 9 | Fioretti. 10 | English 11 | 12 | 13 | 14 | 15 | 16 | Scientific reports (Australasian Antarctic Expedition (1911-1914)). 17 | 18 | Series C, 19 | Zoology and botany 20 | 21 | 22 | 23 | 24 | Bible. 25 | N.T. 26 | John III, 16. 27 | Polyglot. 28 | 1965 29 | 30 | 31 | 32 | 33 | Bible. 34 | O.T. 35 | Song of Solomon. 36 | English. 37 | Pope. 38 | 1977 39 | 40 | 41 | 42 | 43 | Bible. 44 | O.T. 45 | Historical books. 46 | English. 47 | Selections. 48 | 1957. 49 | Pfeiffer 50 | 51 | 52 | 53 | 54 | Bulletin 55 | [1894-1908] (South Dakota Geological Survey) 56 | 57 | 58 | 59 | 60 | Talmud. 61 | Minor tractates. 62 | English 63 | 64 | 65 | 66 | -------------------------------------------------------------------------------- /tests/demo_marc21_to_dc.converted.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Donges, Jonathan F 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /tests/demo_marc21_to_dc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Donges, Jonathan F 5 | 6 | 7 | -------------------------------------------------------------------------------- /tests/test_cli.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # This file is part of DoJSON 4 | # Copyright (C) 2015, 2016 CERN. 5 | # 6 | # DoJSON is free software; you can redistribute it and/or modify 7 | # it under the terms of the Revised BSD License; see LICENSE file for 8 | # more details. 9 | 10 | """Test suite for DoJSON.""" 11 | 12 | from __future__ import absolute_import 13 | 14 | import codecs 15 | import os 16 | 17 | import pkg_resources 18 | import pytest 19 | import simplejson as json 20 | from click.testing import CliRunner 21 | 22 | from dojson import cli 23 | from dojson.contrib.marc21.utils import create_record 24 | from test_core import RECORD_999_FIELD, RECORD_SIMPLE 25 | 26 | 27 | @pytest.mark.parametrize('file_name', [ 28 | 'test_1.xml', 29 | 'test_2.xml', 30 | 'test_3.xml', 31 | 'test_4.xml', 32 | 'test_5.xml', 33 | 'test_6.xml', 34 | 'test_7.xml', 35 | 'test_8.xml', 36 | 'test_9.xml', 37 | 'test_11.xml', 38 | 'test_12.xml', 39 | 'test_13.xml', 40 | 'test_14.xml', 41 | 'test_15.xml', 42 | 'test_16.xml', 43 | 'test_cds_marc21.xml', 44 | 'handcrafted/bd01x09x.xml', 45 | 'handcrafted/bd3xx.xml', 46 | 'handcrafted/bd6xx.xml', 47 | 'handcrafted/bdleader.xml', 48 | 'library_of_congress/bd01x09x.xml', 49 | 'library_of_congress/bd1xx.xml', 50 | 'library_of_congress/bd20x24x.xml', 51 | 'library_of_congress/bd25x28x.xml', 52 | 'library_of_congress/bd3xx.xml', 53 | 'library_of_congress/bd4xx.xml', 54 | 'library_of_congress/bd5xx.xml', 55 | 'library_of_congress/bd6xx.xml', 56 | 'library_of_congress/bd70x75x.xml', 57 | 'library_of_congress/bd76x78x.xml', 58 | 'library_of_congress/bd80x83x.xml', 59 | 'library_of_congress/bd84188x.xml', 60 | ]) 61 | def test_xml_to_marc21_to_xml(file_name): 62 | """Test xslt dump.""" 63 | path = os.path.dirname(__file__) 64 | # Open explicitly as UTF-8 for Python 2.7 compatibility 65 | with codecs.open( 66 | os.path.join(path, 'data', file_name), 67 | 'r', 68 | 'utf-8') as myfile: 69 | expect = myfile.read() 70 | 71 | schema = pkg_resources.resource_filename( 72 | 'dojson.contrib.marc21.schemas', 73 | 'marc21/bibliographic/bd-v1.0.2.json' 74 | ) 75 | 76 | runner = CliRunner() 77 | result = runner.invoke( 78 | cli.cli, [ 79 | '-i', os.path.join(path, 'data', file_name), 80 | '-l', 'marcxml', 81 | '-d', 'marcxml', 82 | 'do', 'marc21', 83 | 'validate', schema, 84 | 'do', 'to_marc21', 85 | ] 86 | ) 87 | 88 | assert expect.strip('\n') == result.output.strip('\n') 89 | assert result.exit_code == 0 90 | 91 | 92 | @pytest.mark.parametrize('file_name', [ 93 | 'authority/ad01x09x.xml', 94 | 'authority/ad1xx.xml', 95 | 'authority/ad3xx.xml', 96 | 'authority/ad4xx.xml', 97 | 'authority/ad5xx.xml', 98 | 'authority/ad6xx.xml', 99 | 'authority/ad25x28x.xml', 100 | 'authority/ad70x75x.xml', 101 | 'authority/ad76x78x.xml', 102 | 'authority/ad84188x.xml', 103 | ]) 104 | def test_xml_to_marc21_authority_to_xml(file_name): 105 | """Test xslt dump.""" 106 | path = os.path.dirname(__file__) 107 | # Open explicitly as UTF-8 for Python 2.7 compatibility 108 | with codecs.open( 109 | os.path.join(path, 'data', file_name), 110 | 'r', 111 | 'utf-8') as myfile: 112 | expect = myfile.read() 113 | 114 | schema = pkg_resources.resource_filename( 115 | 'dojson.contrib.marc21.schemas', 116 | 'marc21/authority/ad-v1.0.2.json' 117 | ) 118 | 119 | runner = CliRunner() 120 | result = runner.invoke( 121 | cli.cli, [ 122 | '-i', os.path.join(path, 'data', file_name), 123 | '-l', 'marcxml', 124 | '-d', 'marcxml', 125 | 'do', 'marc21_authority', 126 | 'validate', schema, 127 | 'do', 'to_marc21_authority', 128 | ] 129 | ) 130 | 131 | assert expect.strip('\n') == result.output.strip('\n') 132 | assert result.exit_code == 0 133 | 134 | 135 | def test_cli_do_marc21_from_xml(): 136 | """Test MARC21 loading from XML.""" 137 | expected = [{ 138 | '__order__': ['main_entry_personal_name'], 139 | 'main_entry_personal_name': { 140 | '__order__': ['personal_name'], 141 | 'personal_name': 'Donges, Jonathan F', 142 | } 143 | }] 144 | 145 | runner = CliRunner() 146 | with runner.isolated_filesystem(): 147 | with open('record.xml', 'wb') as f: 148 | f.write(RECORD_SIMPLE.encode('utf-8')) 149 | 150 | result = runner.invoke( 151 | cli.cli, 152 | ['-i', 'record.xml', '-l', 'marcxml', 'missing', 'marc21'] 153 | ) 154 | assert '' == result.output 155 | assert 0 == result.exit_code 156 | 157 | result = runner.invoke( 158 | cli.cli, 159 | ['-i', 'record.xml', '-l', 'marcxml', 'do', 'marc21'] 160 | ) 161 | 162 | try: 163 | data = json.loads(result.output) 164 | assert expected == data 165 | except ValueError: 166 | assert False, result.output 167 | 168 | result = runner.invoke( 169 | cli.cli, 170 | ['-i', 'record.xml', '-l', 'marcxml', 'do', '--strict', 'marc21'] 171 | ) 172 | assert 0 == result.exit_code 173 | 174 | 175 | def test_cli_do_marc21_from_xml_unknown_fields(): 176 | """Test MARC21 loading from XML containing unknown fields.""" 177 | runner = CliRunner() 178 | with runner.isolated_filesystem(): 179 | with open('record_999.xml', 'wb') as f: 180 | f.write(RECORD_999_FIELD.encode('utf-8')) 181 | 182 | result = runner.invoke( 183 | cli.cli, 184 | ['-i', 'record_999.xml', '-l', 'marcxml', 'missing', 'marc21'] 185 | ) 186 | assert "999__" == result.output.strip() 187 | assert 1 == result.exit_code 188 | result = runner.invoke( 189 | cli.cli, 190 | ['-i', 'record_999.xml', '-l', 'marcxml', 'do', 'marc21'] 191 | ) 192 | 193 | data = json.loads(result.output) 194 | assert {'__order__': []} == data[0] 195 | assert 0 == result.exit_code 196 | 197 | 198 | def test_cli_do_marc21_from_json(): 199 | """Test MARC21 loading from XML.""" 200 | expected = [{ 201 | '$schema': '/schema.json', 202 | '__order__': ['main_entry_personal_name'], 203 | 'main_entry_personal_name': { 204 | '__order__': ['personal_name'], 205 | 'personal_name': 'Donges, Jonathan F', 206 | } 207 | }] 208 | 209 | runner = CliRunner() 210 | with runner.isolated_filesystem(): 211 | with open('record.json', 'wb') as fp: 212 | record = create_record(RECORD_SIMPLE) 213 | fp.write(json.dumps(record).encode('utf-8')) 214 | 215 | result = runner.invoke( 216 | cli.cli, 217 | ['-i', 'record.json', 'missing', 'marc21'] 218 | ) 219 | assert '' == result.output, result.exception 220 | assert 0 == result.exit_code 221 | 222 | result = runner.invoke( 223 | cli.cli, 224 | ['-i', 'record.json', 'do', 'marc21', 'schema', '/schema.json'] 225 | ) 226 | 227 | assert 0 == result.exit_code, result.exception 228 | 229 | try: 230 | data = json.loads(result.output) 231 | assert expected == data 232 | except ValueError: 233 | assert False, result.output 234 | -------------------------------------------------------------------------------- /tests/test_contrib_to_marc21_utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # This file is part of DoJSON 4 | # Copyright (C) 2016 CERN. 5 | # 6 | # DoJSON is free software; you can redistribute it and/or modify 7 | # it under the terms of the Revised BSD License; see LICENSE file for 8 | # more details. 9 | 10 | """Test suite for DoJSON to_marc21.""" 11 | 12 | import os 13 | 14 | import pkg_resources 15 | import pytest 16 | from click.testing import CliRunner 17 | from lxml import etree 18 | from lxml.etree import _Element 19 | 20 | from dojson.contrib.marc21.utils import load 21 | from dojson.contrib.to_marc21.utils import dumps, dumps_etree 22 | from test_core import RECORD_SIMPLE 23 | 24 | 25 | def test_xslt_not_found(): 26 | """Test xslt not found.""" 27 | runner = CliRunner() 28 | with runner.isolated_filesystem(): 29 | with open('record.xml', 'wb') as f: 30 | f.write(RECORD_SIMPLE.encode('utf-8')) 31 | data = list(load('record.xml')) 32 | pytest.raises(IOError, dumps, data, xslt_filename='file_not_exist') 33 | 34 | 35 | def test_xslt_dump(): 36 | """Test xslt dump.""" 37 | path = os.path.dirname(__file__) 38 | with open('{0}/demo_marc21_to_dc.converted.xml'.format(path)) as myfile: 39 | expect = myfile.read() 40 | data = list(load('{0}/demo_marc21_to_dc.xml'.format(path))) 41 | output = dumps( 42 | data, 43 | xslt_filename='{0}/demo_marc21_to_dc.xslt'.format(path) 44 | ) 45 | assert output.decode('utf-8') == expect 46 | 47 | 48 | def test_entry_points(): 49 | """Test entry points.""" 50 | dump = list(pkg_resources.iter_entry_points( 51 | 'dojson.cli.dump', 'marcxml' 52 | ))[0].load() 53 | path = os.path.dirname(__file__) 54 | with open("{0}/demo_marc21_to_dc.converted.xml".format(path)) as myfile: 55 | expect = myfile.read() 56 | data = list(load('{0}/demo_marc21_to_dc.xml'.format(path))) 57 | output = dump(data, 58 | xslt_filename='{0}/demo_marc21_to_dc.xslt'.format(path)) 59 | assert output.decode('utf-8') == expect 60 | 61 | 62 | def test_output_type_from_dumps_etree(): 63 | """Test output type from dumps_etree.""" 64 | path = os.path.dirname(__file__) 65 | data = list(load('{0}/demo_marc21_to_dc.xml'.format(path))) 66 | # test without arguments 67 | output1 = dumps_etree(data) 68 | # test with xslt_filename argument 69 | output2 = dumps_etree( 70 | data, 71 | xslt_filename='{0}/demo_marc21_to_dc.xslt'.format(path) 72 | ) 73 | # it should not generate a TypeError exception 74 | assert isinstance(output1, _Element) 75 | assert isinstance(output2, _Element) 76 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # This file is part of DoJSON 4 | # Copyright (C) 2015, 2016 CERN. 5 | # 6 | # DoJSON is free software; you can redistribute it and/or modify 7 | # it under the terms of the Revised BSD License; see LICENSE file for 8 | # more details. 9 | 10 | """Test suite for DoJSON contrib MARC21 module.""" 11 | 12 | import copy 13 | 14 | import pytest 15 | import simplejson as json 16 | 17 | from dojson.utils import GroupableOrderedDict, force_list, reverse_force_list 18 | 19 | 20 | @pytest.fixture 21 | def god(): 22 | """Create a GroupableOrderedDict for testing.""" 23 | return GroupableOrderedDict([('a', 'dojson'), ('b', 2), ('c', 'invenio'), 24 | ('a', 4), ('b', 5)]) 25 | 26 | 27 | def test_single_element_not_dumped_as_list(): 28 | """Ensure list with a single element is not dumped as a list.""" 29 | god = GroupableOrderedDict([('c', 'invenio')]) 30 | god_dump = json.dumps(god) 31 | fields = [ 32 | '"c": "invenio"', 33 | '"__order__": ["c"]', 34 | '{', 35 | '}', 36 | ] 37 | for field in fields: 38 | assert field in god_dump 39 | 40 | 41 | def test_groupable_ordered_dict_is_immutable(god): 42 | """Test that a GroupableOrderedDict is immutable indeed.""" 43 | with pytest.raises(TypeError): 44 | god['a'] = [1, 2] 45 | 46 | with pytest.raises(AttributeError): 47 | god['a'].append(1) 48 | 49 | god.values().append(('spam', 'ham')) 50 | 51 | with pytest.raises(KeyError): 52 | god['spam'] 53 | 54 | 55 | def test_groupable_ordered_dict_keys(god): 56 | """Test that a GroupableOrderedDict has keys like a dict, but more.""" 57 | keys = god.keys() 58 | keys.sort() 59 | assert ['a', 'b', 'c'] == keys 60 | assert ['a', 'b', 'c', 'a', 'b'] == god.keys(repeated=True) 61 | 62 | 63 | def test_groupable_ordered_dict_items(god): 64 | """Test that a GroupableOrderedDict has items like a dict, but more.""" 65 | assert (('a', ('dojson', 4)), 66 | ('b', (2, 5)), 67 | ('c', 'invenio')) == god.items(with_order=False) 68 | 69 | assert (('__order__', ('a', 'b', 'c', 'a', 'b')), 70 | ('a', ('dojson', 4)), 71 | ('b', (2, 5)), 72 | ('c', 'invenio')) == god.items() 73 | 74 | assert (('__order__', ('a', 'b', 'c', 'a', 'b')), 75 | ('a', 'dojson'), 76 | ('b', 2), 77 | ('c', 'invenio'), 78 | ('a', 4), 79 | ('b', 5)) == god.items(repeated=True) 80 | 81 | 82 | def test_groupable_ordered_dict_get(god): 83 | """Test that a GroupableOrderedDict has get like a dict.""" 84 | assert ('dojson', 4) == god.get('a') 85 | assert 'spam' == god.get('d', 'spam') 86 | 87 | 88 | def test_groupable_ordered_dict_values(god): 89 | """Test that a GroupableOrderedDict has values like a dict, but more.""" 90 | assert ['dojson', 2, 'invenio', 4, 5] == god.values(expand=True) 91 | 92 | assert [('dojson', 4), (2, 5), 'invenio'] == god.values() 93 | 94 | 95 | def test_groupable_ordered_dict_eq(god): 96 | """Test that a GroupableOrderedDict can be compared with ==.""" 97 | expected = {'a': ('dojson', 4), 'b': (2, 5), 'c': 'invenio'} 98 | 99 | # switching the comparisons to use __eq__. 100 | assert god == expected 101 | assert not (god != expected) 102 | 103 | 104 | def test_groupable_ordered_dict_copy(god): 105 | """Test that a GroupableOrderedDict can be copied.""" 106 | god2 = copy.copy(god) 107 | 108 | assert god == god2 109 | 110 | 111 | def test_groupable_ordered_dict_deepcopy(god): 112 | """Test that a GroupableOrderedDict can be copied deeply.""" 113 | god2 = copy.deepcopy(god) 114 | 115 | assert id(god) != id(god2) 116 | assert god == god2 117 | 118 | 119 | def test_groupable_ordered_dict_new(god): 120 | """Test that a GroupableOrderedDict can be created from a same element.""" 121 | god2 = GroupableOrderedDict(god) 122 | 123 | assert god == god2 124 | 125 | 126 | def test_groupable_ordered_dict_to_json(god): 127 | """Test that a GroupableOrderedDict can be serialized to JSON.""" 128 | # JSON output order not deterministic, compare piece by piece 129 | fields = [ 130 | '"a": ["dojson", 4]', 131 | '"c": "invenio"', 132 | '"b": [2, 5]', 133 | '"__order__": ["a", "b", "c", "a", "b"]', 134 | '{', 135 | '}', 136 | ] 137 | 138 | god_dump = json.dumps(god) 139 | for field in fields: 140 | assert field in god_dump 141 | 142 | expected = json.loads(json.dumps(god)) 143 | assert expected == json.loads(json.dumps(god, indent=4)) 144 | 145 | 146 | def test_groupable_ordered_dict_iterable(god): 147 | """Test that a GroupableOrderedDict is iterable like a dict.""" 148 | iterator = iter(god) 149 | 150 | assert '__order__' == next(iterator) 151 | assert 'a' == next(iterator) 152 | assert 'b' == next(iterator) 153 | assert 'c' == next(iterator) 154 | with pytest.raises(StopIteration): 155 | next(iterator) 156 | 157 | 158 | def test_groupable_ordered_dict_recreate(god): 159 | """Test that a GroupableOrderedDict can be recreated from a dict.""" 160 | god2 = GroupableOrderedDict({'__order__': ('a', 'b', 'c', 'a', 'b'), 161 | 'a': ('dojson', 4), 162 | 'b': (2, 5), 163 | 'c': 'invenio'}) 164 | 165 | assert god2 == god 166 | 167 | 168 | def test_groupable_ordered_dict_repr(god): 169 | """Test that a eval(repr(god)) == god.""" 170 | assert eval(repr(god)) == god 171 | 172 | 173 | def test_empty_elements(): 174 | """Test empty elements.""" 175 | from dojson.contrib.marc21.utils import create_record 176 | xml = ( 177 | '' 178 | ) 179 | data = create_record(xml) 180 | assert '037__' in data.keys() 181 | assert data['037__'] == {} 182 | assert (('__order__', ('037__', )), ('037__', {})) == data.items() 183 | 184 | 185 | def test_force_list_roundtrips(): 186 | assert reverse_force_list(force_list('foo')) == 'foo' 187 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | # This file is part of DoJSON 2 | # Copyright (C) 2014 CERN. 3 | # 4 | # DoJSON is free software; you can redistribute it and/or modify 5 | # it under the terms of the Revised BSD License; see LICENSE file for 6 | # more details. 7 | 8 | [tox] 9 | envlist = py26, py27, py33, py34 10 | 11 | [testenv] 12 | deps = pytest 13 | pytest-cov 14 | pytest-pep8 15 | commands = {envpython} setup.py test 16 | --------------------------------------------------------------------------------