├── tests
├── __init__.py
├── data
│ ├── image
│ │ └── dog2.jpeg
│ ├── fail_file.dat
│ ├── test.csv
│ ├── pwscf
│ │ └── NaF.scf.tar.gz
│ ├── vasp
│ │ └── AlNi_static_LDA.tar.gz
│ ├── electron_microscopy
│ │ └── test_files.tar.gz
│ ├── yaml
│ │ └── test_yaml.yaml
│ ├── xml
│ │ └── test_xml.xml
│ ├── json
│ │ └── test_json.json
│ ├── crystal_structure
│ │ ├── diamond.cif
│ │ ├── Ce3VO16.cif
│ │ ├── Al2O3.cif
│ │ └── C13H22O3.cif
│ ├── tdb
│ │ ├── test_PbTe.TDB
│ │ ├── test_AuSi.TDB
│ │ └── PbSSeTe_Na.TDB
│ └── cif
│ │ └── 1548397.cif
├── test_version.py
├── test_image.py
├── test_ase.py
├── test_adapter.py
├── test_crystal_structure.py
├── conftest.py
├── test_file.py
├── test_base.py
├── test_csv.py
├── test_filename.py
├── test_tdb.py
├── test_dft.py
├── test_json.py
├── test_yaml.py
├── test_xml.py
└── test_utils.py
├── .python-version
├── MANIFEST.in
├── docs
├── requirements.txt
├── source
│ ├── api
│ │ └── scythe.rst
│ ├── index.rst
│ ├── extractors.rst
│ ├── goals.rst
│ ├── conf.py
│ ├── user-guide.rst
│ └── contributor-guide.rst
├── Makefile
└── make.bat
├── test-requirements.txt
├── scythe
├── __init__.py
├── adapters
│ ├── __init__.py
│ └── base.py
├── version.py
├── testing.py
├── image.py
├── schemas
│ └── file.json
├── json.py
├── yaml.py
├── xml.py
├── tdb.py
├── filename.py
├── crystal_structure.py
├── ase.py
├── file.py
├── utils
│ ├── grouping.py
│ ├── __init__.py
│ └── interface.py
├── csv.py
├── dft.py
└── base.py
├── setup.cfg
├── .gitignore
├── readthedocs.yaml
├── .github
└── workflows
│ ├── release.yml
│ └── test-suite-and-docs.yml
├── README.md
├── pyproject.toml
└── LICENSE
/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.python-version:
--------------------------------------------------------------------------------
1 | 3.8.12
2 | 3.9.12
3 | 3.10.4
4 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include scythe/schemas *.json
2 |
--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx>5
2 | sphinx-rtd-theme
3 |
--------------------------------------------------------------------------------
/test-requirements.txt:
--------------------------------------------------------------------------------
1 | pytest
2 | pytest-cov
3 | flake8
4 | diff-cover
5 |
--------------------------------------------------------------------------------
/scythe/__init__.py:
--------------------------------------------------------------------------------
1 | from scythe.version import __version__ # noqa: F401
2 |
--------------------------------------------------------------------------------
/scythe/adapters/__init__.py:
--------------------------------------------------------------------------------
1 | """Functions and classes related to adapters"""
2 |
--------------------------------------------------------------------------------
/tests/data/image/dog2.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/materials-data-facility/scythe/HEAD/tests/data/image/dog2.jpeg
--------------------------------------------------------------------------------
/tests/data/fail_file.dat:
--------------------------------------------------------------------------------
1 | This is a test file that is designed to fail most extraction, because it contains no data.
2 |
--------------------------------------------------------------------------------
/tests/data/test.csv:
--------------------------------------------------------------------------------
1 | city,location
2 | london,"51.50,-0.11"
3 | paris,"48.85,2.30"
4 | rome,"41.90,12.49"
5 | utopia,N/A
6 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [bdist_wheel]
2 | universal = 1
3 |
4 | [flake8]
5 | exclude = .git,*.egg*,src/*,.*
6 | max-line-length = 160
7 |
--------------------------------------------------------------------------------
/tests/data/pwscf/NaF.scf.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/materials-data-facility/scythe/HEAD/tests/data/pwscf/NaF.scf.tar.gz
--------------------------------------------------------------------------------
/tests/test_version.py:
--------------------------------------------------------------------------------
1 | from scythe import version
2 |
3 |
4 | def test_version():
5 | assert isinstance(version.__version__, str)
6 |
--------------------------------------------------------------------------------
/tests/data/vasp/AlNi_static_LDA.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/materials-data-facility/scythe/HEAD/tests/data/vasp/AlNi_static_LDA.tar.gz
--------------------------------------------------------------------------------
/tests/data/electron_microscopy/test_files.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/materials-data-facility/scythe/HEAD/tests/data/electron_microscopy/test_files.tar.gz
--------------------------------------------------------------------------------
/tests/data/yaml/test_yaml.yaml:
--------------------------------------------------------------------------------
1 | compost: CN25
2 | dict1:
3 | field1: value1
4 | field2: 2
5 | dict2:
6 | nested1:
7 | field1: true
8 | field3: value3
9 | na_val: na
10 |
--------------------------------------------------------------------------------
/tests/data/xml/test_xml.xml:
--------------------------------------------------------------------------------
1 |
2 | value12bazvalue3CN25na
3 |
--------------------------------------------------------------------------------
/scythe/version.py:
--------------------------------------------------------------------------------
1 | # we target 3.8+, so this should be okay without fallback to importlib_metadata
2 | import importlib.metadata
3 |
4 | # single source of truth for package version,
5 | # see https://packaging.python.org/en/latest/single_source_version/
6 |
7 | __version__ = importlib.metadata.version('scythe-extractors')
8 |
--------------------------------------------------------------------------------
/tests/data/json/test_json.json:
--------------------------------------------------------------------------------
1 | {
2 | "dict1": {
3 | "field1": "value1",
4 | "field2": 2
5 | },
6 | "dict2": {
7 | "nested1": {
8 | "field1": true,
9 | "field3": "value3"
10 | }
11 | },
12 | "compost": "CN25",
13 | "na_val": "na"
14 | }
15 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.swp
3 | *.swo
4 | *.ipynb_checkpoints
5 | Untitled*.ipynb
6 | *.cache*
7 |
8 | *.DS_Store
9 |
10 | */build/*
11 | docs/_build
12 | scythe/schemas/doc/
13 | */dist/*
14 | *.egg*
15 | src/*
16 | dist/
17 | .tox
18 |
19 | coverage_html/
20 | coverage.xml
21 | *.coverage*
22 | *.pytest_cache*
23 | .vscode
24 | .venv
25 | .idea
26 | test_files/
27 | *.xpr
28 |
--------------------------------------------------------------------------------
/docs/source/api/scythe.rst:
--------------------------------------------------------------------------------
1 | scythe
2 | ======
3 |
4 | Documentation for the non-parser functions in ``scythe``.
5 |
6 | scythe.adapters.base
7 | --------------------
8 |
9 | .. automodule:: scythe.adapters.base
10 | :members:
11 |
12 | scythe.utils.interface
13 | ----------------------
14 |
15 | .. automodule:: scythe.utils.interface
16 | :members:
17 |
18 | scythe.utils.grouping
19 | ---------------------
20 |
21 | .. automodule:: scythe.utils.grouping
22 | :members:
--------------------------------------------------------------------------------
/tests/test_image.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import os
3 |
4 | from scythe.image import ImageExtractor
5 |
6 |
7 | @pytest.fixture
8 | def test_image():
9 | return os.path.join(os.path.dirname(__file__), 'data', 'image', 'dog2.jpeg')
10 |
11 |
12 | def test_parse(test_image):
13 | p = ImageExtractor()
14 | assert (p.extract([test_image]) == {'image': {'format': 'JPEG', 'height': 1000,
15 | 'megapixels': 1.91, 'width': 1910,
16 | 'shape': [1000, 1910, 3]}})
17 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = sphinx-build
7 | SOURCEDIR = source
8 | BUILDDIR = build
9 |
10 | # Put it first so that "make" without argument is like "make help".
11 | help:
12 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
13 |
14 | .PHONY: help Makefile
15 |
16 | # Catch-all target: route all unknown targets to Sphinx using the new
17 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
18 | %: Makefile
19 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
--------------------------------------------------------------------------------
/readthedocs.yaml:
--------------------------------------------------------------------------------
1 | # Read the Docs configuration file
2 | version: 2
3 |
4 | # Set the version of Python and other tools you might need
5 | build:
6 | os: ubuntu-22.04
7 | tools:
8 | python: "3.10"
9 |
10 | # Build documentation in the docs/ directory with Sphinx
11 | sphinx:
12 | configuration: docs/source/conf.py
13 |
14 | # If using Sphinx, optionally build your docs in additional formats such as PDF
15 | formats:
16 | - pdf
17 |
18 | # Optionally declare the Python requirements required to build your docs
19 | python:
20 | install:
21 | - requirements: docs/requirements.txt
22 | - method: pip
23 | path: .
24 | extra_requirements:
25 | - all
--------------------------------------------------------------------------------
/scythe/testing.py:
--------------------------------------------------------------------------------
1 | """Parsers used for testing purposes"""
2 |
3 | from scythe.base import BaseExtractor
4 | from typing import Iterable
5 | import os
6 |
7 |
8 | class NOOPExtractor(BaseExtractor):
9 | """Determine whether files exist, used for debugging
10 |
11 | Is not truly a "noop" parser, as it does perform a check as to whether the parser
12 | has access to a certain file. It is more a "check if the parser could run and then do
13 | nothing" parser.
14 | """
15 |
16 | def extract(self, group: Iterable[str], context: dict = None):
17 | return dict((f, os.path.exists(f)) for f in group)
18 |
19 | def version(self):
20 | return '0.0.1'
21 |
22 | def implementors(self):
23 | return ['Logan Ward ']
24 |
--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
1 | Welcome to Scythe's documentation!
2 | =======================================
3 |
4 | Scythe is a library of tools that generate summaries of the data contained in scientific
5 | data files. The goal of Scythe is to provide a shared resources of these tool to avoid
6 | duplication of effort between the many emerging scientific databases. Each extractor is designed to
7 | generate the sum of all data needed by each of these databases with a uniform API so that specific
8 | projects can write simple adaptors for their needs.
9 |
10 | Source Code: https://github.com/materials-data-facility/Scythe
11 |
12 | .. toctree::
13 | :maxdepth: 2
14 | :caption: Contents:
15 |
16 | goals
17 | user-guide
18 | contributor-guide
19 | extractors
20 | api/scythe
21 |
--------------------------------------------------------------------------------
/tests/test_ase.py:
--------------------------------------------------------------------------------
1 | from scythe.ase import ASEExtractor
2 | from math import isclose
3 | import pytest
4 | import os
5 |
6 |
7 | @pytest.fixture
8 | def ase():
9 | return os.path.join(os.path.dirname(__file__), 'data', 'gaussian', 'molecule.log')
10 |
11 |
12 | @pytest.fixture
13 | def parser():
14 | return ASEExtractor()
15 |
16 |
17 | def test_ase(parser, ase):
18 | output = parser.extract(ase)
19 |
20 | # Check the chemical formula
21 | assert output['chemical_formula'] == "C38H14N8O12"
22 |
23 | # Check the shape of the force outputs. There should be
24 | # 72 atoms and forces in 3 directions
25 | assert len(output['forces'][0]) == 72
26 | assert len(output['forces'][0][0]) == 3
27 |
28 | assert isclose(output['energy'], -76063.21525532556)
29 |
--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
1 | name: Publish to PyPi
2 |
3 | on:
4 | release:
5 | types: [published]
6 |
7 | jobs:
8 | publish:
9 | runs-on: ubuntu-latest
10 | steps:
11 | - uses: actions/checkout@v2
12 |
13 | - name: Install pypa/build
14 | run: >-
15 | python -m
16 | pip install
17 | build
18 | --user
19 |
20 | - name: Build a binary wheel and a source tarball
21 | run: >-
22 | python -m
23 | build
24 | --sdist
25 | --wheel
26 | --outdir dist/
27 | .
28 |
29 | - name: pypi-publish
30 | if: startsWith(github.ref, 'refs/tags')
31 | uses: pypa/gh-action-pypi-publish@release/v1
32 | with:
33 | password: ${{ secrets.PYPI_TOKEN }}
34 |
--------------------------------------------------------------------------------
/scythe/image.py:
--------------------------------------------------------------------------------
1 | from PIL import Image
2 |
3 | from scythe.base import BaseSingleFileExtractor
4 |
5 |
6 | class ImageExtractor(BaseSingleFileExtractor):
7 | """Retrieves basic information about an image"""
8 |
9 | def _extract_file(self, file_path, context=None):
10 | im = Image.open(file_path)
11 | return {
12 | "image": {
13 | "width": im.width,
14 | "height": im.height,
15 | "format": im.format,
16 | "megapixels": (im.width * im.height) / 1000000,
17 | "shape": [
18 | im.height,
19 | im.width,
20 | len(im.getbands())
21 | ]
22 | }
23 | }
24 |
25 | def implementors(self):
26 | return ['Jonathon Gaff']
27 |
28 | def version(self):
29 | return '0.0.2'
30 |
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 |
13 | if "%1" == "" goto help
14 |
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | echo.
18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | echo.installed, then set the SPHINXBUILD environment variable to point
20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | echo.may add the Sphinx directory to PATH.
22 | echo.
23 | echo.If you don't have Sphinx installed, grab it from
24 | echo.http://sphinx-doc.org/
25 | exit /b 1
26 | )
27 |
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
29 | goto end
30 |
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
33 |
34 | :end
35 | popd
36 |
--------------------------------------------------------------------------------
/scythe/schemas/file.json:
--------------------------------------------------------------------------------
1 | {
2 | "$schema": "http://json-schema.org/schema#",
3 | "description": "Output from the Scythe File Parser",
4 | "type": "object",
5 | "properties": {
6 | "mime_type": {
7 | "type": "string",
8 | "description": "MIME type of the file. See https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types/Complete_list_of_MIME_types"
9 | },
10 | "length": {
11 | "type": "integer",
12 | "description": "File size in bytes"
13 | },
14 | "filename": {
15 | "type": "string",
16 | "description": "Name of the file, without the path information"
17 | },
18 | "path": {
19 | "type": "string",
20 | "description": "Complete path of the file, including directory"
21 | },
22 | "hash": {
23 | "type": "string",
24 | "description": "SHA512 hash of the file contents"
25 | }
26 | },
27 | "additionalProperties": false,
28 | "required": ["mime_type", "length", "filename"]
29 | }
30 |
--------------------------------------------------------------------------------
/tests/test_adapter.py:
--------------------------------------------------------------------------------
1 | from scythe.adapters.base import NOOPAdapter, GreedySerializeAdapter
2 | from scythe.testing import NOOPExtractor
3 |
4 |
5 | def test_compatibility():
6 | adapter = NOOPAdapter()
7 | parser = NOOPExtractor()
8 |
9 | # Make sure `None` is always compatible
10 | assert adapter.version() is None
11 | assert adapter.check_compatibility(parser)
12 |
13 | # Make sure giving the adapter the same version number works
14 | adapter.version = lambda: parser.version()
15 | assert adapter.check_compatibility(parser)
16 |
17 | # Make sure giving it a different version number breaks compatibility
18 | adapter.version = lambda: parser.version() + '1'
19 | assert not adapter.check_compatibility(parser)
20 |
21 |
22 | def test_greedy_adapter_unserializable():
23 | adapter = GreedySerializeAdapter()
24 | unserializable_bytes = {'key': b'\x03\xdd'}
25 | s = adapter.transform(unserializable_bytes)
26 | assert s == '{"key": "<>"}'
27 |
--------------------------------------------------------------------------------
/tests/test_crystal_structure.py:
--------------------------------------------------------------------------------
1 | from scythe.crystal_structure import CrystalStructureExtractor
2 | from math import isclose
3 | import pytest
4 | import os
5 |
6 |
7 | @pytest.fixture
8 | def cif():
9 | return os.path.join(os.path.dirname(__file__), 'data', 'cif', '1548397.cif')
10 |
11 |
12 | @pytest.fixture
13 | def parser():
14 | return CrystalStructureExtractor()
15 |
16 |
17 | def test_cif(parser, cif):
18 | output = parser.extract(cif)
19 |
20 | # Check the volume and number of atoms, which is a float
21 | assert isclose(output['crystal_structure']['volume'], 101836.44086588411)
22 | assert isclose(output['crystal_structure']['number_of_atoms'], 5070.0)
23 |
24 | # Check everything else
25 | del output['crystal_structure']['volume']
26 | del output['crystal_structure']['number_of_atoms']
27 | assert output == {'material': {'composition': 'Co270H1680C1872N324O924'},
28 | 'crystal_structure': {'space_group_number': 146,
29 | 'stoichiometry': 'A45B54C154D280E312'}}
30 |
--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
1 | import tarfile
2 | import pathlib
3 |
4 | tar_f = pathlib.Path(__file__).parent / 'data' / 'electron_microscopy' / \
5 | 'test_files.tar.gz'
6 |
7 |
8 | def pytest_sessionstart(session):
9 | """
10 | Called after the Session object has been created and
11 | before performing collection and entering the run test loop.
12 |
13 | Unpack the compressed electron_microscopy est files.
14 | """
15 | with tarfile.open(tar_f, 'r:gz') as tar:
16 | tar.extractall(path=pathlib.Path(tar_f).parent)
17 |
18 |
19 | def pytest_sessionfinish(session, exitstatus):
20 | """
21 | Called after whole test run finished, right before
22 | returning the exit status to the system.
23 |
24 | Remove the unpacked test files.
25 | """
26 | with tarfile.open(tar_f, 'r:gz') as tar:
27 | fn_list = tar.getnames()
28 |
29 | fn_list = [pathlib.Path(__file__).parent / 'data' /
30 | 'electron_microscopy' / f for f in fn_list]
31 | for path in fn_list:
32 | if path.is_file():
33 | path.unlink()
34 |
--------------------------------------------------------------------------------
/scythe/json.py:
--------------------------------------------------------------------------------
1 | import json
2 |
3 | from mdf_toolbox import translate_json
4 |
5 | from scythe.base import BaseSingleFileExtractor
6 |
7 |
8 | class JSONExtractor(BaseSingleFileExtractor):
9 | """Extracts fields in JSON into a user-defined new schema."""
10 |
11 | def _extract_file(self, path, context=None):
12 | """Context used:
13 | mapping (dict): Required. The mapping of desired_fields: existing_fields,
14 | using dot notation. For example:
15 | {"good_schema.good_field": "oldSchema.longpath.nestedDicts.old_field"}
16 | na_values (list of str): Values to treat as N/A. Default None.
17 | """
18 | if not context.get("mapping"):
19 | raise ValueError("Mapping is required for the JSONExtractor.")
20 | with open(path) as f:
21 | file_json = json.load(f)
22 | return translate_json(file_json, context["mapping"],
23 | na_values=context.get("na_values", None))
24 |
25 | def implementors(self):
26 | return ['Jonathon Gaff']
27 |
28 | def version(self):
29 | return '0.0.1'
30 |
--------------------------------------------------------------------------------
/scythe/yaml.py:
--------------------------------------------------------------------------------
1 | from mdf_toolbox import translate_json
2 | import yaml
3 |
4 | from scythe.base import BaseSingleFileExtractor
5 |
6 |
7 | class YAMLExtractor(BaseSingleFileExtractor):
8 | """Extracts fields in YAML into a user-defined new schema in JSON."""
9 |
10 | def _extract_file(self, path, context=None):
11 | """Context used:
12 | mapping (dict): Required. The mapping of desired_fields: existing_fields,
13 | using dot notation. For example:
14 | {"good_schema.good_field": "oldSchema.longpath.nestedDicts.old_field"}
15 | na_values (list of str): Values to treat as N/A. Default None.
16 | """
17 | if not context.get("mapping"):
18 | raise ValueError("Mapping is required for the YAMLExtractor.")
19 | with open(path) as f:
20 | file_json = yaml.safe_load(f)
21 | return translate_json(file_json, context["mapping"],
22 | na_values=context.get("na_values", None))
23 |
24 | def implementors(self):
25 | return ['Jonathon Gaff']
26 |
27 | def version(self):
28 | return '0.0.1'
29 |
--------------------------------------------------------------------------------
/tests/data/crystal_structure/diamond.cif:
--------------------------------------------------------------------------------
1 | # generated using pymatgen
2 | data_C
3 | _symmetry_space_group_name_H-M 'P 1'
4 | _cell_length_a 3.57370926
5 | _cell_length_b 3.57370926
6 | _cell_length_c 3.57370926
7 | _cell_angle_alpha 90.00000000
8 | _cell_angle_beta 90.00000000
9 | _cell_angle_gamma 90.00000000
10 | _symmetry_Int_Tables_number 1
11 | _chemical_formula_structural C
12 | _chemical_formula_sum C8
13 | _cell_volume 45.64126285
14 | _cell_formula_units_Z 8
15 | loop_
16 | _symmetry_equiv_pos_site_id
17 | _symmetry_equiv_pos_as_xyz
18 | 1 'x, y, z'
19 | loop_
20 | _atom_site_type_symbol
21 | _atom_site_label
22 | _atom_site_symmetry_multiplicity
23 | _atom_site_fract_x
24 | _atom_site_fract_y
25 | _atom_site_fract_z
26 | _atom_site_occupancy
27 | C C1 1 0.250000 0.250000 0.250000 1
28 | C C2 1 0.000000 0.000000 0.000000 1
29 | C C3 1 0.250000 0.750000 0.750000 1
30 | C C4 1 0.000000 0.500000 0.500000 1
31 | C C5 1 0.750000 0.250000 0.750000 1
32 | C C6 1 0.500000 0.000000 0.500000 1
33 | C C7 1 0.750000 0.750000 0.250000 1
34 | C C8 1 0.500000 0.500000 0.000000 1
35 |
--------------------------------------------------------------------------------
/scythe/xml.py:
--------------------------------------------------------------------------------
1 | from mdf_toolbox import translate_json
2 | import xmltodict
3 |
4 | from scythe.base import BaseSingleFileExtractor
5 |
6 |
7 | class XMLExtractor(BaseSingleFileExtractor):
8 | """Extracts fields in XML into a user-defined new schema in JSON."""
9 |
10 | def _extract_file(self, path, context=None):
11 | """Context used:
12 | mapping (dict): Required. The mapping of desired_fields: existing_fields,
13 | using dot notation. For example:
14 | {"good_schema.good_field": "oldSchema.longpath.nestedDicts.old_field"}
15 | na_values (list of str): Values to treat as N/A. Default None.
16 | """
17 | if not context.get("mapping"):
18 | raise ValueError("Mapping is required for the XMLExtractor.")
19 | with open(path) as f:
20 | file_json = xmltodict.parse(f.read())
21 | return translate_json(file_json, context["mapping"],
22 | na_values=context.get("na_values", None))
23 |
24 | def implementors(self):
25 | return ['Jonathon Gaff']
26 |
27 | def version(self):
28 | return '0.0.1'
29 |
--------------------------------------------------------------------------------
/.github/workflows/test-suite-and-docs.yml:
--------------------------------------------------------------------------------
1 | name: Build Status
2 | on: [push, pull_request]
3 | jobs:
4 | run_test_suite:
5 | name: ${{ matrix.os }}-py${{ matrix.python-version }}
6 | env:
7 | target_dir: './docs/_build'
8 | strategy:
9 | fail-fast: false
10 | matrix:
11 | python-version: ["3.8", "3.9", "3.10"]
12 | os: [ubuntu-latest, macos-latest] #, windows-latest]
13 | runs-on: ${{ matrix.os }}
14 | steps:
15 | - name: Check out the repo at master branch (for diff-cover)
16 | uses: actions/checkout@v3
17 | with:
18 | ref: master
19 |
20 | - uses: actions/checkout@v3
21 |
22 | - uses: actions/setup-python@v2
23 | with:
24 | python-version: ${{ matrix.python-version }}
25 |
26 | - name: Install package
27 | run: |
28 | pip install -e.[all]
29 | pip install -r test-requirements.txt
30 |
31 | - name: Flake8
32 | run: flake8
33 |
34 | - name: Run tests
35 | run: pytest
36 |
37 | - name: Run diff-cover
38 | run: diff-cover coverage.xml --compare-branch=origin/master --diff-range-notation '..'
39 |
40 | - name: Upload coverage to Codecov
41 | uses: codecov/codecov-action@v2
42 |
43 | - name: Build documentation
44 | run: |
45 | cd docs
46 | pip install -r requirements.txt
47 | make html
48 |
--------------------------------------------------------------------------------
/scythe/tdb.py:
--------------------------------------------------------------------------------
1 | import os
2 | # pycalphad and hyperspy imports require this env var set
3 | # Triggers E402: module level import not at top of file, so noqa set for other imports
4 | os.environ["MPLBACKEND"] = "agg"
5 | import pycalphad # noqa: E402
6 |
7 | from scythe.base import BaseSingleFileExtractor # noqa: E402
8 |
9 |
10 | class TDBExtractor(BaseSingleFileExtractor):
11 | """Extract metadata from a Thermodynamic Database (TBD) file.
12 |
13 | Built atop `PyCALPHAD `_.
14 | """
15 |
16 | def _extract_file(self, path, context=None):
17 | material = {}
18 | calphad = {}
19 | # Attempt to read the file
20 | calphad_db = pycalphad.Database(path)
21 | composition = ""
22 | for element in calphad_db.elements:
23 | if element.isalnum():
24 | element = element.lower()
25 | element = element[0].upper() + element[1:]
26 | composition += element
27 |
28 | phases = list(calphad_db.phases.keys())
29 |
30 | if composition:
31 | material['composition'] = composition
32 | if phases:
33 | calphad['phases'] = phases
34 |
35 | # Create record
36 | record = {}
37 | if material:
38 | record["material"] = material
39 | if calphad:
40 | record["calphad"] = calphad
41 | return record
42 |
43 | def implementors(self):
44 | return ['Jonathon Gaff']
45 |
46 | def version(self):
47 | return '0.0.1'
48 |
--------------------------------------------------------------------------------
/scythe/filename.py:
--------------------------------------------------------------------------------
1 | import os
2 | import re
3 |
4 | from mdf_toolbox import flatten_json
5 |
6 | from scythe.base import BaseSingleFileExtractor
7 |
8 |
9 | class FilenameExtractor(BaseSingleFileExtractor):
10 | """Extracts metadata in a filename, according to user-supplied patterns."""
11 |
12 | def _extract_file(self, path, context=None):
13 | """Context used:
14 | mapping (dict): Required. The mapping of desired_fields: regex_pattern,
15 | using dot notation. For example:
16 | {"material.composition": "^[a-zA-Z]{3,4}"}
17 | na_values (list of str): Values to treat as N/A. Default None.
18 | """
19 | if not context.get("mapping"):
20 | raise ValueError("Mapping is required for the FilenameExtractor.")
21 |
22 | record = {}
23 | filename = os.path.basename(path)
24 | for json_path, pattern in flatten_json(context["mapping"]).items():
25 | match = re.search(pattern, filename)
26 | if match:
27 | fields = json_path.split(".")
28 | last_field = fields.pop()
29 | current_field = record
30 | # Create all missing fields
31 | for field in fields:
32 | if current_field.get(field) is None:
33 | current_field[field] = {}
34 | current_field = current_field[field]
35 | # Add value to end
36 | current_field[last_field] = match.group()
37 | return record
38 |
39 | def implementors(self):
40 | return ['Jonathon Gaff']
41 |
42 | def version(self):
43 | return '0.0.1'
44 |
--------------------------------------------------------------------------------
/tests/test_file.py:
--------------------------------------------------------------------------------
1 | from scythe.file import GenericFileExtractor
2 | import pytest
3 | import os
4 |
5 |
6 | def test_file():
7 | my_file = os.path.join(os.path.dirname(__file__), 'data', 'image', 'dog2.jpeg')
8 | parser = GenericFileExtractor(store_path=True, compute_hash=True)
9 | output = parser.extract([my_file])
10 | expected = {
11 | 'mime_type': 'image/jpeg',
12 | 'length': 269360,
13 | 'filename': 'dog2.jpeg',
14 | 'path': my_file,
15 | 'data_type': 'JPEG image data, JFIF standard 1.01, resolution (DPI), '
16 | 'density 300x300, segment length 16, Exif Standard: [TIFF '
17 | 'image data, little-endian, direntries=2, GPS-Data], '
18 | 'baseline, precision 8, 1910x1000, frames 3',
19 | 'sha512': '1f47ed450ad23e92caf1a0e5307e2af9b13edcd7735ac9685c9f21c'
20 | '9faec62cb95892e890a73480b06189ed5b842d8b265c5e47cc6cf27'
21 | '9d281270211cff8f90'}
22 |
23 | # be defensive against data_type, which will only be present if the user has libmagic installed
24 | if 'data_type' not in output:
25 | del expected['data_type']
26 | del expected['mime_type']
27 | assert output == expected
28 | assert isinstance(parser.schema, dict)
29 | pytest.xfail("'data_type' was not present in the parser output, most likely because "
30 | "libmagic is not properly installed")
31 |
32 | for i in ['JPEG image data', 'density 300x300', 'TIFF image data',
33 | '1910x1000']:
34 | assert i in output['data_type']
35 | del output['data_type']
36 | del expected['data_type']
37 | assert output == expected
38 | assert isinstance(parser.schema, dict)
39 |
--------------------------------------------------------------------------------
/tests/test_base.py:
--------------------------------------------------------------------------------
1 | from scythe.base import BaseExtractor, BaseSingleFileExtractor
2 | from glob import glob
3 | import pytest
4 | import os
5 |
6 |
7 | class FakeParser(BaseExtractor):
8 |
9 | def extract(self, group, context=None):
10 | return {'group': list(group)}
11 |
12 | def implementors(self):
13 | return ['Logan Ward']
14 |
15 | def version(self):
16 | return '0.0.0'
17 |
18 |
19 | class FakeSingleParser(BaseSingleFileExtractor):
20 |
21 | def _extract_file(self, path, context=None):
22 | return {'dirname': os.path.dirname(path)}
23 |
24 | def implementors(self):
25 | return ['Logan Ward']
26 |
27 | def version(self):
28 | return '0.0.0'
29 |
30 |
31 | @pytest.fixture
32 | def directory():
33 | return os.path.dirname(__file__)
34 |
35 |
36 | @pytest.fixture
37 | def parser():
38 | return FakeParser()
39 |
40 |
41 | @pytest.fixture
42 | def my_files(directory):
43 | return [p for p in glob(os.path.join(directory, '**', '*'), recursive=True)
44 | if os.path.isfile(p)]
45 |
46 |
47 | def test_group(parser, directory, my_files):
48 | groups = set(parser.group(my_files))
49 | assert groups == set(zip(my_files)) # Each file own group
50 |
51 |
52 | def test_parse_dir(caplog, parser, directory, my_files):
53 | assert len(list(parser.extract_directory(directory))) == len(my_files)
54 |
55 |
56 | def test_citations(parser):
57 | assert parser.citations() == []
58 |
59 |
60 | def test_single_file(directory):
61 | parser = FakeSingleParser()
62 | assert parser.extract(__file__) == {'dirname': directory} # Handle sensibly incorrect inputs
63 | assert parser.extract([__file__]) == {'dirname': directory}
64 | with pytest.raises(ValueError):
65 | parser.extract(['/fake/file.in', '/fake/file.out'])
66 |
--------------------------------------------------------------------------------
/tests/test_csv.py:
--------------------------------------------------------------------------------
1 | from scythe.csv import CSVExtractor
2 | import os
3 |
4 | csv_file = os.path.join(os.path.dirname(__file__), 'data', 'test.csv')
5 |
6 |
7 | def test_csv():
8 | p = CSVExtractor()
9 |
10 | # Test with records
11 | output = p.extract([csv_file])
12 | assert len(output['records']) == 4
13 | assert isinstance(output['records'][0], dict)
14 | assert isinstance(output['records'][0]['location'], list)
15 | assert isinstance(output['records'][0]['location'][0], float)
16 | assert isinstance(output['records'][-1]['location'], str) # Would fail schema
17 | assert output['schema'] == {'fields': [{'name': 'city', 'type': 'string',
18 | 'format': 'default'},
19 | {'name': 'location', 'type': 'geopoint',
20 | 'format': 'default'}],
21 | 'missingValues': ['']}
22 |
23 | # Test without records
24 | p.return_records = False
25 | assert 'records' not in p.extract([csv_file])
26 |
27 | # Test with missing values
28 | p.return_records = True
29 | output = p.extract([csv_file], {'na_values': ['N/A']})
30 | assert output['schema'] == {'fields': [{'name': 'city', 'type': 'string',
31 | 'format': 'default'},
32 | {'name': 'location', 'type': 'geopoint',
33 | 'format': 'default'}],
34 | 'missingValues': ['', 'N/A']}
35 | assert output['records'][-1]['location'] is None
36 |
37 | # Just run the other operations
38 | assert any('https://github.com/frictionlessdata/tableschema-py' in x for x in p.citations())
39 | p.implementors()
40 | p.version()
41 |
--------------------------------------------------------------------------------
/tests/test_filename.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import pytest
4 |
5 | from scythe.filename import FilenameExtractor
6 |
7 |
8 | @pytest.fixture
9 | def test_files():
10 | # These are not files on disk, because no data is read from the files directly
11 | return ["He_abcdeffoo:FOO.txt",
12 | "Al123Smith_et_al.and_co.data",
13 | os.path.join(os.path.dirname(__file__), 'data', 'filename', "O2foo:bar")]
14 |
15 |
16 | @pytest.fixture
17 | def extractor():
18 | return FilenameExtractor()
19 |
20 |
21 | @pytest.fixture
22 | def mappings():
23 | return [{
24 | "material.composition": "^.{2}", # First two chars are always composition
25 | "custom.foo": "foo:.{3}", # 3 chars after foo is value of foo
26 | "custom.ext": "\\..{3,4}$" # 3 or 4 char extension
27 | }]
28 |
29 |
30 | def test_filename(extractor, test_files, mappings):
31 | # Run test extractions
32 | outputs = [{
33 | 'custom': {
34 | 'ext': '.txt',
35 | 'foo': 'foo:FOO'
36 | },
37 | 'material': {
38 | 'composition': 'He'
39 | }
40 | }, {
41 | 'custom': {
42 | 'ext': '.data'
43 | },
44 | 'material': {
45 | 'composition': 'Al'
46 | }
47 | }, {
48 | 'custom': {
49 | 'foo': 'foo:bar'
50 | },
51 | 'material': {
52 | 'composition': 'O2'
53 | }
54 | }]
55 |
56 | assert extractor.extract(test_files[0], context={"mapping": mappings[0]}) == outputs[0]
57 | assert extractor.extract(test_files[1], context={"mapping": mappings[0]}) == outputs[1]
58 | assert extractor.extract(test_files[2], context={"mapping": mappings[0]}) == outputs[2]
59 |
60 | # Test failure modes
61 | # No mapping provided
62 | with pytest.raises(Exception):
63 | extractor.extract(test_files[0])
64 |
--------------------------------------------------------------------------------
/tests/test_tdb.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | from mdf_toolbox import insensitive_comparison as eqi
4 | import pytest
5 |
6 | from scythe.tdb import TDBExtractor
7 |
8 |
9 | @pytest.fixture
10 | def test_files():
11 | return [os.path.join(os.path.dirname(__file__), 'data', 'tdb', 'PbSSeTe_Na.TDB'),
12 | os.path.join(os.path.dirname(__file__), 'data', 'tdb', 'test_AuSi.TDB'),
13 | os.path.join(os.path.dirname(__file__), 'data', 'tdb', 'test_PbTe.TDB')]
14 |
15 |
16 | @pytest.fixture
17 | def fail_file():
18 | return os.path.join(os.path.dirname(__file__), 'data', 'fail_file.dat')
19 |
20 |
21 | @pytest.fixture
22 | def extractor():
23 | return TDBExtractor()
24 |
25 |
26 | def test_tdb(extractor, test_files, fail_file):
27 | # Run test extractions
28 | output0 = extractor.extract(test_files[0])
29 | assert eqi(output0["material"]["composition"], "VaPbSNaTeSe", string_insensitive=True)
30 | assert eqi(output0["calphad"]["phases"], ['LIQUID', 'FCC_A1', 'HALITE', 'HEXAGONAL_A8',
31 | 'ORTHORHOMBIC_S', 'BCC_A2', 'NA2TE', 'NATE', 'NATE3',
32 | 'NA2SE', 'NASE', 'NASE2', 'NA2S', 'NAS', 'NAS2'])
33 | output1 = extractor.extract(test_files[1])
34 | assert eqi(output1["material"]["composition"], "SiVaAu", string_insensitive=True)
35 | assert eqi(output1["calphad"]["phases"], ['LIQUID', 'BCC_A2', 'CBCC_A12', 'CUB_A13',
36 | 'DIAMOND_A4', 'FCC_A1', 'HCP_A3', 'HCP_ZN'])
37 | output2 = extractor.extract(test_files[2])
38 | assert eqi(output2["material"]["composition"], "TeVaPb", string_insensitive=True)
39 | assert eqi(output2["calphad"]["phases"], ['LIQUID', 'PBTE', 'HEXAGONAL_A8', 'RHOMBOHEDRAL_A7'])
40 |
41 | # Test failure modes
42 | with pytest.raises(Exception):
43 | extractor.extract(fail_file)
44 |
--------------------------------------------------------------------------------
/tests/test_dft.py:
--------------------------------------------------------------------------------
1 | from scythe.dft import DFTExtractor
2 | from shutil import copy
3 | from glob import glob
4 | import tarfile
5 | import pytest
6 | import os
7 |
8 | vasp_tar = os.path.join(os.path.dirname(__file__), 'data',
9 | 'vasp', 'AlNi_static_LDA.tar.gz')
10 | pwscf_tar = os.path.join(os.path.dirname(__file__), 'data',
11 | 'pwscf', 'NaF.scf.tar.gz')
12 |
13 |
14 | @pytest.fixture
15 | def vasp_dir(tmpdir):
16 | """Unpack VASP tar into a temporary directory"""
17 | with tarfile.open(vasp_tar) as fp:
18 | fp.extractall(tmpdir)
19 | return str(tmpdir)
20 |
21 |
22 | @pytest.fixture
23 | def parser():
24 | return DFTExtractor(quality_report=False)
25 |
26 |
27 | @pytest.fixture
28 | def multi_vasp_dir(vasp_dir):
29 | """VASP directory with two calculations with different extensions"""
30 | for f in glob(os.path.join(os.path.join(vasp_dir, 'AlNi_static_LDA'), '*')):
31 | if os.path.isfile(f):
32 | copy(f, f + '.2')
33 | return str(vasp_dir)
34 |
35 |
36 | @pytest.fixture
37 | def pwscf_dir(tmpdir):
38 | with tarfile.open(pwscf_tar) as fp:
39 | fp.extractall(tmpdir)
40 | return str(tmpdir)
41 |
42 |
43 | def test_single_vasp_calc(parser, vasp_dir):
44 | metadata = list(parser.extract_directory(vasp_dir))
45 | assert len(metadata) == 1
46 | assert isinstance(metadata[0], tuple)
47 | assert isinstance(metadata[0][0], list)
48 | assert isinstance(metadata[0][1], dict)
49 |
50 |
51 | def test_multivasp_calc(parser: DFTExtractor, multi_vasp_dir):
52 | metadata = list(parser.extract_directory(multi_vasp_dir))
53 | assert len(metadata) == 2
54 | assert isinstance(metadata[0][0], list)
55 | assert isinstance(metadata[0][1], dict)
56 |
57 |
58 | def test_pwscf(parser: DFTExtractor, pwscf_dir):
59 | metadata = list(parser.extract_directory(pwscf_dir))
60 | assert len(metadata) == 1
61 |
--------------------------------------------------------------------------------
/scythe/crystal_structure.py:
--------------------------------------------------------------------------------
1 | from pymatgen.io.ase import AseAtomsAdaptor
2 | from pymatgen.core import Structure
3 | from ase.io import read
4 |
5 | from scythe.base import BaseSingleFileExtractor
6 |
7 |
8 | class CrystalStructureExtractor(BaseSingleFileExtractor):
9 | """Extract information about a crystal structure from many types of files.
10 |
11 | Uses either ASE or Pymatgen on the back end"""
12 |
13 | def _extract_file(self, path, context=None):
14 | material = {}
15 | crystal_structure = {}
16 | # Attempt to read the file
17 | try:
18 | # Read with ASE
19 | ase_res = read(path)
20 | # Check data read, validate crystal structure
21 | if not ase_res or not all(ase_res.get_pbc()):
22 | raise ValueError("No valid data")
23 | else:
24 | # Convert ASE Atoms to Pymatgen Structure
25 | pmg_s = AseAtomsAdaptor.get_structure(ase_res)
26 | # ASE failed to read file
27 | except Exception:
28 | try:
29 | # Read with Pymatgen
30 | pmg_s = Structure.from_file(path)
31 | except Exception:
32 | # Can't read file
33 | raise ValueError('File not readable by pymatgen or ase: {}'.format(path))
34 |
35 | # Parse material block
36 | material["composition"] = pmg_s.formula.replace(" ", "")
37 |
38 | # Parse crystal_structure block
39 | crystal_structure["space_group_number"] = pmg_s.get_space_group_info()[1]
40 | crystal_structure["number_of_atoms"] = float(pmg_s.composition.num_atoms)
41 | crystal_structure["volume"] = float(pmg_s.volume)
42 | crystal_structure["stoichiometry"] = pmg_s.composition.anonymized_formula
43 |
44 | record = {}
45 | if material:
46 | record["material"] = material
47 | if crystal_structure:
48 | record["crystal_structure"] = crystal_structure
49 | return record
50 |
51 | def implementors(self):
52 | return ['Jonathon Gaff']
53 |
54 | def version(self):
55 | return '0.0.1'
56 |
--------------------------------------------------------------------------------
/docs/source/extractors.rst:
--------------------------------------------------------------------------------
1 | Available Extractors
2 | ====================
3 |
4 | These pages detail all of the extractors currently available in Scythe.
5 |
6 | Quick Summary
7 | ~~~~~~~~~~~~~
8 |
9 | The extractors that are configured to work with the stevedore plugin are:
10 |
11 | .. list-plugins:: scythe.extractor
12 |
13 |
14 | Detailed Listing
15 | ~~~~~~~~~~~~~~~~
16 |
17 | Generic File Extractors
18 | -----------------------
19 |
20 | Extractors that work for any kind of file
21 |
22 | .. automodule:: scythe.file
23 | :members:
24 | :exclude-members: implementors, schema, version, group
25 |
26 | Image Extractors
27 | ----------------
28 |
29 | Extractors that read image data
30 |
31 | .. automodule:: scythe.image
32 | :members:
33 | :exclude-members: implementors, schema, version, group
34 |
35 | Electron Microscopy Extractors
36 | ------------------------------
37 |
38 | Extractors that read electron microscopy data of various sorts (images, spectra, spectrum images,
39 | etc.) using the `HyperSpy `_ package.
40 |
41 | .. automodule:: scythe.electron_microscopy
42 | :members:
43 | :exclude-members: implementors, schema, version, group
44 |
45 | Atomistic Data Extractors
46 | -------------------------
47 |
48 | Extractors related to data files that encode atom-level structure
49 |
50 | .. automodule:: scythe.crystal_structure
51 | :members:
52 | :exclude-members: implementors, schema, version, group
53 |
54 | .. automodule:: scythe.ase
55 | :members:
56 | :noindex:
57 | :exclude-members: implementors, schema, version, group
58 |
59 | Calculation Extractors
60 | ----------------------
61 |
62 | Extractors that retrieve results from calculations
63 |
64 | .. automodule:: scythe.dft
65 | :members:
66 | :exclude-members: implementors, schema, version, group
67 |
68 | .. automodule:: scythe.ase
69 | :members:
70 | :noindex:
71 | :exclude-members: implementors, schema, version, group
72 |
73 | Structured Data Files
74 | ---------------------
75 |
76 | Extractors that read data from structured files
77 |
78 | .. automodule:: scythe.csv
79 | :members:
80 | :exclude-members: implementors, schema, version, group
81 |
--------------------------------------------------------------------------------
/scythe/ase.py:
--------------------------------------------------------------------------------
1 | import json
2 | import datetime
3 | from ase.io.jsonio import create_ndarray
4 | from ase.io import read, write
5 | from io import StringIO
6 | import numpy as np
7 |
8 | from scythe.base import BaseSingleFileExtractor
9 |
10 |
11 | def object_hook(dct):
12 | """Custom decoder for ASE JSON objects
13 |
14 | Does everything *except* reconstitute the JSON object and
15 | also converts numpy arrays to lists
16 |
17 | Adapted from ase.io.jsonio
18 |
19 | Args:
20 | dct (dict): Dictionary to reconstitute to an ASE object
21 | """
22 | if '__datetime__' in dct:
23 | return datetime.datetime.strptime(dct['__datetime__'], '%Y-%m-%dT%H:%M:%S.%f')
24 |
25 | if '__complex__' in dct:
26 | return complex(*dct['__complex__'])
27 |
28 | if '__ndarray__' in dct:
29 | return create_ndarray(*dct['__ndarray__'])
30 |
31 | # No longer used (only here for backwards compatibility):
32 | if '__complex_ndarray__' in dct:
33 | r, i = (np.array(x) for x in dct['__complex_ndarray__'])
34 | return r + i * 1j
35 |
36 | return dct
37 |
38 |
39 | class ASEExtractor(BaseSingleFileExtractor):
40 | """Parse information from atomistic simulation input files using ASE.
41 |
42 | ASE can read many file types. These can be found at https://wiki.fysik.dtu.dk/ase/ase/io/io.html
43 |
44 | Metadata are generated as ASE JSON DB format: https://wiki.fysik.dtu.dk/ase/ase/db/db.html
45 | """
46 |
47 | def _extract_file(self, path, context=None):
48 | # Attempt to read the file with ASE
49 | # To return ASE JSON DB requires writing to file.
50 | # Here we use StringIO instead of a file on disk.
51 |
52 | fobj = StringIO()
53 | m = read(path)
54 | write(images=m, format="json", filename=fobj)
55 | js = json.loads(fobj.getvalue(), object_hook=object_hook)
56 |
57 | # Select the first record.
58 | # TODO: Test this against multiple records
59 | record = js['1']
60 | record['chemical_formula'] = m.get_chemical_formula()
61 | return record
62 |
63 | def implementors(self):
64 | return ['Ben Blaiszik ']
65 |
66 | def version(self):
67 | return '0.0.1'
68 |
--------------------------------------------------------------------------------
/scythe/file.py:
--------------------------------------------------------------------------------
1 | from scythe.base import BaseSingleFileExtractor
2 | from hashlib import sha512
3 | from warnings import warn
4 | import json
5 | import os
6 |
7 |
8 | try:
9 | import magic
10 | except ImportError as e:
11 | if 'failed to find libmagic' in str(e):
12 | warn('The libmagic library is not installed. '
13 | 'See: https://github.com/ahupp/python-magic#installation')
14 | else:
15 | warn('The python wrapper for libmagic is not installed. '
16 | 'If desired, call: https://github.com/ahupp/python-magic#installation')
17 | magic = None
18 |
19 |
20 | class GenericFileExtractor(BaseSingleFileExtractor):
21 | """Gather basic file information"""
22 |
23 | def __init__(self, store_path=True, compute_hash=True):
24 | """
25 | Args:
26 | store_path (bool): Whether to record the path of the file
27 | compute_hash (bool): Whether to compute the hash of a file
28 | """
29 | super().__init__()
30 | self.store_path = store_path
31 | self.compute_hash = compute_hash
32 |
33 | def _extract_file(self, path, context=None):
34 | output = {
35 | "length": os.path.getsize(path),
36 | "filename": os.path.basename(path),
37 | }
38 |
39 | # If magic imported properly, use it
40 | if magic is not None:
41 | output["mime_type"] = magic.from_file(path, mime=True)
42 | output["data_type"] = magic.from_file(path)
43 |
44 | if self.store_path:
45 | output['path'] = path
46 | if self.compute_hash:
47 | sha = sha512()
48 | with open(path, 'rb') as fp:
49 | while True:
50 | data = fp.read(65536)
51 | if not data:
52 | break
53 | sha.update(data)
54 | output['sha512'] = sha.hexdigest()
55 | return output
56 |
57 | def implementors(self):
58 | return ['Logan Ward']
59 |
60 | def version(self):
61 | return '0.0.1'
62 |
63 | @property
64 | def schema(self):
65 | with open(os.path.join(os.path.dirname(__file__), 'schemas', 'file.json')) as fp:
66 | return json.load(fp)
67 |
--------------------------------------------------------------------------------
/tests/test_json.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import pytest
4 |
5 | from scythe.json import JSONExtractor
6 |
7 |
8 | @pytest.fixture
9 | def test_files():
10 | return [os.path.join(os.path.dirname(__file__), 'data', 'json', 'test_json.json')]
11 |
12 |
13 | @pytest.fixture
14 | def fail_file():
15 | return os.path.join(os.path.dirname(__file__), 'data', 'fail_file.dat')
16 |
17 |
18 | @pytest.fixture
19 | def extractor():
20 | return JSONExtractor()
21 |
22 |
23 | @pytest.fixture
24 | def mappings():
25 | return [{
26 | "custom": {
27 | "foo": "dict1.field1",
28 | "bar": "dict2.nested1.field1",
29 | "missing": "na_val"
30 | },
31 | "material": {
32 | "composition": "compost"
33 | }
34 | }, {
35 | "custom.foo": "dict1.field1",
36 | "custom.bar": "dict2.nested1.field1",
37 | "custom.missing": "na_val",
38 | "material.composition": "compost"
39 | }]
40 |
41 |
42 | def test_json(extractor, test_files, fail_file, mappings):
43 | # Run test extractions
44 | output_na_unset = {
45 | "material": {
46 | "composition": "CN25"
47 | },
48 | "custom": {
49 | "foo": "value1",
50 | "bar": True,
51 | "missing": "na"
52 | }
53 | }
54 | output_na_set = {
55 | "material": {
56 | "composition": "CN25"
57 | },
58 | "custom": {
59 | "foo": "value1",
60 | "bar": True
61 | }
62 | }
63 |
64 | assert extractor.extract(test_files[0], context={"mapping": mappings[0]}) == output_na_unset
65 | assert extractor.extract(test_files[0], context={"mapping": mappings[1]}) == output_na_unset
66 | assert extractor.extract(test_files[0], context={"mapping": mappings[0],
67 | "na_values": ["na"]}) == output_na_set
68 | assert extractor.extract(test_files[0], context={"mapping": mappings[1],
69 | "na_values": "na"}) == output_na_set
70 |
71 | # Test failure modes
72 | with pytest.raises(Exception):
73 | extractor.extract(fail_file)
74 | # No mapping provided
75 | with pytest.raises(Exception):
76 | extractor.extract(test_files[0])
77 |
--------------------------------------------------------------------------------
/tests/test_yaml.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import pytest
4 |
5 | from scythe.yaml import YAMLExtractor
6 |
7 |
8 | @pytest.fixture
9 | def test_files():
10 | return [os.path.join(os.path.dirname(__file__), 'data', 'yaml', 'test_yaml.yaml')]
11 |
12 |
13 | @pytest.fixture
14 | def fail_file():
15 | return os.path.join(os.path.dirname(__file__), 'data', 'fail_file.dat')
16 |
17 |
18 | @pytest.fixture
19 | def extractor():
20 | return YAMLExtractor()
21 |
22 |
23 | @pytest.fixture
24 | def mappings():
25 | return [{
26 | "custom": {
27 | "foo": "dict1.field1",
28 | "bar": "dict2.nested1.field1",
29 | "missing": "na_val"
30 | },
31 | "material": {
32 | "composition": "compost"
33 | }
34 | }, {
35 | "custom.foo": "dict1.field1",
36 | "custom.bar": "dict2.nested1.field1",
37 | "custom.missing": "na_val",
38 | "material.composition": "compost"
39 | }]
40 |
41 |
42 | def test_yaml(extractor, test_files, fail_file, mappings):
43 | # Run test extractions
44 | output_na_unset = {
45 | "material": {
46 | "composition": "CN25"
47 | },
48 | "custom": {
49 | "foo": "value1",
50 | "bar": True,
51 | "missing": "na"
52 | }
53 | }
54 | output_na_set = {
55 | "material": {
56 | "composition": "CN25"
57 | },
58 | "custom": {
59 | "foo": "value1",
60 | "bar": True
61 | }
62 | }
63 |
64 | assert extractor.extract(test_files[0], context={"mapping": mappings[0]}) == output_na_unset
65 | assert extractor.extract(test_files[0], context={"mapping": mappings[1]}) == output_na_unset
66 | assert extractor.extract(test_files[0], context={"mapping": mappings[0],
67 | "na_values": ["na"]}) == output_na_set
68 | assert extractor.extract(test_files[0], context={"mapping": mappings[1],
69 | "na_values": "na"}) == output_na_set
70 |
71 | # Test failure modes
72 | with pytest.raises(Exception):
73 | extractor.extract(fail_file)
74 | # No mapping provided
75 | with pytest.raises(Exception):
76 | extractor.extract(test_files[0])
77 |
--------------------------------------------------------------------------------
/tests/test_xml.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import pytest
4 |
5 | from scythe.xml import XMLExtractor
6 |
7 |
8 | @pytest.fixture
9 | def test_files():
10 | return [os.path.join(os.path.dirname(__file__), 'data', 'xml', 'test_xml.xml')]
11 |
12 |
13 | @pytest.fixture
14 | def fail_file():
15 | return os.path.join(os.path.dirname(__file__), 'data', 'fail_file.dat')
16 |
17 |
18 | @pytest.fixture
19 | def extractor():
20 | return XMLExtractor()
21 |
22 |
23 | @pytest.fixture
24 | def mappings():
25 | return [{
26 | "custom": {
27 | "foo": "root.dict1.field1",
28 | "bar": "root.dict2.nested1.field1",
29 | "missing": "root.na_val"
30 | },
31 | "material": {
32 | "composition": "root.compost"
33 | }
34 | }, {
35 | "custom.foo": "root.dict1.field1",
36 | "custom.bar": "root.dict2.nested1.field1",
37 | "custom.missing": "root.na_val",
38 | "material.composition": "root.compost"
39 | }]
40 |
41 |
42 | def test_xml(extractor, test_files, fail_file, mappings):
43 | # Run test extractions
44 | output_na_unset = {
45 | "material": {
46 | "composition": "CN25"
47 | },
48 | "custom": {
49 | "foo": "value1",
50 | "bar": "baz",
51 | "missing": "na"
52 | }
53 | }
54 | output_na_set = {
55 | "material": {
56 | "composition": "CN25"
57 | },
58 | "custom": {
59 | "foo": "value1",
60 | "bar": "baz"
61 | }
62 | }
63 |
64 | assert extractor.extract(test_files[0], context={"mapping": mappings[0]}) == output_na_unset
65 | assert extractor.extract(test_files[0], context={"mapping": mappings[1]}) == output_na_unset
66 | assert extractor.extract(test_files[0], context={"mapping": mappings[0],
67 | "na_values": ["na"]}) == output_na_set
68 | assert extractor.extract(test_files[0], context={"mapping": mappings[1],
69 | "na_values": "na"}) == output_na_set
70 |
71 | # Test failure modes
72 | with pytest.raises(Exception):
73 | extractor.extract(fail_file)
74 | # No mapping provided
75 | with pytest.raises(Exception):
76 | extractor.extract(test_files[0])
77 |
--------------------------------------------------------------------------------
/docs/source/goals.rst:
--------------------------------------------------------------------------------
1 | Project Goals
2 | =============
3 |
4 | The goal of Scythe is to minimize the amount of code duplication between scientific databases.
5 | Many databases rely on custom software to extract information from scientific files and transform that data into a standardized format.
6 | Automation or analysis software also require extracting information from files.
7 | While the data needs of application vary, they all rely on similar algorithms to extract information from the
8 | same types of files.
9 | *Scythe is designed to be a shared repository for these algorithms*.
10 |
11 | The core of Scythe is a collection of "extractors" which each generate simplified, standardized
12 | data from a certain class of files. For example, the
13 | :class:`~scythe.electron_microscopy.ElectronMicroscopyExtractor` produces structured data from
14 | file types specific to brands of electron microscopes.
15 |
16 | Each extractor does not necessarily generate data in a format needed by any tool. Rather, the extractors
17 | are designed to produce *all* of the information needed by all projects that utilize the
18 | libraries. In this way, the extractors can service every user without modification.
19 |
20 | What Does Scythe *Do*?
21 | ---------------------------
22 |
23 | Scythe is designed to provide the answer to two limited questions:
24 |
25 | 1. *Which files can I parse with a certain tool?*
26 | Scythe provides tools for quickly finding files of a certain type
27 |
28 | 2. *What information does a set of files contain?*
29 | Scythe provides a library of tools that transform data into a simpler formats
30 |
31 | What Does Scythe *Not Do*?
32 | -------------------------------
33 |
34 | There are several questions that are specifically out-of-scope for Scythe:
35 |
36 | 1. *How do I get access to files that I want to parse?*
37 | Scythe does not solve the data transfer problem
38 | 2. *How can I parse large numbers of files reliably?*
39 | Scythe is not a distributed workflow engine, but is designed to integrate with one
40 | for extracting metadata from large filesystems.
41 | 3. *How can I translate data into the schema needed for my application?*
42 | The goal of Scythe is to go from opaque to well-documented formats. We recommend
43 | implementing separate "adapter" classes to transform Scythe metadata to your
44 | specific requirements.
45 |
46 | See our
47 | `"how to use Scythe" documentation `_
48 | for more detail on how to integrate Scythe into an application that provides these
49 | intentionally-missing features.
50 |
--------------------------------------------------------------------------------
/scythe/utils/grouping.py:
--------------------------------------------------------------------------------
1 | """Utilities for implementing grouping operations"""
2 | from typing import Union, List, Iterable, Tuple
3 | from operator import itemgetter
4 | from pathlib import Path
5 | import itertools
6 | import os
7 |
8 |
9 | def preprocess_paths(paths: Union[str, Path, List[str], List[Path]]) -> List[str]:
10 | """Transform paths to absolute paths
11 |
12 | Designed to be used to simplify grouping logic
13 |
14 | Args:
15 | paths (Union[str, List[str]): Files and directories to be parsed
16 | Returns:
17 | (List[str]): List of paths in standardized form
18 | """
19 |
20 | # Make sure paths are strings or Path-like objects
21 | if isinstance(paths, (str, Path)):
22 | paths = [paths]
23 |
24 | # Make paths absolute
25 | return [os.path.abspath(os.path.expanduser(f)) for f in paths]
26 |
27 |
28 | def group_by_postfix(files: Iterable[str], vocabulary: List[str]) -> Iterable[Tuple[str, ...]]:
29 | """Group files that have a common ending
30 |
31 | Finds all filenames that begin with a prefixes from a
32 | user-provided vocabulary and end with the same post-fix.
33 |
34 | For example, consider a directory that contains files A.1, B.1, A.2, B.2, and C.1.
35 | If a user provides a vocabulary of ['A', 'B'], the parser will return
36 | groups (A.1, B.1) and (A.2, B.2).
37 | If a user provides a vocabulary of ['A', 'B', 'C'], the parser will
38 | return groups (A.1, B.1), (A.2, B.2), and (C.1)
39 |
40 | See :class:`scythe.dft.DFTParser` for an example usage.
41 |
42 | Args:
43 | files ([str]): List of files to be grouped
44 | vocabulary ([str]): List of known starts for the file
45 | Yields:
46 | ([str]): Groups of files to be parsed together
47 | """
48 |
49 | # TODO (lw): This function could be more flexible, but let's add features on demand
50 |
51 | # Get the files with similar post-fixes and are from the user-defined vocabulary
52 | matchable_files = [] # List of (path, type, (dir, postfix))
53 | for filename in files:
54 | # Find if the filename matches a known type
55 | name = os.path.basename(filename)
56 | name_lower = name.lower()
57 | matches = [name_lower.startswith(n) for n in vocabulary]
58 | if not any(matches):
59 | continue
60 |
61 | # Get the extension of the file
62 | match_id = matches.index(True)
63 | vtype = vocabulary[match_id]
64 | ext = name[len(vtype):]
65 | d = os.path.dirname(filename)
66 |
67 | # Add to the list
68 | matchable_files.append((filename, vtype, (d, ext)))
69 |
70 | # Group files by postfix type and directory
71 | sort_key = itemgetter(2)
72 | for k, group in itertools.groupby(sorted(matchable_files, key=sort_key),
73 | key=sort_key):
74 | yield [x[0] for x in group]
75 |
--------------------------------------------------------------------------------
/scythe/csv.py:
--------------------------------------------------------------------------------
1 | from scythe.base import BaseSingleFileExtractor
2 | from tableschema.exceptions import CastError
3 | from tableschema import Table
4 | from typing import List
5 | import logging
6 |
7 | logger = logging.getLogger(__name__)
8 |
9 |
10 | class CSVExtractor(BaseSingleFileExtractor):
11 | """Describe the contents of a comma-separated value (CSV) file
12 |
13 | The context dictionary for the CSV parser includes several fields:
14 | - ``schema``: Dictionary defining the schema for this dataset, following that of
15 | FrictionlessIO
16 | - ``na_values``: Any values that should be interpreted as missing
17 | """
18 |
19 | def __init__(self, return_records=True, **kwargs):
20 | """
21 | Args:
22 | return_records (bool): Whether to return each row in the CSV file
23 | Keyword:
24 | All kwargs as passed to `TableSchema's infer `_ method
25 | """
26 | self.return_records = return_records
27 | self.infer_kwargs = kwargs
28 |
29 | def _extract_file(self, path: str, context=None):
30 | # Set the default value
31 | if context is None:
32 | context = dict()
33 |
34 | # Load in the table
35 | table = Table(path, schema=context.get('schema', None))
36 |
37 | # Infer the table's schema
38 | table.infer(**self.infer_kwargs)
39 |
40 | # Add missing values
41 | if 'na_values' in context:
42 | if not isinstance(context['na_values'], list):
43 | raise ValueError('context["na_values"] must be a list')
44 | table.schema.descriptor['missingValues'] = sorted(set([''] + context['na_values']))
45 | table.schema.commit()
46 |
47 | # Store the schema
48 | output = {'schema': table.schema.descriptor}
49 |
50 | # If desired, store the data
51 | if self.return_records:
52 | headers = table.schema.headers
53 | records = []
54 | failed_records = 0
55 | for row in table.iter(keyed=False, cast=False):
56 | try:
57 | row = table.schema.cast_row(row)
58 | except CastError:
59 | failed_records += 1
60 |
61 | # TODO (wardlt): Use json output from tableschema once it's supported
62 | # https://github.com/frictionlessdata/tableschema-py/issues/213
63 | records.append(eval(repr(dict(zip(headers, row)))))
64 | if failed_records > 0:
65 | logger.warning(f'{failed_records} records failed casting with schema')
66 | output['records'] = records
67 |
68 | return output
69 |
70 | def implementors(self) -> List[str]:
71 | return ['Logan Ward']
72 |
73 | def citations(self) -> List[str]:
74 | return ["https://github.com/frictionlessdata/tableschema-py"]
75 |
76 | def version(self) -> str:
77 | return '0.0.1'
78 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Scythe
2 |
3 | [](https://github.com/materials-data-facility/Scythe/actions/workflows/test-suite-and-docs.yml)
4 | [](https://materials-data-facility.github.io/Scythe)
5 | [](https://codecov.io/gh/materials-data-facility/Scythe)
6 | [](https://github.com/materials-data-facility/Scythe/commits/master)
7 | [](https://badge.fury.io/py/scythe-extractors)
8 | [](https://github.com/materials-data-facility/Scythe/graphs/contributors)
9 |
10 | Scythe is a library of tools that generate summaries of the data contained in scientific data files.
11 | The goal of Scythe is to provide a shared resources of these tools ("extractors") to avoid duplication of effort between the many emerging materials databases.
12 | Each extractor is designed to generate the sum of all data needed by each of these databases with a uniform API so that specific projects can write simple adaptors for their needs.
13 |
14 | ## Installation
15 |
16 | Install using an up-to-date version of `pip` on version 3.8 or higher of Python:
17 |
18 | ```bash
19 | pip install scythe-extractors
20 | ```
21 |
22 | Each specific extractor module has its own set of required libraries.
23 | Given that some modules have extensive dependencies, we do not install all of them automatically.
24 | You can install them either module-by-module using the pip "extras" installation (e.g.,
25 | `pip install scythe-extractors[image]"`),
26 | or install all extractors with
27 | `pip install scythe-extractors[all]"`.
28 |
29 | ## Development/Contribution
30 |
31 | If you wish to develop new features using Scythe, please consult the
32 | [Contributor Guide](https://materialsio.readthedocs.io/en/latest/contributor-guide.html) that will
33 | walk you through installing [Poetry](https://python-poetry.org/) and the Scythe dependencies.
34 |
35 | ## Documentation
36 |
37 | * Complete documentation for Scythe is on [Read the Docs](https://materialsio.readthedocs.io/en/latest/).
38 | * [List of Available Extractors](https://materialsio.readthedocs.io/en/latest/extractors.html)
39 |
40 | ## Support
41 |
42 | This work was performed in partnership with [Citrine Informatics](https://citrine.io/).
43 | This was also performed under financial assistance award 70NANB14H012 from U.S. Department of Commerce, National Institute of Standards and Technology as part of the Center for Hierarchical Material Design (CHiMaD).
44 | This work was also supported by the National Science Foundation as part of the Midwest Big Data Hub under NSF Award Number: 1636950 "BD Spokes: SPOKE: MIDWEST: Collaborative: Integrative Materials Design (IMaD): Leverage, Innovate, and Disseminate".
45 |
--------------------------------------------------------------------------------
/scythe/adapters/base.py:
--------------------------------------------------------------------------------
1 | """Base classes for adapters"""
2 |
3 | import json
4 | from abc import abstractmethod
5 | from typing import Any, Union
6 |
7 | import numpy as np
8 |
9 | from scythe.base import BaseExtractor
10 |
11 |
12 | class BaseAdapter:
13 | """Template for tools that transform metadata into a new form"""
14 |
15 | @abstractmethod
16 | def transform(self, metadata: dict, context: Union[None, dict] = None) -> Any:
17 | """Process metadata into a new form
18 |
19 | Args:
20 | metadata (dict): Metadata to transform
21 | context (dict): Any context information used during transformation
22 | Returns:
23 | Metadata in a new form, can be any type of object.
24 | ``None`` corresponding
25 | """
26 |
27 | def check_compatibility(self, parser: BaseExtractor) -> bool:
28 | """Evaluate whether an adapter is compatible with a certain parser
29 |
30 | Args:
31 | parser (BaseExtractor): Parser to evaluate
32 | Returns:
33 | (bool) Whether this parser is compatible
34 | """
35 |
36 | if self.version() is None:
37 | return True
38 | else:
39 | my_version = tuple(int(x) for x in self.version().split('.'))
40 | their_version = tuple(int(x) for x in parser.version().split('.'))
41 | return my_version == their_version
42 |
43 | def version(self) -> Union[None, str]:
44 | """Version of the parser that an adapter was created for
45 |
46 | Returns:
47 | (str) Version of parser this adapter was designed for,
48 | or ``None`` if not applicable
49 | """
50 | return None
51 |
52 |
53 | class NOOPAdapter(BaseAdapter):
54 | """Adapter that does not alter the output data
55 |
56 | Used for testing purposes"""
57 |
58 | def transform(self, metadata: dict, context=None) -> dict:
59 | return metadata
60 |
61 |
62 | class SerializeAdapter(BaseAdapter):
63 | """Converts the metadata to a string by serializing with JSON"""
64 |
65 | def transform(self, metadata: dict, context=None) -> str:
66 | return json.dumps(metadata)
67 |
68 |
69 | class GreedySerializeAdapter(BaseAdapter):
70 | """Converts the metadata to a string by serializing with JSON, making some (hopefully) informed
71 | choices about what to do with various types commonly seen, and otherwise reporting that the
72 | data type could not be serialized. May not work in all situations, but should cover a large
73 | number of cases."""
74 | @staticmethod
75 | def default(o):
76 | success = False
77 | if isinstance(o, np.void):
78 | return None
79 | elif isinstance(o, (np.ndarray, np.generic)):
80 | return o.tolist()
81 | elif isinstance(o, bytes):
82 | try:
83 | return o.decode()
84 | except UnicodeDecodeError:
85 | pass
86 |
87 | if not success:
88 | type_name = o.__class__.__name__
89 | return f"<>"
90 |
91 | def transform(self, metadata: dict, context=None) -> str:
92 | s = json.dumps(metadata, default=GreedySerializeAdapter.default)
93 | return s
94 |
--------------------------------------------------------------------------------
/scythe/dft.py:
--------------------------------------------------------------------------------
1 | from typing import Union, Iterable, Tuple, List
2 | from scythe.utils.grouping import preprocess_paths, group_by_postfix
3 | from scythe.base import BaseExtractor
4 | from dfttopif import files_to_pif
5 | from operator import itemgetter
6 | import itertools
7 | import os
8 |
9 |
10 | # List of files that are known to the VASP parser
11 | _vasp_file_names = ["outcar", "incar", "chgcar", "wavecar", "wavcar", "oszicar", "ibzcar",
12 | "kpoints", "doscar", "poscar", "contcar", "vasp_run.xml", "xdatcar"]
13 |
14 |
15 | class DFTExtractor(BaseExtractor):
16 | """Extract metadata from Density Functional Theory calculation results
17 |
18 | Uses the `dfttopif `_ parser to extract metadata from each file
19 | """
20 |
21 | def __init__(self, quality_report=False):
22 | """Initialize the extractor
23 |
24 | Args:
25 | quality_report (bool): Whether to generate a quality report
26 | """
27 | self.quality_report = quality_report
28 |
29 | def group(self, files: Union[str, List[str]], directories: List[str] = None,
30 | context: dict = None):
31 | # Convert paths into standardized form
32 | files = set(preprocess_paths(files))
33 |
34 | # Find all files, and attempt to group them
35 | for group in self._group_vasp(files): # VASP grouping logic
36 | # Remove all files matched as VASP from the matchable files
37 | files.difference_update(group)
38 | yield group
39 | for group in self._group_pwscf(files):
40 | yield group # Do not remove, as the PWSCF group is not reliable
41 |
42 | def _group_vasp(self, files: Iterable[str]) -> Iterable[Tuple[str, ...]]:
43 | """Find groupings of files associated with VASP calculations
44 |
45 | Find files that start with the name "OUTCAR" (not case sensitive) and groups those files
46 | together with any file that share the same postfix (e.g., "OUTCAR.1" and "INCAR.1" are
47 | grouped together)
48 |
49 | Args:
50 | files ([str]): List of files to be grouped
51 | Yields:
52 | ((files)): List of VASP files from the same calculation
53 | """
54 |
55 | for group in group_by_postfix(files, _vasp_file_names):
56 | yield group
57 |
58 | def _group_pwscf(self, files: Iterable[str]) -> Iterable[Tuple[str, ...]]:
59 | """Assemble groups of files that are potentially PWSCF calculations
60 |
61 | Args:
62 | files ([str]): List of files to be grouped
63 | Yields:
64 | ((str)): Groups of potential-pwscf files
65 | """
66 |
67 | # For now, we just group files by directory
68 | # TODO (lw): Find files that have PWSCF flags in them
69 | # TODO (lw): Read PWSCF input files to know the save directory
70 | file_and_dir = [(os.path.dirname(f), f) for f in files]
71 | for k, group in itertools.groupby(sorted(file_and_dir), key=itemgetter(0)):
72 | yield [x[1] for x in group]
73 |
74 | def extract(self, group: Iterable[str], context: dict = None):
75 | return files_to_pif(group, quality_report=self.quality_report).as_dictionary()
76 |
77 | def implementors(self):
78 | return ['Logan Ward ']
79 |
80 | def version(self):
81 | return '0.0.1'
82 |
--------------------------------------------------------------------------------
/tests/data/crystal_structure/Ce3VO16.cif:
--------------------------------------------------------------------------------
1 | #------------------------------------------------------------------------------
2 | #$Date: 2016-03-02 09:51:31 +0200 (Wed, 02 Mar 2016) $
3 | #$Revision: 177123 $
4 | #$URL: svn://www.crystallography.net/cod/cif/5/91/02/5910204.cif $
5 | #------------------------------------------------------------------------------
6 | #
7 | # This file is available in the Crystallography Open Database (COD),
8 | # http://www.crystallography.net/
9 | #
10 | # All data on this site have been placed in the public domain by the
11 | # contributors.
12 | #
13 | data_5910204
14 | loop_
15 | _publ_author_name
16 | 'Wyckoff, R. W. G.'
17 | _publ_section_title
18 | ;
19 | Pages 15 & 17 from the Structure of Crystals, vol. 3 by Wyckoff R W G.
20 | published by Interscience Publishers, Inc. in 1951
21 | ;
22 | _journal_name_full 'The Structure of Crystals'
23 | _journal_page_first 15
24 | _journal_page_last 17
25 | _journal_volume 3
26 | _journal_year 1951
27 | _chemical_formula_structural CeVO4
28 | _chemical_formula_sum 'Ce O4 V'
29 | _space_group_IT_number 141
30 | _symmetry_cell_setting tetragonal
31 | _symmetry_Int_Tables_number 141
32 | _symmetry_space_group_name_Hall '-I 4bd 2'
33 | _symmetry_space_group_name_H-M 'I 41/a m d :2'
34 | _audit_creation_date 2006-30-06
35 | _audit_creation_method
36 | ;
37 | Pages 15 & 17 from the Structure of Crystals, vol. 3 by Wyckoff R W G.
38 | published by Interscience Publishers, Inc. in 1951
39 | ;
40 | _audit_update_record
41 | 'created by Girish Upreti, Portland State University'
42 | _cell_angle_alpha 90
43 | _cell_angle_beta 90
44 | _cell_angle_gamma 90
45 | _cell_length_a 7.399
46 | _cell_length_b 7.399
47 | _cell_length_c 6.496
48 | _cell_volume 355.625
49 | _cod_original_sg_symbol_H-M 'I 41/a m d'
50 | _cod_original_formula_sum 'Ce V O4'
51 | _cod_database_code 5910204
52 | loop_
53 | _symmetry_equiv_pos_as_xyz
54 | x,y,z
55 | -y+1/4,x+3/4,z+1/4
56 | y+1/4,-x+1/4,z+3/4
57 | x,-y,-z
58 | -x,y+1/2,-z
59 | -x,-y+1/2,z
60 | y+1/4,x+3/4,-z+1/4
61 | -y+1/4,-x+1/4,-z+3/4
62 | -x,-y,-z
63 | y+3/4,-x+1/4,-z+3/4
64 | -y+3/4,x+3/4,-z+1/4
65 | -x,y,z
66 | x,-y+1/2,z
67 | x,y+1/2,-z
68 | -y+3/4,-x+1/4,z+3/4
69 | y+3/4,x+3/4,z+1/4
70 | x+1/2,y+1/2,z+1/2
71 | -y+3/4,x+1/4,z+3/4
72 | y+3/4,-x+3/4,z+1/4
73 | x+1/2,-y+1/2,-z+1/2
74 | -x+1/2,y,-z+1/2
75 | -x+1/2,-y,z+1/2
76 | y+3/4,x+1/4,-z+3/4
77 | -y+3/4,-x+3/4,-z+1/4
78 | -x+1/2,-y+1/2,-z+1/2
79 | y+1/4,-x+3/4,-z+1/4
80 | -y+1/4,x+1/4,-z+3/4
81 | -x+1/2,y+1/2,z+1/2
82 | x+1/2,-y,z+1/2
83 | x+1/2,y,-z+1/2
84 | -y+1/4,-x+3/4,z+1/4
85 | y+1/4,x+1/4,z+3/4
86 | loop_
87 | _atom_site_fract_x
88 | _atom_site_fract_y
89 | _atom_site_fract_z
90 | _atom_site_label
91 | 0.00000 0.00000 0.00000 Ce1
92 | 0.00000 0.50000 0.25000 Ce2
93 | 0.50000 0.00000 0.75000 Ce3
94 | 0.50000 0.50000 0.50000 Ce4
95 | 0.00000 0.00000 0.50000 V1
96 | 0.00000 0.50000 0.75000 V2
97 | 0.50000 0.00000 0.25000 V3
98 | 0.50000 0.50000 0.00000 V4
99 | 0.00000 0.20000 0.34000 O1
100 | 0.00000 -0.20000 0.34000 O2
101 | 0.20000 0.00000 -0.34000 O3
102 | -0.20000 0.00000 -0.34000 O4
103 | 0.00000 0.70000 -0.09000 O5
104 | 0.00000 0.30000 -0.09000 O6
105 | -0.20000 0.50000 0.59000 O7
106 |
--------------------------------------------------------------------------------
/tests/data/crystal_structure/Al2O3.cif:
--------------------------------------------------------------------------------
1 | #------------------------------------------------------------------------------
2 | #$Date: 2017-10-13 02:32:00 +0300 (Fri, 13 Oct 2017) $
3 | #$Revision: 201954 $
4 | #$URL: file:///home/coder/svn-repositories/cod/cif/1/00/00/1000017.cif $
5 | #------------------------------------------------------------------------------
6 | #
7 | # This file is available in the Crystallography Open Database (COD),
8 | # http://www.crystallography.net/
9 | #
10 | # All data on this site have been placed in the public domain by the
11 | # contributors.
12 | #
13 | data_1000017
14 | loop_
15 | _publ_author_name
16 | 'Tsirelson, V G'
17 | 'Antipin, M Y'
18 | 'Gerr, R G'
19 | 'Ozerov, R P'
20 | 'Struchkov, Y T'
21 | _publ_section_title
22 | ;
23 | Ruby structure peculiarities derived from X-ray data. Localization of
24 | chromium atoms and electron deformation density
25 | ;
26 | _journal_coden_ASTM PSSABA
27 | _journal_name_full
28 | ;
29 | Physica Status Solidi, Sectio A: Applied Research
30 | ;
31 | _journal_page_first 425
32 | _journal_page_last 433
33 | _journal_paper_doi 10.1002/pssa.2210870204
34 | _journal_volume 87
35 | _journal_year 1985
36 | _chemical_formula_structural 'Al2 O3'
37 | _chemical_formula_sum 'Al2 O3'
38 | _chemical_name_mineral Corundum
39 | _chemical_name_systematic 'Aluminium oxide'
40 | _space_group_IT_number 167
41 | _symmetry_cell_setting trigonal
42 | _symmetry_space_group_name_Hall '-R 3 2"c'
43 | _symmetry_space_group_name_H-M 'R -3 c :H'
44 | _audit_creation_date 102-05-16
45 | _cell_angle_alpha 90
46 | _cell_angle_beta 90
47 | _cell_angle_gamma 120
48 | _cell_formula_units_Z 6
49 | _cell_length_a 4.7606(5)
50 | _cell_length_b 4.7606(5)
51 | _cell_length_c 12.994(1)
52 | _cell_volume 255.0
53 | _refine_ls_R_factor_all 0.063
54 | _cod_original_sg_symbol_H-M 'R -3 c'
55 | _cod_database_code 1000017
56 | loop_
57 | _symmetry_equiv_pos_as_xyz
58 | x,y,z
59 | -y,x-y,z
60 | y-x,-x,z
61 | -y,-x,1/2+z
62 | x,x-y,1/2+z
63 | y-x,y,1/2+z
64 | -x,-y,-z
65 | y,y-x,-z
66 | x-y,x,-z
67 | y,x,1/2-z
68 | -x,y-x,1/2-z
69 | x-y,-y,1/2-z
70 | 1/3+x,2/3+y,2/3+z
71 | 2/3+x,1/3+y,1/3+z
72 | 1/3-y,2/3+x-y,2/3+z
73 | 2/3-y,1/3+x-y,1/3+z
74 | 1/3-x+y,2/3-x,2/3+z
75 | 2/3-x+y,1/3-x,1/3+z
76 | 1/3-y,2/3-x,1/6+z
77 | 2/3-y,1/3-x,5/6+z
78 | 1/3+x,2/3+x-y,1/6+z
79 | 2/3+x,1/3+x-y,5/6+z
80 | 1/3-x+y,2/3+y,1/6+z
81 | 2/3-x+y,1/3+y,5/6+z
82 | 1/3-x,2/3-y,2/3-z
83 | 2/3-x,1/3-y,1/3-z
84 | 1/3+y,2/3-x+y,2/3-z
85 | 2/3+y,1/3-x+y,1/3-z
86 | 1/3+x-y,2/3+x,2/3-z
87 | 2/3+x-y,1/3+x,1/3-z
88 | 1/3+y,2/3+x,1/6-z
89 | 2/3+y,1/3+x,5/6-z
90 | 1/3-x,2/3-x+y,1/6-z
91 | 2/3-x,1/3-x+y,5/6-z
92 | 1/3+x-y,2/3-y,1/6-z
93 | 2/3+x-y,1/3-y,5/6-z
94 | loop_
95 | _atom_site_label
96 | _atom_site_type_symbol
97 | _atom_site_symmetry_multiplicity
98 | _atom_site_Wyckoff_symbol
99 | _atom_site_fract_x
100 | _atom_site_fract_y
101 | _atom_site_fract_z
102 | _atom_site_occupancy
103 | _atom_site_attached_hydrogens
104 | _atom_site_calc_flag
105 | O1 O2- 18 e 0.69365(3) 0. 0.25 1. 0 d
106 | Al1 Al3+ 12 c 0. 0. 0.35217(1) 1. 0 d
107 | loop_
108 | _atom_type_symbol
109 | _atom_type_oxidation_number
110 | O2- -2.000
111 | Al3+ 3.000
112 | loop_
113 | _cod_related_entry_id
114 | _cod_related_entry_database
115 | _cod_related_entry_code
116 | 1 ChemSpider 8164808
117 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.poetry]
2 | name = "scythe-extractors"
3 | version = "0.1.1"
4 | description = "A library of tools that generate summaries of the data contained in scientific data files"
5 | authors = ["Materials Data Facility "]
6 | license = "Apache"
7 | readme = "README.md"
8 | repository = "https://github.com/materials-data-facility/scythe"
9 |
10 | packages = [
11 | { include = "scythe" },
12 | ]
13 |
14 | [tool.poetry.dependencies]
15 | python = ">=3.8.0,<3.11"
16 | mdf-toolbox = "^0.5.3"
17 | stevedore = "^3.5.0"
18 | pandas = "^1.4.2"
19 | llvmlite = "^0.38.0"
20 | numba = "^0.55"
21 |
22 | ase = { version = "~3.19", optional = true }
23 | pymatgen = { version = "^2022.3.24", optional = true }
24 | tableschema = { version = "^1,<2", optional = true }
25 | dfttopif = { version = "^1.1.0", optional = true }
26 | hyperspy = { version = "^1.4.1", optional = true }
27 | python-magic = { version = "^0.4.15", optional = true }
28 | Pillow = { version = "^9.0.1", optional = true }
29 | xmltodict = { version = "^0.12.0", optional = true }
30 | pycalphad = { version = "^0.10.0", optional = true }
31 |
32 | [tool.poetry.dev-dependencies]
33 | flake8 = "^3.9.2" # pinned due to incompatibility with flake8 v4 and sphinx
34 | pytest = "^7.1.1"
35 | coveralls = "^3.3.1"
36 | pytest-cov = "^3.0.0"
37 | tox = "^3.25.0"
38 | Sphinx = "^4.5.0"
39 | sphinx-rtd-theme = "^1.0.0"
40 | diff-cover = "^6.4.5"
41 |
42 | [tool.poetry.extras]
43 | ase = ['ase']
44 | crystal_structure = ['pymatgen', 'ase']
45 | csv = ['tableschema']
46 | dft = ['dfttopif']
47 | electron_microscopy = ['hyperspy']
48 | file = ['python-magic']
49 | image = ['Pillow']
50 | tdb = ['pycalphad']
51 | xml = ['xmltodict']
52 | # to make it easy to add all extras, maintain the list below as the sum
53 | # of all the dependencies above
54 | all = ['ase',
55 | 'pymatgen',
56 | 'tableschema',
57 | 'dfttopif',
58 | 'hyperspy',
59 | 'python-magic',
60 | 'Pillow',
61 | 'xmltodict',
62 | 'pycalphad']
63 |
64 | [tool.poetry.plugins]
65 |
66 | [tool.poetry.plugins."scythe.extractor"]
67 | "ase" = "scythe.ase:ASEExtractor"
68 | "crystal" = "scythe.crystal_structure:CrystalStructureExtractor"
69 | "csv" = "scythe.csv:CSVExtractor"
70 | "dft" = "scythe.dft:DFTExtractor"
71 | "em" = "scythe.electron_microscopy:ElectronMicroscopyExtractor"
72 | "filename" = "scythe.filename:FilenameExtractor"
73 | "generic" = "scythe.file:GenericFileExtractor"
74 | "image" = "scythe.image:ImageExtractor"
75 | "json" = "scythe.json:JSONExtractor"
76 | "noop" = "scythe.testing:NOOPExtractor"
77 | "tdb" = "scythe.tdb:TDBExtractor"
78 | "xml" = "scythe.xml:XMLExtractor"
79 | "yaml" = "scythe.yaml:YAMLExtractor"
80 |
81 | [tool.poetry.plugins."scythe.adapter"]
82 | "noop" = "scythe.adapters.base:NOOPAdapter"
83 | "serialize" = "scythe.adapters.base:SerializeAdapter"
84 | "greedy_serialize" = "scythe.adapters.base:GreedySerializeAdapter"
85 |
86 | [build-system]
87 | requires = ["poetry-core>=1.0.0"]
88 | build-backend = "poetry.core.masonry.api"
89 |
90 | [tool.pytest.ini_options]
91 | addopts = "--ignore=.venv --ignore=.tox --cov=scythe --cov-report html --cov-report term-missing --cov-report=xml"
92 | testpaths = ['tests']
93 |
94 | [tool.coverage.run]
95 | omit = [".tox/*"]
96 | dynamic_context = "test_function"
97 |
98 | [tool.coverage.report]
99 | show_missing = true
100 |
101 | [tool.coverage.html]
102 | directory = "coverage_html"
103 | show_contexts = true
104 |
105 | [tool.tox]
106 | legacy_tox_ini = """
107 | [tox]
108 | isolated_build = true
109 | envlist = py3{8,9,10}
110 |
111 | [testenv]
112 | passenv = *
113 | whitelist_externals = poetry
114 | commands =
115 | poetry install -E all
116 | poetry run flake8
117 | poetry run pytest
118 | poetry run diff-cover coverage.xml --compare-branch=origin/master
119 |
120 | [testenv:docs]
121 | passenv = *
122 | envlist = py310
123 | description = invoke sphinx-build to build the HTML docs
124 | commands =
125 | poetry install -E all
126 | poetry run flake8
127 | poetry run python -m sphinx.cmd.build ./docs/source ./docs/_build -n -E -a -j auto -b html
128 | """
129 |
--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
1 | from scythe.utils.interface import (get_available_extractors, run_extractor,
2 | get_available_adapters, run_all_extractors_on_directory,
3 | ExtractResult)
4 | from scythe.utils import set_nested_dict_value
5 | from scythe.image import ImageExtractor
6 | import pytest
7 | import json
8 | import os
9 |
10 |
11 | cwd = os.path.dirname(__file__)
12 |
13 |
14 | def test_list_parsers():
15 | assert 'image' in get_available_extractors()
16 |
17 |
18 | def test_execute_parser():
19 | image = os.path.join(cwd, 'data', 'image', 'dog2.jpeg')
20 | assert ImageExtractor().extract([image]) == run_extractor('image', [image])
21 | assert run_extractor('image', [image], adapter='noop') == run_extractor('image', [image])
22 |
23 |
24 | def test_run_all_parsers():
25 | path = os.path.join(cwd, 'data', 'image')
26 | output = list(run_all_extractors_on_directory(path))
27 | assert len(output) > 0
28 | assert len(output[0]) == 3
29 | assert isinstance(output[0][0], tuple)
30 | assert isinstance(output[0][1], str)
31 | assert isinstance(output[0][2], dict)
32 |
33 | # Re-run parsers with adapters
34 | output_noop = list(run_all_extractors_on_directory(path, default_adapter='noop'))
35 | assert output == output_noop
36 | output_json = list(run_all_extractors_on_directory(path, default_adapter='serialize'))
37 | assert output == [ExtractResult(x.group, x.extractor, json.loads(x.metadata)) for x in output_json]
38 |
39 | # Test the matching
40 | output_matching = list(run_all_extractors_on_directory(path, adapter_map={'file': 'serialize'}))
41 | assert all(isinstance(x.metadata, str if x.extractor == 'file' else dict)
42 | for x in output_matching)
43 | output_matching = list(run_all_extractors_on_directory(path, adapter_map={'file': 'noop'},
44 | default_adapter='serialize'))
45 | assert all(isinstance(x.metadata, str if x.extractor != 'file' else dict)
46 | for x in output_matching)
47 |
48 | # This matching test fails if we have other packages with adapters on the system
49 | adapters = set(get_available_adapters().keys())
50 | if adapters == {'noop', 'serialize'}:
51 | output_matching = list(run_all_extractors_on_directory(path, adapter_map='match',
52 | default_adapter='serialize'))
53 | assert all(isinstance(x.metadata, str if x.extractor != 'noop' else dict)
54 | for x in output_matching)
55 |
56 | # Test the error case
57 | with pytest.raises(ValueError):
58 | list(run_all_extractors_on_directory(path, adapter_map='matching',
59 | default_adapter='serialize'))
60 |
61 | # Test specifying parsers
62 | assert set([x.extractor for x in output]).issuperset(['image', 'generic'])
63 | output_limit = list(run_all_extractors_on_directory(path, exclude_extractors=['image']))
64 | assert 'image' not in [x.extractor for x in output_limit]
65 | output_limit = list(run_all_extractors_on_directory(path, include_extractors=['image']))
66 | assert set([x.extractor for x in output_limit]) == {'image'}
67 | with pytest.raises(ValueError):
68 | list(run_all_extractors_on_directory(path, include_extractors=['image'],
69 | exclude_extractors=['image']))
70 | with pytest.raises(ValueError):
71 | list(run_all_extractors_on_directory(path, include_extractors=['totally-not-a-parser']))
72 |
73 |
74 | def test_list_adapters():
75 | assert 'noop' in get_available_adapters()
76 |
77 |
78 | def test_set_nested_dict():
79 | dest_dict1 = {
80 | 'key1': 'val1',
81 | 'key2': {
82 | 'key2.1': 'val2.1',
83 | 'key2.2': 'val2.2'}
84 | }
85 | dest_dict2 = {
86 | 'key1': 'val1',
87 | 'key2': {
88 | 'key2.1': 'val2.1',
89 | 'key2.2': 'val2.2'}
90 | }
91 |
92 | set_nested_dict_value(dest_dict2, ('key3', 'key3.1'), None)
93 | assert dest_dict1 == dest_dict2
94 |
95 | set_nested_dict_value(dest_dict2, ('key3', 'key3.1'), 4)
96 | assert dest_dict2 == {
97 | 'key1': 'val1',
98 | 'key2': {
99 | 'key2.1': 'val2.1',
100 | 'key2.2': 'val2.2'},
101 | 'key3': {'key3.1': 4}
102 | }
103 |
104 | set_nested_dict_value(dest_dict2, ('key3', 'key3.1'), 5, override=False)
105 | assert dest_dict2 == {
106 | 'key1': 'val1',
107 | 'key2': {
108 | 'key2.1': 'val2.1',
109 | 'key2.2': 'val2.2'},
110 | 'key3': {'key3.1': 4}
111 | }
112 |
113 | set_nested_dict_value(dest_dict2, ('key3', 'key3.1'), 5, override=True)
114 | assert dest_dict2 == {
115 | 'key1': 'val1',
116 | 'key2': {
117 | 'key2.1': 'val2.1',
118 | 'key2.2': 'val2.2'},
119 | 'key3': {'key3.1': 5}
120 | }
121 |
--------------------------------------------------------------------------------
/tests/data/tdb/test_PbTe.TDB:
--------------------------------------------------------------------------------
1 | $ Reference: "Ab inito study of intrinsic point defects in PbTe: an insight into phase
2 | $ stability", Bajaj, S., et al. Acta Materialia 92, 2015, doi: 10.1016/j.actamat.2015.03.034
3 |
4 | ELEMENT VA VACANCY 0.0000E+00 0.0000E+00 0.0000E+00!
5 | ELEMENT /- ELECTRON_GAS 0.0000E+00 0.0000E+00 0.0000E+00!
6 | ELEMENT PB FCC_A1 2.072E+02 6.870E+03 6.48E+01!
7 | ELEMENT TE HEXAGONAL_A8 1.2760E+02 6.1212E+03 4.9497E+01!
8 |
9 |
10 | SPECIES PBTE_L PB1TE1 !
11 |
12 |
13 | FUNCTION GHSERPB 298.15 -7650.09+101.7*T-24.5242*T*LN(T)
14 | -0.00365895*T**2-2.4395E-007*T**3; 600.61 Y
15 | -10531.1+154.243*T-32.4914*T*LN(T)
16 | +0.00154613*T**2+8.05448E+025*T**(-9); 1200 Y
17 | 4157.62+53.1391*T-18.9641*T*LN(T)
18 | -0.00288294*T**2+9.8144E-008*T**3-2.69676E+006*T**(-1)+8.05448E+025*T**(-9); 2100 N !
19 | FUNCTION GHSERTE 2.98150E+02 -10544.679+183.372894*T-35.6687*T*LN(T)
20 | +.01583435*T**2-5.240417E-06*T**3+155015*T**(-1); 7.22660E+02 Y
21 | +9160.595-129.265373*T+13.004*T*LN(T)-.0362361*T**2+5.006367E-06*T**3
22 | -1286810*T**(-1); 1.15000E+03 Y
23 | -12781.349+174.901226*T-32.5596*T*LN(T); 1.60000E+03 N !
24 | FUNCTION GLIQTE 2.98150E+02 -17554.731+685.877639*T
25 | -126.318*T*LN(T)+.2219435*T**2-9.42075E-05*T**3+827930*T**(-1);
26 | 6.26490E+02 Y
27 | -3165763.48+46756.357*T-7196.41*T*LN(T)+7.09775*T**2-.00130692833*T**3
28 | +2.58051E+08*T**(-1); 7.22660E+02 Y
29 | +180326.959-1500.57909*T+202.743*T*LN(T)-.142016*T**2+1.6129733E-05*T**3
30 | -24238450*T**(-1); 1.15000E+03 Y
31 | +6328.687+148.708299*T-32.5596*T*LN(T); 1.60000E+03 N REF0 !
32 | FUNCTION GLIQPB 298.15 -2977.96+93.9496*T-24.5242*T*LN(T)
33 | -0.00365895*T**2-2.4395E-007*T**3-6.019E-019*T**7; 600.61 Y
34 | -5677.96+146.176*T-32.4914*T*LN(T)
35 | +0.00154613*T**2; 1200 Y
36 | 9010.75+45.0719*T-18.9641*T*LN(T)
37 | -0.00288294*T**2+9.8144E-008*T**3-2.69676E+006*T**(-1); 2100 N !
38 |
39 |
40 | PHASE LIQUID % 1 1 !
41 | CONSTITUENT LIQUID :PB,TE,PBTE_L : !
42 | PARAMETER G(LIQUID,PB;0) 298.15 -2977.96+93.9496*T-24.5242*T*LN(T)
43 | -0.00365895*T**2-2.4395E-007*T**3-6.019E-019*T**7; 600.61 Y
44 | -5677.96+146.176*T-32.4914*T*LN(T)
45 | +0.00154613*T**2; 1200 Y
46 | 9010.75+45.0719*T-18.9641*T*LN(T)
47 | -0.00288294*T**2+9.8144E-008*T**3-2.69676E+006*T**(-1); 2100 N !
48 | PARAMETER G(LIQUID,TE;0) 2.98150E+02 -17554.731+685.877639*T
49 | -126.318*T*LN(T)+.2219435*T**2-9.42075E-05*T**3+827930*T**(-1);
50 | 6.26490E+02 Y
51 | -3165763.48+46756.357*T-7196.41*T*LN(T)+7.09775*T**2-.00130692833*T**3
52 | +2.58051E+08*T**(-1); 7.22660E+02 Y
53 | +180326.959-1500.57909*T+202.743*T*LN(T)-.142016*T**2+1.6129733E-05*T**3
54 | -24238450*T**(-1); 1.15000E+03 Y
55 | +6328.687+148.708299*T-32.5596*T*LN(T); 1.60000E+03 N REF0 !
56 | PARAMETER G(LIQUID,PBTE_L;0) 2.98150E+02 GLIQPB#+GLIQTE#-61700+18.9*T; 2.00000E+03 N REF0 !
57 | PARAMETER G(LIQUID,PB,PBTE_L;0) 2.98150E+02 15965.83-3.8*T; 2.00000E+03 N REF0 !
58 | PARAMETER G(LIQUID,PB,PBTE_L;1) 2.98150E+02 3681.91; 2.00000E+03 N REF0 !
59 | PARAMETER G(LIQUID,TE,PBTE_L;0) 2.98150E+02 -6216.19+5.56*T; 2.00000E+03 N REF0 !
60 | PARAMETER G(LIQUID,TE,PBTE_L;1) 2.98150E+02 1174.92; 2.00000E+03 N REF0 !
61 |
62 | $OPTIMIZATION P1 -200000 -86518; -25000 N !
63 | $OPTIMIZATION P2 -250 -25.657; 30 N !
64 | $OPTIMIZATION P3 -200000 -85071; -25000 N !
65 | $OPTIMIZATION P4 -250 -23; 50 N !
66 | PHASE PBTE % 2 1 1 !
67 | CONSTITUENT PBTE :PB,VA:TE,VA:!
68 | PARAMETER G(PBTE,PB:TE;0) 298.15 GHSERPB+GHSERTE-65055+5.87*T; 2000 N !
69 | PARAMETER G(PBTE,PB:VA;0) 298.15 GHSERPB+174091.2; 2000 N !
70 | PARAMETER G(PBTE,VA:TE;0) 298.15 GHSERTE+157960.355; 2000 N !
71 | PARAMETER G(PBTE,PB,VA:TE;0) 298.15 -103462.5-5.2714*T; 2000 N !
72 | PARAMETER G(PBTE,PB,VA:TE;1) 298.15 -12000+5.5*T; 2000 N !
73 | PARAMETER G(PBTE,PB:VA,TE;0) 298.15 -84750.59-28.0930*T; 2000 N !
74 | PARAMETER G(PBTE,PB:VA,TE;1) 298.15 -8000+4.5*T; 2000 N !
75 | PARAMETER G(PBTE,PB,VA:TE,VA;0) 298.15 -62405.5-9.919*T; 2000 N !
76 |
77 | PHASE HEXAGONAL_A8 % 1 1 !
78 | CONSTITUENT HEXAGONAL_A8 :TE:!
79 | PARAMETER G(HEXAGONAL_A8,TE;0) 298.15 -10544.7+183.373*T-35.6687*T*LN(T)
80 | +0.0158344*T**2-5.24042E-006*T**3+155015*T**(-1); 722.66 Y
81 | 9160.59-129.265*T+13.004*T*LN(T)
82 | -0.0362361*T**2+5.00637E-006*T**3-1.28681E+006*T**(-1); 1150 Y
83 | -12781.3+174.901*T-32.5596*T*LN(T); 1600 N !
84 |
85 | PHASE RHOMBOHEDRAL_A7 % 1 1.0 !
86 | CONSTITUENT RHOMBOHEDRAL_A7 :PB,TE:!
87 | PARAMETER G(RHOMBOHEDRAL_A7,PB;0) 298.15 -7350.09+102.7*T-24.5242*T*LN(T)
88 | -0.00365895*T**2-2.4395E-007*T**3; 600.61 Y
89 | -10231.1+155.243*T-32.4914*T*LN(T)
90 | +0.00154613*T**2+8.05448E+025*T**(-9); 1200 Y
91 | 4457.62+54.1391*T-18.9641*T*LN(T)
92 | -0.00288294*T**2+9.8144E-008*T**3-2.69676E+006*T**(-1)+8.05448E+025*T**(-9); 2100 N !
93 | PARAMETER G(RHOMBOHEDRAL_A7,TE;0) 2.98150E+02 +500+GHSERTE#;
94 | 2.00000E+03 N REF0 !
95 |
96 |
97 | LIST_OF_REFERENCES
98 | NUMBER SOURCE
99 | !
100 |
101 |
102 |
103 |
--------------------------------------------------------------------------------
/scythe/base.py:
--------------------------------------------------------------------------------
1 | from typing import List, Iterator, Tuple, Iterable, Union, Sequence
2 | from abc import ABC, abstractmethod
3 | import logging
4 | import os
5 |
6 | from scythe.utils.grouping import preprocess_paths
7 |
8 | logger = logging.getLogger(__name__)
9 |
10 |
11 | class BaseExtractor(ABC):
12 | """Abstract base class for a metadata extractor
13 |
14 | This class defines the interface for all extractors in Scythe. Each new extractor must
15 | implement the :meth:`parse`, :meth:`version`, and :meth:`implementors` functions. The
16 | :meth:`group` method should be overridden to generate smart groups of file (e.g., associating
17 | the inputs and outputs to the same calculation) :meth:`citations` can be used if there
18 | are papers that should be cited if the extractor is used as part of a scientific publication.
19 |
20 | See the `Scythe Contributor Guide `_ for further details.
21 | """
22 |
23 | def identify_files(self, path: str, context: dict = None) -> \
24 | Iterator[Tuple[str]]:
25 | """Identify all groups of files likely to be compatible with this extractor
26 |
27 | Uses the :meth:`group` function to determine groups of files that should be parsed together.
28 |
29 | Args:
30 | path (str): Root of directory to group together
31 | context (dict): Context about the files
32 | Yields:
33 | ([str]) Groups of eligible files
34 | """
35 |
36 | # Walk through the directories
37 | for root, dirs, files in os.walk(path):
38 | # Generate the full paths
39 | dirs = [os.path.join(root, d) for d in dirs]
40 | files = [os.path.join(root, f) for f in files]
41 |
42 | # Get any groups from this directory
43 | for group in self.group(files, dirs, context):
44 | yield group
45 |
46 | def extract_directory(self, path: str, context: dict = None) -> \
47 | Iterator[Tuple[Tuple[str], dict]]:
48 | """Run extractor on all appropriate files in a directory
49 |
50 | Skips files that throw exceptions while parsing
51 |
52 | Args:
53 | path (str): Root of directory to extract metadata from
54 | context (dict): Context about the files
55 | Yields:
56 | ([str], dict): Tuple of the group identity and the metadata unit
57 | """
58 |
59 | for group in self.identify_files(path, context):
60 | try:
61 | metadata_unit = self.extract(group, context)
62 | except Exception:
63 | continue
64 | else:
65 | yield group, metadata_unit
66 |
67 | @abstractmethod
68 | def extract(self, group: Iterable[str], context: dict = None) -> dict:
69 | """Extract metadata from a group of files
70 |
71 | A group of files is a set of 1 or more files that describe the same object
72 | and will be used together to create a single metadata record.
73 |
74 | Arguments:
75 | group ([str]): A list of one or more files that should be parsed together
76 | context (dict): Context about the files
77 |
78 | Returns:
79 | (dict): The parsed results, in JSON-serializable format.
80 | """
81 |
82 | def group(self, files: Union[str, List[str]], directories: List[str] = None,
83 | context: dict = None) -> Iterator[Tuple[str, ...]]:
84 | """Identify a groups of files and directories that should be parsed together
85 |
86 | Will create groups using only the files and directories included as input.
87 |
88 | The files of files are _all_ files that could be read by this extractor,
89 | which may include many false positives.
90 |
91 | Args:
92 | files (str or [str]): List of files to consider grouping
93 | directories ([str]): Any directories to consider group as well
94 | context (dict): Context about the files
95 | Yields:
96 | ((str)): Groups of files
97 | """
98 |
99 | # Make sure file paths are strings or Path-like objects
100 | files = preprocess_paths(files)
101 |
102 | # Default: Every file is in its own group
103 | for f in files:
104 | yield f,
105 |
106 | def citations(self) -> List[str]:
107 | """Citation(s) and reference(s) for this extractor
108 |
109 | Returns:
110 | ([str]): each element should be a string citation in BibTeX format
111 | """
112 | return []
113 |
114 | @abstractmethod
115 | def implementors(self) -> List[str]:
116 | """List of implementors of the extractor
117 |
118 | These people are the points-of-contact for addressing errors or modifying the extractor
119 |
120 | Returns:
121 | ([str]): List of implementors in the form "FirstName LastName "
122 | """
123 |
124 | @abstractmethod
125 | def version(self) -> str:
126 | """Return the version of the extractor
127 |
128 | Returns:
129 | (str): Version of the extractor
130 | """
131 |
132 | @property
133 | def schema(self) -> dict:
134 | """Schema for the output of the extractor"""
135 | return {
136 | "$schema": "http://json-schema.org/schema#"
137 | }
138 |
139 |
140 | class BaseSingleFileExtractor(BaseExtractor):
141 | """Base class for extractors that only ever considers a single file at a time
142 |
143 | Instead of implementing :meth:`parse`, implement :meth:`_parse_file`"""
144 |
145 | @abstractmethod
146 | def _extract_file(self, path: str, context=None):
147 | """Generate the metadata for a single file
148 |
149 | Args:
150 | path (str): Path to the file
151 | context (dict): Optional context information about the file
152 | Returns:
153 | (dict): Metadata for the file
154 | """
155 |
156 | def extract(self, group: Union[str, Sequence[str]], context=None):
157 | # Error catching: allows for single files to passed not as list
158 | if isinstance(group, str):
159 | return self._extract_file(group, context)
160 |
161 | # Assumes that the group must have exactly one file
162 | if len(group) > 1:
163 | raise ValueError('Extractor only takes a single file at a time')
164 |
165 | return self._extract_file(group[0], context)
166 |
--------------------------------------------------------------------------------
/tests/data/tdb/test_AuSi.TDB:
--------------------------------------------------------------------------------
1 | $ Reference: "Phase stability in nanoscale material systems: extension from bulk phase diagrams", Bajaj, S. et al,
2 | $ Nanoscale 7, 2015, doi: 10.1039/C5NR01535A
3 | $ Database file written 2013- 8-24
4 | $ From database: PURE4
5 | ELEMENT /- ELECTRON_GAS 0.0000E+00 0.0000E+00 0.0000E+00!
6 | ELEMENT VA VACUUM 0.0000E+00 0.0000E+00 0.0000E+00!
7 | ELEMENT AU FCC_A1 1.9697E+02 6.0166E+03 4.7488E+01!
8 | ELEMENT SI DIAMOND_A4 2.8085E+01 3.2175E+03 1.8820E+01!
9 |
10 |
11 | FUNCTION GHSERAU 2.98150E+02 -6938.856+106.830098*T-22.75455*T*LN(T)
12 | -.00385924*T**2+3.79625E-07*T**3-25097*T**(-1); 9.29400E+02 Y
13 | -93586.481+1021.69543*T-155.706745*T*LN(T)+.08756015*T**2
14 | -1.1518713E-05*T**3+10637210*T**(-1); 1.33733E+03 Y
15 | +314067.829-2016.37825*T+263.252259*T*LN(T)-.118216828*T**2
16 | +8.923844E-06*T**3-67999832*T**(-1); 1.73580E+03 Y
17 | -12133.783+165.272524*T-30.9616*T*LN(T); 3.20000E+03 N !
18 | FUNCTION GHSERSI 2.98150E+02 -8162.609+137.236859*T-22.8317533*T*LN(T)
19 | -.001912904*T**2-3.552E-09*T**3+176667*T**(-1); 1.68700E+03 Y
20 | -9457.642+167.281367*T-27.196*T*LN(T)-4.20369E+30*T**(-9);
21 | 3.60000E+03 N !
22 | FUNCTION UN_ASS 298.15 0; 300 N !
23 |
24 | TYPE_DEFINITION % SEQ *!
25 | DEFINE_SYSTEM_DEFAULT ELEMENT 2 !
26 | DEFAULT_COMMAND DEF_SYS_ELEMENT VA /- !
27 |
28 |
29 | PHASE LIQUID:L % 1 1.0 !
30 | CONSTITUENT LIQUID:L :AU,SI : !
31 |
32 | PARAMETER G(LIQUID,AU;0) 2.98150E+02 +5613.144+97.444232*T
33 | -22.75455*T*LN(T)-.00385924*T**2+3.79625E-07*T**3-25097*T**(-1);
34 | 9.29400E+02 Y
35 | -81034.481+1012.30956*T-155.706745*T*LN(T)+.08756015*T**2
36 | -1.1518713E-05*T**3+10637210*T**(-1); 1.33733E+03 Y
37 | +326619.829-2025.76412*T+263.252259*T*LN(T)-.118216828*T**2
38 | +8.923844E-06*T**3-67999832*T**(-1); 1.73580E+03 Y
39 | +418.217+155.886658*T-30.9616*T*LN(T); 3.20000E+03 N REF1 !
40 | PARAMETER G(LIQUID,SI;0) 2.98150E+02 +42533.751+107.13742*T
41 | -22.8317533*T*LN(T)-.001912904*T**2-3.552E-09*T**3+176667*T**(-1)
42 | +2.09307E-21*T**7; 1.68700E+03 Y
43 | +40370.523+137.722298*T-27.196*T*LN(T); 3.60000E+03 N REF1 !
44 | PARAMETER G(LIQUID,AU,SI;0) 2.98150E+02 -24103.3028-15.13883*T;
45 | 6.00000E+03 N REF0 !
46 | PARAMETER G(LIQUID,AU,SI;1) 2.98150E+02 -29375.2777+1.1065*T;
47 | 6.00000E+03 N REF0 !
48 | PARAMETER G(LIQUID,AU,SI;2) 2.98150E+02 -13032.2412; 6.00000E+03 N
49 | REF0 !
50 |
51 |
52 | TYPE_DEFINITION & GES A_P_D BCC_A2 MAGNETIC -1.0 4.00000E-01 !
53 | PHASE BCC_A2 %& 2 1 3 !
54 | CONSTITUENT BCC_A2 :AU,SI : VA : !
55 |
56 | PARAMETER G(BCC_A2,AU:VA;0) 2.98150E+02 -2688.856+105.730098*T
57 | -22.75455*T*LN(T)-.00385924*T**2+3.79625E-07*T**3-25097*T**(-1);
58 | 9.29400E+02 Y
59 | -89336.481+1020.59543*T-155.706745*T*LN(T)+.08756015*T**2
60 | -1.1518713E-05*T**3+10637210*T**(-1); 1.33733E+03 Y
61 | +318317.829-2017.47825*T+263.252259*T*LN(T)-.118216828*T**2
62 | +8.923844E-06*T**3-67999832*T**(-1); 1.73580E+03 Y
63 | -7883.783+164.172524*T-30.9616*T*LN(T); 3.20000E+03 N REF1 !
64 | PARAMETER G(BCC_A2,SI:VA;0) 2.98150E+02 +38837.391+114.736859*T
65 | -22.8317533*T*LN(T)-.001912904*T**2-3.552E-09*T**3+176667*T**(-1);
66 | 1.68700E+03 Y
67 | +37542.358+144.781367*T-27.196*T*LN(T)-4.20369E+30*T**(-9); 3.60000E+03
68 | N REF1 !
69 |
70 |
71 | TYPE_DEFINITION ' GES A_P_D CBCC_A12 MAGNETIC -3.0 2.80000E-01 !
72 | PHASE CBCC_A12 %' 2 1 1 !
73 | CONSTITUENT CBCC_A12 :SI : VA : !
74 |
75 | PARAMETER G(CBCC_A12,SI:VA;0) 2.98150E+02 +42045.391+116.859859*T
76 | -22.8317533*T*LN(T)-.001912904*T**2-3.552E-09*T**3+176667*T**(-1);
77 | 1.68700E+03 Y
78 | +40750.358+146.904367*T-27.196*T*LN(T)-4.20369E+30*T**(-9); 3.60000E+03
79 | N REF1 !
80 |
81 |
82 | PHASE CUB_A13 % 2 1 1 !
83 | CONSTITUENT CUB_A13 :SI : VA : !
84 |
85 | PARAMETER G(CUB_A13,SI:VA;0) 2.98150E+02 +39116.391+116.859859*T
86 | -22.8317533*T*LN(T)-.001912904*T**2-3.552E-09*T**3+176667*T**(-1);
87 | 1.68700E+03 Y
88 | +37821.358+146.904367*T-27.196*T*LN(T)-4.20369E+30*T**(-9); 3.60000E+03
89 | N REF1 !
90 |
91 |
92 | PHASE DIAMOND_A4 % 1 1.0 !
93 | CONSTITUENT DIAMOND_A4 :AU,SI : !
94 |
95 | PARAMETER G(DIAMOND_A4,AU;0) 2.98150E+02 +GHSERAU#+12552+20.61589*T;
96 | 6.00000E+03 N REF0 !
97 | PARAMETER G(DIAMOND_A4,SI;0) 2.98150E+02 +GHSERSI#; 3.60000E+03 N
98 | REF1 !
99 | PARAMETER G(DIAMOND_A4,AU,SI;0) 2.98150E+02 40000; 6.00000E+03 N
100 | REF0 !
101 |
102 |
103 | TYPE_DEFINITION ( GES A_P_D FCC_A1 MAGNETIC -3.0 2.80000E-01 !
104 | PHASE FCC_A1 %( 2 1 1 !
105 | CONSTITUENT FCC_A1 :AU,SI : VA : !
106 |
107 | PARAMETER G(FCC_A1,AU:VA;0) 2.98150E+02 +GHSERAU#; 3.20000E+03 N REF1 !
108 | PARAMETER G(FCC_A1,SI:VA;0) 2.98150E+02 +42837.391+115.436859*T
109 | -22.8317533*T*LN(T)-.001912904*T**2-3.552E-09*T**3+176667*T**(-1);
110 | 1.68700E+03 Y
111 | +41542.358+145.481367*T-27.196*T*LN(T)-4.20369E+30*T**(-9); 3.60000E+03
112 | N REF1 !
113 | PARAMETER G(FCC_A1,AU,SI:VA;0) 2.98150E+02 2000; 6.00000E+03 N REF0 !
114 |
115 |
116 | TYPE_DEFINITION ) GES A_P_D HCP_A3 MAGNETIC -3.0 2.80000E-01 !
117 | PHASE HCP_A3 %) 2 1 .5 !
118 | CONSTITUENT HCP_A3 :AU,SI : VA : !
119 |
120 | PARAMETER G(HCP_A3,AU:VA;0) 2.98150E+02 -6698.106+108.430098*T
121 | -22.75455*T*LN(T)-.00385924*T**2+3.79625E-07*T**3-25097*T**(-1);
122 | 9.29400E+02 Y
123 | -93345.731+1023.29543*T-155.706745*T*LN(T)+.08756015*T**2
124 | -1.1518713E-05*T**3+10637210*T**(-1); 1.33733E+03 Y
125 | +314308.579-2014.77825*T+263.252259*T*LN(T)-.118216828*T**2
126 | +8.923844E-06*T**3-67999832*T**(-1); 1.73580E+03 Y
127 | -11893.033+166.872524*T-30.9616*T*LN(T); 3.20000E+03 N REF1 !
128 | PARAMETER G(HCP_A3,SI:VA;0) 2.98150E+02 +41037.391+116.436859*T
129 | -22.8317533*T*LN(T)-.001912904*T**2-3.552E-09*T**3+176667*T**(-1);
130 | 1.68700E+03 Y
131 | +39742.358+146.481367*T-27.196*T*LN(T)-4.20369E+30*T**(-9); 3.60000E+03
132 | N REF1 !
133 |
134 |
135 | PHASE HCP_ZN % 2 1 .5 !
136 | CONSTITUENT HCP_ZN :SI : VA : !
137 |
138 | PARAMETER G(HCP_ZN,SI:VA;0) 2.98150E+02 +41038.391+116.436859*T
139 | -22.8317533*T*LN(T)-.001912904*T**2-3.552E-09*T**3+176667*T**(-1);
140 | 1.68700E+03 Y
141 | +39743.358+146.481367*T-27.196*T*LN(T)-4.20369E+30*T**(-9); 3.60000E+03
142 | N REF1 !
143 |
144 | LIST_OF_REFERENCES
145 | NUMBER SOURCE
146 | REF1 'PURE4 - SGTE Pure Elements (Unary) Database (Version 4.6),
147 | developed by SGTE (Scientific Group Thermodata Europe), 1991-2008,
148 | and provided by TCSAB (Jan. 2008). '
149 | !
150 |
151 |
--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # Configuration file for the Sphinx documentation builder.
4 | #
5 | # This file does only contain a selection of the most common options. For a
6 | # full list see the documentation:
7 | # http://www.sphinx-doc.org/en/master/config
8 |
9 | # -- Path setup --------------------------------------------------------------
10 |
11 | # If extensions (or modules to document with autodoc) are in another directory,
12 | # add these directories to sys.path here. If the directory is relative to the
13 | # documentation root, use os.path.abspath to make it absolute, like shown here.
14 | #
15 | # import os
16 | # import sys
17 | # sys.path.insert(0, os.path.abspath('.'))
18 |
19 | from scythe import __version__
20 |
21 |
22 | # -- Project information -----------------------------------------------------
23 |
24 | project = 'Scythe'
25 | copyright = '2019 - 2022, Materials Data Facility Team, Citrine Informatics'
26 | author = 'Materials Data Facility Team, Citrine Informatics'
27 |
28 | # The short X.Y version
29 | version = __version__
30 | # The full version, including alpha/beta/rc tags
31 | release = version
32 |
33 |
34 | # -- General configuration ---------------------------------------------------
35 |
36 | # If your documentation needs a minimal Sphinx version, state it here.
37 | #
38 | # needs_sphinx = '1.0'
39 |
40 | # Add any Sphinx extension module names here, as strings. They can be
41 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
42 | # ones.
43 | extensions = [
44 | 'sphinx.ext.autodoc',
45 | 'sphinx.ext.intersphinx',
46 | 'sphinx.ext.todo',
47 | 'sphinx.ext.napoleon',
48 | 'sphinx.ext.viewcode',
49 | 'stevedore.sphinxext',
50 | 'sphinx.ext.autosectionlabel',
51 | 'sphinx.ext.githubpages'
52 | ]
53 |
54 | # Add any paths that contain templates here, relative to this directory.
55 | templates_path = ['_templates']
56 |
57 | # The suffix(es) of source filenames.
58 | # You can specify multiple suffix as a list of string:
59 | #
60 | # source_suffix = ['.rst', '.md']
61 | source_suffix = '.rst'
62 |
63 | # The master toctree document.
64 | master_doc = 'index'
65 |
66 | # The language for content autogenerated by Sphinx. Refer to documentation
67 | # for a list of supported languages.
68 | #
69 | # This is also used if you do content translation via gettext catalogs.
70 | # Usually you set "language" from the command line for these cases.
71 | language = 'EN-US'
72 |
73 | # List of patterns, relative to source directory, that match files and
74 | # directories to ignore when looking for source files.
75 | # This pattern also affects html_static_path and html_extra_path.
76 | exclude_patterns = []
77 |
78 | # The name of the Pygments (syntax highlighting) style to use.
79 | pygments_style = None
80 |
81 |
82 | # -- Options for HTML output -------------------------------------------------
83 |
84 | # The theme to use for HTML and HTML Help pages. See the documentation for
85 | # a list of builtin themes.
86 | #
87 | html_theme = 'sphinx_rtd_theme'
88 |
89 | # Theme options are theme-specific and customize the look and feel of a theme
90 | # further. For a list of options available for each theme, see the
91 | # documentation.
92 | #
93 | # html_theme_options = {}
94 |
95 | # Add any paths that contain custom static files (such as style sheets) here,
96 | # relative to this directory. They are copied after the builtin static files,
97 | # so a file named "default.css" will overwrite the builtin "default.css".
98 | # html_static_path = ['_static']
99 |
100 | # Custom sidebar templates, must be a dictionary that maps document names
101 | # to template names.
102 | #
103 | # The default sidebars (for documents that don't match any pattern) are
104 | # defined by theme itself. Builtin themes are using these templates by
105 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
106 | # 'searchbox.html']``.
107 | #
108 | # html_sidebars = {}
109 |
110 |
111 | # -- Options for HTMLHelp output ---------------------------------------------
112 |
113 | # Output file base name for HTML help builder.
114 | htmlhelp_basename = 'Scythedoc'
115 |
116 |
117 | # -- Options for LaTeX output ------------------------------------------------
118 |
119 | latex_elements = {
120 | # The paper size ('letterpaper' or 'a4paper').
121 | #
122 | # 'papersize': 'letterpaper',
123 |
124 | # The font size ('10pt', '11pt' or '12pt').
125 | #
126 | # 'pointsize': '10pt',
127 |
128 | # Additional stuff for the LaTeX preamble.
129 | #
130 | # 'preamble': '',
131 |
132 | # Latex figure (float) alignment
133 | #
134 | # 'figure_align': 'htbp',
135 | }
136 |
137 | # Grouping the document tree into LaTeX files. List of tuples
138 | # (source start file, target name, title,
139 | # author, documentclass [howto, manual, or own class]).
140 | latex_documents = [
141 | (master_doc, 'Scythe.tex', 'Scythe Documentation',
142 | 'Materials Data Facility Team, Citrine Informatics', 'manual'),
143 | ]
144 |
145 |
146 | # -- Options for manual page output ------------------------------------------
147 |
148 | # One entry per manual page. List of tuples
149 | # (source start file, name, description, authors, manual section).
150 | man_pages = [
151 | (master_doc, 'materialsio', 'Scythe Documentation',
152 | [author], 1)
153 | ]
154 |
155 |
156 | # -- Options for Texinfo output ----------------------------------------------
157 |
158 | # Grouping the document tree into Texinfo files. List of tuples
159 | # (source start file, target name, title, author,
160 | # dir menu entry, description, category)
161 | texinfo_documents = [
162 | (master_doc, 'Scythe', 'Scythe Documentation',
163 | author, 'Scythe', 'One line description of project.',
164 | 'Miscellaneous'),
165 | ]
166 |
167 |
168 | # -- Options for Epub output -------------------------------------------------
169 |
170 | # Bibliographic Dublin Core info.
171 | epub_title = project
172 |
173 | # The unique identifier of the text. This can be a ISBN number
174 | # or the project homepage.
175 | #
176 | # epub_identifier = ''
177 |
178 | # A unique identification for the text.
179 | #
180 | # epub_uid = ''
181 |
182 | # A list of files that should not be packed into the epub file.
183 | epub_exclude_files = ['search.html']
184 |
185 |
186 | # -- Extension configuration -------------------------------------------------
187 |
188 | autoclass_content = "both"
189 |
190 | # -- Options for intersphinx extension ---------------------------------------
191 |
192 | # Example configuration for intersphinx: refer to the Python standard library.
193 | intersphinx_mapping = {'python': ('https://docs.python.org/3/', None)}
194 |
195 | # -- Options for todo extension ----------------------------------------------
196 |
197 | # If true, `todo` and `todoList` produce output, else they produce nothing.
198 | todo_include_todos = True
199 |
--------------------------------------------------------------------------------
/scythe/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from typing import Dict, Union, Tuple, Any, Callable, Optional, List, TypedDict
2 | import logging
3 |
4 | logger = logging.getLogger(__name__)
5 |
6 |
7 | def get_nested_dict_value_by_path(nest_dict: Dict,
8 | path: Union[Tuple, str],
9 | cast: Optional[Callable] = None) -> Any:
10 | """Get the value from within a nested dictionary structure by traversing
11 | into the dictionary as deep as that path found and returning that value
12 |
13 | Args:
14 | nest_dict: A dictionary of dictionaries that is to be queried
15 | path: A string or tuple that specifies the subsequent keys needed to
16 | get to a value within `nest_dict`. If a string, the value will
17 | return just from the first level (mostly for convenience)
18 | cast: A function that (if provided) will be applied to the value. This
19 | helps with serialization. If it returns an error, the value will be returned as is
20 | without conversion
21 |
22 | Returns:
23 | The value at the path within the nested dictionary; if there's no
24 | value there, return ``None``
25 | """
26 | sub_dict = nest_dict
27 |
28 | if isinstance(path, str):
29 | path = (path,)
30 |
31 | for key in path:
32 | try:
33 | sub_dict = sub_dict[key]
34 | except KeyError:
35 | return None
36 |
37 | # coerce empty values to None
38 | if sub_dict in [{}, dict(), [], '', None]:
39 | return None
40 |
41 | if cast is not None:
42 | # noinspection PyBroadException
43 | try:
44 | return cast(sub_dict)
45 | except Exception as e:
46 | logger.warning(f"Exception encountered when casting value using {cast}: {e}; returning "
47 | f"value as is without casting")
48 | return sub_dict
49 | else:
50 | return sub_dict
51 |
52 |
53 | def set_nested_dict_value(nest_dict: Dict, path: Tuple,
54 | value: Any, override: Optional[bool] = False, ):
55 | """Set a value within a nested dictionary structure by traversing into
56 | the dictionary as deep as that path found and changing it to ``value``.
57 | If ``value`` is ``None``, immediately return without performing an action
58 | Cribbed from https://stackoverflow.com/a/13688108/1435788
59 |
60 | Args:
61 | nest_dict: A dictionary of dictionaries that is to be queried
62 | path: A tuple (or other iterable type) that specifies the subsequent
63 | keys needed to get to a value within `nest_dict`
64 | value: The value which will be given to the path in the nested
65 | dictionary
66 | override: If the value is already present, this flag controls whether
67 | to override its existing value
68 | """
69 | if value is None:
70 | return
71 | orig_value = get_nested_dict_value_by_path(nest_dict, path)
72 | for key in path[:-1]:
73 | nest_dict = nest_dict.setdefault(key, {})
74 | if orig_value is None or \
75 | orig_value is not None and override:
76 | # only set the value if it was None, or we chose to override
77 | nest_dict[path[-1]] = value
78 |
79 |
80 | def set_nested_dict_value_with_units(nest_dict: Dict, path: Tuple,
81 | value: Any, units: Optional[str] = None,
82 | override: bool = False,
83 | fn: Optional[Callable] = None):
84 | """Same as :func:`~scythe.utils.set_nested_dict_value`, but sets the
85 | value in the format of a dictionary with keys ``'value'`` and ``'units'``
86 | according to the specified units. If ``fn`` is supplied, it will be
87 | applied to the value prior to setting it.
88 |
89 | Args:
90 | nest_dict: A dictionary of dictionaries that is to be queried
91 | path: A tuple (or other iterable type) that specifies the subsequent
92 | keys needed to get to a value within ``nest_dict``.
93 | value: The value which will be given to the path in the nested
94 | dictionary
95 | units: If provided, will set the value at the given path to the
96 | provided units
97 | override: Whether to override a value if there is one already present
98 | at the path given
99 | fn: A callable function to apply to the value; can be used (for example)
100 | to convert a value form one unit to another, or any other purpose
101 | """
102 | if value is not None:
103 | if fn is not None:
104 | value = fn(value)
105 | to_set = {'value': value}
106 | if units is not None:
107 | to_set['units'] = units
108 | set_nested_dict_value(nest_dict, path, to_set, override)
109 |
110 |
111 | # type definition for the mapping dictionaries
112 | MappingElements = TypedDict('MappingElements',
113 | {'source_dict': Dict,
114 | 'source_path': Union[str, Tuple[str, ...]],
115 | 'dest_dict': Dict,
116 | 'dest_path': Union[str, Tuple[str, ...]],
117 | 'cast_fn': Optional[Callable],
118 | 'units': Optional[Union[None, str]],
119 | 'conv_fn': Optional[Union[None, Callable]],
120 | 'override': Optional[bool]})
121 | """TypedDict: A TypedDict to specify the exact types expected when creating a
122 | mapping dictionary to map metadata from one place to another.
123 | """
124 |
125 |
126 | def map_dict_values(mapping: List[MappingElements]):
127 | """
128 | Helper method to apply map values from one dictionary into another.
129 | Inspired by the implementation in :func:`hyperspy.io.dict2signal`
130 |
131 | For each mapping we need a source dict and destination dict, then for
132 | each term, the source path, the destination path, the cast function,
133 | the units to set, and potentially a conversion function
134 |
135 | Args:
136 | mapping: should be a list of dicts, for example:
137 | [
138 | {'source_path': ('source', 'path',),
139 | 'dest_path': ('dest', 'path',),
140 | 'cast_fn': float,
141 | 'units': str,
142 | 'conv_fn': lambda x: x,
143 | 'override': bool}
144 | ]
145 | """
146 | for m in mapping:
147 | m.setdefault('cast_fn', None)
148 | m.setdefault('units', None)
149 | m.setdefault('conv_fn', None)
150 |
151 | value = get_nested_dict_value_by_path(
152 | nest_dict=m['source_dict'],
153 | path=m['source_path'],
154 | cast=m['cast_fn'])
155 | set_nested_dict_value_with_units(
156 | nest_dict=m['dest_dict'], path=m['dest_path'], value=value,
157 | units=m['units'], fn=m['conv_fn'], override=m['override'])
158 |
159 |
160 | def standardize_unit(u: str) -> str:
161 | """
162 | Helper method to convert typically seen unit representations into a
163 | standardized representation from QUDT
164 | (http://www.qudt.org/doc/DOC_VOCAB-UNITS.html). This is
165 | non-exhaustive, and may need to be updated as more types of units are
166 | encountered
167 |
168 | Args:
169 | u: The unit representation to convert
170 |
171 | Returns:
172 | The unit in a QUDT-standard representation (if known; otherwise just
173 | returns the unit representation as provided)
174 | """
175 | mapping = {
176 | # length
177 | 'km': 'KiloM', 'cm': 'CentiM', 'm': 'M', 'mm': 'MilliM',
178 | 'µm': 'MicroM', 'um': 'MicroM', 'nm': 'NanoM', 'pm': 'PicoM',
179 | 'Å': 'ANGSTROM',
180 | # current
181 | 'A': 'A', 'mA': 'MilliA', 'nA': 'NanoA', 'pA': 'PicoA',
182 | 'µA': 'MicroA', 'uA': 'MicroA',
183 | # energy
184 | 'eV': 'EV', 'GeV': 'GigaEV', 'keV': 'KiloEV', 'MeV': 'MegaEV',
185 | # mass
186 | 'g': 'GM', 'kg': 'KiloGM',
187 | # potential
188 | 'V': 'V', 'kV': 'KiloV', 'MV': 'MegaV', 'mV': 'MilliV',
189 | 'uV': 'MicroV', 'µV': 'MicroV',
190 | # inverse lengths
191 | '1/nm': 'PER-NanoM', '1/mm': 'PER-MilliM', '1/m': 'PER-M',
192 | '1/cm': 'PER-CentiM', '1/um': 'PER-MicroM', '1/µm': 'PER-MicroM',
193 | '1/pm': 'PER-PicoM'
194 | }
195 | if u in mapping:
196 | return mapping[u]
197 | else:
198 | return u
199 |
--------------------------------------------------------------------------------
/docs/source/user-guide.rst:
--------------------------------------------------------------------------------
1 | User Guide
2 | ==========
3 |
4 | In this part of the guide, we show a simple example of using a Scythe extractor and discuss the
5 | full functionality of an extractor.
6 |
7 | Installing Scythe (for users)
8 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
9 |
10 | Installing Scythe should be as easy as a single ``pip`` command. Assuming you have a
11 | version of Python that is 3.8 or higher, running::
12 |
13 | pip install scythe-extractors
14 |
15 | Should get the basics of Scythe installed. By default however, only a small subset of
16 | extractors will be installed (this is done so you do not need to install all the dependencies of
17 | extractors you may never use). To install additional extractors, you can specify "extras" at install time
18 | using the ``[...]`` syntax for ``pip``. For example, if you want to install all the extractors
19 | bundled with Scythe (and their dependencies), run::
20 |
21 | pip install pip install scythe-extractors[all]
22 |
23 | This will pull in many more packages, but also enable as many extractors as possible. Check the list
24 | under ``[tool.poetry.extras]`` in ``pyproject.toml`` to see all the options you can specify in
25 | the brackets of the ``pip install`` command.
26 |
27 |
28 | Discovering an extractor
29 | ~~~~~~~~~~~~~~~~~~~~~~~~
30 |
31 | Scythe uses `stevedore `_ to manage
32 | a collection of extractors, and has a utility function for listing available extractors::
33 |
34 | from scythe.utils.interface import get_available_extractors
35 | print(get_available_extractors())
36 |
37 | This snippet will print a dictionary of extractors installed on your system. Both extractors that are
38 | part of the Scythe base package and those defined by other packages will be included in this
39 | list.
40 |
41 | Simple Interface
42 | ~~~~~~~~~~~~~~~~
43 |
44 | The methods in :mod:`scythe.utils.interface` are useful for most applications. As an
45 | example, we illustrate the use of :class:`scythe.file.GenericFileExtractor`, which is
46 | available through the ``'generic'`` extractor plugin::
47 |
48 | from scythe.utils.interface import execute_extractor
49 | print(execute_extractor('generic', ['pyproject.toml']))
50 |
51 |
52 | The above snippet creates the extractor object and runs it on a file named ``pyproject.toml``. Run
53 | in the root directory of the Scythe, it would produce output similar to the following,
54 | likely with a different ``sha512`` value if the contents of that file have changed since this
55 | documentation was written:
56 |
57 | .. code:: json
58 |
59 | [{
60 | "data_type": "ASCII text",
61 | "filename": "pyproject.toml",
62 | "length": 2421,
63 | "mime_type": "text/plain",
64 | "path": "pyproject.toml",
65 | "sha512": "a7eb382c4a3e6cf469656453f9ff2e3c1ac2c02c9c2ba31c3d569a09883e2b2471801c39125dafb7c13bfcaf9cf6afbab92afa4c053c0c93a4c8c59acad1b85b"
66 | }]
67 |
68 | The other pre-built parsing function provides the ability to run all extractors on all files in a
69 | directory::
70 |
71 | from scythe.utils.interface import run_all_extractors
72 | gen = run_all_extractors('.')
73 | for record in gen:
74 | print(record)
75 |
76 | A third route for using ``scythe`` is to employ the ``get_extractor`` operation to access a
77 | specific extractor, and then use its class interface (described below)::
78 |
79 | from scythe.utils.interface import get_extractor
80 | extractor = get_extractor('generic')
81 | gen = extractor.parse_directory('.')
82 | for record in gen:
83 | print(record)
84 |
85 |
86 | Advanced Usage: Adding Context
87 | ++++++++++++++++++++++++++++++
88 |
89 | The function interface for Scythe supports using "context" and "adapters" to provide
90 | additional information Scythe into Applications <#id1>`_. Here, we describe the purpose
91 | of context and how to use it in our interface.
92 |
93 | Context is information about the data held in a file that is not contained within the file itself
94 | . Examples include human-friendly descriptions of columns names or which values actually
95 | represent a missing measurement in tabular data file (e.g., CSV files). A limited number of
96 | extractors support context and this information can be provided via the ``execute_extractor``
97 | function::
98 |
99 | execute_extractor('csv', 'tests/data/test.csv', context={'na_values': ['N/A']})
100 |
101 |
102 | The types of context information used by an extractor, if any, is described in the
103 | `documentation for each extractor `_.
104 |
105 | The ``run_all_extractors_on_directory`` function has several options for providing context to the
106 | extractors. These options include specifying "global context" to be passed to every extractor or
107 | adapter and ways of limiting the metadata to specific extractors. See
108 | :meth:`scythe.utils.interface.run_all_extractors_on_directory` for further details on the
109 | syntax for this command.
110 |
111 | .. note::
112 |
113 | *Context is still an experimental feature and APIs are subject to change*
114 |
115 |
116 | Class Interface
117 | ~~~~~~~~~~~~~~~
118 |
119 | The class API of extractors provide access to more detailed features of individual extractors. The
120 | functionality of an extractor is broken into several simple operations.
121 |
122 | Initializing an extractor
123 | +++++++++++++++++++++++++
124 |
125 | The first step to using an extractor is to initialize it. Most extractors do not have any options for
126 | the initializer, so you can create them with::
127 |
128 | extractor = Extractor()
129 |
130 | Some extractors require configuration options that define how the extractor runs, such as the location
131 | of a non-Python executable.
132 |
133 | Parsing Method
134 | ++++++++++++++
135 |
136 | The main operation for any extractor is the data extraction operation: ``parse``.
137 |
138 | In most cases, the ``parse`` operation takes the path to a file and and returns a summary of the
139 | data the file holds::
140 |
141 | metadata = extractor.parse(['/my/file'])
142 |
143 | Some extractors take multiple files that describe the same object (e.g., the input and output files
144 | of a simulation) and use them to generate a single metadata record::
145 |
146 | metadata = extractor.parse(['/my/file.in', '/my/file.out'])
147 |
148 | The `grouping method <#grouping-files>`_ for these extractors provides logic to identify groups of
149 | related files.
150 |
151 | Some extractors also can use information that is not contained within the file themselves, which can
152 | be provided to the extractor as a "context"::
153 |
154 | metadata = extractor.parse(['/my/file1'], context={'headers': {'temp': 'temperature'}})
155 |
156 | The documentation for the extractor should indicate valid types of context information.
157 |
158 | Grouping Files
159 | ++++++++++++++
160 |
161 | Extractors also provide the ability to quickly find groups of associated files: ``group``.
162 | The ``group`` operation takes path or list of files and, optionally, directories and generates
163 | a list of files that should be treated together when parsing::
164 |
165 | extractor.group(['input.file', 'output.file', 'unrelated']) # -> [('input.file', 'output.file'), ('unrelated',)]
166 |
167 | Parsing Entire Directories
168 | ++++++++++++++++++++++++++
169 |
170 | ``scythe`` also provides a utility operation to parse all groups of valid files in a directory::
171 |
172 | metadata = list(extractor.parse_directory('.'))
173 |
174 | ``parse_directory`` is a generator function, so we use ``list`` here to turn the output into a list
175 | format.
176 |
177 | Attribution Functions
178 | +++++++++++++++++++++
179 |
180 | Two functions, ``citations`` and ``implementors``, are available to determine who contirbuted a
181 | extractor. ``implementors`` returns the list of people who created an extractor, who are likely the
182 | points-of-contact for support. ``citations`` indicates if any publications are available that
183 | describe the underlying methods and should be reference in scientific articles.
184 |
185 | Full Extractor API
186 | ++++++++++++++++++
187 |
188 | The full API for the extractors are described as a Python abstract class:
189 |
190 | .. autoclass:: scythe.base.BaseExtractor
191 | :members:
192 | :member-order: bysource
193 |
194 | Integrating Scythe into Applications
195 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
196 |
197 | Scythe is designed to create a documented, JSON-format version of scientific files, but
198 | these files might not yet be in a form useful for your application. We recommend an "adapter"
199 | approach to post-process these "generic JSON" files that can actually be used for your application.
200 |
201 | BaseAdapter
202 | +++++++++++
203 |
204 | The ``BaseAdapter`` class defines the interface for all adapters.
205 |
206 | .. autoclass:: scythe.adapters.base.BaseAdapter
207 | :member-order: bysource
208 | :noindex:
209 | :members:
210 |
211 | Adapters must fulfill a single operation, ``transform``, which renders metadata from one of the
212 | Scythe extractors into a new form. There are no restrictions on the output for this function,
213 | except that ``None`` indicates that there is no valid transformation for an object.
214 |
215 | The ``check_compatibility`` and ``version`` method provide a route for marking which versions of
216 | an extractor are compatible with an adapter. ``scythe`` uses the version in utility operations
217 | to provide warnings to users about when an adapter is out-of-date.
218 |
219 | Using Adapters
220 | ++++++++++++++
221 |
222 | The same utility operations `described above <#simple-interface>`_ support using adapters. The
223 | ``execute_extractor`` function has an argument, ``adapter``, that takes the name of the adapter as
224 | an input and causes the parsing operation to run the adapter after parsing. The
225 | ``run_all_extractors`` function also has arguments (e.g., ``adapter_map``) that associate each
226 | extractor with the adapter needed to run after parsing.
227 |
228 | As an example, we will demonstrate an adapter that comes packaged with Scythe:
229 | :class:`scythe.adapters.base.SerializeAdapter`
230 | The serialize adapter is registered using ``stevedore`` as the name "serialize". To use it after
231 | all extractors::
232 |
233 | from scythe.utils.interface import run_all_extractors
234 | gen = run_all_extractors('.', default_adapter='serialize')
235 |
236 | Implementing Adapters
237 | +++++++++++++++++++++
238 |
239 | Any new adapters must inherit from the ``BaseAdapter`` class defined above. You only need
240 | implement the ``transform`` operation.
241 |
242 | Once the adapter is implemented, you need to put it in a project that is installable via pip. See
243 | [python docs](https://docs.python.org/3.7/distutils/setupscript.html) for a detailed tutorial or
244 | copy the structure used by the
245 | `MDF's adapter library `_.
246 |
247 | Then, register the adapter with ``stevedore`` by adding it as an entry point in your project's
248 | ``setup.py`` or ``pyproject.toml`` file. See the
249 | `stevedore documentation for more detail `_.
250 | We recommend using the same name for a adapter as the extractor it is designed for so that
251 | ``scythe`` can auto-detect the adapters associated with each extractor.
252 |
253 | Examples of Tools Using Scythe
254 | +++++++++++++++++++++++++++++++++++
255 |
256 | Materials Data Facility:
257 | https://github.com/materials-data-facility/mdf-materialsio-adapters
258 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/docs/source/contributor-guide.rst:
--------------------------------------------------------------------------------
1 | Contributor Guide
2 | =================
3 |
4 | Setting up development environment
5 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
6 |
7 | Scythe makes use of the `Poetry `_ project to manage
8 | dependencies and packaging. To install the latest version of Scythe, first install poetry
9 | following `their documentation `_. Once that's
10 | done, clone/download the Scythe repository locally from
11 | `Github `_. Change into that directory
12 | and run ``poetry install`` (it would be a good idea to create a new virtual environment for your
13 | project first too, so as to not mix dependencies with your system environment).
14 |
15 | By default, only a small subset of extractors will be installed (this is done so that you do not
16 | need to install all the dependencies of extractors you may never use). To install additional
17 | extractors, you can specify "extras" at install time using ``poetry``. Any of the values specified
18 | in the ``[tool.poetry.extras]`` section of ``pyproject.toml`` can be provided, including ``all``,
19 | which will install all bundled extractors and their dependencies. For example::
20 |
21 | poetry install -E all
22 |
23 | Poetry wil create a dedicated virtual environment for the project and the Scythe code will
24 | be installed in "editable" mode, so any changes you make to the code will be reflected when
25 | running tests, importing extractors, etc. It will use the default version of python available.
26 | Scythe is currently developed and tested against Python versions 3.8.12, 3.9.12, and 3.10.4.
27 | We recommend using the `pyenv `_ project to manage
28 | various python versions on your system if this does not match your system version of Python. It
29 | is required to use ``tox`` as well (see next paragraph). Make sure you install the versions
30 | specified in the ``.python-version`` file by running commands such as ``pyenv install 3.8.12`` etc.
31 |
32 | Additionally, the project uses `tox `_ to simplify common tasks and
33 | to be able to run tests in isolated environments. This will be installed automatically as a
34 | development package when running the ``poetry install`` command above. It can be used to run the
35 | test suite with common settings, as well as building the documentation. For example, to
36 | run the full Scythe test suite on all three versions of Python targetd, just run::
37 |
38 | poetry run tox
39 |
40 | To build the HTML documentation (will be placed inside the ``./docs/_build/`` folder), run::
41 |
42 | poetry run tox -e docs
43 |
44 | For the sake of speed, if you would like to focus your testing on just one Python version, you can
45 | temporarily override the environment list from ``pyproject.toml`` with an enviornment variable.
46 | For example, to only run the test/coverage suite on Python 3.8.X, run::
47 |
48 | TOXENV=py38 poetry run tox
49 |
50 | Check out the ``[tool.tox]`` section of the ``pyproject.toml`` file to view how these tasks are
51 | configured, and the `tox documentation `_ on how to add your
52 | own custom tasks, if needed.
53 |
54 | Finally, Scythe uses ``flake8`` to enforce code styles, which will be run for you
55 | automatically when using ``tox`` as defined above. Any code-style errors, such as lines longer
56 | than 100 characters, trailing whitespace, etc. will be flagged when running ``poetry run tox``.
57 |
58 | The next part of the Scythe guide details how to add a new extractor to the ecosystem.
59 |
60 | Step 1: Implement the Extractor
61 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
62 |
63 | Creating a new extractor is accomplished by implementing the
64 | `BaseExtractor `_ abstract class. If you are new to MaterailsIO, we
65 | recommend reviewing the `User Guide `_ first to learn about
66 | the available methods of BaseExtractor. Minimally, you need only implement the ``extract``,
67 | ``version``, and ``implementors`` operations for a new extractor. Each of these methods (and any
68 | other methods you override) must be stateless, so that running the operation does not change the
69 | behavior of the extractor.
70 |
71 | We also have subclasses of ``BaseExtractor`` that are useful for common types of extractors:
72 |
73 | - ``BaseSingleFileExtractor``: Extractors that only ever evaluate a single file at a time
74 |
75 | Class Attributes and Initializer
76 | --------------------------------
77 |
78 | The ``BaseExtractor`` class supports configuration options as Python class attributes.
79 | These options are intended to define the behavior of an extractor for a particular environment
80 | (e.g., paths of required executables) or for a particular application (e.g., turning off unneeded
81 | features). We recommend limiting these options to be only JSON-serializable data types and for
82 | all to be defined in the ``__init__`` function to simplify text-based configuration files.
83 |
84 | The initializer function should check if an extractor has access to all required external tools, and
85 | throw exceptions if not. For example, an extractor that relies on calling an external command-line
86 | tool should check whether the package is installed. In general, extractors should fail during
87 | initialization and not during the parsing operation if the system in misconfigured.
88 |
89 | Implementing ``extract``
90 | ------------------------
91 |
92 | The ``extract`` method contains the core logic of a Scythe extractor: rendering a summary of a
93 | group of data files. We do not specify any particular schema for the output but we do recommend
94 | best practices:
95 |
96 |
97 | #. *Summaries must be JSON-serializable.*
98 | Limiting to JSON data types ensures summaries are readable by most software without specia
99 | libraries. JSON documents are also able to be documented easily.
100 |
101 | #. *Human-readability is desirable.*
102 | JSON summaries should be understandable to users without expert-level knowledge of the data.
103 | Avoid unfamiliar acronyms, such as names of variables in a specific simulation code or settings
104 | specific to a certain brand of instrument.
105 |
106 | #. *Adhere closely to the original format.*
107 | If feasible, try to stay close to the original data format of a file or the output of a library
108 | used for parsing. Deviating from already existing formats complicates modifications to an extractor.
109 |
110 | #. *Always return a dictionary.*
111 | If an extractor can return multiple records from a single file group, return the list as an element
112 | of the dictionary. Any metadata that pertains to each of the sub-records should be stored as
113 | a distinct element rather than being duplicated in each sub-record.
114 |
115 |
116 | We also have a recommendations for the extractor behavior:
117 |
118 | #. *Avoid configuration options that change only output format.*
119 | Extractors can take configuration options that alter the output format, but configurations
120 | should be used sparingly. A good use of configuration would be to disable complex parsing
121 | operations if unneeded. A bad use of configuration would be to change the output to match a
122 | different schema. Operations that significantly alter the form but not the content of a
123 | summary should be implemented as adaptors.
124 |
125 | #. *Consider whether context should be configuration.*
126 | Settings that are identical for each file could be better suited as configuration settings
127 | than as context.
128 |
129 | Implementing ``group``
130 | ----------------------
131 |
132 | The ``group`` operation finds all sets of files in a user-provided list files and directories
133 | that should be parsed together. Implementing ``group`` is optional. Implementing a new ``group``
134 | method is required only when the default behavior of "each file is its own group" (i.e., the
135 | extractor only treats files individually) is incorrect.
136 |
137 | The ``group`` operation should not require access to the content of the files or directories to
138 | determine groupings. Being able to determine file groups via only file names improves performance
139 | and allows for determining groups of parsable files without needing to download them from remote
140 | systems.
141 |
142 | Files are allowed to appear in more than one group, but we recommend generating only the largest
143 | valid group of files to minimize the same metadata being generated multiple times.
144 |
145 | It is important to note that that file groups are specific to an extractor. Groupings of files that
146 | are meaningful to one extractor need not be meaningful to another. For that reason, limit the
147 | definition of groups to sets of files that can be parsed together without consideration to what
148 | other information makes the files related (e.g., being in the same directory).
149 |
150 | Another appropriate use of the ``group`` operation is to filter out files which are very unlikely
151 | to parse correctly. For example, a PDF extractor could identify only files with a ".pdf" extension.
152 | However, we recommend using filtering sparing to ensure no files are missed.
153 |
154 | Implementing ``citations`` and ``implementors``
155 | -----------------------------------------------
156 |
157 | The ``citation`` and ``implementors`` methods identify additional resources describing an extractor
158 | and provide credit to contributors. ``implementors`` is required, as this operation is also used
159 | to identify points-of-contact for support requests.
160 |
161 | ``citation`` should return a list of BibTeX-format references.
162 |
163 | ``implementors`` should return a list of people and, optionally, their contract information
164 | in the form: "FirstName LastName ".
165 |
166 | Implementing ``version``
167 | ------------------------
168 |
169 | We require using `semantic versioning `_ for specifying the version of extractors.
170 | As the API of the extractor should remain unchanged, use versioning to indicate changes in available
171 | options or the output schema. The ``version`` operation should return the version of the extractor.
172 |
173 |
174 | Step 2: Document the Extractor
175 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
176 |
177 | The docstring for an extractor must start with a short, one sentence summary of the extractor, which
178 | will be used by our autodocumentation tooling. The rest of the documentation should describe what
179 | types of files are compatible, what context information can be used, and
180 | summarize what types of metadata are generated.
181 |
182 | .. todo:: Actually write these descriptors for the available extractors
183 |
184 | The Scythe project uses JSON documents as the output for all extractors and
185 | `JSON Schema `_ to describe the content of the documents. The
186 | BaseExtractor class includes a property, ``schema``, that stores a description of the output format.
187 | We recommend writing your description as a separate file and having the ``schema`` property read
188 | and output the contents of this file. See the
189 | `GenericFileExtractor source code `_
190 | for a example.
191 |
192 |
193 | Step 3: Register the Extractor
194 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
195 |
196 | Preferred Route: Adding the Extractor to Scythe
197 | -----------------------------------------------
198 |
199 | If your extractor has the same dependencies as existing extractors, add it to the existing module with
200 | the same dependencies.
201 |
202 | If your extractor has new dependencies, create a new module for your extractor in ``scythe``, and
203 | then add the requirements as a new key in the ``[tool.poetry.extras]`` section of ``pyproject
204 | .toml``, following the other extractor examples in that section. Next, add your extractor to
205 | ``docs/source/extractors.rst`` by adding an ``.. automodule::`` statement that refers to your new
206 | module (again, following the existing pattern).
207 |
208 | Scythe uses ``stevedore`` to simplify access to the extractors. After implementing and
209 | documenting the extractor, add it to the ``[tool.poetry.plugins."scythe.extractor"]`` section of the
210 | ``pyproject.toml`` file for Scythe. See
211 | `stevedore documentation for more information `_
212 | (these docs reference ``setup.py``, but the equivalent can be done via plugins in ``pyproject
213 | .toml``; follow the existing structure if you're unsure, and ask for help from the developers if
214 | you run into issues).
215 |
216 |
217 | Alternative Route: Including Extractors from Other Libraries
218 | ------------------------------------------------------------
219 |
220 | If an extractor would be better suited as part of a different library, you can still register it as a
221 | extractor with Scythe by altering your ``pyproject.toml`` file. Add an entry point with the
222 | namespace ``"scythe.extractor"`` and point to the class object following the
223 | `stevedore documentation `_.
224 | Adding the entry point will let Scythe use your extractor if your library is installed in the
225 | same Python environment as Scythe.
226 |
227 | .. todo:: Provide a public listing of scythe-compatible software.
228 |
229 | So that people know where to find these external libraries
230 |
--------------------------------------------------------------------------------
/tests/data/crystal_structure/C13H22O3.cif:
--------------------------------------------------------------------------------
1 | #------------------------------------------------------------------------------
2 | #$Date: 2015-01-07 18:25:02 +0200 (Wed, 07 Jan 2015) $
3 | #$Revision: 129439 $
4 | #$URL: file:///home/coder/svn-repositories/cod/cif/1/00/00/1000018.cif $
5 | #------------------------------------------------------------------------------
6 | #
7 | # This file is available in the Crystallography Open Database (COD),
8 | # http://www.crystallography.net/
9 | #
10 | # All data on this site have been placed in the public domain by the
11 | # contributors.
12 | #
13 | data_1000018
14 | loop_
15 | _publ_author_name
16 | 'Mondal, Swastik'
17 | 'Mukherjee, Monika'
18 | 'Roy, Arnab'
19 | 'Mukherjee, Debabrata'
20 | 'Helliwell, Madeleine'
21 | _publ_section_title
22 | ;
23 | (1SR,2RS,5RS,6SR,8RS)-7,7-dimethyltricyclo[6.2.1.0^1,6^]undecane-2,5,6-triol:
24 | a supramolecular framework built from O-H...O hydrogen bonds
25 | ;
26 | _journal_coeditor_code SK1554
27 | _journal_issue 8
28 | _journal_name_full 'Acta Crystallographica, Section C'
29 | _journal_page_first o474
30 | _journal_page_last o476
31 | _journal_volume 58
32 | _journal_year 2002
33 | _chemical_formula_moiety 'C13 H22 O3'
34 | _chemical_formula_sum 'C13 H22 O3'
35 | _chemical_formula_weight 226.31
36 | _chemical_melting_point 453
37 | _chemical_name_systematic
38 | ;
39 | (1SR,2RS,5RS,6SR,8RS)-7,7-dimethyltricyclo[6.2.1.0^1,6^]undecane-2,5,6-triol
40 | ;
41 | _symmetry_cell_setting triclinic
42 | _symmetry_space_group_name_Hall '-P 1'
43 | _symmetry_space_group_name_H-M 'P -1'
44 | _atom_sites_solution_hydrogens geom
45 | _atom_sites_solution_primary direct
46 | _atom_sites_solution_secondary difmap
47 | _audit_creation_method SHELXL97
48 | _cell_angle_alpha 82.470(10)
49 | _cell_angle_beta 77.560(10)
50 | _cell_angle_gamma 89.460(10)
51 | _cell_formula_units_Z 4
52 | _cell_length_a 9.812(2)
53 | _cell_length_b 11.1410(10)
54 | _cell_length_c 11.443(2)
55 | _cell_measurement_reflns_used 18
56 | _cell_measurement_temperature 293(2)
57 | _cell_measurement_theta_max 8.1
58 | _cell_measurement_theta_min 6.9
59 | _cell_volume 1210.8(3)
60 | _computing_cell_refinement 'MSC/AFC Diffractometer Control Software'
61 | _computing_data_collection
62 | ;
63 | MSC/AFC Diffractometer Control Software (Molecular Structure Corporation, 1995)
64 | ;
65 | _computing_data_reduction
66 | 'TEXSAN (Molecular Structure Corporation, 1995)'
67 | _computing_molecular_graphics
68 | 'ZORTEP (Zsolnai, 1995) and WinGX (Farrugia, 1999)'
69 | _computing_publication_material 'SHELXL97 and PARST (Nardelli, 1995)'
70 | _computing_structure_refinement 'SHELXL97 (Sheldrick, 1997)'
71 | _computing_structure_solution 'MULTAN-88 (Debaerdemaeker et al., 1988)'
72 | _diffrn_ambient_temperature 293(2)
73 | _diffrn_measured_fraction_theta_full 0.996
74 | _diffrn_measured_fraction_theta_max 0.996
75 | _diffrn_measurement_device_type 'Rigaku AFC-5R'
76 | _diffrn_measurement_method \w/2\q
77 | _diffrn_radiation_monochromator graphite
78 | _diffrn_radiation_source 'fine-focus sealed tube'
79 | _diffrn_radiation_type MoK\a
80 | _diffrn_radiation_wavelength 0.71070
81 | _diffrn_reflns_av_R_equivalents 0.015
82 | _diffrn_reflns_av_sigmaI/netI 0.039
83 | _diffrn_reflns_limit_h_max 11
84 | _diffrn_reflns_limit_h_min -11
85 | _diffrn_reflns_limit_k_max 0
86 | _diffrn_reflns_limit_k_min -13
87 | _diffrn_reflns_limit_l_max 13
88 | _diffrn_reflns_limit_l_min -13
89 | _diffrn_reflns_number 4499
90 | _diffrn_reflns_theta_full 25.00
91 | _diffrn_reflns_theta_max 25.00
92 | _diffrn_reflns_theta_min 1.84
93 | _diffrn_standards_decay_% -1.09
94 | _diffrn_standards_interval_count 150
95 | _diffrn_standards_number 3
96 | _exptl_absorpt_coefficient_mu 0.086
97 | _exptl_absorpt_correction_type none
98 | _exptl_crystal_colour colourless
99 | _exptl_crystal_density_diffrn 1.241
100 | _exptl_crystal_density_meas ?
101 | _exptl_crystal_density_method .
102 | _exptl_crystal_description block
103 | _exptl_crystal_F_000 496
104 | _exptl_crystal_size_max 0.5
105 | _exptl_crystal_size_mid 0.4
106 | _exptl_crystal_size_min 0.3
107 | _refine_diff_density_max 0.24
108 | _refine_diff_density_min -0.21
109 | _refine_ls_extinction_coef none
110 | _refine_ls_extinction_method none
111 | _refine_ls_goodness_of_fit_ref 1.101
112 | _refine_ls_hydrogen_treatment noref
113 | _refine_ls_matrix_type full
114 | _refine_ls_number_parameters 299
115 | _refine_ls_number_reflns 4259
116 | _refine_ls_number_restraints 0
117 | _refine_ls_restrained_S_all 1.101
118 | _refine_ls_R_factor_all 0.094
119 | _refine_ls_R_factor_gt 0.054
120 | _refine_ls_shift/su_max 0.014
121 | _refine_ls_shift/su_mean 0.000
122 | _refine_ls_structure_factor_coef Fsqd
123 | _refine_ls_weighting_details
124 | 'calc w = 1/[\s^2^(Fo^2^)+(0.0577P)^2^+0.9623P] where P=(Fo^2^+2Fc^2^)/3'
125 | _refine_ls_weighting_scheme calc
126 | _refine_ls_wR_factor_gt 0.132
127 | _refine_ls_wR_factor_ref 0.156
128 | _reflns_number_gt 2972
129 | _reflns_number_total 4259
130 | _reflns_threshold_expression I>2\s(I)
131 | _cod_duplicate_entry 2012894
132 | _cod_depositor_comments
133 | ;
134 | The following automatic conversions were performed:
135 |
136 | '_chemical_melting_point' value '453K' was changed to '453' - the
137 | value should be numeric and without a unit designator.
138 |
139 | Automatic conversion script
140 | Id: cif_fix_values 1646 2011-03-28 12:23:43Z adriana
141 | ;
142 | _cod_database_code 1000018
143 | loop_
144 | _symmetry_equiv_pos_as_xyz
145 | 'x, y, z'
146 | '-x, -y, -z'
147 | loop_
148 | _atom_site_label
149 | _atom_site_fract_x
150 | _atom_site_fract_y
151 | _atom_site_fract_z
152 | _atom_site_U_iso_or_equiv
153 | _atom_site_adp_type
154 | _atom_site_calc_flag
155 | _atom_site_refinement_flags
156 | _atom_site_occupancy
157 | _atom_site_type_symbol
158 | C1A 0.7141(3) 0.2593(2) 0.9184(2) 0.0303(6) Uani d . 1 C
159 | C2A 0.6901(3) 0.2018(2) 0.8106(2) 0.0336(6) Uani d . 1 C
160 | H2A 0.7796 0.1978 0.7535 0.040 Uiso calc R 1 H
161 | C3A 0.6296(3) 0.0742(3) 0.8510(3) 0.0410(7) Uani d . 1 C
162 | H3A1 0.6222 0.0370 0.7806 0.049 Uiso calc R 1 H
163 | H3A2 0.5363 0.0784 0.9002 0.049 Uiso calc R 1 H
164 | C4A 0.7192(3) -0.0050(2) 0.9236(3) 0.0383(7) Uani d . 1 C
165 | H4A1 0.6725 -0.0828 0.9531 0.046 Uiso calc R 1 H
166 | H4A2 0.8078 -0.0191 0.8709 0.046 Uiso calc R 1 H
167 | C5A 0.7465(3) 0.0526(2) 1.0304(2) 0.0306(6) Uani d . 1 C
168 | H5A 0.6579 0.0596 1.0880 0.037 Uiso calc R 1 H
169 | C6A 0.8121(3) 0.1796(2) 0.9847(2) 0.0281(6) Uani d . 1 C
170 | C7A 0.8437(3) 0.2627(3) 1.0796(2) 0.0383(7) Uani d . 1 C
171 | C8A 0.7786(3) 0.3848(3) 1.0373(3) 0.0409(7) Uani d . 1 C
172 | H8A 0.8228 0.4562 1.0561 0.049 Uiso calc R 1 H
173 | C9A 0.6201(3) 0.3799(3) 1.0801(3) 0.0490(8) Uani d . 1 C
174 | H9A1 0.5797 0.4578 1.0608 0.059 Uiso calc R 1 H
175 | H9A2 0.5926 0.3545 1.1664 0.059 Uiso calc R 1 H
176 | C10A 0.5768(3) 0.2844(3) 1.0080(3) 0.0396(7) Uani d . 1 C
177 | H10A 0.5062 0.3157 0.9651 0.048 Uiso calc R 1 H
178 | H10B 0.5408 0.2113 1.0610 0.048 Uiso calc R 1 H
179 | C11A 0.7907(3) 0.3824(2) 0.9017(2) 0.0363(7) Uani d . 1 C
180 | H11A 0.8866 0.3808 0.8573 0.044 Uiso calc R 1 H
181 | H11B 0.7417 0.4481 0.8651 0.044 Uiso calc R 1 H
182 | C12A 1.0010(3) 0.2824(3) 1.0680(3) 0.0561(9) Uani d . 1 C
183 | H12A 1.0422 0.3190 0.9875 0.084 Uiso calc R 1 H
184 | H12B 1.0435 0.2058 1.0843 0.084 Uiso calc R 1 H
185 | H12C 1.0156 0.3345 1.1248 0.084 Uiso calc R 1 H
186 | C13A 0.7803(4) 0.2134(3) 1.2117(3) 0.0587(10) Uani d . 1 C
187 | H13A 0.6836 0.1927 1.2200 0.088 Uiso calc R 1 H
188 | H13B 0.7882 0.2742 1.2623 0.088 Uiso calc R 1 H
189 | H13C 0.8294 0.1426 1.2355 0.088 Uiso calc R 1 H
190 | O1A 0.59473(19) 0.2694(2) 0.74991(18) 0.0453(5) Uani d . 1 O
191 | H1A 0.6369 0.3255 0.7029 0.068 Uiso calc R 1 H
192 | O2A 0.84002(19) -0.01982(17) 1.08956(18) 0.0385(5) Uani d . 1 O
193 | H2A1 0.7973 -0.0787 1.1316 0.058 Uiso calc R 1 H
194 | O3A 0.93661(17) 0.16502(17) 0.89587(15) 0.0319(4) Uani d . 1 O
195 | H3A 0.9861 0.1147 0.9245 0.048 Uiso calc R 1 H
196 | C1B 0.7940(3) 0.6323(2) 0.5335(2) 0.0300(6) Uani d . 1 C
197 | C2B 0.8156(3) 0.5029(2) 0.5043(2) 0.0330(6) Uani d . 1 C
198 | H2B 0.9074 0.4977 0.4509 0.040 Uiso calc R 1 H
199 | C3B 0.7034(3) 0.4690(2) 0.4412(3) 0.0359(6) Uani d . 1 C
200 | H3B1 0.6133 0.4672 0.4967 0.043 Uiso calc R 1 H
201 | H3B2 0.7205 0.3884 0.4186 0.043 Uiso calc R 1 H
202 | C4B 0.6993(3) 0.5582(3) 0.3280(3) 0.0396(7) Uani d . 1 C
203 | H4B1 0.7835 0.5499 0.2673 0.048 Uiso calc R 1 H
204 | H4B2 0.6203 0.5372 0.2956 0.048 Uiso calc R 1 H
205 | C5B 0.6875(3) 0.6889(2) 0.3516(2) 0.0313(6) Uani d . 1 C
206 | H5B 0.5961 0.6992 0.4039 0.038 Uiso calc R 1 H
207 | C6B 0.8003(2) 0.7213(2) 0.4158(2) 0.0280(6) Uani d . 1 C
208 | C7B 0.7980(3) 0.8496(2) 0.4626(3) 0.0362(7) Uani d . 1 C
209 | C8B 0.8150(3) 0.8138(3) 0.5955(3) 0.0426(7) Uani d . 1 C
210 | H8B 0.8596 0.8772 0.6269 0.051 Uiso calc R 1 H
211 | C9B 0.6780(3) 0.7643(3) 0.6783(3) 0.0531(9) Uani d . 1 C
212 | H9B1 0.6023 0.8200 0.6727 0.064 Uiso calc R 1 H
213 | H9B2 0.6867 0.7480 0.7617 0.064 Uiso calc R 1 H
214 | C10B 0.6553(3) 0.6465(3) 0.6274(3) 0.0387(7) Uani d . 1 C
215 | H10C 0.5762 0.6530 0.5887 0.046 Uiso calc R 1 H
216 | H10D 0.6393 0.5781 0.6909 0.046 Uiso calc R 1 H
217 | C11B 0.8977(3) 0.6965(3) 0.5892(2) 0.0391(7) Uani d . 1 C
218 | H11C 0.9049 0.6565 0.6682 0.047 Uiso calc R 1 H
219 | H11D 0.9896 0.7082 0.5365 0.047 Uiso calc R 1 H
220 | C12B 0.9221(3) 0.9312(3) 0.3914(3) 0.0434(7) Uani d . 1 C
221 | H12D 1.0079 0.8942 0.4026 0.065 Uiso calc R 1 H
222 | H12E 0.9155 1.0087 0.4201 0.065 Uiso calc R 1 H
223 | H12F 0.9203 0.9417 0.3071 0.065 Uiso calc R 1 H
224 | C13B 0.6646(3) 0.9206(3) 0.4549(3) 0.0571(9) Uani d . 1 C
225 | H13D 0.6635 0.9908 0.4956 0.086 Uiso calc R 1 H
226 | H13E 0.5845 0.8700 0.4927 0.086 Uiso calc R 1 H
227 | H13F 0.6625 0.9453 0.3718 0.086 Uiso calc R 1 H
228 | O1B 0.8074(2) 0.41898(19) 0.61185(18) 0.0459(6) Uani d . 1 O
229 | H1B 0.8856 0.3947 0.6156 0.069 Uiso calc R 1 H
230 | O2B 0.6974(2) 0.76823(19) 0.23988(18) 0.0438(5) Uani d . 1 O
231 | H2B1 0.6220 0.7690 0.2199 0.066 Uiso calc R 1 H
232 | O3B 0.93444(17) 0.70004(17) 0.33988(16) 0.0320(4) Uani d . 1 O
233 | H3B 0.9459 0.7466 0.2767 0.048 Uiso calc R 1 H
234 | loop_
235 | _atom_site_aniso_label
236 | _atom_site_aniso_U_11
237 | _atom_site_aniso_U_22
238 | _atom_site_aniso_U_33
239 | _atom_site_aniso_U_12
240 | _atom_site_aniso_U_13
241 | _atom_site_aniso_U_23
242 | C1A 0.0273(13) 0.0295(14) 0.0315(14) 0.0018(11) -0.0054(11) 0.0037(11)
243 | C2A 0.0269(14) 0.0401(16) 0.0325(14) -0.0007(12) -0.0084(11) 0.0033(12)
244 | C3A 0.0406(16) 0.0428(17) 0.0409(17) -0.0081(13) -0.0133(13) -0.0019(13)
245 | C4A 0.0371(16) 0.0312(15) 0.0445(17) -0.0042(12) -0.0060(13) -0.0019(13)
246 | C5A 0.0257(13) 0.0296(14) 0.0328(14) 0.0024(11) -0.0027(11) 0.0031(11)
247 | C6A 0.0261(13) 0.0300(14) 0.0249(13) -0.0015(11) -0.0016(10) 0.0018(11)
248 | C7A 0.0480(17) 0.0338(15) 0.0349(15) 0.0013(13) -0.0119(13) -0.0068(12)
249 | C8A 0.0490(18) 0.0320(16) 0.0428(17) 0.0007(13) -0.0105(14) -0.0076(13)
250 | C9A 0.055(2) 0.0422(18) 0.0481(18) 0.0172(15) -0.0045(15) -0.0102(15)
251 | C10A 0.0314(15) 0.0378(16) 0.0446(17) 0.0053(12) -0.0009(13) 0.0004(13)
252 | C11A 0.0376(16) 0.0318(15) 0.0367(15) 0.0021(12) -0.0067(12) 0.0037(12)
253 | C12A 0.058(2) 0.056(2) 0.067(2) -0.0009(17) -0.0346(18) -0.0182(18)
254 | C13A 0.097(3) 0.0470(19) 0.0328(17) 0.0111(19) -0.0162(18) -0.0059(14)
255 | O1A 0.0314(11) 0.0587(14) 0.0440(12) -0.0064(9) -0.0166(9) 0.0144(10)
256 | O2A 0.0324(10) 0.0347(11) 0.0427(11) 0.0016(8) -0.0055(9) 0.0120(9)
257 | O3A 0.0251(9) 0.0370(11) 0.0300(10) 0.0044(8) -0.0032(8) 0.0045(8)
258 | C1B 0.0242(13) 0.0374(15) 0.0271(13) -0.0014(11) -0.0049(11) 0.0000(11)
259 | C2B 0.0262(13) 0.0340(15) 0.0342(14) -0.0005(11) -0.0033(11) 0.0067(12)
260 | C3B 0.0363(15) 0.0290(14) 0.0426(16) -0.0040(12) -0.0084(13) -0.0047(12)
261 | C4B 0.0429(17) 0.0405(17) 0.0394(16) -0.0067(13) -0.0183(13) -0.0040(13)
262 | C5B 0.0286(14) 0.0351(15) 0.0296(14) -0.0008(11) -0.0094(11) 0.0030(11)
263 | C6B 0.0220(13) 0.0332(14) 0.0275(13) 0.0007(11) -0.0035(10) -0.0023(11)
264 | C7B 0.0359(15) 0.0307(15) 0.0418(16) -0.0041(12) -0.0072(13) -0.0058(12)
265 | C8B 0.0457(18) 0.0488(18) 0.0337(15) -0.0103(14) -0.0038(13) -0.0145(13)
266 | C9B 0.055(2) 0.059(2) 0.0406(18) -0.0038(16) 0.0060(15) -0.0172(16)
267 | C10B 0.0327(15) 0.0457(17) 0.0336(15) -0.0046(13) 0.0003(12) -0.0023(13)
268 | C11B 0.0366(15) 0.0540(19) 0.0258(14) -0.0103(13) -0.0080(12) 0.0011(13)
269 | C12B 0.0503(18) 0.0342(16) 0.0468(18) -0.0105(13) -0.0133(14) -0.0040(13)
270 | C13B 0.053(2) 0.0416(19) 0.079(2) 0.0132(15) -0.0128(18) -0.0182(18)
271 | O1B 0.0318(11) 0.0540(13) 0.0431(12) 0.0025(10) -0.0043(9) 0.0194(10)
272 | O2B 0.0408(12) 0.0500(13) 0.0408(11) -0.0068(10) -0.0198(9) 0.0126(9)
273 | O3B 0.0263(9) 0.0381(11) 0.0277(10) 0.0012(8) -0.0020(8) 0.0033(8)
274 | loop_
275 | _atom_type_symbol
276 | _atom_type_description
277 | _atom_type_scat_dispersion_real
278 | _atom_type_scat_dispersion_imag
279 | _atom_type_scat_source
280 | C C 0.0033 0.0016 'International Tables Vol C Tables 4.2.6.8 and 6.1.1.4'
281 | H H 0.0000 0.0000 'International Tables Vol C Tables 4.2.6.8 and 6.1.1.4'
282 | O O 0.0106 0.0060 'International Tables Vol C Tables 4.2.6.8 and 6.1.1.4'
283 | _journal_paper_doi 10.1107/S0108270102010843
284 |
--------------------------------------------------------------------------------
/scythe/utils/interface.py:
--------------------------------------------------------------------------------
1 | """Utilities for working with extractors from other applications"""
2 |
3 | from stevedore.extension import ExtensionManager
4 | from stevedore.driver import DriverManager
5 | from typing import Iterator, Union, Dict, List
6 | from collections import namedtuple
7 | from copy import deepcopy
8 |
9 | from scythe.adapters.base import BaseAdapter
10 | from scythe.base import BaseExtractor
11 | import logging
12 |
13 | logger = logging.getLogger(__name__)
14 |
15 | ExtractResult = namedtuple('ExtractResult', ['group', 'extractor', 'metadata'])
16 |
17 |
18 | def _output_plugin_info(mgr: ExtensionManager) -> dict:
19 | """Gets information about all plugins attached to a particular manager
20 |
21 | Args:
22 | mgr (ExtensionManager): Plugin manager
23 | Returns:
24 | (dict): Dictionary where keys are plugin ids and values are descriptions
25 | """
26 |
27 | output = {}
28 | for name, ext in mgr.items():
29 | plugin = ext.plugin()
30 | output[name] = {
31 | 'description': plugin.__doc__.split("\n")[0],
32 | 'version': plugin.version(),
33 | 'class': ext.entry_point_target
34 | }
35 | return output
36 |
37 |
38 | def get_available_extractors():
39 | """Get information about the available extractors
40 |
41 | Returns:
42 | [dict]: Descriptions of available extractors
43 | """
44 | mgr = ExtensionManager(
45 | namespace='scythe.extractor',
46 | )
47 |
48 | # Get information about each extractor
49 | return _output_plugin_info(mgr)
50 |
51 |
52 | def get_available_adapters() -> dict:
53 | """Get information on all available adapters
54 |
55 | Returns:
56 | (dict) Where keys are adapter names and values are descriptions
57 | """
58 |
59 | return _output_plugin_info(ExtensionManager(namespace='scythe.adapter'))
60 |
61 |
62 | def _get_adapter_map(adapter_map: str, extractors: list) -> dict:
63 | """Helper function to generate 'adapter map'
64 |
65 | Adapter map is a list of extractors and names of the appropriate adapters
66 | to use to format their output.
67 |
68 | Args:
69 | adapter_map (str): string argument for adapters.
70 | - 'match' means just find adapters with same names as corresponding extractors.
71 | extractors ([str]): list of extractors
72 | Returns:
73 | (dict) where keys are adapter names extractor/adapter names and values are adapter objects.
74 | """
75 | if adapter_map is None:
76 | adapter_map = {}
77 | elif adapter_map == 'match':
78 | adapters = get_available_adapters()
79 | adapter_map = dict((x, x) for x in extractors if x in adapters)
80 | elif not isinstance(adapter_map, dict):
81 | raise ValueError('Adapter map must be a dict, None, or `matching`')
82 |
83 | # Give it to the user
84 | return adapter_map
85 |
86 |
87 | def get_extractor_and_adapter_contexts(name, global_context, extractor_context, adapter_context):
88 | """
89 | Helper function to update the helper and adapter contexts and the 'name'
90 | of a extractor/adapter pair
91 | Args:
92 | name (str): adapter/extractor name.
93 | global_context (dict): Context of the files, used for every extractor and adapter
94 | adapter_context (dict): Context used for adapters. Key is the name of the adapter,
95 | value is the context. The key ``@all`` is used to for context used for every adapter
96 | extractor_context (dict): Context used for adapters. Key is the name of the extractor,
97 | value is the context. The key ``@all`` is used to for context used for every extractor
98 | Returns:
99 | (dict, dict): extractor_context, my_adapter context tuple
100 | """
101 |
102 | # Get the context information for the extractor and adapter
103 | my_extractor_context = deepcopy(global_context)
104 | my_extractor_context.update(extractor_context.get('@all', {}))
105 | my_extractor_context.update(extractor_context.get(name, {}))
106 |
107 | my_adapter_context = deepcopy(global_context)
108 | my_adapter_context.update(adapter_context.get('@all', {}))
109 | my_adapter_context.update(adapter_context.get(name, {}))
110 |
111 | return my_extractor_context, my_adapter_context
112 |
113 |
114 | def _get_extractor_list(to_include: list, to_exclude: list) -> list:
115 | """ Helper function to get a list of extractors given lists of extractors to include/exclude
116 |
117 | Args:
118 | to_include ([str]): Predefined list of extractors to run. Only these will be used.
119 | Mutually exclusive with `exclude_extractors`.
120 | to_exclude ([str]): List of extractors to exclude.
121 | Mutually exclusive with `include_extractors`.
122 | Returns:
123 | List of all applicable extractors
124 | """
125 |
126 | extractors = get_available_extractors()
127 | if to_include is not None and to_exclude is not None:
128 | raise ValueError('Including and excluding extractors are mutually exclusive')
129 | elif to_include is not None:
130 | missing_extractors = set(to_include).difference(extractors.keys())
131 | if len(missing_extractors) > 0:
132 | raise ValueError('Some extractors are missing: ' + ' '.join(missing_extractors))
133 | extractors = to_include
134 | elif to_exclude is not None:
135 | extractors = list(set(extractors.keys()).difference(to_exclude))
136 |
137 | return extractors
138 |
139 |
140 | def get_extractor(name: str) -> BaseExtractor:
141 | """Load an extractor object
142 |
143 | Args:
144 | name (str): Name of extractor
145 | Returns:
146 | Requested extractor
147 | """
148 | return DriverManager(
149 | namespace='scythe.extractor',
150 | name=name,
151 | invoke_on_load=True
152 | ).driver
153 |
154 |
155 | def get_adapter(name: str) -> BaseAdapter:
156 | """Load an adapter
157 |
158 | Args:
159 | name (str): Name of adapter
160 | Returns:
161 | (BaseAdapter) Requested adapter
162 | """
163 |
164 | # Load the adapter
165 | mgr = DriverManager(
166 | namespace='scythe.adapter',
167 | name=name,
168 | invoke_on_load=True
169 | )
170 |
171 | # Give it to the user
172 | return mgr.driver
173 |
174 |
175 | def run_extractor(name, group, context=None, adapter=None):
176 | """Invoke a extractor on a certain group of files
177 |
178 | Args:
179 | name (str): Name of the extractor
180 | group ([str]): Paths to group of files to be parsed
181 | context (dict): Context of the files, used in adapter and extractor
182 | adapter (str): Name of adapter to use to transform metadata
183 | Returns:
184 | ([dict]): Metadata generated by the extractor
185 | """
186 | metadata = get_extractor(name).extract(group, context)
187 | if adapter is not None:
188 | adapter = get_adapter(adapter)
189 | return adapter.transform(metadata, context=context)
190 | return metadata
191 |
192 |
193 | def run_all_extractors_on_directory(directory: str, global_context=None,
194 | adapter_context: Union[None, dict] = None,
195 | extractor_context: Union[None, dict] = None,
196 | include_extractors: Union[None, List[str]] = None,
197 | exclude_extractors: Union[None, List] = None,
198 | adapter_map: Union[None, str, Dict[str, str]] = None,
199 | default_adapter: Union[None, str] = None) \
200 | -> Iterator[ExtractResult]:
201 | """Run all known files on a directory of files
202 |
203 | Args:
204 | directory (str): Path to directory to be parsed
205 | global_context (dict): Context of the files, used for every extractor and adapter
206 | adapter_context (dict): Context used for adapters. Key is the name of the adapter,
207 | value is the context. The key ``@all`` is used to for context used for every adapter
208 | extractor_context (dict): Context used for adapters. Key is the name of the extractor,
209 | value is the context. The key ``@all`` is used to for context used for every extractor
210 | include_extractors ([str]): Predefined list of extractors to run. Only these will be used.
211 | Mutually exclusive with `exclude_extractors`.
212 | exclude_extractors ([str]): List of extractors to exclude.
213 | Mutually exclusive with `include_extractors`.
214 | adapter_map (str, dict): Map of extractor name to the desired adapter.
215 | Use 'match' to find adapters with the same names
216 | default_adapter (str): Adapter to use if no other adapter is defined
217 | Yields
218 | ((str), str, dict) Tuple of (1) group of files, (2) name of extractor, (3) metadata
219 | """
220 |
221 | # Load in default arguments
222 | if global_context is None:
223 | global_context = dict()
224 | if adapter_context is None:
225 | adapter_context = dict()
226 | if extractor_context is None:
227 | extractor_context = dict()
228 |
229 | # Get the list of extractors
230 | extractors = _get_extractor_list(include_extractors, exclude_extractors)
231 |
232 | # Make the adapter map
233 | adapter_map = _get_adapter_map(adapter_map=adapter_map, extractors=extractors)
234 |
235 | # Get the list of known extractors
236 | for name in extractors:
237 | # Get the extractor and adapter
238 | extractor = get_extractor(name)
239 | adapter_name = adapter_map.get(name, default_adapter)
240 | if adapter_name is not None:
241 | adapter = get_adapter(adapter_name)
242 | else:
243 | adapter = None
244 |
245 | my_extractor_context, my_adapter_context = get_extractor_and_adapter_contexts(name,
246 | global_context,
247 | extractor_context,
248 | adapter_context)
249 |
250 | for group, metadata in extractor.extract_directory(directory, context=my_extractor_context):
251 | # Run the adapter, if defined
252 | if adapter is not None:
253 | try:
254 | metadata = adapter.transform(metadata, my_adapter_context)
255 | except Exception as e:
256 | logger.warning(f'Adapter for {extractor} failed with caught exception: {e}')
257 | continue
258 | if metadata is None:
259 | continue
260 |
261 | yield ExtractResult(group, name, metadata)
262 |
263 |
264 | def run_all_extractors_on_group(group,
265 | adapter_map=None,
266 | global_context=None,
267 | adapter_context: Union[None, dict] = None,
268 | extractor_context: Union[None, dict] = None,
269 | include_extractors: Union[None, List[str]] = None,
270 | exclude_extractors: Union[None, List] = None,
271 | default_adapter: Union[None, str] = None):
272 | """
273 | Parse metadata from a file-group and adapt its metadata per a user-supplied adapter_map.
274 |
275 | This function is effectively a wrapper to execute_extractor() that enables us to output metadata
276 | in the same format as run_all_extractors_on_directory(), but just on a single file group.
277 |
278 | Args:
279 | group ([str]): Paths to group of files to be parsed
280 | global_context (dict): Context of the files, used for every extractor and adapter
281 | adapter_context (dict): Context used for adapters. Key is the name of the adapter,
282 | value is the context. The key ``@all`` is used to for context used for every adapter
283 | extractor_context (dict): Context used for adapters. Key is the name of the extractor,
284 | value is the context. The key ``@all`` is used to for context used for every extractor
285 | include_extractors ([str]): Predefined list of extractors to run. Only these will be used.
286 | Mutually exclusive with `exclude_extractors`.
287 | exclude_extractors ([str]): List of extractors to exclude.
288 | Mutually exclusive with `include_extractors`.
289 | adapter_map (str, dict): Map of extractor name to the desired adapter.
290 | Use 'match' to find adapters with the same names:
291 | default_adapter:
292 | Yields:
293 | Metadata for a certain
294 | """
295 |
296 | # Load in default arguments
297 | if global_context is None:
298 | global_context = dict()
299 | if adapter_context is None:
300 | adapter_context = dict()
301 | if extractor_context is None:
302 | extractor_context = dict()
303 |
304 | # Get the list of extractors
305 | extractors = _get_extractor_list(include_extractors, exclude_extractors)
306 |
307 | # Make the adapter map
308 | adapter_map = _get_adapter_map(adapter_map=adapter_map, extractors=extractors)
309 |
310 | for name in extractors:
311 | # Get the extractor and adapter
312 | adapter_name = adapter_map.get(name, default_adapter)
313 |
314 | my_extractor_context, my_adapter_context = get_extractor_and_adapter_contexts(name,
315 | global_context,
316 | extractor_context,
317 | adapter_context)
318 |
319 | metadata = run_extractor(name, group, context=my_extractor_context, adapter=adapter_name)
320 |
321 | yield ExtractResult(group, name, metadata)
322 |
--------------------------------------------------------------------------------
/tests/data/tdb/PbSSeTe_Na.TDB:
--------------------------------------------------------------------------------
1 | $ Database file written 1900- 1-**
2 | $ From database: User data 1900.01.**
3 | ELEMENT /- ELECTRON_GAS 0.0000E+00 0.0000E+00 0.0000E+00!
4 | ELEMENT VA VACUUM 0.0000E+00 0.0000E+00 0.0000E+00!
5 | ELEMENT NA BCC_A2 2.2990E+01 6.4475E+03 5.1447E+01 !
6 | ELEMENT PB FCC_A1 2.0720E+02 6.8785E+03 6.4785E+01!
7 | ELEMENT S ORTHORHOMBIC_S 3.2066E+01 0.0000E+00 0.0000E+00!
8 | ELEMENT SE HEXAGONAL_A8 7.8960E+01 5.5145E+03 4.1966E+01!
9 | ELEMENT TE HEXAGONAL_A8 1.2760E+02 6.1212E+03 4.9497E+01!
10 |
11 | SPECIES PB2 PB2!
12 | SPECIES PBS PB1S1!
13 | SPECIES PBSE PB1SE1!
14 | SPECIES PBTE PB1TE1!
15 | SPECIES TE+2 TE1/+2!
16 | SPECIES TE2 TE2!
17 | SPECIES TE3 TE3!
18 | SPECIES TE4 TE4!
19 | SPECIES TE5 TE5!
20 | SPECIES TE6 TE6!
21 | SPECIES TE7 TE7!
22 | SPECIES VA+1 VA1/+1!
23 | SPECIES VA+2 VA1/+2!
24 | SPECIES VA-1 VA1/-1!
25 | SPECIES VA-2 VA1/-2!
26 | SPECIES NA2TE NA2TE1!
27 | SPECIES NATE3 NA1TE3!
28 |
29 |
30 | FUNCTION GHSERPB 298.15 -7650.085+101.700244*T-24.5242231*T*LN(T)
31 | -.00365895*T**2-2.4395E-07*T**3; 600.61 Y
32 | -10531.095+154.243182*T-32.4913959*T*LN(T)+.00154613*T**2
33 | +8.05448E+25*T**(-9); 1200 Y
34 | +4157.616+53.139072*T-18.9640637*T*LN(T)-.002882943*T**2
35 | +9.8144E-08*T**3-2696755*T**(-1)+8.05448E+25*T**(-9); 2100 N !
36 | FUNCTION GLIQPB 298.15 +GHSERPB#+4672.124-7.750683*T-6.019E-19*T**7;
37 | 600.61 Y
38 | -5677.958+146.176046*T-32.4913959*T*LN(T)+.00154613*T**2; 1200 Y
39 | +9010.753+45.071937*T-18.9640637*T*LN(T)-.002882943*T**2
40 | +9.8144E-08*T**3-2696755*T**(-1); 2100 N !
41 | FUNCTION GLIQTE 298.15 -17554.731+685.877639*T-126.318*T*LN(T)
42 | +.2219435*T**2-9.42075E-05*T**3+827930*T**(-1); 626.49 Y
43 | -3165763.48+46756.357*T-7196.41*T*LN(T)+7.09775*T**2-.00130692833*T**3
44 | +2.58051E+08*T**(-1); 722.66 Y
45 | +180326.959-1500.57909*T+202.743*T*LN(T)-.142016*T**2
46 | +1.6129733E-05*T**3-24238450*T**(-1); 1150 Y
47 | +6328.687+148.708299*T-32.5596*T*LN(T); 1600 N !
48 | FUNCTION GBCCPB 298.15 +GHSERPB#+2400-1.1*T; 2100 N !
49 | FUNCTION GHCPPB 298.15 +GHSERPB#+300+T; 2100 N !
50 | FUNCTION GHSERTE 298.15 -10544.679+183.372894*T-35.6687*T*LN(T)
51 | +.01583435*T**2-5.240417E-06*T**3+155015*T**(-1); 722.66 Y
52 | +9160.595-129.265373*T+13.004*T*LN(T)-.0362361*T**2+5.006367E-06*T**3
53 | -1286810*T**(-1); 1150 Y
54 | -12781.349+174.901226*T-32.5596*T*LN(T); 1600 N !
55 | FUNCTION GHSERS 298.15 -5198.294+53.913855*T-10.726*T*LN(T)
56 | -.0273801*T**2+8.179537E-06*T**3; 368.30 Y
57 | -6475.706+94.182332*T-17.8693298*T*LN(T)-.010936877*T**2
58 | +1.406467E-06*T**3+36871*T**(-1); 1300 Y
59 | -12485.546+188.304687*T-32*T*LN(T); 1301 N !
60 | FUNCTION GLIQS 298.15 -4196.575+85.63027*T-17.413*T*LN(T)
61 | -.00993935*T**2-7.0062E-08*T**3+1250*T**(-1); 335 Y
62 | +1790361.98-44195.4514*T+7511.61943*T*LN(T)-13.9855175*T**2
63 | +.0048387386*T**3-79880891*T**(-1); 388.36 Y
64 | -876313.954+23366.873*T-4028.756*T*LN(T)+7.954595*T**2
65 | -.00290851333*T**3+33980035*T**(-1); 432.25 Y
66 | +454088.687-7814.67023*T+1237.001*T*LN(T)-1.5607295*T**2
67 | +3.59883667E-04*T**3-31765395*T**(-1); 500 Y
68 | +18554.561-144.895285*T+16.535*T*LN(T)-.0454119*T**2+8.327402E-06*T**3
69 | -2705030*T**(-1); 700 Y
70 | +21243.126-113.298877*T+9.944*T*LN(T)-.0288384*T**2+3.791365E-06*T**3
71 | -3507570*T**(-1); 900 Y
72 | +16117.849-32.79523*T-2.425*T*LN(T)-.01712545*T**2+1.84974E-06*T**3
73 | -3215170*T**(-1); 1300 Y
74 | -6461.814+175.590536*T-32*T*LN(T); 1301 N !
75 | FUNCTION GBCCS 298.15 +105000+GHSERS#; 1301 N !
76 | FUNCTION GFCCS 298.15 +105000+GHSERS#; 1301 N !
77 | FUNCTION GHSERSE 298.15 -9376.371+174.205877*T-33.6527*T*LN(T)
78 | +.02424314*T**2-1.5318461E-05*T**3+102249*T**(-1); 494 Y
79 | -37546.134+507.111538*T-81.2006585*T*LN(T)+.037144892*T**2
80 | -5.611026E-06*T**3+2614263*T**(-1); 800 Y
81 | -12193.47+197.770166*T-35.1456*T*LN(T); 1000 N !
82 | FUNCTION GLIQSE 298.15 +50533.347-1178.28824*T+194.107439*T*LN(T)
83 | -.390268991*T**2+1.19219297E-04*T**3-2224398*T**(-1); 494 Y
84 | -5228.304+183.72559*T-35.1456*T*LN(T); 1000 N !
85 | FUNCTION GPBS 298.15 -98000+8.6*T+GHSERS#+GHSERPB#; 6000 N !
86 | FUNCTION GPBTE 298.15 -128362.29+432.84959*T-52.848901*T*LN(T)+GHSERPB#
87 | +GHSERTE#; 6000 N !
88 | FUNCTION GPBSE 298.15 -99783.25+22.58*T+GHSERPB#+GHSERSE#; 6000 N !
89 | FUNCTION GVA 298.15 +2.3*R#*T; 6000 N !
90 | FUNCTION UN_ASS 298.15 +0.0; 300 N !
91 |
92 | $ Na
93 | $ -------------------------------------
94 | FUNCTION GHSERNA 200.00
95 | -11989.434+260.548732*T-51.0393608*T*LN(T)+72.306633E-3*T**2
96 | -43.638283E-6*T**3+132154*T**(-1); 370.87 Y
97 | -11009.884+199.619999*T-38.1198801*T*LN(T)+9.745854E-3*T**2-1.70664E-6*T**3
98 | +34342*T**(-1)+165.071E21*T**(-9); 2300.00 N !
99 |
100 | FUNCTION GLIQNA 200 2581.02-6.95218*T-276.132E-20*T**7+GHSERNA; 370.87 Y
101 | -8400.44+192.587343*T-38.1198801*T*LN(T)+9.745854E-3*T**2-1.70664E-6*T**3
102 | +34342*T**(-1); 2300.00 N !
103 |
104 | FUNCTION GHCPNA 200 -104+2*T+GHSERNA; 2300 N !
105 |
106 | FUNCTION GFCCNA 200 -50+1.3*T+GHSERNA; 2300 N !
107 | $ -------------------------------------
108 |
109 | TYPE_DEFINITION % SEQ *!
110 | DEFINE_SYSTEM_DEFAULT ELEMENT 2 !
111 | DEFAULT_COMMAND DEF_SYS_ELEMENT VA /- !
112 |
113 |
114 | PHASE LIQUID:L % 1 1.0 !
115 | CONSTITUENT LIQUID:L :PB,PBS,PBSE,PBTE,S,SE,TE,NA,NA2TE,NATE3 : !
116 |
117 | PARAMETER G(LIQUID,PB;0) 298.15 +GLIQPB#; 2100 N REF0 !
118 | PARAMETER G(LIQUID,PBS;0) 298.15 +GLIQPB#+GLIQS#-60000-6*T;
119 | 3000 N REF0 !
120 | PARAMETER G(LIQUID,PBSE;0) 298.15 +GLIQPB#+GLIQSE#-91032.45
121 | +29.88*T; 3000 N REF0 !
122 | PARAMETER G(LIQUID,PBTE;0) 298.15 +GLIQPB#+GLIQTE#-60870.273
123 | +18.088152*T; 3000 N REF0 !
124 | PARAMETER G(LIQUID,S;0) 298.15 +GLIQS#; 1600 N REF0 !
125 | PARAMETER G(LIQUID,SE;0) 298.15 +GLIQSE#; 1600 N REF0 !
126 | PARAMETER G(LIQUID,TE;0) 298.15 +GLIQTE#; 1600 N REF0 !
127 | PARAMETER G(LIQUID,PB,PBS;0) 298.15 +21000; 3000 N REF0 !
128 | PARAMETER G(LIQUID,PB,PBS;1) 298.15 -2*T; 3000 N REF0 !
129 | PARAMETER G(LIQUID,PB,PBTE;0) 298.15 +20634.752-9.7324602*T;
130 | 3000 N REF0 !
131 | PARAMETER G(LIQUID,PB,PBTE;1) 298.15 +7.858817; 3000 N REF0 !
132 | PARAMETER G(LIQUID,PB,PBSE;0) 298.15 +19500.32; 3000 N REF0 !
133 | PARAMETER G(LIQUID,PB,PBSE;1) 298.15 -1003.26+.58*T; 3000 N REF0 !
134 | PARAMETER G(LIQUID,PB,PBSE;2) 298.15 +8352.21-5.64*T; 3000 N
135 | REF0 !
136 | PARAMETER G(LIQUID,PBS,S;0) 298.15 +4500; 3000 N REF0 !
137 | PARAMETER G(LIQUID,PBS,S;1) 298.15 -11000+2*T; 3000 N REF0 !
138 | PARAMETER G(LIQUID,PBS,S;2) 298.15 +8000+7*T; 3000 N REF0 !
139 | PARAMETER G(LIQUID,PBS,PBTE;0) 298.15 -7046.79603; 6000 N REF0 !
140 | PARAMETER G(LIQUID,PBS,PBTE;1) 298.15 -12692.7833; 6000 N REF0 !
141 | PARAMETER G(LIQUID,PBS,PBSE;0) 298.15 -17214.314+13.5254896*T;
142 | 6000 N REF0 !
143 | PARAMETER G(LIQUID,PBSE,SE;0) 298.15 +17503.24-7.95*T; 3000 N
144 | REF0 !
145 | PARAMETER G(LIQUID,PBSE,SE;1) 298.15 -4201.24+1.42*T; 3000 N
146 | REF0 !
147 | PARAMETER G(LIQUID,PBSE,SE;2) 298.15 +16498.24-3.61*T; 3000 N
148 | REF0 !
149 | PARAMETER G(LIQUID,PBSE,PBTE;0) 298.15 +9090.74243; 6000 N REF0 !
150 | PARAMETER G(LIQUID,PBTE,TE;0) 298.15 -4167.4859-2.660869*T; 3000
151 | N REF0 !
152 | PARAMETER G(LIQUID,PBTE,TE;1) 298.15 +3500.9393; 3000 N REF0 !
153 | PARAMETER G(LIQUID,NA;0) 298.15 +GLIQNA#; 3000 N REF0 !
154 | PARAMETER G(LIQUID,NA2TE;0) 298.15 +2*GLIQNA#+GLIQTE#-320000
155 | +52*T; 3000 N REF0 !
156 | PARAMETER G(LIQUID,NATE3;0) 298.15 +GLIQNA#+3*GLIQTE#-185000
157 | +49*T; 3000 N REF0 !
158 | PARAMETER G(LIQUID,NA,NA2TE;0) 298.15 +14000+4*T; 3000 N REF0 !
159 | PARAMETER G(LIQUID,NA,NA2TE;1) 298.15 +16000-12*T; 3000 N REF0 !
160 | PARAMETER G(LIQUID,NA,NA2TE;2) 298.15 +4000; 3000 N REF0 !
161 | PARAMETER G(LIQUID,NA2TE,TE;0) 298.15 -40000-4*T; 3000 N REF0 !
162 | PARAMETER G(LIQUID,NA2TE,TE;1) 298.15 +2000+4*T; 3000 N REF0 !
163 |
164 |
165 | TYPE_DEFINITION & GES A_P_D FCC_A1 MAGNETIC -3.0 2.80000E-01 !
166 | PHASE FCC_A1 %& 2 1 1 !
167 | CONSTITUENT FCC_A1 :PB : VA : !
168 |
169 | PARAMETER G(FCC_A1,PB:VA;0) 298.15 +GHSERPB#; 2100 N REF0 !
170 |
171 |
172 | PHASE HALITE % 2 1 1 !
173 | CONSTITUENT HALITE :PB,NA,VA : S,SE,TE,VA : !
174 |
175 | PARAMETER G(HALITE,PB:S;0) 298.15 +GPBS#; 6000 N REF0 !
176 | PARAMETER G(HALITE,VA:S;0) 298.15 +GHSERS#+GVA#; 6000 N REF0 !
177 | PARAMETER G(HALITE,PB:SE;0) 298.15 +GPBSE#; 6000 N REF0 !
178 | PARAMETER G(HALITE,VA:SE;0) 298.15 +GHSERSE#+GVA#; 6000 N REF0 !
179 | PARAMETER G(HALITE,PB:TE;0) 298.15 +GPBTE#; 6000 N REF0 !
180 | PARAMETER G(HALITE,VA:TE;0) 298.15 +GHSERTE#+GVA#; 6000 N REF0 !
181 | PARAMETER G(HALITE,PB:VA;0) 298.15 +GHSERPB#+GVA#; 6000 N REF0 !
182 | PARAMETER G(HALITE,VA:VA;0) 298.15 +2*GVA#; 6000 N REF0 !
183 | PARAMETER G(HALITE,PB,VA:S;0) 298.15 +38570.1008+4.42495068*T;
184 | 6000 N REF0 !
185 | PARAMETER G(HALITE,PB:S,TE;0) 298.15 +38232.7229-20.9138489*T;
186 | 3600 N REF0 !
187 | PARAMETER G(HALITE,PB:S,TE;1) 298.15 +4372.80543; 3600 N REF0 !
188 | PARAMETER G(HALITE,PB:S,SE;0) 298.15 +3206.19881; 3600 N REF0 !
189 | PARAMETER G(HALITE,PB:S,SE;1) 298.15 +590.16994; 6000 N REF0 !
190 | PARAMETER G(HALITE,PB:S,SE,TE;0) 298.15 -4.5249796E+03; 3000 N REF0 !
191 | PARAMETER G(HALITE,PB:S,SE,TE;1) 298.15 3.0769838E+04; 3000 N REF0 !
192 | PARAMETER G(HALITE,PB:S,SE,TE;2) 298.15 -2.3660755E+04; 3000 N REF0 !
193 | PARAMETER G(HALITE,PB:S,VA;0) 298.15 +38944.7237+4.62924619*T;
194 | 6000 N REF0 !
195 | PARAMETER G(HALITE,PB,VA:SE;0) 298.15 +37953.5572; 6000 N REF0 !
196 | PARAMETER G(HALITE,PB:SE,TE;0) 298.15 +10250.9638; 3600 N REF0 !
197 | PARAMETER G(HALITE,PB:SE,TE;1) 298.15 +1348.60571; 3600 N REF0 !
198 | PARAMETER G(HALITE,PB:SE,VA;0) 298.15 +45585.0811; 6000 N REF0 !
199 | PARAMETER G(HALITE,PB,VA:TE;0) 298.15 +38070.478+2.39964988*T;
200 | 6000 N REF0 !
201 | PARAMETER G(HALITE,PB:TE,VA;0) 298.15 +72780.5624-10.7972522*T;
202 | 6000 N REF0 !
203 | PARAMETER G(HALITE,NA,PB:TE;0) 298.15 -8.2090710E+04
204 | +3.1004837E+01*T; 6000 N REF0 !
205 | PARAMETER G(HALITE,NA:VA;0) 298.15 +GHSERNA#+GVA#; 6000 N REF0 !
206 | PARAMETER G(HALITE,NA:TE;0) 298.15 +GHSERNA#+GHSERTE#
207 | -74873.8344; 6000 N REF0 !
208 | PARAMETER G(HALITE,NA,PB:SE;0) 298.15 -7.84156946E+04; 6000 N REF0 !
209 | PARAMETER G(HALITE,NA,PB:S;0) 298.15 -1.3192696E+05; 6000 N REF0 !
210 | PARAMETER G(HALITE,NA:S;0) 298.15 -61729.2889+GHSERNA#
211 | +GHSERS#; 6000 N REF0 !
212 | PARAMETER G(HALITE,NA:SE;0) 298.15 -82803.3063+GHSERNA#
213 | +GHSERSE#; 6000 N REF0 !
214 |
215 | PHASE HEXAGONAL_A8 % 1 1.0 !
216 | CONSTITUENT HEXAGONAL_A8 :SE,TE : !
217 |
218 | PARAMETER G(HEXAGONAL_A8,SE;0) 298.15 +GHSERSE#; 1600 N REF0 !
219 | PARAMETER G(HEXAGONAL_A8,TE;0) 298.15 +GHSERTE#; 1600 N REF0 !
220 |
221 |
222 | PHASE ORTHORHOMBIC_S % 1 1.0 !
223 | CONSTITUENT ORTHORHOMBIC_S :S : !
224 |
225 | PARAMETER G(ORTHORHOMBIC_S,S;0) 298.15 +GHSERS#; 1301 N REF0 !
226 |
227 | TYPE_DEFINITION & GES A_P_D BCC_A2 MAGNETIC -1.0 4.00000E-01 !
228 | PHASE BCC_A2 %& 2 1 3 !
229 | CONSTITUENT BCC_A2 :NA%,PB : VA% : !
230 |
231 | PARAMETER G(BCC_A2,NA:VA;0) 298.15 +GHSERNA#; 6000 N REF0 !
232 | PARAMETER G(BCC_A2,PB:VA;0) 298.15 +GBCCPB#; 2100 N REF0 !
233 |
234 | PHASE NA2TE % 2 2 1 !
235 | CONSTITUENT NA2TE :NA : TE : !
236 |
237 | PARAMETER G(NA2TE,NA:TE;0) 298.15 +2*GHSERNA#+GHSERTE#-330000
238 | +39.5*T; 6000 N REF0 !
239 |
240 |
241 | PHASE NATE % 2 1 1 !
242 | CONSTITUENT NATE :NA : TE : !
243 |
244 | PARAMETER G(NATE,NA:TE;0) 298.15 +GHSERNA#+GHSERTE#-175000
245 | +25.6*T; 6000 N REF0 !
246 |
247 |
248 | PHASE NATE3 % 2 1 3 !
249 | CONSTITUENT NATE3 :NA : TE : !
250 |
251 | PARAMETER G(NATE3,NA:TE;0) 298.15 +GHSERNA#+3*GHSERTE#-180000
252 | +7.9*T; 6000 N REF0 !
253 |
254 | PHASE NA2SE % 2 2 1 !
255 | CONSTITUENT NA2SE :NA : SE : !
256 |
257 | PARAMETER G(NA2SE,NA:SE;0) 298.15 -332011.423+2*GHSERNA#
258 | +GHSERSE#; 6000 N REF0 !
259 |
260 |
261 | PHASE NASE % 2 1 1 !
262 | CONSTITUENT NASE :NA : SE : !
263 |
264 | PARAMETER G(NASE,NA:SE;0) 298.15 -171553.708+GHSERSE#
265 | +GHSERNA#; 6000 N REF0 !
266 |
267 |
268 | PHASE NASE2 % 2 1 2 !
269 | CONSTITUENT NASE2 :NA : SE : !
270 |
271 | PARAMETER G(NASE2,NA:SE;0) 298.15 -177728.87+2*GHSERSE#
272 | +GHSERNA#; 6000 N REF0 !
273 |
274 | PHASE NA2S % 2 2 1 !
275 | CONSTITUENT NA2S :NA : S : !
276 |
277 | PARAMETER G(NA2S,NA:S;0) 298.15 -360957.493+2*GHSERNA#
278 | +GHSERS#; 6000 N REF0 !
279 |
280 |
281 | PHASE NAS % 2 1 1 !
282 | CONSTITUENT NAS :NA : S : !
283 |
284 | PARAMETER G(NAS,NA:S;0) 298.15 -199727.883+GHSERS#
285 | +GHSERNA#; 6000 N REF0 !
286 |
287 |
288 | PHASE NAS2 % 2 1 2 !
289 | CONSTITUENT NAS2 :NA : S : !
290 |
291 | PARAMETER G(NAS2,NA:S;0) 298.15 -235910.47+2*GHSERS#
292 | +GHSERNA#; 6000 N REF0 !
293 |
294 |
295 | LIST_OF_REFERENCES
296 | NUMBER SOURCE
297 | !
298 |
--------------------------------------------------------------------------------
/tests/data/cif/1548397.cif:
--------------------------------------------------------------------------------
1 | #------------------------------------------------------------------------------
2 | #$Date: 2017-12-06 04:31:51 +0200 (Wed, 06 Dec 2017) $
3 | #$Revision: 204011 $
4 | #$URL: file:///home/coder/svn-repositories/cod/cif/1/54/83/1548397.cif $
5 | #------------------------------------------------------------------------------
6 | #
7 | # This file is available in the Crystallography Open Database (COD),
8 | # http://www.crystallography.net/
9 | #
10 | # All data on this site have been placed in the public domain by the
11 | # contributors.
12 | #
13 | data_1548397
14 | loop_
15 | _publ_author_name
16 | 'Feng, Rui'
17 | 'Jia, Yan-Yuan'
18 | 'Li, Zhao-Yang'
19 | 'Chang, Ze'
20 | 'Bu, Xian-He'
21 | _publ_section_title
22 | ;
23 | Enhancing the stability and porosity of penetrated metal--organic
24 | frameworks through the insertion of coordination sites
25 | ;
26 | _journal_name_full 'Chemical Science'
27 | _journal_paper_doi 10.1039/C7SC04192F
28 | _journal_year 2018
29 | _chemical_formula_sum 'C36 H20 Co2 N6 O15'
30 | _chemical_formula_weight 894.44
31 | _chemical_name_systematic
32 | ;
33 |
34 | ?
35 |
36 | ;
37 | _chemical_properties_physical Heat-sensitive
38 | _space_group_IT_number 227
39 | _symmetry_cell_setting cubic
40 | _symmetry_space_group_name_Hall '-F 4vw 2vw 3'
41 | _symmetry_space_group_name_H-M 'F d -3 m :2'
42 | _atom_sites_solution_hydrogens geom
43 | _atom_sites_solution_primary direct
44 | _atom_sites_solution_secondary difmap
45 | _audit_creation_method SHELXL-97
46 | _audit_update_record
47 | ;
48 | 2017-09-25 deposited with the CCDC.
49 | 2017-12-05 downloaded from the CCDC.
50 | ;
51 | _cell_angle_alpha 90.00
52 | _cell_angle_beta 90.00
53 | _cell_angle_gamma 90.00
54 | _cell_formula_units_Z 48
55 | _cell_length_a 46.6983(3)
56 | _cell_length_b 46.6983(3)
57 | _cell_length_c 46.6983(3)
58 | _cell_measurement_reflns_used 2483
59 | _cell_measurement_temperature 293(2)
60 | _cell_measurement_theta_max 61.2060
61 | _cell_measurement_theta_min 3.1350
62 | _cell_volume 101836.4(11)
63 | _computing_cell_refinement 'Bruker FRAMBO'
64 | _computing_data_collection 'Bruker FRAMBO'
65 | _computing_data_reduction 'Bruker SAINT'
66 | _computing_molecular_graphics 'Bruker SHELXTL'
67 | _computing_publication_material 'Bruker SHELXTL'
68 | _computing_structure_refinement 'SHELXL-97 (Sheldrick, 1997)'
69 | _computing_structure_solution 'SHELXS-97 (Sheldrick, 1990)'
70 | _diffrn_ambient_temperature 293(2)
71 | _diffrn_measured_fraction_theta_full 0.962
72 | _diffrn_measured_fraction_theta_max 0.962
73 | _diffrn_measurement_device_type 'multiwire proportional'
74 | _diffrn_measurement_method 'phi and omega scans'
75 | _diffrn_radiation_monochromator graphite
76 | _diffrn_radiation_source 'fine-focus sealed tube'
77 | _diffrn_radiation_type CuK\a
78 | _diffrn_radiation_wavelength 1.54178
79 | _diffrn_reflns_av_R_equivalents 0.1501
80 | _diffrn_reflns_av_sigmaI/netI 0.0721
81 | _diffrn_reflns_limit_h_max 43
82 | _diffrn_reflns_limit_h_min -38
83 | _diffrn_reflns_limit_k_max 20
84 | _diffrn_reflns_limit_k_min -49
85 | _diffrn_reflns_limit_l_max 36
86 | _diffrn_reflns_limit_l_min -39
87 | _diffrn_reflns_number 16582
88 | _diffrn_reflns_theta_full 54.94
89 | _diffrn_reflns_theta_max 54.94
90 | _diffrn_reflns_theta_min 3.79
91 | _exptl_absorpt_coefficient_mu 3.377
92 | _exptl_absorpt_correction_T_max 0.4409
93 | _exptl_absorpt_correction_T_min 0.3761
94 | _exptl_absorpt_correction_type multi-scan
95 | _exptl_absorpt_process_details SADABS
96 | _exptl_crystal_colour RED
97 | _exptl_crystal_density_diffrn 0.700
98 | _exptl_crystal_density_method 'not measured'
99 | _exptl_crystal_description BLOCK
100 | _exptl_crystal_F_000 21696
101 | _exptl_crystal_recrystallization_method
102 | 'Re-crystallisation from solvent: DMF, CH3CN and H2O'
103 | _exptl_crystal_size_max 0.36
104 | _exptl_crystal_size_mid 0.32
105 | _exptl_crystal_size_min 0.29
106 | _refine_diff_density_max 1.656
107 | _refine_diff_density_min -0.622
108 | _refine_diff_density_rms 0.156
109 | _refine_ls_extinction_coef 0.000072(17)
110 | _refine_ls_extinction_expression Fc^*^=kFc[1+0.001xFc^2^\l^3^/sin(2\q)]^-1/4^
111 | _refine_ls_extinction_method SHELXL
112 | _refine_ls_goodness_of_fit_ref 1.059
113 | _refine_ls_hydrogen_treatment constr
114 | _refine_ls_matrix_type full
115 | _refine_ls_number_parameters 158
116 | _refine_ls_number_reflns 2913
117 | _refine_ls_number_restraints 105
118 | _refine_ls_restrained_S_all 1.081
119 | _refine_ls_R_factor_all 0.2094
120 | _refine_ls_R_factor_gt 0.1495
121 | _refine_ls_shift/su_max 0.002
122 | _refine_ls_shift/su_mean 0.000
123 | _refine_ls_structure_factor_coef Fsqd
124 | _refine_ls_weighting_details
125 | 'calc w=1/[\s^2^(Fo^2^)+(0.3300P)^2^+0.0000P] where P=(Fo^2^+2Fc^2^)/3'
126 | _refine_ls_weighting_scheme calc
127 | _refine_ls_wR_factor_gt 0.3603
128 | _refine_ls_wR_factor_ref 0.4440
129 | _reflns_number_gt 1649
130 | _reflns_number_total 2913
131 | _reflns_threshold_expression >2sigma(I)
132 | _cod_data_source_file c7sc04192f2.cif
133 | _cod_data_source_block aaa
134 | _cod_depositor_comments
135 | ;
136 | The following automatic conversions were performed:
137 |
138 | '_symmetry_cell_setting' value 'Cubic' changed to 'cubic' according
139 | to
140 | /home/data/users/saulius/crontab/automatic-downloads/rss-feeds/RSC/lib/dictionaries/cif_core.dic
141 | dictionary named 'cif_core.dic' version 2.4.2 from 2011-04-26.
142 |
143 | '_exptl_absorpt_correction_type' value 'MULTI-SCAN' changed to
144 | 'multi-scan' according to
145 | /home/data/users/saulius/crontab/automatic-downloads/rss-feeds/RSC/lib/dictionaries/cif_core.dic
146 | dictionary named 'cif_core.dic' version 2.4.2 from 2011-04-26.
147 |
148 | '_refine_ls_hydrogen_treatment' value 'CONSTR' changed to 'constr'
149 | according to
150 | /home/data/users/saulius/crontab/automatic-downloads/rss-feeds/RSC/lib/dictionaries/cif_core.dic
151 | dictionary named 'cif_core.dic' version 2.4.2 from 2011-04-26.
152 |
153 | Automatic conversion script
154 | Id: cif_fix_values 4973 2017-02-22 13:04:09Z antanas
155 | ;
156 | _cod_original_sg_symbol_H-M Fd-3m
157 | _cod_database_code 1548397
158 | loop_
159 | _symmetry_equiv_pos_as_xyz
160 | 'x, y, z'
161 | '-x+3/4, -y+1/4, z+1/2'
162 | '-x+1/4, y+1/2, -z+3/4'
163 | 'x+1/2, -y+3/4, -z+1/4'
164 | 'z, x, y'
165 | 'z+1/2, -x+3/4, -y+1/4'
166 | '-z+3/4, -x+1/4, y+1/2'
167 | '-z+1/4, x+1/2, -y+3/4'
168 | 'y, z, x'
169 | '-y+1/4, z+1/2, -x+3/4'
170 | 'y+1/2, -z+3/4, -x+1/4'
171 | '-y+3/4, -z+1/4, x+1/2'
172 | 'y+3/4, x+1/4, -z+1/2'
173 | '-y, -x, -z'
174 | 'y+1/4, -x+1/2, z+3/4'
175 | '-y+1/2, x+3/4, z+1/4'
176 | 'x+3/4, z+1/4, -y+1/2'
177 | '-x+1/2, z+3/4, y+1/4'
178 | '-x, -z, -y'
179 | 'x+1/4, -z+1/2, y+3/4'
180 | 'z+3/4, y+1/4, -x+1/2'
181 | 'z+1/4, -y+1/2, x+3/4'
182 | '-z+1/2, y+3/4, x+1/4'
183 | '-z, -y, -x'
184 | 'x, y+1/2, z+1/2'
185 | '-x+3/4, -y+3/4, z+1'
186 | '-x+1/4, y+1, -z+5/4'
187 | 'x+1/2, -y+5/4, -z+3/4'
188 | 'z, x+1/2, y+1/2'
189 | 'z+1/2, -x+5/4, -y+3/4'
190 | '-z+3/4, -x+3/4, y+1'
191 | '-z+1/4, x+1, -y+5/4'
192 | 'y, z+1/2, x+1/2'
193 | '-y+1/4, z+1, -x+5/4'
194 | 'y+1/2, -z+5/4, -x+3/4'
195 | '-y+3/4, -z+3/4, x+1'
196 | 'y+3/4, x+3/4, -z+1'
197 | '-y, -x+1/2, -z+1/2'
198 | 'y+1/4, -x+1, z+5/4'
199 | '-y+1/2, x+5/4, z+3/4'
200 | 'x+3/4, z+3/4, -y+1'
201 | '-x+1/2, z+5/4, y+3/4'
202 | '-x, -z+1/2, -y+1/2'
203 | 'x+1/4, -z+1, y+5/4'
204 | 'z+3/4, y+3/4, -x+1'
205 | 'z+1/4, -y+1, x+5/4'
206 | '-z+1/2, y+5/4, x+3/4'
207 | '-z, -y+1/2, -x+1/2'
208 | 'x+1/2, y, z+1/2'
209 | '-x+5/4, -y+1/4, z+1'
210 | '-x+3/4, y+1/2, -z+5/4'
211 | 'x+1, -y+3/4, -z+3/4'
212 | 'z+1/2, x, y+1/2'
213 | 'z+1, -x+3/4, -y+3/4'
214 | '-z+5/4, -x+1/4, y+1'
215 | '-z+3/4, x+1/2, -y+5/4'
216 | 'y+1/2, z, x+1/2'
217 | '-y+3/4, z+1/2, -x+5/4'
218 | 'y+1, -z+3/4, -x+3/4'
219 | '-y+5/4, -z+1/4, x+1'
220 | 'y+5/4, x+1/4, -z+1'
221 | '-y+1/2, -x, -z+1/2'
222 | 'y+3/4, -x+1/2, z+5/4'
223 | '-y+1, x+3/4, z+3/4'
224 | 'x+5/4, z+1/4, -y+1'
225 | '-x+1, z+3/4, y+3/4'
226 | '-x+1/2, -z, -y+1/2'
227 | 'x+3/4, -z+1/2, y+5/4'
228 | 'z+5/4, y+1/4, -x+1'
229 | 'z+3/4, -y+1/2, x+5/4'
230 | '-z+1, y+3/4, x+3/4'
231 | '-z+1/2, -y, -x+1/2'
232 | 'x+1/2, y+1/2, z'
233 | '-x+5/4, -y+3/4, z+1/2'
234 | '-x+3/4, y+1, -z+3/4'
235 | 'x+1, -y+5/4, -z+1/4'
236 | 'z+1/2, x+1/2, y'
237 | 'z+1, -x+5/4, -y+1/4'
238 | '-z+5/4, -x+3/4, y+1/2'
239 | '-z+3/4, x+1, -y+3/4'
240 | 'y+1/2, z+1/2, x'
241 | '-y+3/4, z+1, -x+3/4'
242 | 'y+1, -z+5/4, -x+1/4'
243 | '-y+5/4, -z+3/4, x+1/2'
244 | 'y+5/4, x+3/4, -z+1/2'
245 | '-y+1/2, -x+1/2, -z'
246 | 'y+3/4, -x+1, z+3/4'
247 | '-y+1, x+5/4, z+1/4'
248 | 'x+5/4, z+3/4, -y+1/2'
249 | '-x+1, z+5/4, y+1/4'
250 | '-x+1/2, -z+1/2, -y'
251 | 'x+3/4, -z+1, y+3/4'
252 | 'z+5/4, y+3/4, -x+1/2'
253 | 'z+3/4, -y+1, x+3/4'
254 | '-z+1, y+5/4, x+1/4'
255 | '-z+1/2, -y+1/2, -x'
256 | '-x, -y, -z'
257 | 'x-3/4, y-1/4, -z-1/2'
258 | 'x-1/4, -y-1/2, z-3/4'
259 | '-x-1/2, y-3/4, z-1/4'
260 | '-z, -x, -y'
261 | '-z-1/2, x-3/4, y-1/4'
262 | 'z-3/4, x-1/4, -y-1/2'
263 | 'z-1/4, -x-1/2, y-3/4'
264 | '-y, -z, -x'
265 | 'y-1/4, -z-1/2, x-3/4'
266 | '-y-1/2, z-3/4, x-1/4'
267 | 'y-3/4, z-1/4, -x-1/2'
268 | '-y-3/4, -x-1/4, z-1/2'
269 | 'y, x, z'
270 | '-y-1/4, x-1/2, -z-3/4'
271 | 'y-1/2, -x-3/4, -z-1/4'
272 | '-x-3/4, -z-1/4, y-1/2'
273 | 'x-1/2, -z-3/4, -y-1/4'
274 | 'x, z, y'
275 | '-x-1/4, z-1/2, -y-3/4'
276 | '-z-3/4, -y-1/4, x-1/2'
277 | '-z-1/4, y-1/2, -x-3/4'
278 | 'z-1/2, -y-3/4, -x-1/4'
279 | 'z, y, x'
280 | '-x, -y+1/2, -z+1/2'
281 | 'x-3/4, y+1/4, -z'
282 | 'x-1/4, -y, z-1/4'
283 | '-x-1/2, y-1/4, z+1/4'
284 | '-z, -x+1/2, -y+1/2'
285 | '-z-1/2, x-1/4, y+1/4'
286 | 'z-3/4, x+1/4, -y'
287 | 'z-1/4, -x, y-1/4'
288 | '-y, -z+1/2, -x+1/2'
289 | 'y-1/4, -z, x-1/4'
290 | '-y-1/2, z-1/4, x+1/4'
291 | 'y-3/4, z+1/4, -x'
292 | '-y-3/4, -x+1/4, z'
293 | 'y, x+1/2, z+1/2'
294 | '-y-1/4, x, -z-1/4'
295 | 'y-1/2, -x-1/4, -z+1/4'
296 | '-x-3/4, -z+1/4, y'
297 | 'x-1/2, -z-1/4, -y+1/4'
298 | 'x, z+1/2, y+1/2'
299 | '-x-1/4, z, -y-1/4'
300 | '-z-3/4, -y+1/4, x'
301 | '-z-1/4, y, -x-1/4'
302 | 'z-1/2, -y-1/4, -x+1/4'
303 | 'z, y+1/2, x+1/2'
304 | '-x+1/2, -y, -z+1/2'
305 | 'x-1/4, y-1/4, -z'
306 | 'x+1/4, -y-1/2, z-1/4'
307 | '-x, y-3/4, z+1/4'
308 | '-z+1/2, -x, -y+1/2'
309 | '-z, x-3/4, y+1/4'
310 | 'z-1/4, x-1/4, -y'
311 | 'z+1/4, -x-1/2, y-1/4'
312 | '-y+1/2, -z, -x+1/2'
313 | 'y+1/4, -z-1/2, x-1/4'
314 | '-y, z-3/4, x+1/4'
315 | 'y-1/4, z-1/4, -x'
316 | '-y-1/4, -x-1/4, z'
317 | 'y+1/2, x, z+1/2'
318 | '-y+1/4, x-1/2, -z-1/4'
319 | 'y, -x-3/4, -z+1/4'
320 | '-x-1/4, -z-1/4, y'
321 | 'x, -z-3/4, -y+1/4'
322 | 'x+1/2, z, y+1/2'
323 | '-x+1/4, z-1/2, -y-1/4'
324 | '-z-1/4, -y-1/4, x'
325 | '-z+1/4, y-1/2, -x-1/4'
326 | 'z, -y-3/4, -x+1/4'
327 | 'z+1/2, y, x+1/2'
328 | '-x+1/2, -y+1/2, -z'
329 | 'x-1/4, y+1/4, -z-1/2'
330 | 'x+1/4, -y, z-3/4'
331 | '-x, y-1/4, z-1/4'
332 | '-z+1/2, -x+1/2, -y'
333 | '-z, x-1/4, y-1/4'
334 | 'z-1/4, x+1/4, -y-1/2'
335 | 'z+1/4, -x, y-3/4'
336 | '-y+1/2, -z+1/2, -x'
337 | 'y+1/4, -z, x-3/4'
338 | '-y, z-1/4, x-1/4'
339 | 'y-1/4, z+1/4, -x-1/2'
340 | '-y-1/4, -x+1/4, z-1/2'
341 | 'y+1/2, x+1/2, z'
342 | '-y+1/4, x, -z-3/4'
343 | 'y, -x-1/4, -z-1/4'
344 | '-x-1/4, -z+1/4, y-1/2'
345 | 'x, -z-1/4, -y-1/4'
346 | 'x+1/2, z+1/2, y'
347 | '-x+1/4, z, -y-3/4'
348 | '-z-1/4, -y+1/4, x-1/2'
349 | '-z+1/4, y, -x-3/4'
350 | 'z, -y-1/4, -x-1/4'
351 | 'z+1/2, y+1/2, x'
352 | loop_
353 | _atom_site_label
354 | _atom_site_type_symbol
355 | _atom_site_fract_x
356 | _atom_site_fract_y
357 | _atom_site_fract_z
358 | _atom_site_U_iso_or_equiv
359 | _atom_site_adp_type
360 | _atom_site_occupancy
361 | _atom_site_symmetry_multiplicity
362 | _atom_site_calc_flag
363 | _atom_site_refinement_flags
364 | _atom_site_disorder_assembly
365 | _atom_site_disorder_group
366 | Co2 Co 0.09666(17) 0.15054(17) 0.93636(8) 0.0352(10) Uani 0.25 1 d P A -1
367 | O6 O 0.0730(7) 0.1833(7) 0.9152(4) 0.058(8) Uani 0.25 1 d PU A -1
368 | H1W H 0.0754 0.1749 0.8992 0.086 Uiso 0.50 2 d SPR A -1
369 | H2W H 0.0659 0.2000 0.9136 0.086 Uiso 0.25 1 d PR A -1
370 | Co1 Co 0.1250 0.1250 1.00490(5) 0.0352(10) Uani 1 4 d S . .
371 | O1 O 0.12508(15) 0.16855(13) 1.00669(12) 0.061(2) Uani 1 1 d . . .
372 | O2 O 0.1173(3) 0.1832(2) 0.9609(2) 0.122(3) Uani 1 1 d U . .
373 | O3 O 0.1505(3) 0.3241(3) 0.9864(3) 0.162(5) Uani 1 1 d DU A .
374 | O4 O 0.1250 0.1250 1.0507(2) 0.087(5) Uani 1 4 d S . .
375 | H3W H 0.1079 0.1266 1.0568 0.130 Uiso 0.50 1 d PR . .
376 | O5 O 0.1250 0.1250 0.95858(19) 0.034(2) Uani 1 4 d S . .
377 | H5A H 0.1418 0.1293 0.9532 0.051 Uiso 0.25 1 d PR A .
378 | H5B H 0.1220 0.1079 0.9532 0.051 Uiso 0.25 1 d PR . .
379 | N1 N 0.1543(3) 0.3709(3) 0.9106(3) 0.107(3) Uani 1 1 d U A .
380 | N2 N 0.1268(4) 0.2886(2) 0.9614(2) 0.106(4) Uani 1 2 d SDU . .
381 | C1 C 0.1225(3) 0.1869(2) 0.9874(2) 0.080(4) Uani 1 1 d . A .
382 | C2 C 0.1242(3) 0.22567(18) 1.02433(18) 0.056(4) Uani 1 2 d S . .
383 | H2 H 0.1238 0.2116 1.0384 0.067 Uiso 1 2 calc SR . .
384 | C3 C 0.1245(2) 0.21790(19) 0.99607(18) 0.066(3) Uani 1 1 d . . .
385 | C4 C 0.1239(3) 0.2393(2) 0.9750(2) 0.083(4) Uani 1 1 d . A .
386 | H4A H 0.1219 0.2344 0.9558 0.099 Uiso 1 1 calc R . .
387 | C5 C 0.1263(4) 0.26725(17) 0.98275(17) 0.076(5) Uani 1 2 d SD . .
388 | C6 C 0.1375(4) 0.3154(3) 0.9647(4) 0.123(4) Uani 1 1 d DU . .
389 | C7 C 0.1331(4) 0.3342(3) 0.9384(3) 0.119(3) Uani 1 1 d U . .
390 | C8 C 0.1102(4) 0.3314(3) 0.9186(3) 0.101(6) Uani 1 2 d S . .
391 | H8 H 0.0921 0.3238 0.9262 0.121 Uiso 1 2 calc SR A .
392 | C9 C 0.1080(3) 0.3522(2) 0.8978(2) 0.077(5) Uani 1 2 d S . .
393 | H9 H 0.0887 0.3580 0.8920 0.092 Uiso 1 2 calc SR . .
394 | C10 C 0.1299(3) 0.3725(3) 0.8947(3) 0.107(4) Uani 1 1 d U . .
395 | C11 C 0.1554(4) 0.3522(3) 0.9324(4) 0.114(3) Uani 1 1 d U . .
396 | H11 H 0.1717 0.3514 0.9437 0.137 Uiso 1 1 calc R A .
397 | loop_
398 | _atom_site_aniso_label
399 | _atom_site_aniso_U_11
400 | _atom_site_aniso_U_22
401 | _atom_site_aniso_U_33
402 | _atom_site_aniso_U_23
403 | _atom_site_aniso_U_13
404 | _atom_site_aniso_U_12
405 | Co2 0.039(6) 0.047(6) 0.0199(12) 0.004(4) -0.004(4) -0.0137(9)
406 | O6 0.061(12) 0.052(12) 0.060(9) 0.009(8) -0.002(8) 0.000(9)
407 | Co1 0.039(6) 0.047(6) 0.0199(12) 0.004(4) -0.004(4) -0.0137(9)
408 | O1 0.107(6) 0.043(4) 0.035(3) -0.010(3) 0.009(3) -0.016(3)
409 | O2 0.203(8) 0.085(5) 0.078(6) -0.001(4) 0.004(5) -0.035(5)
410 | O3 0.172(8) 0.171(8) 0.143(7) 0.015(6) -0.018(6) -0.034(6)
411 | O4 0.121(8) 0.121(8) 0.019(6) 0.000 0.000 -0.011(10)
412 | O5 0.038(4) 0.038(4) 0.027(5) 0.000 0.000 -0.001(4)
413 | N1 0.100(5) 0.112(6) 0.109(6) 0.021(4) 0.001(4) 0.001(5)
414 | N2 0.128(7) 0.095(4) 0.095(4) 0.016(5) 0.005(4) -0.005(4)
415 | C1 0.148(12) 0.069(7) 0.023(5) -0.010(5) 0.013(6) -0.032(7)
416 | C2 0.098(11) 0.035(5) 0.035(5) 0.009(5) 0.009(4) -0.009(4)
417 | C3 0.129(10) 0.037(5) 0.031(5) 0.002(4) 0.016(5) -0.020(5)
418 | C4 0.140(11) 0.066(7) 0.042(6) 0.003(5) 0.004(6) -0.023(7)
419 | C5 0.146(16) 0.041(5) 0.041(5) 0.011(6) 0.018(6) -0.018(6)
420 | C6 0.139(6) 0.115(6) 0.116(6) 0.013(5) 0.009(5) -0.014(5)
421 | C7 0.128(6) 0.114(6) 0.116(6) 0.014(5) 0.014(5) -0.009(5)
422 | C8 0.072(11) 0.115(10) 0.115(10) 0.050(13) 0.024(7) -0.024(7)
423 | C9 0.051(9) 0.090(7) 0.090(7) 0.028(9) 0.014(5) -0.014(5)
424 | C10 0.098(6) 0.113(7) 0.110(7) 0.019(6) -0.004(5) -0.007(5)
425 | C11 0.116(6) 0.113(6) 0.113(6) 0.022(5) 0.002(5) -0.004(5)
426 | loop_
427 | _atom_type_symbol
428 | _atom_type_description
429 | _atom_type_scat_dispersion_real
430 | _atom_type_scat_dispersion_imag
431 | _atom_type_scat_source
432 | C C 0.0181 0.0091 'International Tables Vol C Tables 4.2.6.8 and 6.1.1.4'
433 | H H 0.0000 0.0000 'International Tables Vol C Tables 4.2.6.8 and 6.1.1.4'
434 | N N 0.0311 0.0180 'International Tables Vol C Tables 4.2.6.8 and 6.1.1.4'
435 | O O 0.0492 0.0322 'International Tables Vol C Tables 4.2.6.8 and 6.1.1.4'
436 | Co Co -2.3653 3.6143 'International Tables Vol C Tables 4.2.6.8 and 6.1.1.4'
437 | loop_
438 | _geom_angle_atom_site_label_1
439 | _geom_angle_atom_site_label_2
440 | _geom_angle_atom_site_label_3
441 | _geom_angle
442 | _geom_angle_site_symmetry_1
443 | _geom_angle_site_symmetry_3
444 | O2 Co2 O5 85.3(4) 133_655 .
445 | O2 Co2 N1 91.3(6) 133_655 71_545
446 | O5 Co2 N1 94.7(5) . 71_545
447 | O2 Co2 O6 101.8(10) 133_655 .
448 | O5 Co2 O6 169.1(13) . .
449 | N1 Co2 O6 93.4(9) 71_545 .
450 | O2 Co2 O2 107.7(6) 133_655 .
451 | O5 Co2 O2 81.6(4) . .
452 | N1 Co2 O2 160.1(6) 71_545 .
453 | O6 Co2 O2 88.3(11) . .
454 | O2 Co2 N1 166.7(6) 133_655 107_656
455 | O5 Co2 N1 91.4(5) . 107_656
456 | N1 Co2 N1 76.1(7) 71_545 107_656
457 | O6 Co2 N1 83.5(9) . 107_656
458 | O2 Co2 N1 84.4(6) . 107_656
459 | Co2 O6 H1W 90.8 . .
460 | Co2 O6 H2W 154.3 . .
461 | H1W O6 H2W 113.2 . .
462 | O1 Co1 O1 90.1(4) 110 50_454
463 | O1 Co1 O1 89.7(4) 110 .
464 | O1 Co1 O1 175.3(3) 50_454 .
465 | O1 Co1 O1 175.3(3) 110 133_655
466 | O1 Co1 O1 89.7(4) 50_454 133_655
467 | O1 Co1 O1 90.1(4) . 133_655
468 | O1 Co1 O4 87.64(16) 110 .
469 | O1 Co1 O4 87.64(16) 50_454 .
470 | O1 Co1 O4 87.64(16) . .
471 | O1 Co1 O4 87.64(16) 133_655 .
472 | O1 Co1 O5 92.36(16) 110 .
473 | O1 Co1 O5 92.36(16) 50_454 .
474 | O1 Co1 O5 92.36(16) . .
475 | O1 Co1 O5 92.36(16) 133_655 .
476 | O4 Co1 O5 180.000(3) . .
477 | C1 O1 Co1 131.0(6) . .
478 | C1 O2 Co2 137.5(8) . 133_655
479 | C1 O2 Co2 134.9(8) . .
480 | Co2 O2 Co2 3.0(4) 133_655 .
481 | Co1 O4 H3W 109.5 . .
482 | Co2 O5 Co2 5.2(6) 50_454 110
483 | Co2 O5 Co2 119.6(5) 50_454 .
484 | Co2 O5 Co2 119.3(5) 110 .
485 | Co2 O5 Co2 119.3(5) 50_454 133_655
486 | Co2 O5 Co2 119.6(5) 110 133_655
487 | Co2 O5 Co2 5.2(6) . 133_655
488 | Co2 O5 Co1 120.2(2) 50_454 .
489 | Co2 O5 Co1 120.2(2) 110 .
490 | Co2 O5 Co1 120.2(2) . .
491 | Co2 O5 Co1 120.2(2) 133_655 .
492 | Co2 O5 H5A 52.8 50_454 .
493 | Co2 O5 H5A 57.9 110 .
494 | Co2 O5 H5A 107.9 . .
495 | Co2 O5 H5A 103.5 133_655 .
496 | Co1 O5 H5A 107.2 . .
497 | Co2 O5 H5B 54.1 50_454 .
498 | Co2 O5 H5B 49.0 110 .
499 | Co2 O5 H5B 106.7 . .
500 | Co2 O5 H5B 111.0 133_655 .
501 | Co1 O5 H5B 107.2 . .
502 | H5A O5 H5B 106.9 . .
503 | C11 N1 C10 118.7(13) . .
504 | C11 N1 Co2 122.2(11) . 69_355
505 | C10 N1 Co2 118.7(9) . 69_355
506 | C11 N1 Co2 126.1(11) . 152_466
507 | C10 N1 Co2 114.7(9) . 152_466
508 | Co2 N1 Co2 3.9(5) 69_355 152_466
509 | C6 N2 C6 94.2(17) . 162_576
510 | C6 N2 C5 125.3(11) . .
511 | C6 N2 C5 125.3(11) 162_576 .
512 | O1 C1 O2 128.6(10) . .
513 | O1 C1 C3 117.5(8) . .
514 | O2 C1 C3 113.9(10) . .
515 | C3 C2 C3 120.7(11) . 162_576
516 | C3 C2 H2 119.6 . .
517 | C3 C2 H2 119.6 162_576 .
518 | C2 C3 C4 119.1(9) . .
519 | C2 C3 C1 120.9(8) . .
520 | C4 C3 C1 119.7(8) . .
521 | C5 C4 C3 119.8(10) . .
522 | C5 C4 H4A 120.1 . .
523 | C3 C4 H4A 120.1 . .
524 | C4 C5 C4 120.3(12) 162_576 .
525 | C4 C5 N2 119.5(7) 162_576 .
526 | C4 C5 N2 119.5(6) . .
527 | O3 C6 N2 125.0(16) . .
528 | O3 C6 C7 122.0(14) . .
529 | N2 C6 C7 112.9(15) . .
530 | O3 C6 C6 143.6(11) . 162_576
531 | N2 C6 C6 42.9(9) . 162_576
532 | C7 C6 C6 80.6(8) . 162_576
533 | C11 C7 C8 119.8(16) . .
534 | C11 C7 C7 106.8(10) . 162_576
535 | C8 C7 C7 58.3(8) . 162_576
536 | C11 C7 C6 114.7(16) . .
537 | C8 C7 C6 124.9(15) . .
538 | C7 C7 C6 99.4(8) 162_576 .
539 | C9 C8 C7 116.6(13) . 162_576
540 | C9 C8 C7 116.6(13) . .
541 | C7 C8 C7 63.3(15) 162_576 .
542 | C9 C8 H8 116.6 . .
543 | C7 C8 H8 116.6 162_576 .
544 | C7 C8 H8 116.6 . .
545 | C8 C9 C10 119.8(14) . .
546 | C8 C9 C10 119.8(14) . 162_576
547 | C10 C9 C10 48.0(12) . 162_576
548 | C8 C9 H9 117.1 . .
549 | C10 C9 H9 117.1 . .
550 | C10 C9 H9 117.1 162_576 .
551 | C10 C10 N1 110.1(8) 162_576 .
552 | C10 C10 C9 66.0(6) 162_576 .
553 | N1 C10 C9 121.1(13) . .
554 | C10 C10 C10 90.000(10) 162_576 139_545
555 | N1 C10 C10 115.1(7) . 139_545
556 | C9 C10 C10 123.4(9) . 139_545
557 | C10 C10 C10 52.1(8) 162_576 52_456
558 | N1 C10 C10 123.1(8) . 52_456
559 | C9 C10 C10 100.6(10) . 52_456
560 | C10 C10 C10 37.9(8) 139_545 52_456
561 | N1 C11 C7 121.9(16) . .
562 | N1 C11 H11 119.1 . .
563 | C7 C11 H11 119.1 . .
564 | loop_
565 | _geom_bond_atom_site_label_1
566 | _geom_bond_atom_site_label_2
567 | _geom_bond_distance
568 | _geom_bond_site_symmetry_2
569 | Co2 O2 1.987(13) 133_655
570 | Co2 O5 2.062(6) .
571 | Co2 N1 2.067(14) 71_545
572 | Co2 O6 2.13(2) .
573 | Co2 O2 2.137(13) .
574 | Co2 N1 2.181(14) 107_656
575 | O6 H1W 0.8497 .
576 | O6 H2W 0.8501 .
577 | Co1 O1 2.035(6) 110
578 | Co1 O1 2.035(6) 50_454
579 | Co1 O1 2.035(6) .
580 | Co1 O1 2.035(6) 133_655
581 | Co1 O4 2.140(12) .
582 | Co1 O5 2.163(9) .
583 | O1 C1 1.250(12) .
584 | O2 C1 1.274(13) .
585 | O2 Co2 1.987(13) 133_655
586 | O3 C6 1.248(14) .
587 | O4 H3W 0.8499 .
588 | O5 Co2 2.062(6) 50_454
589 | O5 Co2 2.062(6) 110
590 | O5 Co2 2.062(6) 133_655
591 | O5 H5A 0.8501 .
592 | O5 H5B 0.8500 .
593 | N1 C11 1.344(17) .
594 | N1 C10 1.362(17) .
595 | N1 Co2 2.067(14) 69_355
596 | N1 Co2 2.181(14) 152_466
597 | N2 C6 1.357(14) .
598 | N2 C6 1.357(14) 162_576
599 | N2 C5 1.408(14) .
600 | C1 C3 1.504(14) .
601 | C2 C3 1.369(11) .
602 | C2 C3 1.369(11) 162_576
603 | C2 H2 0.9300 .
604 | C3 C4 1.402(14) .
605 | C4 C5 1.359(13) .
606 | C4 H4A 0.9300 .
607 | C5 C4 1.359(13) 162_576
608 | C6 C7 1.52(2) .
609 | C6 C6 1.99(3) 162_576
610 | C7 C11 1.37(2) .
611 | C7 C8 1.42(2) .
612 | C7 C7 1.49(3) 162_576
613 | C8 C9 1.38(2) .
614 | C8 C7 1.42(2) 162_576
615 | C8 H8 0.9800 .
616 | C9 C10 1.401(17) .
617 | C9 C10 1.401(17) 162_576
618 | C9 H9 0.9800 .
619 | C10 C10 1.14(3) 162_576
620 | C10 C10 1.46(3) 139_545
621 | C10 C10 1.86(3) 52_456
622 | C11 H11 0.9300 .
623 | loop_
624 | _geom_torsion_atom_site_label_1
625 | _geom_torsion_atom_site_label_2
626 | _geom_torsion_atom_site_label_3
627 | _geom_torsion_atom_site_label_4
628 | _geom_torsion
629 | _geom_torsion_site_symmetry_1
630 | _geom_torsion_site_symmetry_4
631 | O1 Co1 O1 C1 99.8(10) 110 .
632 | O1 Co1 O1 C1 -172.6(10) 50_454 .
633 | O1 Co1 O1 C1 -84.9(10) 133_655 .
634 | O4 Co1 O1 C1 -172.6(10) . .
635 | O5 Co1 O1 C1 7.4(10) . .
636 | O2 Co2 O2 C1 30.1(18) 133_655 .
637 | O5 Co2 O2 C1 -52.2(14) . .
638 | N1 Co2 O2 C1 -132.7(17) 71_545 .
639 | O6 Co2 O2 C1 132.0(16) . .
640 | N1 Co2 O2 C1 -144.4(15) 107_656 .
641 | O2 Co2 O2 Co2 180.0(5) 133_655 133_655
642 | O5 Co2 O2 Co2 97.7(6) . 133_655
643 | N1 Co2 O2 Co2 17.2(18) 71_545 133_655
644 | O6 Co2 O2 Co2 -78.1(9) . 133_655
645 | N1 Co2 O2 Co2 5.5(6) 107_656 133_655
646 | O2 Co2 O5 Co2 128.5(5) 133_655 50_454
647 | N1 Co2 O5 Co2 37.5(4) 71_545 50_454
648 | O6 Co2 O5 Co2 -100(4) . 50_454
649 | O2 Co2 O5 Co2 -122.8(4) . 50_454
650 | N1 Co2 O5 Co2 -38.7(3) 107_656 50_454
651 | O2 Co2 O5 Co2 122.6(5) 133_655 110
652 | N1 Co2 O5 Co2 31.6(8) 71_545 110
653 | O6 Co2 O5 Co2 -106(3) . 110
654 | O2 Co2 O5 Co2 -128.7(5) . 110
655 | N1 Co2 O5 Co2 -44.6(8) 107_656 110
656 | O2 Co2 O5 Co2 -143.0(4) 133_655 133_655
657 | N1 Co2 O5 Co2 126.0(4) 71_545 133_655
658 | O6 Co2 O5 Co2 -12(4) . 133_655
659 | O2 Co2 O5 Co2 -34.3(3) . 133_655
660 | N1 Co2 O5 Co2 49.8(4) 107_656 133_655
661 | O2 Co2 O5 Co1 -51.5(5) 133_655 .
662 | N1 Co2 O5 Co1 -142.5(4) 71_545 .
663 | O6 Co2 O5 Co1 80(4) . .
664 | O2 Co2 O5 Co1 57.2(4) . .
665 | N1 Co2 O5 Co1 141.3(3) 107_656 .
666 | O1 Co1 O5 Co2 42.1(4) 110 50_454
667 | O1 Co1 O5 Co2 -48.1(4) 50_454 50_454
668 | O1 Co1 O5 Co2 131.9(4) . 50_454
669 | O1 Co1 O5 Co2 -137.9(4) 133_655 50_454
670 | O4 Co1 O5 Co2 -139(100) . 50_454
671 | O1 Co1 O5 Co2 48.1(4) 110 110
672 | O1 Co1 O5 Co2 -42.1(4) 50_454 110
673 | O1 Co1 O5 Co2 137.9(4) . 110
674 | O1 Co1 O5 Co2 -131.9(4) 133_655 110
675 | O4 Co1 O5 Co2 -133(100) . 110
676 | O1 Co1 O5 Co2 -137.9(4) 110 .
677 | O1 Co1 O5 Co2 131.9(4) 50_454 .
678 | O1 Co1 O5 Co2 -48.1(4) . .
679 | O1 Co1 O5 Co2 42.1(4) 133_655 .
680 | O4 Co1 O5 Co2 41(100) . .
681 | O1 Co1 O5 Co2 -131.9(4) 110 133_655
682 | O1 Co1 O5 Co2 137.9(4) 50_454 133_655
683 | O1 Co1 O5 Co2 -42.1(4) . 133_655
684 | O1 Co1 O5 Co2 48.1(4) 133_655 133_655
685 | O4 Co1 O5 Co2 47(100) . 133_655
686 | Co1 O1 C1 O2 5(2) . .
687 | Co1 O1 C1 C3 -178.1(7) . .
688 | Co2 O2 C1 O1 23(2) 133_655 .
689 | Co2 O2 C1 O1 25(2) . .
690 | Co2 O2 C1 C3 -154.3(11) 133_655 .
691 | Co2 O2 C1 C3 -152.0(10) . .
692 | C3 C2 C3 C4 -2(2) 162_576 .
693 | C3 C2 C3 C1 -176.5(9) 162_576 .
694 | O1 C1 C3 C2 -10.8(18) . .
695 | O2 C1 C3 C2 166.7(13) . .
696 | O1 C1 C3 C4 175.1(12) . .
697 | O2 C1 C3 C4 -7.4(17) . .
698 | C2 C3 C4 C5 7(2) . .
699 | C1 C3 C4 C5 -178.6(13) . .
700 | C3 C4 C5 C4 -12(3) . 162_576
701 | C3 C4 C5 N2 177.7(14) . .
702 | C6 N2 C5 C4 31(3) . 162_576
703 | C6 N2 C5 C4 158.6(16) 162_576 162_576
704 | C6 N2 C5 C4 -158.6(16) . .
705 | C6 N2 C5 C4 -31(3) 162_576 .
706 | C6 N2 C6 O3 -133.6(15) 162_576 .
707 | C5 N2 C6 O3 6(3) . .
708 | C6 N2 C6 C7 44(2) 162_576 .
709 | C5 N2 C6 C7 -176.0(16) . .
710 | C5 N2 C6 C6 140(3) . 162_576
711 | O3 C6 C7 C11 36(2) . .
712 | N2 C6 C7 C11 -142.4(16) . .
713 | C6 C6 C7 C11 -113.5(14) 162_576 .
714 | O3 C6 C7 C8 -153.1(17) . .
715 | N2 C6 C7 C8 29(2) . .
716 | C6 C6 C7 C8 57.8(13) 162_576 .
717 | O3 C6 C7 C7 149.2(16) . 162_576
718 | N2 C6 C7 C7 -28.9(15) . 162_576
719 | C6 C6 C7 C7 0.000(6) 162_576 162_576
720 | C11 C7 C8 C9 -16(2) . .
721 | C7 C7 C8 C9 -108.0(12) 162_576 .
722 | C6 C7 C8 C9 173.4(13) . .
723 | C11 C7 C8 C7 92.2(15) . 162_576
724 | C6 C7 C8 C7 -78.6(16) . 162_576
725 | C7 C8 C9 C10 -63.9(12) 162_576 .
726 | C7 C8 C9 C10 8.0(15) . .
727 | C7 C8 C9 C10 -8.0(15) 162_576 162_576
728 | C7 C8 C9 C10 63.9(12) . 162_576
729 | C11 N1 C10 C10 -83.8(13) . 162_576
730 | Co2 N1 C10 C10 103.6(8) 69_355 162_576
731 | Co2 N1 C10 C10 103.2(7) 152_466 162_576
732 | C11 N1 C10 C9 -10(2) . .
733 | Co2 N1 C10 C9 177.0(9) 69_355 .
734 | Co2 N1 C10 C9 176.7(9) 152_466 .
735 | C11 N1 C10 C10 176.3(11) . 139_545
736 | Co2 N1 C10 C10 3.7(12) 69_355 139_545
737 | Co2 N1 C10 C10 3.4(11) 152_466 139_545
738 | C11 N1 C10 C10 -141.0(14) . 52_456
739 | Co2 N1 C10 C10 46.4(18) 69_355 52_456
740 | Co2 N1 C10 C10 46.1(18) 152_466 52_456
741 | C8 C9 C10 C10 104.8(10) . 162_576
742 | C8 C9 C10 N1 4.9(17) . .
743 | C10 C9 C10 N1 -99.8(13) 162_576 .
744 | C8 C9 C10 C10 177.7(5) . 139_545
745 | C10 C9 C10 C10 72.9(7) 162_576 139_545
746 | C8 C9 C10 C10 144.6(7) . 52_456
747 | C10 C9 C10 C10 39.8(10) 162_576 52_456
748 | C10 N1 C11 C7 2(2) . .
749 | Co2 N1 C11 C7 174.7(12) 69_355 .
750 | Co2 N1 C11 C7 174.4(12) 152_466 .
751 | C8 C7 C11 N1 11(2) . .
752 | C7 C7 C11 N1 73.6(16) 162_576 .
753 | C6 C7 C11 N1 -177.3(14) . .
754 |
--------------------------------------------------------------------------------