├── .conda-recipe ├── meta.yaml ├── post-link.bat ├── post-link.sh ├── pre-unlink.bat └── pre-unlink.sh ├── .git-blame-ignore-revs ├── .gitattributes ├── .github ├── dependabot.yml └── workflows │ ├── python-package.yml │ └── python-publish.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .readthedocs.yml ├── .travis.yml ├── 3rdparty └── h5py-LICENSE ├── LICENSE ├── MANIFEST.in ├── README.md ├── codecov.yml ├── docs ├── Makefile ├── attributes.rst ├── conf.py ├── dataset.rst ├── environment.yml ├── file.rst ├── group.rst ├── index.rst ├── installation.rst ├── plugins.rst └── raw.rst ├── environment.yml ├── examples ├── axona_to_exdir.py ├── exdirio.py ├── usecase_exdir.py └── usecase_h5py.py ├── exdir ├── __init__.py ├── _version.py ├── core │ ├── __init__.py │ ├── attribute.py │ ├── constants.py │ ├── dataset.py │ ├── exdir_file.py │ ├── exdir_object.py │ ├── group.py │ ├── mode.py │ ├── raw.py │ └── validation.py ├── plugin_interface │ ├── __init__.py │ └── plugin_interface.py ├── plugins │ ├── __init__.py │ ├── git_lfs.py │ ├── numpy_attributes.py │ └── quantities.py ├── static │ └── index.js └── utils │ ├── __init__.py │ ├── display.py │ └── path.py ├── jupyter-config ├── jupyter_notebook_config.d │ └── exdir.json └── nbconfig │ └── notebook.d │ └── exdir.json ├── libs └── travis-conda-scripts │ ├── .gitignore │ ├── .gitrepo │ ├── LICENSE │ ├── README.md │ └── scripts │ ├── build.sh │ ├── config.sh │ ├── doctest.sh │ ├── install_package.sh │ ├── travis_install.sh │ └── upload.sh ├── postBuild ├── requirements.in ├── requirements.txt ├── setup.cfg ├── setup.py ├── tests ├── benchmarks │ ├── benchmarks.ipynb │ ├── benchmarks.py │ └── profiling.py ├── conftest.py ├── test_attr.py ├── test_dataset.py ├── test_file.py ├── test_group.py ├── test_help_functions.py ├── test_numpy_attributes.py ├── test_object.py ├── test_plugins.py ├── test_quantities.py └── test_raw.py └── versioneer.py /.conda-recipe/meta.yaml: -------------------------------------------------------------------------------- 1 | package: 2 | name: exdir 3 | version: {{ environ.get("GIT_STRING", "0.0.0_custom")|replace("v", "")|replace("-", "_") }} 4 | 5 | source: 6 | git_url: .. 7 | 8 | build: 9 | noarch: python 10 | script: python -m pip install --no-deps --ignore-installed . 11 | 12 | requirements: 13 | host: 14 | - python 15 | - setuptools 16 | - pip 17 | - jupyter 18 | - notebook 19 | 20 | run: 21 | - python 22 | - setuptools 23 | - numpy 24 | - scipy 25 | - ruamel_yaml 26 | - pyyaml 27 | - pathlib # [py2k] 28 | - enum34 # [py2k] 29 | - six # [py2k] 30 | - notebook 31 | 32 | test: 33 | requires: 34 | - pytest 35 | - pytest-benchmark 36 | - h5py 37 | - six 38 | - coverage 39 | - codecov 40 | - sphinx 41 | - quantities 42 | - jupyter 43 | - notebook 44 | 45 | source_files: 46 | - tests 47 | - docs 48 | imports: 49 | - exdir 50 | commands: 51 | - python -c "import exdir; print(exdir.__version__)" 52 | - coverage run --source tests -m py.test -s 53 | - coverage report # [py3k] 54 | - codecov # [py3k] 55 | - cd docs ; make doctest # [py3k] 56 | 57 | about: 58 | home: https://github.com/CINPLA/exdir/ 59 | license: MIT 60 | license_file: LICENSE 61 | -------------------------------------------------------------------------------- /.conda-recipe/post-link.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | 3 | "%PREFIX%\Scripts\jupyter-nbextension.exe" enable exdir --py --sys-prefix >> "%PREFIX%\.messages.txt" 2>&1 && if errorlevel 1 exit 1 4 | -------------------------------------------------------------------------------- /.conda-recipe/post-link.sh: -------------------------------------------------------------------------------- 1 | "${PREFIX}/bin/jupyter-nbextension" enable exdir --py --sys-prefix >> "${PREFIX}/.messages.txt" 2>&1 2 | -------------------------------------------------------------------------------- /.conda-recipe/pre-unlink.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | 3 | "%PREFIX%\Scripts\jupyter-nbextension.exe" uninstall exdir --py --sys-prefix >> "%PREFIX%\.messages.txt" 2>&1 && if errorlevel 1 exit 1 4 | -------------------------------------------------------------------------------- /.conda-recipe/pre-unlink.sh: -------------------------------------------------------------------------------- 1 | "${PREFIX}/bin/jupyter-nbextension" uninstall exdir --py --sys-prefix >> "${PREFIX}/.messages.txt" 2>&1 2 | -------------------------------------------------------------------------------- /.git-blame-ignore-revs: -------------------------------------------------------------------------------- 1 | # pyupgrade 2 | d2a29a2c36b92d1e77ac291a42ece5d8a455a69b 3 | 0f081e455c6082b8f43bdc1ce8d03359e13cf5be 4 | fec69ce479a84bac8270b5f1c95ae6344c9f797a 5 | 59dc3c129b40a7992b5d1fab5e5f04705662f71a 6 | 2d7173ec9277da9eb59982c03c7a6e4e06a49759 7 | 7cfe37c6e20df0417248451dd919aeabaed1349a 8 | b8563af7487d4ea9cda48d5beb49b7354feda5db 9 | 26b7b0c40d317341168090353f85e87e1d9b9f51 10 | c2ffbb7204424f6af32bb8f150e86c8f51bd9eb9 11 | 55f776114a9b5ef96d2504e1118ee74d75f34cc0 12 | 160475c6680a33a0c57535e7275cc6dad473c4ef 13 | 0419e92e29ecb98eec10cbd2534022563dbf44e5 14 | 6048a44d34a9398bda910563f5f85f1a43714319 15 | 328b8843108c427c6153d2e3e2bf32ce41682433 16 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | exdir/_version.py export-subst 2 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: pip 4 | directory: "/" 5 | schedule: 6 | interval: daily 7 | time: "04:00" 8 | open-pull-requests-limit: 10 9 | groups: 10 | all: 11 | patterns: 12 | - "*" 13 | -------------------------------------------------------------------------------- /.github/workflows/python-package.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Python package 5 | 6 | on: 7 | push: 8 | branches: [dev] 9 | pull_request: 10 | branches: [dev] 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | strategy: 17 | matrix: 18 | python-version: ['3.10', '3.11', '3.12'] 19 | 20 | steps: 21 | - uses: actions/checkout@v4 22 | - name: Set up Python ${{ matrix.python-version }} 23 | uses: actions/setup-python@v4 24 | with: 25 | python-version: ${{ matrix.python-version }} 26 | - name: Install dependencies 27 | run: | 28 | python -m pip install --upgrade pip 29 | python -m pip install --requirement requirements.txt 30 | python -m pip install . 31 | - name: Test with pytest 32 | run: | 33 | python -m pytest 34 | -------------------------------------------------------------------------------- /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | # This workflow will upload a Python Package using Twine when a release is created 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries 3 | 4 | name: Upload Python Package 5 | 6 | on: 7 | release: 8 | types: [created] 9 | 10 | jobs: 11 | deploy: 12 | 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - uses: actions/checkout@v4 17 | - name: Set up Python 18 | uses: actions/setup-python@v4 19 | with: 20 | python-version: 3.x 21 | - name: Install dependencies 22 | run: | 23 | python -m pip install --upgrade pip 24 | python -m pip install build setuptools wheel twine 25 | - name: Build and publish 26 | env: 27 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} 28 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 29 | run: | 30 | python -m build 31 | twine upload dist/* 32 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | 85 | # Spyder project settings 86 | .spyderproject 87 | 88 | # Rope project settings 89 | .ropeproject 90 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | fail_fast: false 3 | repos: 4 | - repo: meta 5 | hooks: 6 | - id: check-hooks-apply 7 | - id: check-useless-excludes 8 | - repo: https://github.com/pre-commit/pre-commit-hooks 9 | rev: "v4.6.0" 10 | hooks: 11 | - id: check-yaml 12 | - repo: https://github.com/asottile/pyupgrade 13 | rev: v3.16.0 14 | hooks: 15 | - id: pyupgrade 16 | args: [--py37-plus] 17 | - repo: https://github.com/Mateusz-Grzelinski/actionlint-py 18 | rev: "v1.7.1.15" 19 | hooks: 20 | - id: actionlint 21 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | conda: 2 | file: docs/environment.yml 3 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3.6 3 | matrix: 4 | include: 5 | - env: "PYTHON_VERSION=2.7" 6 | - env: "PYTHON_VERSION=3.6" 7 | - env: "PYTHON_VERSION=3.7" 8 | install: 9 | - bash -ex libs/travis-conda-scripts/scripts/travis_install.sh 10 | script: 11 | - source libs/travis-conda-scripts/scripts/config.sh 12 | - bash -ex libs/travis-conda-scripts/scripts/build.sh 13 | matrix: 14 | include: 15 | - env: PYTHON_VERSION=3.6 16 | - env: PYTHON_VERSION=3.7 17 | deploy: 18 | - provider: script 19 | skip_cleanup: true 20 | script: bash -ex libs/travis-conda-scripts/scripts/upload.sh cinpla main 21 | on: 22 | tags: true 23 | - provider: script 24 | skip_cleanup: true 25 | script: bash -ex libs/travis-conda-scripts/scripts/upload.sh cinpla dev 26 | on: 27 | branch: dev 28 | - provider: pypi 29 | on: 30 | tags: true 31 | user: dragly 32 | password: 33 | secure: W14+M6tpBrajbsUprYQChdUURYob0T4L1yp77kELusI5hkTubDBLc7gqQ4AE5DAF3NPGgYuTQ4ntdv+Sji8ZinIyMPt/kVbKbtH0AJusKEH/y+F1AFlxUJ82V7j++5Lu5kuz4uFAHmkdlkh5ARfbJC2fb49aBsWrd6e4g3d87O5kFt0GsLtOUXuXRkhZ9cVQo7FIH9KbjG4v70su3ijJa3NdG02kp9SujsX44CQJI8aAfU17nLUhbOVyHtmTrVhH/Ao22trk1rn9UeKqc0/Upbmm0yegRk0FBcm+t7cEJvW2zWx0BuF9mWyvgvsm5HkvqcYbv/USEzvalCOcGWxs2FGqnzFKmKDSRF1zix9bhyI7wab+zPRW67Al1YK+7Yt1FtEh/AVJqWdzevYcOihHderhS4OnRFtwqEBXRK9Av8hWnVkCqR7fit3f27m3sEqsCTMqcXoaZLRiJXKuLM9hY+uWPq+mM5yTJ5tBhdudsfXY3lLRTIyrQSYmh1zRge8OWqME3nMnccO6Mvatl+c2/aimUTFe7pfdEgseddkLkKyk0z+sE7Z43o18yBKL1K6BvyQth9yKF8uHq7aSjR1rfGb5qj3632Zjf1Zzp+4pETJ3bkLggfQmRc8s4hFo/riCOwMrnrJ/VITg23hO5Me4grh8bPh8S9E4ydSb4KPbtu4= 34 | -------------------------------------------------------------------------------- /3rdparty/h5py-LICENSE: -------------------------------------------------------------------------------- 1 | Copyright Notice and Statement for the h5py Project 2 | =================================================== 3 | 4 | Copyright (c) 2008-2013 Andrew Collette and contributors 5 | http://www.h5py.org 6 | All rights reserved. 7 | 8 | Redistribution and use in source and binary forms, with or without 9 | modification, are permitted provided that the following conditions are 10 | met: 11 | 12 | a. Redistributions of source code must retain the above copyright 13 | notice, this list of conditions and the following disclaimer. 14 | 15 | b. Redistributions in binary form must reproduce the above copyright 16 | notice, this list of conditions and the following disclaimer in the 17 | documentation and/or other materials provided with the 18 | distribution. 19 | 20 | c. Neither the name of the author nor the names of contributors may 21 | be used to endorse or promote products derived from this software 22 | without specific prior written permission. 23 | 24 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 25 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 26 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 27 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 28 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 29 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 30 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 31 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 32 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 33 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 34 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 CINPLA 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include versioneer.py 2 | include exdir/_version.py 3 | recursive-include jupyter-config *.json 4 | recursive-include exdir/static *.js 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Project Status: Active - The project has reached a stable, usable state and is being actively developed.](http://www.repostatus.org/badges/latest/active.svg)](http://www.repostatus.org/#active) 2 | [![codecov](https://codecov.io/gh/CINPLA/exdir/branch/dev/graph/badge.svg)](https://codecov.io/gh/CINPLA/exdir) 3 | [![Binder](https://mybinder.org/badge.svg)](https://mybinder.org/v2/gh/CINPLA/exdir/dev?filepath=tests%2Fbenchmarks%2Fbenchmarks.ipynb) 4 | 5 | *Important*: The reference implementation contained in this repository is intended for 6 | feedback and as a basis for future library implementations. 7 | It is not ready for production use. 8 | 9 | # Experimental Directory Structure # 10 | 11 | Experimental Directory Structure (exdir) is a proposed, open specification for 12 | experimental pipelines. 13 | Exdir is currently a prototype published to invite researchers to give feedback on 14 | the standard. 15 | 16 | Exdir is an hierarchical format based on open standards. 17 | It is inspired by already existing formats, such as HDF5 and NumPy, 18 | and attempts to solve some of the problems assosciated with these while 19 | retaining their benefits. 20 | The development of exdir owes a great deal to the efforts of others to standardize 21 | data formats in science in general and neuroscience in particular, among them 22 | the Klusta Kwik Team and Neurodata Without Borders. 23 | 24 | ## Installation 25 | 26 | Exdir can be installed with Anaconda: 27 | 28 | conda install exdir -c cinpla -c conda-forge 29 | 30 | ## Usage 31 | 32 | The following code creates an Exdir directory with a group and a dataset: 33 | 34 | ``` 35 | import numpy as np 36 | import exdir 37 | 38 | experiment = exdir.File("experiment.exdir") 39 | group = experiment.create_group("group") 40 | data = np.arange(10) 41 | dataset = group.create_dataset("dataset", data=data) 42 | ``` 43 | 44 | The data can be retrieved using the above used keys: 45 | 46 | ``` 47 | group = experiment["group"] 48 | dataset = group["dataset"] 49 | print(dataset) 50 | ``` 51 | 52 | Attributes can be added to all objects, including files, groups and datasets: 53 | 54 | ``` 55 | group.attrs["room_number"] = 1234 56 | dataset.attrs["recoring_date"] = "2018-02-04" 57 | ``` 58 | 59 | See the [documentation](https://exdir.readthedocs.io) for more information. 60 | 61 | ## Benchmarks ## 62 | 63 | See [benchmarks.ipynb](tests/benchmarks/benchmarks.ipynb). 64 | 65 | A [live version](https://mybinder.org/v2/gh/CINPLA/exdir/dev?filepath=tests%2Fbenchmarks%2Fbenchmarks.ipynb) 66 | can be explored using Binder. 67 | 68 | ## Quick introduction ## 69 | 70 | Exdir is not a file format in itself, but rather a standardized folder structure. 71 | The abstract data model is almost equivalent to that of HDF5, 72 | with groups, datasets, and attributes. 73 | This was done to simplify the transition from either format. 74 | However, data in Exdir is not stored in a single file, 75 | but rather multiple files within the hierarchy. 76 | The metadata is stored in a restricted verison of the YAML 1.2 format 77 | and the binary data in the NumPy 2.0 format. 78 | 79 | Here is an example structure: 80 | 81 | ``` 82 | example.exdir (File, folder) 83 | │ attributes.yaml (-, file) 84 | │ exdir.yaml (-, file) 85 | │ 86 | ├── dataset1 (Dataset, folder) 87 | │ ├── data.npy (-, file) 88 | │ ├── attributes.yaml (-, file) 89 | │ └── exdir.yaml (-, file) 90 | │ 91 | └── group1 (Group, folder) 92 | │ ├── attributes.yaml (-, file) 93 | └── exdir.yaml (-, file) 94 | │ 95 | ├── dataset3 (Dataset, folder) 96 | │ ├── data.npy (-, file) 97 | │ ├── attributes.yaml (-, file) 98 | │ └── exdir.yaml (-, file) 99 | │ 100 | ├── link1 (Link, folder) 101 | │ └── exdir.yaml (-, file) 102 | │ 103 | └── dataset4 (Dataset, folder) 104 | ├── data.npy (-, file) 105 | ├── attributes.yaml (-, file) 106 | ├── exdir.yaml (-, file) 107 | │ 108 | └── raw (Raw, folder) 109 | ├── image0001.tif (-, file) 110 | ├── image0002.tif (-, file) 111 | └── ... 112 | ``` 113 | 114 | The above structure shows the name of the object, the type of the object in exdir and 115 | the type of the object on the file system as follows: 116 | 117 | ``` 118 | [name] ([EXP type], [file system type]) 119 | ``` 120 | 121 | A dash (-) indicates that the object doesn't have a separate internal 122 | representation in the format, but is used indirectly. 123 | It is however explicitly stored in the file system. 124 | 125 | The above structure shows that the `example.exdir` file is simply a folder in 126 | the file system, but when read by an exdir parser, it appears as a `File`. 127 | The `File` is the root object of any structure. 128 | The metadata of the `File` is stored in a file named meta.yaml. 129 | This is internal to exdir. 130 | Attributes of the `File` is stored in a file named attributes.yaml. 131 | This is optional. 132 | 133 | Below the file, multiple objects may appear, among them `Dataset`s and `Group`s. 134 | Both `Dataset`s and `Group`s are stored as folders in the file system. 135 | Both have their metadata stored in files named meta.yaml. 136 | These are not visible as files within the exdir format, but appear simply as 137 | the metadata for the `Dataset`s and `Group`s. 138 | 139 | If there is any additional data assosciated with the dataset, 140 | it may (optionally) be stored in a folder named `raw`. 141 | This differs from HDF5, but allows storing raw data from experiments (such as 142 | TIFF images from an external microscopy system) locally with the data 143 | converted to the NumPy format. 144 | 145 | ## Goals and benefits ## 146 | 147 | By reusing the structure of HDF5, exdir should be familiar to researchers that 148 | have experience with this format. 149 | However, by not storing the data in a single file, 150 | the data is much less prone to corruption. 151 | Further, HDF5 is not optimal for modifications, parallelization or data 152 | exploration. 153 | 154 | By storing the data in separate files, we get the many benefits of modern file 155 | systems in protection against data corruption. 156 | The data is more easily accessible in parallell computing and is stored in 157 | a well known and tested format. 158 | It is easier to explore the data by use of standard command line tools or simply 159 | the file explorer. 160 | 161 | However, we intend to develop a graphical user interface along the lines of 162 | HDF5view that allows simple data exploration similar to this. 163 | 164 | ## Principles ## 165 | 166 | - Exdir should be based on existing open standards 167 | - Exdir should not solve problems that have already been solved, such as storing binary data 168 | - Exdir should be lightweight 169 | 170 | ## Background ## 171 | 172 | Exdir was designed due to a need at the Centre for Integrative 173 | Neuroplasticity (CINPLA) at the University of Oslo for a format that would 174 | fit the experimental pipeline. 175 | While researching the different options, we found that the neuroscience 176 | community had several formats for storing experimental data. 177 | A large effort at standardizing the format in the community was spawned by 178 | Neurodata Without Borders (NWB). 179 | An initial version of the NWB format was published, based on the HDF5 format. 180 | However, shortly after the first publication of NWB, concerns were voiced 181 | about HDF5 format from the developers of the klusta project[1]. 182 | They had been using HDF5 as the underlying file format for their software suite 183 | and started seeing problems with the file format among their users. 184 | They saw multiple problems with HDF5 in the form of data corrpution, performance 185 | issues, bugs and poor support for parallelization. 186 | 187 | HDF5 is not optimal for modifications. 188 | This is not a problem if you only store data from acquisition, 189 | as this shouldn't be changed. 190 | However, for analysis it is often necessary to modify the data multiple times as 191 | different methods and parameters are tested. 192 | At the same time, it is beneficial to keep the analysed data stored together 193 | with the acquisition data. 194 | 195 | [1] http://cyrille.rossant.net/moving-away-hdf5/ 196 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | ignore: 2 | - "exdir/_version.py" 3 | - "exdir/__init__.py" 4 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = exdir 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /docs/attributes.rst: -------------------------------------------------------------------------------- 1 | .. _attributes: 2 | 3 | Attributes 4 | ========== 5 | 6 | .. autoclass:: exdir.core.Attribute 7 | :members: 8 | :undoc-members: 9 | :show-inheritance: 10 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # exdir documentation build configuration file, created by 4 | # sphinx-quickstart on Fri Feb 3 09:52:17 2017. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | # If extensions (or modules to document with autodoc) are in another directory, 16 | # add these directories to sys.path here. If the directory is relative to the 17 | # documentation root, use os.path.abspath to make it absolute, like shown here. 18 | # 19 | # import os 20 | # import sys 21 | # sys.path.insert(0, os.path.abspath('.')) 22 | 23 | import os 24 | import re 25 | import sys 26 | # import exdir 27 | sys.path.insert(0, os.path.abspath("../")) 28 | 29 | on_rtd = os.environ.get('READTHEDOCS', None) == 'True' 30 | if not on_rtd: # only import and set the theme if we're building docs locally 31 | try: 32 | import sphinx_rtd_theme 33 | html_theme = "sphinx_rtd_theme" 34 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] 35 | except ImportError: 36 | print("RTD theme not installed, using default") 37 | pass 38 | 39 | # -- General configuration ------------------------------------------------ 40 | 41 | # If your documentation needs a minimal Sphinx version, state it here. 42 | # 43 | # needs_sphinx = '1.0' 44 | 45 | # Add any Sphinx extension module names here, as strings. They can be 46 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 47 | # ones. 48 | extensions = ['sphinx.ext.autodoc', 49 | 'sphinx.ext.doctest', 50 | 'sphinx.ext.todo', 51 | 'sphinx.ext.coverage', 52 | 'sphinx.ext.mathjax', 53 | 'sphinx.ext.ifconfig', 54 | 'sphinx.ext.viewcode', 55 | 'sphinx.ext.intersphinx', 56 | 'sphinx.ext.autosummary', 57 | 'sphinx.ext.napoleon'] 58 | 59 | viewcode_import = True 60 | 61 | # Napoleon options 62 | napoleon_google_docstring = False 63 | napoleon_numpy_docstring = True 64 | napoleon_include_init_with_doc = False 65 | napoleon_use_param = False 66 | napoleon_use_ivar = True 67 | 68 | autosummary_generate = True 69 | 70 | napoleon_include_special_with_doc = False 71 | napoleon_use_admonition_for_notes = False 72 | 73 | # Add any paths that contain templates here, relative to this directory. 74 | templates_path = ['_templates'] 75 | 76 | # The suffix(es) of source filenames. 77 | # You can specify multiple suffix as a list of string: 78 | # 79 | # source_suffix = ['.rst', '.md'] 80 | source_suffix = '.rst' 81 | 82 | # The master toctree document. 83 | master_doc = 'index' 84 | 85 | # General information about the project. 86 | project = 'exdir' 87 | copyright = '2018, Svenn-Arne Dragly, Milad H. Mobarhan, Mikkel E. Lepperød' 88 | author = 'Svenn-Arne Dragly, Milad H. Mobarhan, Mikkel E. Lepperød' 89 | 90 | # The version info for the project you're documenting, acts as replacement for 91 | # |version| and |release|, also used in various other places throughout the 92 | # built documents. 93 | # 94 | # The full version, including alpha/beta/rc tags. 95 | # release = exdir.__version__.replace("v", "") 96 | # print("Release", release) 97 | # # The short X.Y version. 98 | # version = release 99 | # result = re.match(r"\d+\.\d+\.?\d*", release) 100 | # if result: 101 | # version = result.group() 102 | # print("Version", version) 103 | 104 | # The language for content autogenerated by Sphinx. Refer to documentation 105 | # for a list of supported languages. 106 | # 107 | # This is also used if you do content translation via gettext catalogs. 108 | # Usually you set "language" from the command line for these cases. 109 | language = None 110 | 111 | # List of patterns, relative to source directory, that match files and 112 | # directories to ignore when looking for source files. 113 | # This patterns also effect to html_static_path and html_extra_path 114 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 115 | 116 | # The name of the Pygments (syntax highlighting) style to use. 117 | # pygments_style = 'sphinx' 118 | 119 | # If true, `todo` and `todoList` produce output, else they produce nothing. 120 | todo_include_todos = True 121 | 122 | 123 | # -- Options for HTML output ---------------------------------------------- 124 | 125 | # The theme to use for HTML and HTML Help pages. See the documentation for 126 | # a list of builtin themes. 127 | # 128 | # html_theme = 'alabaster' 129 | 130 | # Theme options are theme-specific and customize the look and feel of a theme 131 | # further. For a list of options available for each theme, see the 132 | # documentation. 133 | # 134 | # html_theme_options = {} 135 | 136 | # Add any paths that contain custom static files (such as style sheets) here, 137 | # relative to this directory. They are copied after the builtin static files, 138 | # so a file named "default.css" will overwrite the builtin "default.css". 139 | # html_static_path = ['_static'] 140 | 141 | 142 | # -- Options for HTMLHelp output ------------------------------------------ 143 | 144 | # Output file base name for HTML help builder. 145 | htmlhelp_basename = 'exdirdoc' 146 | 147 | 148 | # -- Options for LaTeX output --------------------------------------------- 149 | 150 | latex_elements = { 151 | # The paper size ('letterpaper' or 'a4paper'). 152 | # 153 | # 'papersize': 'letterpaper', 154 | 155 | # The font size ('10pt', '11pt' or '12pt'). 156 | # 157 | # 'pointsize': '10pt', 158 | 159 | # Additional stuff for the LaTeX preamble. 160 | # 161 | # 'preamble': '', 162 | 163 | # Latex figure (float) alignment 164 | # 165 | # 'figure_align': 'htbp', 166 | } 167 | 168 | # Grouping the document tree into LaTeX files. List of tuples 169 | # (source start file, target name, title, 170 | # author, documentclass [howto, manual, or own class]). 171 | latex_documents = [ 172 | (master_doc, 'exdir.tex', 'exdir Documentation', 173 | 'Svenn-Arne Dragly, Milad H. Mobarhan, Mikkel E. Lepperød', 'manual'), 174 | ] 175 | 176 | 177 | # -- Options for manual page output --------------------------------------- 178 | 179 | # One entry per manual page. List of tuples 180 | # (source start file, name, description, authors, manual section). 181 | man_pages = [ 182 | (master_doc, 'exdir', 'exdir Documentation', 183 | [author], 1) 184 | ] 185 | 186 | 187 | # -- Options for Texinfo output ------------------------------------------- 188 | 189 | # Grouping the document tree into Texinfo files. List of tuples 190 | # (source start file, target name, title, author, 191 | # dir menu entry, description, category) 192 | texinfo_documents = [ 193 | (master_doc, 'exdir', 'exdir Documentation', 194 | author, 'exdir', 'One line description of project.', 195 | 'Miscellaneous'), 196 | ] 197 | -------------------------------------------------------------------------------- /docs/dataset.rst: -------------------------------------------------------------------------------- 1 | .. _dataset: 2 | 3 | Datasets 4 | ======== 5 | 6 | This is data set class. It has class :py:class:`exdir.core.Dataset`: 7 | 8 | .. autoclass:: exdir.core.Dataset 9 | :members: 10 | :undoc-members: 11 | :show-inheritance: 12 | -------------------------------------------------------------------------------- /docs/environment.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | 4 | dependencies: 5 | - python=3.5 6 | - setuptools 7 | - numpy 8 | - scipy 9 | - ruamel_yaml 10 | -------------------------------------------------------------------------------- /docs/file.rst: -------------------------------------------------------------------------------- 1 | .. _file: 2 | 3 | File Objects 4 | ============ 5 | 6 | .. autoclass:: exdir.core.File 7 | :members: 8 | :undoc-members: 9 | :show-inheritance: 10 | -------------------------------------------------------------------------------- /docs/group.rst: -------------------------------------------------------------------------------- 1 | .. _group: 2 | 3 | Groups 4 | ====== 5 | .. autoclass:: exdir.core.Group 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. exdir documentation master file, created by 2 | sphinx-quickstart on Fri Feb 3 09:52:17 2017. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to Exdir's documentation! 7 | ================================= 8 | 9 | The Experimental Directory Structure (Exdir) is a proposed, open file format specification for experimental pipelines. 10 | Exdir uses the same abstractions as HDF5 and is compatible with the HDF5 Abstract Data Model, but stores data and metadata in directories instead of in a single file. 11 | Exdir uses file system directories to represent the hierarchy, with metadata stored in human-readable YAML files, datasets stored in binary NumPy files, and raw data stored directly in subdirectories. 12 | Furthermore, storing data in multiple files makes it easier to track for version control systems. 13 | Exdir is not a file format in itself, but a specification for organizing files in a directory structure. 14 | With the publication of Exdir, we invite the scientific community to join the development to create an open specification that will serve as many needs as possible and as a foundation for open access to and exchange of data. 15 | 16 | Exdir is described in detail in our reasearch paper: 17 | 18 | `Experimental Directory Structure (Exdir): An Alternative to HDF5 Without Introducing a New File Format `_. 19 | 20 | .. toctree:: 21 | :maxdepth: 1 22 | :hidden: 23 | 24 | installation 25 | file 26 | group 27 | dataset 28 | raw 29 | attributes 30 | plugins 31 | 32 | Specification 33 | ------------- 34 | 35 | exdir is not a file format in itself, but rather a specification for a directory structure 36 | with NumPy and YAML files. 37 | 38 | .. code-block:: text 39 | 40 | example.exdir (File, folder) 41 | │ attributes.yaml (-, file) 42 | │ exdir.yaml (-, file) 43 | │ 44 | ├── dataset1 (Dataset, folder) 45 | │ ├── data.npy (-, file) 46 | │ ├── attributes.yaml (-, file) 47 | │ └── exdir.yaml (-, file) 48 | │ 49 | └── group1 (Group, folder) 50 | ├── attributes.yaml (-, file) 51 | ├── exdir.yaml (-, file) 52 | │ 53 | └── dataset2 (Dataset, folder) 54 | ├── data.npy (-, file) 55 | ├── attributes.yaml (-, file) 56 | ├── exdir.yaml (-, file) 57 | │ 58 | └── raw (Raw, folder) 59 | ├── image0001.tif (-, file) 60 | ├── image0002.tif (-, file) 61 | └── ... 62 | 63 | The above structure shows the name of the object, the type of the object in exdir and 64 | the type of the object on the file system as follows: 65 | 66 | ``` 67 | [name] ([exdir type], [file system type]) 68 | ``` 69 | 70 | A dash (-) indicates that the object doesn't have a separate internal 71 | representation in the format, but is used indirectly. 72 | It is however explicitly stored in the file system. 73 | 74 | 75 | Install 76 | ------- 77 | 78 | With `Anaconda `_ or 79 | `Miniconda `_:: 80 | 81 | conda install -c cinpla exdir 82 | 83 | For more, see :ref:`installation`. 84 | 85 | Quick usage example 86 | ------------------- 87 | 88 | .. testsetup:: 89 | 90 | import os 91 | import shutil 92 | if(os.path.exists("mytestfile.exdir")): 93 | shutil.rmtree("mytestfile.exdir") 94 | 95 | 96 | .. doctest:: 97 | 98 | >>> import exdir 99 | >>> import numpy as np 100 | >>> f = exdir.File("mytestfile.exdir") 101 | 102 | The :ref:`File object ` points to the root folder in the exdir file 103 | structure. 104 | You can add groups and datasets to it. 105 | 106 | .. doctest:: 107 | 108 | >>> my_group = f.require_group("my_group") 109 | >>> a = np.arange(100) 110 | >>> dset = f.require_dataset("my_data", data=a) 111 | 112 | These can later be accessed with square brackets: 113 | 114 | .. doctest:: 115 | 116 | >>> f["my_data"][10] 117 | 10 118 | 119 | Groups can hold other groups or datasets: 120 | 121 | .. doctest:: 122 | 123 | >>> subgroup = my_group.require_group("subgroup") 124 | >>> subdata = subgroup.require_dataset("subdata", data=a) 125 | 126 | Datasets support array-style slicing: 127 | 128 | .. doctest:: 129 | 130 | >>> dset[0:100:10] 131 | memmap([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90]) 132 | 133 | Attributes can be added to files, groups and datasets: 134 | 135 | .. doctest:: 136 | 137 | >>> f.attrs["description"] = "My first exdir file" 138 | >>> my_group.attrs["meaning_of_life"] = 42 139 | >>> dset.attrs["trial_number"] = 12 140 | >>> f.attrs["description"] 141 | 'My first exdir file' 142 | 143 | 144 | Core concepts 145 | ------------- 146 | An exdir object contains two types of objects: `datasets`, which are 147 | array-like collections of data, and `groups`, which are directories containing 148 | datasets and other groups. 149 | 150 | An exdir directory is created by: 151 | 152 | .. testsetup:: 153 | 154 | import os 155 | import shutil 156 | if(os.path.exists("myfile.exdir")): 157 | shutil.rmtree("myfile.exdir") 158 | 159 | 160 | .. doctest:: 161 | 162 | >>> import exdir 163 | >>> import numpy as np 164 | >>> f = exdir.File("myfile.exdir", "w") 165 | 166 | The :ref:`File object ` containes many useful methods including :py:meth:`exdir.core.Group.require_dataset`: 167 | 168 | >>> data = np.arange(100) 169 | >>> dset = f.require_dataset("mydataset", data=data) 170 | 171 | The created object is not an array but :ref:`an exdir dataset`. 172 | Like NumPy arrays, datasets have a shape: 173 | 174 | >>> dset.shape 175 | (100,) 176 | 177 | Also array-style slicing is supported: 178 | 179 | >>> dset[0] 180 | 0 181 | >>> dset[10] 182 | 10 183 | >>> dset[0:100:10] 184 | memmap([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90]) 185 | 186 | For more, see :ref:`file` and :ref:`dataset`. 187 | 188 | 189 | Groups and hierarchical organization 190 | ------------------------------------ 191 | 192 | Every object in an exdir directory has a name, and they're arranged in a POSIX-style hierarchy with ``/``-separators: 193 | 194 | >>> dset.name 195 | '/mydataset' 196 | 197 | The "directory" in this system are called :ref:`groups `. 198 | The :ref:`File object ` we created is itself a group, in this case the `root group`, named ``/`` 199 | 200 | >>> f.name 201 | '/' 202 | 203 | Creating a subgroup is done by using :py:meth:`exdir.core.Group.require_group` method: 204 | 205 | >>> grp = f.require_group("subgroup") 206 | 207 | All :py:class:`exdir.core.Group` objects also have the ``require_*`` methods like File: 208 | 209 | >>> dset2 = grp.require_dataset("another_dataset", data=data) 210 | >>> dset2.name 211 | '/subgroup/another_dataset' 212 | 213 | .. By the way, you don't have to create all the intermediate groups manually. 214 | .. Specifying a full path works just fine: 215 | .. 216 | .. 217 | .. >>> dset3 = f.create_dataset('subgroup2/dataset_three', (10,)) 218 | .. >>> dset3.name 219 | .. '/subgroup2/dataset_three' 220 | 221 | You retrieve objects in the file using the item-retrieval syntax: 222 | 223 | >>> dataset_three = f['subgroup/another_dataset'] 224 | 225 | Iterating over a group provides the names of its members: 226 | 227 | >>> for name in f: 228 | ... print(name) 229 | mydataset 230 | subgroup 231 | 232 | 233 | Containership testing also uses names: 234 | 235 | 236 | >>> "mydataset" in f 237 | True 238 | >>> "somethingelse" in f 239 | False 240 | 241 | You can even use full path names: 242 | 243 | >>> "subgroup/another_dataset" in f 244 | True 245 | >>> "subgroup/somethingelse" in f 246 | False 247 | 248 | There are also the familiar :py:meth:`exdir.core.Group.keys`, :py:meth:`exdir.core.Group.values`, :py:meth:`exdir.core.Group.items` and 249 | :py:meth:`exdir.core.Group.iter` methods, as well as :py:meth:`exdir.core.Group.get`. 250 | 251 | 252 | .. Since iterating over a group only yields its directly-attached members, 253 | .. iterating over an entire file is accomplished with the ``Group`` methods 254 | .. ``visit()`` and ``visititems()``, which take a callable: 255 | .. 256 | .. 257 | .. 258 | .. >>> def printname(name): 259 | .. ... print(name) 260 | .. >>> f.visit(printname) 261 | .. mydataset 262 | .. subgroup 263 | .. subgroup/another_dataset 264 | .. subgroup2 265 | .. subgroup2/dataset_three 266 | 267 | For more, see :ref:`group`. 268 | 269 | 270 | 271 | Attributes 272 | ---------- 273 | 274 | With exdir you can store metadata right next to the data it describes. 275 | All groups and datasets can have attributes which are descibed by :py:meth:`exdir.core.attributes`. 276 | 277 | Attributes are accessed through the ``attrs`` proxy object, which again 278 | implements the dictionary interface: 279 | 280 | >>> dset.attrs['temperature'] = 99.5 281 | >>> dset.attrs['temperature'] 282 | 99.5 283 | >>> 'temperature' in dset.attrs 284 | True 285 | 286 | For more, see :ref:`attributes`. 287 | 288 | Acknowledgements 289 | ---------------- 290 | 291 | The development of Exdir owes a great deal to other standardization efforts in science in general and neuroscience in particular, 292 | among them the contributors to HDF5, NumPy, YAML, PyYAML, ruamel-yaml, SciPy, Klusta Kwik, NeuralEnsemble, and Neurodata Without Borders. 293 | 294 | References 295 | ---------- 296 | 297 | * :ref:`genindex` 298 | * :ref:`search` 299 | -------------------------------------------------------------------------------- /docs/installation.rst: -------------------------------------------------------------------------------- 1 | :orphan: 2 | 3 | .. _installation: 4 | 5 | Installation 6 | ============ 7 | 8 | Pre-configured installation (recommended) 9 | ----------------------------------------- 10 | It’s strongly recommended that you use Anaconda to install exdir along with its compiled dependencies. 11 | 12 | With `Anaconda `_ or 13 | `Miniconda `_:: 14 | 15 | conda install -c cinpla exdir 16 | -------------------------------------------------------------------------------- /docs/plugins.rst: -------------------------------------------------------------------------------- 1 | .. _plugins: 2 | 3 | Plugins 4 | ======= 5 | 6 | The functionality of Exdir can be extended with plugins. 7 | These allow modifying the behavior of Exdir when enabled. 8 | For instance, dataset and attribute plugins can perform pre- and post-processing of data during 9 | reading and writing operations. 10 | Note that plugins do not change the underlying specifications of Exdir. 11 | Plugins are intended to perform verification of data consistency, 12 | and to provide convenient mapping from general in-memory objects to objects that can be stored in 13 | the Exdir format and back again. 14 | Some plugins are provided in the exdir.plugins module, 15 | while new plugins can be defined by Exdir users or package developers. 16 | 17 | One of the built-in plugins provides experimental support for units using the `quantities` package: 18 | 19 | .. code-block:: python 20 | 21 | >>> import exdir 22 | >>> import exdir.plugins.quantities 23 | >>> import quantities as pq 24 | >>> f = exdir.File("test.exdir", plugins=[exdir.plugins.quantities]) 25 | >>> q = np.array([1,2,3])*pq.mV 26 | >>> dset_q = f.create_dataset("quantities_array", data=q) 27 | >>> dset_q[:] 28 | array([ 1., 2., 3.]) * mV 29 | 30 | As shown in the above example, a plugin is enabled when creating a File object by passing the 31 | plugin to the plugins argument. 32 | 33 | To create a custom plugin, one of the handler classes in `exdir.plugin_interface` must be inherited. 34 | The abstract handler classes are named after the object type you want to create a handler for. 35 | In this example we have a simplified `Quantity` class, 36 | which only contains a magnitude and a corresponding unit: 37 | 38 | .. code-block:: python 39 | 40 | >>> class Quantity: 41 | >>> def __init__(self, magnitude, unit): 42 | >>> self.magnitude = magnitude 43 | >>> self.unit = unit 44 | 45 | Below, we create a plugin that enables us to directly use a `Quantity` object as a `Dataset` in 46 | Exdir. 47 | We do this by inheriting from `exdir.plugin_interface.Dataset` and overloading `prepare_write` and 48 | `prepare_read`: 49 | 50 | .. code-block:: python 51 | 52 | >>> import exdir 53 | >>> class DatasetQuantity(exdir.plugin_interface.Dataset): 54 | >>> def prepare_write(self, dataset_data): 55 | >>> magnitude = dataset_data.data.magnitude 56 | >>> unit = dataset_data.data.unit 57 | >>> 58 | >>> dataset_data.data = magnitude 59 | >>> dataset_data.attrs = {"unit": unit} 60 | >>> 61 | >>> return dataset_data 62 | >>> 63 | >>> def prepare_read(self, dataset_data): 64 | >>> unit = dataset_data.attrs["unit"] 65 | >>> magnitude = dataset_data.data 66 | >>> 67 | >>> dataset_data.data = Quantity(magnitude, unit) 68 | >>> 69 | >>> return dataset_data 70 | 71 | The overloaded functions take `dataset_data` as an argument. 72 | This has the `data`, `attrs`, and `meta` properties. 73 | The property `attrs` is a dictionary with optional attributes, 74 | while `meta` is a dictionary with information about the plugin. 75 | 76 | In `prepare_write`, the magnitude and unit of the data is translated to a value (numeric or 77 | `numpy.ndarray`) and an attribute (dictionary-like) that then can be written to file. 78 | `prepare_read` receives the data from the NumPy file and the attributes from the YAML file, 79 | and uses these to reconstruct a `Quantity` object. 80 | 81 | We create a plugin that uses this handler as follows: 82 | 83 | .. code-block:: python 84 | 85 | >>> my_plugin = exdir.plugin_interface.Plugin( 86 | >>> name="dataset_quantity", 87 | >>> dataset_plugins=[DatasetQuantity()] 88 | >>> ) 89 | 90 | The plugin is enabled when opening a File by passing it to the plugins parameter: 91 | 92 | .. code-block:: python 93 | 94 | >>> f = exdir.File("test.exdir", plugins=[my_plugin]) 95 | >>> dset = f.create_dataset("test", data=Quantity(1.5, "meter")) 96 | 97 | -------------------------------------------------------------------------------- /docs/raw.rst: -------------------------------------------------------------------------------- 1 | .. _raw: 2 | 3 | Raw 4 | ====== 5 | .. autoclass:: exdir.core.Raw 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | - defaults 4 | 5 | dependencies: 6 | - python 7 | - setuptools 8 | - pip 9 | - numpy 10 | - scipy 11 | - pyyaml 12 | - ruamel_yaml 13 | - pytest 14 | - pytest-benchmark 15 | - h5py 16 | - pandas 17 | -------------------------------------------------------------------------------- /examples/exdirio.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is the implementation of the NEO IO for the exdir format. 3 | Depends on: scipy 4 | h5py >= 2.5.0 5 | numpy 6 | quantities 7 | Supported: Read 8 | Authors: Milad H. Mobarhan @CINPLA, 9 | Svenn-Arne Dragly @CINPLA, 10 | Mikkel E. Lepperød @CINPLA 11 | """ 12 | 13 | 14 | import sys 15 | from neo.io.baseio import BaseIO 16 | from neo.core import (Segment, SpikeTrain, Unit, Epoch, AnalogSignal, 17 | ChannelIndex, Block, IrregularlySampledSignal) 18 | import neo.io.tools 19 | import numpy as np 20 | import quantities as pq 21 | import os 22 | import glob 23 | 24 | python_version = sys.version_info.major 25 | if python_version == 2: 26 | from future.builtins import str 27 | 28 | 29 | import exdir 30 | 31 | class ExdirIO(BaseIO): 32 | """ 33 | Class for reading/writting of exdir fromat 34 | """ 35 | 36 | is_readable = True 37 | is_writable = False 38 | 39 | supported_objects = [Block, Segment, AnalogSignal, ChannelIndex, SpikeTrain] 40 | readable_objects = [Block, SpikeTrain] 41 | writeable_objects = [] 42 | 43 | has_header = False 44 | is_streameable = False 45 | 46 | name = 'exdir' 47 | description = 'This IO reads experimental data from an eds folder' 48 | 49 | # mode can be 'file' or 'dir' or 'fake' or 'database' 50 | # the main case is 'file' but some reader are base on a directory or a database 51 | # this info is for GUI stuff also 52 | mode = 'dir' 53 | 54 | def __init__(self, folder_path): 55 | """ 56 | Arguments: 57 | folder_path : the folder path 58 | """ 59 | BaseIO.__init__(self) 60 | self._absolute_folder_path = folder_path 61 | self._path, relative_folder_path = os.path.split(folder_path) 62 | self._base_folder, extension = os.path.splitext(relative_folder_path) 63 | 64 | if extension != ".exdir": 65 | raise ValueError("folder extension must be '.exdir'") 66 | 67 | self._exdir_folder = exdir.File(folder=folder_path, mode="a") 68 | 69 | # TODO check if group exists 70 | self._processing = self._exdir_folder.require_group("processing") 71 | 72 | 73 | def read_block(self, 74 | lazy=False, 75 | cascade=True): 76 | # TODO read block 77 | blk = Block() 78 | if cascade: 79 | seg = Segment(file_origin=self._absolute_folder_path) 80 | 81 | for name in self._processing: 82 | if(name=="Position"): 83 | seg.irregularlysampledsignals+= self.read_tracking(path="") 84 | if(name == "LFP"): 85 | seg.analogsignals+= self.read_analogsignal(path="") 86 | if(name == "EventWaveform"): 87 | seg.spiketrains+= self.read_spiketrain(path="") 88 | 89 | for key in self._processing[name]: 90 | if(key == "Position"): 91 | seg.irregularlysampledsignals+= self.read_tracking(path=name) 92 | if(key == "LFP"): 93 | seg.analogsignals+= self.read_analogsignal(path=name) 94 | if(key == "EventWaveform"): 95 | seg.spiketrains+= self.read_spiketrain(path=name) 96 | 97 | 98 | #blk.channel_indexes = self._channel_indexes 99 | 100 | blk.segments += [seg] 101 | 102 | # TODO add duration 103 | #seg.duration = self._duration 104 | 105 | # TODO May need to "populate_RecordingChannel" 106 | 107 | 108 | #blk.create_many_to_one_relationship() 109 | return blk 110 | 111 | 112 | def read_analogsignal(self, path): 113 | if(len(path)==0): 114 | lfp_group = self._processing["LFP"] 115 | else: 116 | lfp_group = self._processing[path]["LFP"] 117 | 118 | analogsignals = [] 119 | 120 | for key in lfp_group: 121 | timeserie = lfp_group[key] 122 | signal = timeserie["data"] 123 | analogsignal = AnalogSignal(signal.data, 124 | units=signal.attrs["unit"], 125 | sampling_rate=pq.Quantity(timeserie.attrs["sample_rate"]["value"], timeserie.attrs["sample_rate"]["unit"])) 126 | 127 | analogsignals.append(analogsignal) 128 | 129 | # TODO: what about channel index 130 | # TODO: read attrs? 131 | 132 | 133 | return analogsignals 134 | 135 | def read_spiketrain(self, path): 136 | # TODO implement read spike train 137 | if(len(path)==0): 138 | event_waveform_group = self._processing["EventWaveform"] 139 | else: 140 | event_waveform_group = self._processing[path]["EventWaveform"] 141 | 142 | spike_trains = [] 143 | 144 | for key in event_waveform_group: 145 | timeserie = event_waveform_group[key] 146 | timestamps = timeserie["timestamps"] 147 | waveforms = timeserie["waveforms"] 148 | 149 | spike_train = SpikeTrain(pq.Quantity(timestamps.data,timestamps.attrs["unit"]), 150 | t_stop=pq.Quantity(timestamps.data[-1],timestamps.attrs["unit"]), 151 | waveforms=pq.Quantity(waveforms.data, waveforms.attrs["unit"])) 152 | 153 | spike_trains.append(spike_train) 154 | # TODO: read attrs? 155 | 156 | 157 | return spike_trains 158 | 159 | 160 | def read_epoch(self): 161 | # TODO read epoch data 162 | pass 163 | 164 | 165 | def read_tracking(self, path): 166 | """ 167 | Read tracking data_end 168 | """ 169 | if(len(path)==0): 170 | pos_group = self._processing["Position"] 171 | else: 172 | pos_group = self._processing[path]["Position"] 173 | irr_signals = [] 174 | for key in pos_group: 175 | spot_group = pos_group[key] 176 | times = spot_group["timestamps"] 177 | coords = spot_group["data"] 178 | irr_signal = IrregularlySampledSignal(name=pos_group[key].name, 179 | signal=coords.data, 180 | times=times.data, 181 | units=coords.attrs["unit"], 182 | time_units=times.attrs["unit"]) 183 | irr_signals.append(irr_signal) 184 | return irr_signals 185 | 186 | 187 | if __name__ == "__main__": 188 | import sys 189 | testfile = "/tmp/test.exdir" 190 | io = ExdirIO(testfile) 191 | 192 | block = io.read_block() 193 | 194 | from neo.io.hdf5io import NeoHdf5IO 195 | 196 | testfile = "/tmp/test_exdir_to_neo.h5" 197 | try: 198 | os.remove(testfile) 199 | except: 200 | pass 201 | hdf5io = NeoHdf5IO(testfile) 202 | hdf5io.write(block) 203 | 204 | -------------------------------------------------------------------------------- /examples/usecase_exdir.py: -------------------------------------------------------------------------------- 1 | # This is a usecase that shows how h5py can be swapped with exdir. 2 | # usecase_h5py.py shows the same usecase with h5py instead 3 | 4 | import exdir 5 | import numpy as np 6 | 7 | time = np.linspace(0, 100, 101) 8 | 9 | voltage_1 = np.sin(time) 10 | voltage_2 = np.sin(time) + 10 11 | 12 | 13 | f = exdir.File("experiments.exdir", "w") 14 | f.attrs['description'] = "This is a mock experiment with voltage values over time" 15 | 16 | # Creating group and datasets for experiment 1 17 | grp_1 = f.create_group("experiment_1") 18 | 19 | dset_time_1 = grp_1.create_dataset("time", data=time) 20 | dset_time_1.attrs['unit'] = "ms" 21 | 22 | dset_voltage_1 = grp_1.create_dataset("voltage", data=voltage_1) 23 | dset_voltage_1.attrs['unit'] = "mV" 24 | 25 | # Creating group and datasets for experiment 2 26 | grp_2 = f.create_group("experiment_2") 27 | 28 | dset_time_2 = grp_2.create_dataset("time", data=time) 29 | dset_time_2.attrs['unit'] = "ms" 30 | 31 | dset_voltage_2 = grp_2.create_dataset("voltage", data=voltage_2) 32 | dset_voltage_2.attrs['unit'] = "mV" 33 | 34 | # Creating group and subgroup for experiment 3 35 | grp_3 = f.create_group("experiment_invalid") 36 | 37 | # Looping through and accessing 38 | print("Experiments: ", list(f.keys())) 39 | for experiment in f.keys(): 40 | if "voltage" in f[experiment]: 41 | print(experiment) 42 | print(f[experiment]["voltage"]) 43 | print("First voltage:", f[experiment]["voltage"][0]) 44 | else: 45 | print(f"No voltage values for: {experiment}") 46 | 47 | 48 | 49 | 50 | # Creating and accessing a subgroup 51 | grp_4 = grp_3.create_group("subgroup") 52 | dset_time_4 = grp_4.create_dataset("time", data=time) 53 | 54 | print(f["experiment_invalid"]["subgroup"]["time"]) 55 | 56 | f.close() 57 | 58 | 59 | import exdir -------------------------------------------------------------------------------- /examples/usecase_h5py.py: -------------------------------------------------------------------------------- 1 | # This is a usecase that shows how h5py could be used. 2 | # usecase_exdir.py shows the same usecase with exdir instead 3 | 4 | import h5py 5 | import numpy as np 6 | 7 | time = np.linspace(0, 100, 101) 8 | 9 | voltage_1 = np.sin(time) 10 | voltage_2 = np.sin(time) + 10 11 | 12 | 13 | f = h5py.File("experiments.hdf5", "w") 14 | f.attrs['description'] = "This is a mock experiment with voltage values over time" 15 | 16 | 17 | # Creating group and datasets for experiment 1 18 | grp_1 = f.create_group("experiment_1") 19 | 20 | dset_time_1 = grp_1.create_dataset("time", data=time) 21 | dset_time_1.attrs['unit'] = "ms" 22 | 23 | dset_voltage_1 = grp_1.create_dataset("voltage", data=voltage_1) 24 | dset_voltage_1.attrs['unit'] = "mV" 25 | 26 | 27 | # Creating group and datasets for experiment 2 28 | grp_2 = f.create_group("experiment_2") 29 | 30 | dset_time_2 = grp_2.create_dataset("time", data=time) 31 | dset_time_2.attrs['unit'] = "ms" 32 | 33 | dset_voltage_2 = grp_2.create_dataset("voltage", data=voltage_2) 34 | dset_voltage_2.attrs['unit'] = "mV" 35 | 36 | 37 | # Creating group and subgroup for experiment 3 38 | grp_3 = f.create_group("experiment_invalid") 39 | 40 | 41 | 42 | # Looping through and accessing 43 | print("Experiments: ", list(f.keys())) 44 | for experiment in f.keys(): 45 | if "voltage" in f[experiment]: 46 | print(experiment) 47 | print(f[experiment]["voltage"]) 48 | print("First voltage:", f[experiment]["voltage"][0]) 49 | else: 50 | print(f"No voltage values for: {experiment}") 51 | 52 | 53 | 54 | 55 | # Creating and accessing a subgroup 56 | grp_4 = grp_3.create_group("subgroup") 57 | dset_time_4 = grp_4.create_dataset("time", data=time) 58 | 59 | print(f["experiment_invalid"]["subgroup"]["time"]) 60 | 61 | f.close() 62 | 63 | 64 | import exdir -------------------------------------------------------------------------------- /exdir/__init__.py: -------------------------------------------------------------------------------- 1 | from . import core 2 | from . import plugin_interface 3 | from . import plugins 4 | from .core import File, validation, Attribute, Dataset, Group, Raw, Object 5 | 6 | # TODO remove versioneer 7 | from . import _version 8 | __version__ = _version.get_versions()['version'] 9 | 10 | # Jupyter extensions 11 | def _jupyter_server_extension_paths(): 12 | return [{ 13 | "module": "exdir" 14 | }] 15 | 16 | # Jupyter Extension points 17 | def _jupyter_nbextension_paths(): 18 | return [dict( 19 | section="notebook", 20 | # the path is relative to the `exdir` directory 21 | src="static", 22 | # directory in the `nbextension/` namespace 23 | dest="exdir", 24 | # _also_ in the `nbextension/` namespace 25 | require="exdir/index")] 26 | 27 | def load_jupyter_server_extension(nbapp): 28 | nbapp.log.info("Exdir extension enabled!") 29 | -------------------------------------------------------------------------------- /exdir/core/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | .. module:: exdir.core 3 | .. moduleauthor:: Svenn-Arne Dragly, Milad H. Mobarhan, Mikkel E. Lepperød, Simen Tennøe 4 | """ 5 | 6 | from .exdir_object import Object 7 | from .exdir_file import File 8 | from .attribute import Attribute 9 | from .dataset import Dataset 10 | from .group import Group 11 | from .raw import Raw 12 | -------------------------------------------------------------------------------- /exdir/core/attribute.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | import exdir 3 | try: 4 | import ruamel_yaml as yaml 5 | except ImportError: 6 | import ruamel.yaml as yaml 7 | 8 | from .mode import assert_file_open, OpenMode, assert_file_writable 9 | 10 | def _quote_strings(value): 11 | if isinstance(value, str): 12 | return yaml.scalarstring.DoubleQuotedScalarString(value) 13 | else: 14 | try: 15 | new_result = {} 16 | for key, val in value.items(): 17 | new_result[key] = _quote_strings(val) 18 | return new_result 19 | except AttributeError: 20 | pass 21 | return value 22 | 23 | 24 | class Attribute: 25 | """ 26 | The attribute object is a dictionary-like object that is used to access 27 | the attributes stored in the :code:`attributes.yaml` file for a given 28 | Exdir Object. 29 | 30 | The Attribute object should not be created, but retrieved by accessing 31 | the :code:`.attrs` property of any Exdir Object, such as a Dataset, 32 | Group or File. 33 | """ 34 | 35 | # TODO remove METADATA mode and read/write metadata directly as YAML instead 36 | class _Mode(Enum): 37 | ATTRIBUTES = 1 38 | METADATA = 2 39 | 40 | def __init__(self, parent, mode, file, path=None): 41 | self.parent = parent 42 | self.mode = mode 43 | self.file = file 44 | self.path = path or [] 45 | 46 | def __getitem__(self, name=None): 47 | attrs = self._open_or_create() 48 | 49 | if self.mode == self._Mode.ATTRIBUTES: 50 | meta = self.parent.meta.to_dict() 51 | for plugin in self.file.plugin_manager.attribute_plugins.read_order: 52 | attribute_data = exdir.plugin_interface.AttributeData( 53 | attrs=attrs, 54 | meta=meta 55 | ) 56 | 57 | attribute_data = plugin.prepare_read(attribute_data) 58 | attrs = attribute_data.attrs 59 | meta.update(attribute_data.meta) 60 | 61 | for i in self.path: 62 | attrs = attrs[i] 63 | if name is not None: 64 | attrs = attrs[name] 65 | if isinstance(attrs, dict): 66 | return Attribute( 67 | self.parent, self.mode, self.file, self.path + [name] 68 | ) 69 | else: 70 | return attrs 71 | 72 | def __setitem__(self, name, value): 73 | attrs = self._open_or_create() 74 | key = name 75 | sub_attrs = attrs 76 | 77 | for i in self.path: 78 | sub_attrs = sub_attrs[i] 79 | sub_attrs[key] = value 80 | 81 | self._set_data(attrs) 82 | 83 | def __contains__(self, name): 84 | if self.file.io_mode == OpenMode.FILE_CLOSED: 85 | return False 86 | attrs = self._open_or_create() 87 | for i in self.path: 88 | attrs = attrs[i] 89 | return name in attrs 90 | 91 | def keys(self): 92 | """ 93 | Returns 94 | ------- 95 | a new view of the Attribute's keys. 96 | """ 97 | attrs = self._open_or_create() 98 | for i in self.path: 99 | attrs = attrs[i] 100 | return attrs.keys() 101 | 102 | def to_dict(self): 103 | """ 104 | Convert the Attribute into a standard Python dictionary. 105 | """ 106 | attrs = self._open_or_create() 107 | for i in self.path: # TODO check if this is necesary 108 | attrs = attrs[i] 109 | 110 | if self.mode == self._Mode.ATTRIBUTES: 111 | meta = self.parent.meta.to_dict() 112 | attribute_data = exdir.plugin_interface.AttributeData( 113 | attrs=attrs, 114 | meta=meta 115 | ) 116 | for plugin in self.file.plugin_manager.attribute_plugins.read_order: 117 | attribute_data = plugin.prepare_read(attribute_data) 118 | 119 | attrs = attribute_data.attrs 120 | 121 | return attrs 122 | 123 | def items(self): 124 | """ 125 | Returns 126 | ------- 127 | a new view of the Attribute's items. 128 | """ 129 | attrs = self._open_or_create() 130 | for i in self.path: 131 | attrs = attrs[i] 132 | return attrs.items() 133 | 134 | def values(self): 135 | """ 136 | Returns 137 | ------- 138 | a new view of the Attribute's values. 139 | """ 140 | attrs = self._open_or_create() 141 | for i in self.path: 142 | attrs = attrs[i] 143 | return attrs.values() 144 | 145 | def _set_data(self, attrs): 146 | assert_file_writable(self.file) 147 | plugins = self.file.plugin_manager.attribute_plugins.write_order 148 | 149 | if self.mode == self._Mode.ATTRIBUTES and len(plugins) > 0: 150 | meta = self.parent.meta.to_dict() 151 | for plugin in plugins: 152 | attribute_data = exdir.plugin_interface.AttributeData( 153 | attrs=attrs, 154 | meta=meta 155 | ) 156 | 157 | attribute_data = plugin.prepare_write(attribute_data) 158 | meta = attribute_data.meta 159 | attrs = attribute_data.attrs 160 | 161 | attribute_data_quoted = _quote_strings(attribute_data.attrs) 162 | self.parent.meta._set_data(meta) 163 | else: 164 | attribute_data_quoted = attrs 165 | 166 | with self.filename.open("w", encoding="utf-8") as attribute_file: 167 | yaml.YAML(typ="rt", pure=True).dump( 168 | attribute_data_quoted, 169 | attribute_file, 170 | ) 171 | 172 | # TODO only needs filename, make into free function 173 | def _open_or_create(self): 174 | assert_file_open(self.file) 175 | attrs = {} 176 | if self.filename.exists(): # NOTE str for Python 3.5 support 177 | with self.filename.open("r", encoding="utf-8") as meta_file: 178 | attrs = yaml.YAML(typ="safe", pure=True).load(meta_file) 179 | return attrs 180 | 181 | def __iter__(self): 182 | yield from self.keys() 183 | 184 | @property 185 | def filename(self): 186 | """ 187 | Returns 188 | ------- 189 | The filename of the :code:`attributes.yaml` file. 190 | """ 191 | assert_file_open(self.file) 192 | if self.mode == self._Mode.METADATA: 193 | return self.parent.meta_filename 194 | else: 195 | return self.parent.attributes_filename 196 | 197 | def __len__(self): 198 | return len(self.keys()) 199 | 200 | def update(self, value): 201 | """ 202 | Update the Attribute with the key/value pairs from :code:`value`, overwriting existing keys. 203 | 204 | This function accepts either another Attribute object, a dictionary object or an iterable of key/value pairs 205 | """ 206 | for key in value: 207 | self[key] = value[key] 208 | 209 | def __str__(self): 210 | if self.file.io_mode == OpenMode.FILE_CLOSED: 211 | return "" 212 | string = "" 213 | for key in self: 214 | string += f"{key}: {self[key]}," 215 | return f"Attribute({self.parent.name}, {{{string}}})" 216 | 217 | def _repr_html_(self): 218 | if self.file.io_mode == OpenMode.FILE_CLOSED: 219 | return False 220 | return exdir.utils.display.html_attrs(self) 221 | 222 | def __repr__(self): 223 | if self.file.io_mode == OpenMode.FILE_CLOSED: 224 | return "" 225 | return f"Attributes of Exdir object '{self.parent.name}' at '{id(self)}'" 226 | -------------------------------------------------------------------------------- /exdir/core/constants.py: -------------------------------------------------------------------------------- 1 | # metadata 2 | EXDIR_METANAME = "exdir" 3 | TYPE_METANAME = "type" 4 | VERSION_METANAME = "version" 5 | 6 | # filenames 7 | META_FILENAME = "exdir.yaml" 8 | ATTRIBUTES_FILENAME = "attributes.yaml" 9 | RAW_FOLDER_NAME = "__raw__" 10 | 11 | # typenames 12 | DATASET_TYPENAME = "dataset" 13 | GROUP_TYPENAME = "group" 14 | FILE_TYPENAME = "file" 15 | -------------------------------------------------------------------------------- /exdir/core/dataset.py: -------------------------------------------------------------------------------- 1 | import numbers 2 | import numpy as np 3 | import exdir 4 | 5 | from . import exdir_object as exob 6 | from .mode import assert_file_open, OpenMode, assert_file_writable 7 | 8 | def _prepare_write(data, plugins, attrs, meta): 9 | for plugin in plugins: 10 | dataset_data = exdir.plugin_interface.DatasetData( 11 | data=data, 12 | attrs=attrs, 13 | meta=meta 14 | ) 15 | dataset_data = plugin.prepare_write(dataset_data) 16 | 17 | data = dataset_data.data 18 | attrs = dataset_data.attrs 19 | meta = dataset_data.meta 20 | 21 | if isinstance(data, (numbers.Number, tuple, str)): 22 | data = np.asarray(data, order="C") 23 | 24 | return data, attrs, meta 25 | 26 | 27 | def _dataset_filename(dataset_directory): 28 | return dataset_directory / "data.npy" 29 | 30 | 31 | class Dataset(exob.Object): 32 | """ 33 | Dataset class 34 | 35 | Warnings 36 | -------- 37 | This class modifies the view and it is possible to overwrite 38 | an existing dataset, which is different from the behavior in h5py. 39 | """ 40 | def __init__(self, root_directory, parent_path, object_name, file): 41 | super().__init__( 42 | root_directory=root_directory, 43 | parent_path=parent_path, 44 | object_name=object_name, 45 | file=file 46 | ) 47 | self._data_memmap = None 48 | self.plugin_manager = file.plugin_manager 49 | self.data_filename = str(_dataset_filename(self.directory)) 50 | 51 | def __getitem__(self, args): 52 | assert_file_open(self.file) 53 | if len(self._data.shape) == 0: 54 | values = self._data 55 | else: 56 | values = self._data[args] 57 | 58 | enabled_plugins = [plugin_module.name for plugin_module in self.plugin_manager.plugins] 59 | 60 | data = values 61 | 62 | if "plugins" in self.meta: 63 | for plugin_name in self.meta["plugins"]: 64 | if ("required" in self.meta["plugins"][plugin_name] 65 | and self.meta["plugins"][plugin_name]["required"] == True 66 | and plugin_name not in enabled_plugins): 67 | raise Exception( 68 | f"Plugin '{plugin_name}' was used to write '{self.name}', " 69 | "but is not enabled." 70 | ) 71 | 72 | plugins = self.plugin_manager.dataset_plugins.read_order 73 | 74 | if len(plugins) > 0: 75 | meta = self.meta.to_dict() 76 | atts = self.attrs.to_dict() 77 | 78 | dataset_data = exdir.plugin_interface.DatasetData(data=values, 79 | attrs=self.attrs.to_dict(), 80 | meta=meta) 81 | for plugin in plugins: 82 | dataset_data = plugin.prepare_read(dataset_data) 83 | 84 | data = dataset_data.data 85 | 86 | return data 87 | 88 | def __setitem__(self, args, value): 89 | assert_file_writable(self.file) 90 | 91 | value, attrs, meta = _prepare_write( 92 | data=value, 93 | plugins=self.plugin_manager.dataset_plugins.write_order, 94 | attrs=self.attrs.to_dict(), 95 | meta=self.meta.to_dict() 96 | ) 97 | self._data[args] = value 98 | self.attrs = attrs 99 | self.meta._set_data(meta) 100 | 101 | def _reload_data(self): 102 | assert_file_open(self.file) 103 | for plugin in self.plugin_manager.dataset_plugins.write_order: 104 | plugin.before_load(self.data_filename) 105 | 106 | if self.file.io_mode == OpenMode.READ_ONLY: 107 | mmap_mode = "r" 108 | else: 109 | mmap_mode = "r+" 110 | 111 | try: 112 | self._data_memmap = np.load(self.data_filename, mmap_mode=mmap_mode, allow_pickle=False) 113 | self.file._open_datasets[self.name] = self 114 | except ValueError as e: 115 | # Could be that it is a Git LFS file. Let's see if that is the case and warn if so. 116 | with open(self.data_filename, encoding="utf-8") as f: 117 | test_string = "version https://git-lfs.github.com/spec/v1" 118 | contents = f.read(len(test_string)) 119 | if contents == test_string: 120 | raise OSError(f"The file '{self.data_filename}' is a Git LFS placeholder. " 121 | "Open the the Exdir File with the Git LFS plugin or run " 122 | "`git lfs fetch` first. ") 123 | else: 124 | raise e 125 | 126 | def _reset_data(self, value, attrs, meta): 127 | assert_file_open(self.file) 128 | self._data_memmap = np.lib.format.open_memmap( 129 | self.data_filename, 130 | mode="w+", 131 | dtype=value.dtype, 132 | shape=value.shape 133 | ) 134 | 135 | self._data_memmap[...] = value 136 | 137 | # update attributes and plugin metadata 138 | if attrs: 139 | self.attrs = attrs 140 | 141 | if meta: 142 | self.meta._set_data(meta) 143 | 144 | return 145 | 146 | def set_data(self, data): 147 | """ 148 | Warning 149 | ------- 150 | Deprecated convenience function. 151 | Use :code:`dataset.data = data` instead. 152 | """ 153 | raise DeprecationWarning( 154 | "set_data is deprecated. Use `dataset.data = data` instead." 155 | ) 156 | self.value = data 157 | 158 | @property 159 | def data(self): 160 | """ 161 | Property that gives access the entire dataset. 162 | Equivalent to calling :code:`dataset[:]`. 163 | 164 | Returns 165 | ------- 166 | numpy.memmap 167 | The entire dataset. 168 | """ 169 | assert_file_open(self.file) 170 | return self[:] 171 | 172 | @data.setter 173 | def data(self, value): 174 | assert_file_open(self.file) 175 | if self._data.shape != value.shape or self._data.dtype != value.dtype: 176 | value, attrs, meta = _prepare_write( 177 | data=value, 178 | plugins=self.plugin_manager.dataset_plugins.write_order, 179 | attrs=self.attrs.to_dict(), 180 | meta=self.meta.to_dict() 181 | ) 182 | self._reset_data(value, attrs, meta) 183 | return 184 | 185 | self[:] = value 186 | 187 | @property 188 | def shape(self): 189 | """ 190 | The shape of the dataset. 191 | Equivalent to calling :code:`dataset[:].shape`. 192 | 193 | Returns 194 | ------- 195 | tuple 196 | The shape of the dataset. 197 | """ 198 | return self[:].shape 199 | 200 | @property 201 | def size(self): 202 | """ 203 | The size of the dataset. 204 | Equivalent to calling :code:`dataset[:].size`. 205 | 206 | Returns 207 | ------- 208 | np.int64 209 | The size of the dataset. 210 | """ 211 | return self[:].size 212 | 213 | @property 214 | def dtype(self): 215 | """ 216 | The NumPy data type of the dataset. 217 | Equivalent to calling :code:`dataset[:].dtype`. 218 | 219 | Returns 220 | ------- 221 | numpy.dtype 222 | The NumPy data type of the dataset. 223 | """ 224 | return self[:].dtype 225 | 226 | @property 227 | def value(self): 228 | """ 229 | Convenience alias for the :code:`data` property. 230 | 231 | Warning 232 | ------- 233 | This property is only provided as a convenience to make the API 234 | interoperable with h5py. 235 | We recommend to use :code:`data` instead of :code:`value`. 236 | """ 237 | return self.data 238 | 239 | @value.setter 240 | def value(self, value): 241 | self.data = value 242 | 243 | def __len__(self): 244 | """ The size of the first axis. TypeError if scalar.""" 245 | assert_file_open(self.file) 246 | if len(self.shape) == 0: 247 | raise TypeError("Attempt to take len() of scalar dataset") 248 | return self.shape[0] 249 | 250 | def __iter__(self): 251 | """Iterate over the first axis. TypeError if scalar. 252 | WARNING: Modifications to the yielded data are *NOT* written to file. 253 | """ 254 | assert_file_open(self.file) 255 | 256 | if len(self.shape) == 0: 257 | raise TypeError("Can't iterate over a scalar dataset") 258 | 259 | for i in range(self.shape[0]): 260 | yield self[i] 261 | 262 | def __str__(self): 263 | return self.data.__str__() 264 | 265 | def __repr__(self): 266 | if self.file.io_mode == OpenMode.FILE_CLOSED: 267 | return "" 268 | return f"" 269 | 270 | @property 271 | def _data(self): 272 | assert_file_open(self.file) 273 | if self._data_memmap is None: 274 | self._reload_data() 275 | return self._data_memmap 276 | -------------------------------------------------------------------------------- /exdir/core/exdir_file.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import weakref 4 | import pathlib 5 | import warnings 6 | 7 | import exdir 8 | from . import exdir_object as exob 9 | from .group import Group 10 | from .. import utils 11 | from .mode import OpenMode 12 | from . import validation 13 | 14 | 15 | class File(Group): 16 | """ 17 | Exdir file object. 18 | A File is a special type of :class:`.Group`. 19 | See :class:`.Group` for documentation of inherited functions. 20 | 21 | To create a File, call the File constructor with the name of the File you wish to create: 22 | 23 | >>> import exdir 24 | >>> import numpy as np 25 | >>> f = exdir.File("mytestfile.exdir") 26 | 27 | The :code:`File` object :code:`f` now points to the root folder in the exdir file 28 | structure. 29 | You can add groups and datasets to it as follows: 30 | 31 | >>> my_group = f.require_group("my_group") 32 | >>> a = np.arange(100) 33 | >>> dset = f.require_dataset("my_data", data=a) 34 | 35 | The data is immediately written to disk. 36 | 37 | Parameters 38 | ---------- 39 | directory: 40 | Name of the directory to be opened or created as an Exdir File. 41 | mode: str, optional 42 | A file mode string that defines the read/write behavior. 43 | See open() for information about the different modes. 44 | allow_remove: bool 45 | Set to True if you want mode 'w' to remove existing trees if they 46 | exist. This False by default to avoid removing entire directory 47 | trees by mistake. 48 | name_validation: str, function, optional 49 | Set the validation mode for names. 50 | Can be a function that takes a name and returns True if the name 51 | is valid or one of the following built-in validation modes: 52 | 53 | - 'strict': only allow numbers, lowercase letters, underscore (_) and dash (-) 54 | - 'simple': allow numbers, lowercase letters, uppercase letters, underscore (_) and dash (-), check if any file exists with same name in any case. 55 | - 'thorough': verify if name is safe on all platforms, check if any file exists with same name in any case. 56 | - 'none': allows any filename 57 | 58 | The default is 'thorough'. 59 | plugins: list, optional 60 | A list of instantiated plugins or modules with a plugins() 61 | function that returns a list of plugins. 62 | 63 | """ 64 | 65 | def __init__(self, directory, mode=None, allow_remove=False, 66 | name_validation=None, plugins=None): 67 | self._open_datasets = weakref.WeakValueDictionary({}) 68 | directory = pathlib.Path(directory) #.resolve() 69 | if directory.suffix != ".exdir": 70 | directory = directory.with_suffix(directory.suffix + ".exdir") 71 | self.user_mode = mode = mode or 'a' 72 | recognized_modes = ['a', 'r', 'r+', 'w', 'w-', 'x'] 73 | if mode not in recognized_modes: 74 | raise ValueError( 75 | f"IO mode {mode} not recognized, " 76 | f"mode must be one of {recognized_modes}" 77 | ) 78 | 79 | self.plugin_manager = exdir.plugin_interface.plugin_interface.Manager(plugins) 80 | 81 | name_validation = name_validation or validation.thorough 82 | 83 | if isinstance(name_validation, str): 84 | if name_validation == 'simple': 85 | name_validation = validation.thorough 86 | elif name_validation == 'thorough': 87 | name_validation = validation.thorough 88 | elif name_validation == 'strict': 89 | name_validation = validation.strict 90 | elif name_validation == 'none': 91 | name_validation = validation.none 92 | else: 93 | raise ValueError( 94 | f'IO name rule "{name_validation}" not recognized, ' 95 | 'name rule must be one of "strict", "simple", ' 96 | '"thorough", "none"' 97 | ) 98 | 99 | warnings.warn( 100 | "WARNING: name_validation should be set to one of the functions in " 101 | "the exdir.validation module. " 102 | "Defining naming rule by string is no longer supported." 103 | ) 104 | 105 | self.name_validation = name_validation 106 | 107 | if mode == "r": 108 | self.io_mode = OpenMode.READ_ONLY 109 | else: 110 | self.io_mode = OpenMode.READ_WRITE 111 | 112 | super().__init__( 113 | root_directory=directory, 114 | parent_path=pathlib.PurePosixPath(""), 115 | object_name="", 116 | file=self 117 | ) 118 | 119 | already_exists = directory.exists() 120 | if already_exists: 121 | if not exob.is_nonraw_object_directory(directory): 122 | raise RuntimeError( 123 | f"Path '{directory}' already exists, but is not a valid exdir file." 124 | ) 125 | 126 | should_create_directory = False 127 | 128 | if mode == "r": 129 | if not already_exists: 130 | raise RuntimeError("File " + str(directory) + " does not exist.") 131 | elif mode == "r+": 132 | if not already_exists: 133 | raise RuntimeError("File " + str(directory) + " does not exist.") 134 | elif mode == "w": 135 | if already_exists: 136 | if allow_remove: 137 | shutil.rmtree(str(directory)) # NOTE str needed for Python 3.5 138 | else: 139 | raise RuntimeError( 140 | f"File {directory} already exists. We won't delete the entire tree " 141 | "by default. Add allow_remove=True to override." 142 | ) 143 | should_create_directory = True 144 | elif mode == "w-" or mode == "x": 145 | if already_exists: 146 | raise RuntimeError("File " + str(directory) + " already exists.") 147 | should_create_directory = True 148 | elif mode == "a": 149 | if not already_exists: 150 | should_create_directory = True 151 | 152 | if should_create_directory: 153 | self.name_validation(directory.parent, directory.name) 154 | exob._create_object_directory(directory, exob._default_metadata(exob.FILE_TYPENAME)) 155 | 156 | def close(self): 157 | """ 158 | Closes the File object. 159 | Sets the OpenMode to FILE_CLOSED which denies access to any attribute or 160 | child 161 | """ 162 | import gc 163 | for name, data_set in self._open_datasets.items(): 164 | # there are no way to close the memmap other than deleting all 165 | # references to it, thus 166 | try: 167 | data_set._data_memmap.flush() 168 | data_set._data_memmap.setflags(write=False) # TODO does not work 169 | except AttributeError: 170 | pass 171 | # force garbage collection to clean weakrefs 172 | gc.collect() 173 | self.io_mode = OpenMode.FILE_CLOSED 174 | 175 | def __enter__(self): 176 | return self 177 | 178 | def __exit__(self, exc_type, exc_value, traceback): 179 | self.close() 180 | 181 | def create_group(self, name): 182 | """ 183 | Create a group with the given name or absolute path. 184 | 185 | See :class:`.Group` for more details. 186 | 187 | Note 188 | ---- 189 | Creating groups with absolute paths is only allowed on File objects and 190 | not on Group objects in general. 191 | """ 192 | path = utils.path.remove_root(name) 193 | 194 | return super().create_group(path) 195 | 196 | def require_group(self, name): 197 | """ 198 | Open an existing subgroup or create one if it does not exist. 199 | 200 | See :class:`.Group` for more details. 201 | 202 | Note 203 | ---- 204 | Creating groups with absolute paths is only allowed on File objects and 205 | not on Group objects in general. 206 | """ 207 | path = utils.path.remove_root(name) 208 | 209 | return super().require_group(path) 210 | 211 | def __getitem__(self, name): 212 | path = utils.path.remove_root(name) 213 | if len(path.parts) < 1: 214 | return self 215 | return super().__getitem__(path) 216 | 217 | def __contains__(self, name): 218 | path = utils.path.remove_root(name) 219 | return super().__contains__(path) 220 | 221 | def __repr__(self): 222 | if self.io_mode == OpenMode.FILE_CLOSED: 223 | return "" 224 | return f"" 225 | -------------------------------------------------------------------------------- /exdir/core/exdir_object.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import shutil 3 | 4 | try: 5 | import ruamel_yaml as yaml 6 | except ImportError: 7 | import ruamel.yaml as yaml 8 | 9 | import exdir 10 | 11 | from .attribute import Attribute 12 | from .constants import * 13 | from .mode import assert_file_open, OpenMode 14 | 15 | 16 | def _resolve_path(path): 17 | return Path(path).resolve() 18 | 19 | 20 | def _assert_valid_name(name, container): 21 | """Check if name (dataset or group) is valid.""" 22 | container.file.name_validation(container.directory, name) 23 | 24 | 25 | def _create_object_directory(directory, metadata): 26 | """ 27 | Create object directory and meta file if directory 28 | don't already exist. 29 | """ 30 | if directory.exists(): 31 | raise OSError("The directory '" + str(directory) + "' already exists") 32 | valid_types = [DATASET_TYPENAME, FILE_TYPENAME, GROUP_TYPENAME] 33 | typename = metadata[EXDIR_METANAME][TYPE_METANAME] 34 | if typename not in valid_types: 35 | raise ValueError(f"{typename} is not a valid typename") 36 | directory.mkdir() 37 | meta_filename = directory / META_FILENAME 38 | with meta_filename.open("w", encoding="utf-8") as meta_file: 39 | if metadata == _default_metadata(typename): 40 | # if it is the default, we know how to print it fast 41 | metadata_string = ('' 42 | f'{EXDIR_METANAME}:\n' 43 | f' {TYPE_METANAME}: "{typename}"\n' 44 | f' {VERSION_METANAME}: 1\n' 45 | '') 46 | else: 47 | from io import StringIO 48 | with StringIO() as buf: 49 | yaml.YAML(typ="safe", pure=True).dump(metadata, buf) 50 | metadata_string = buf.getvalue() 51 | 52 | meta_file.write(metadata_string) 53 | 54 | 55 | def _remove_object_directory(directory): 56 | """ 57 | Remove object directory and meta file if directory exist. 58 | """ 59 | if not directory.exists(): 60 | raise OSError("The directory '" + str(directory) + "' does not exist") 61 | assert is_inside_exdir(directory) 62 | shutil.rmtree(directory) 63 | 64 | 65 | def _default_metadata(typename): 66 | return { 67 | EXDIR_METANAME: { 68 | TYPE_METANAME: typename, 69 | VERSION_METANAME: 1 70 | } 71 | } 72 | 73 | 74 | def is_exdir_object(directory): 75 | """ 76 | WARNING: Does not test if inside exdir directory, 77 | only if the object can be an exdir object (i.e. a directory). 78 | """ 79 | return directory.is_dir() 80 | 81 | 82 | def is_nonraw_object_directory(directory): 83 | meta_filename = directory / META_FILENAME 84 | if not meta_filename.exists(): 85 | return False 86 | with meta_filename.open("r", encoding="utf-8") as meta_file: 87 | meta_data = yaml.YAML(typ="safe", pure=True).load(meta_file) 88 | 89 | if not isinstance(meta_data, dict): 90 | return False 91 | 92 | if EXDIR_METANAME not in meta_data: 93 | return False 94 | if TYPE_METANAME not in meta_data[EXDIR_METANAME]: 95 | return False 96 | valid_types = [DATASET_TYPENAME, FILE_TYPENAME, GROUP_TYPENAME] 97 | if meta_data[EXDIR_METANAME][TYPE_METANAME] not in valid_types: 98 | return False 99 | return True 100 | 101 | 102 | def is_raw_object_directory(directory): 103 | return is_exdir_object(directory) and not is_nonraw_object_directory(directory) 104 | 105 | 106 | def root_directory(path): 107 | """ 108 | Iterates upwards until a exdir.File object is found. 109 | 110 | returns: path to exdir.File or None if not found. 111 | """ 112 | path = _resolve_path(path) 113 | found = False 114 | while not found: 115 | if path.parent == path: # parent is self 116 | return None 117 | valid = is_nonraw_object_directory(path) 118 | if not valid: 119 | path = path.parent 120 | continue 121 | 122 | meta_filename = path / META_FILENAME 123 | with meta_filename.open("r", encoding="utf-8") as meta_file: 124 | meta_data = yaml.YAML(typ="safe", pure=True).load(meta_file) 125 | if EXDIR_METANAME not in meta_data: 126 | path = path.parent 127 | continue 128 | exdir_meta = meta_data[EXDIR_METANAME] 129 | if TYPE_METANAME not in exdir_meta: 130 | path = path.parent 131 | continue 132 | if FILE_TYPENAME != exdir_meta[TYPE_METANAME]: 133 | path = path.parent 134 | continue 135 | found = True 136 | return path 137 | 138 | 139 | def is_inside_exdir(path): 140 | path = _resolve_path(path) 141 | return root_directory(path) is not None 142 | 143 | 144 | def assert_inside_exdir(path): 145 | path = _resolve_path(path) 146 | if not is_inside_exdir(path): 147 | raise RuntimeError("Path " + str(path) + " is not inside an Exdir repository.") 148 | 149 | 150 | def open_object(path): 151 | from . import exdir_file 152 | path = _resolve_path(path) 153 | assert_inside_exdir(path) 154 | root_dir = root_directory(path) 155 | object_name = path.relative_to(root_dir) 156 | object_name = object_name.as_posix() 157 | exdir_file = exdir_file.File(root_dir) 158 | if object_name == ".": 159 | return exdir_file 160 | return exdir_file[object_name] 161 | 162 | 163 | # NOTE This is in a separate file only because of circular imports between Object and Raw otherwise 164 | # TODO move this back to Object once circular imports are figured out 165 | 166 | class Object: 167 | """ 168 | Parent class for exdir Group and exdir dataset objects 169 | """ 170 | def __init__(self, root_directory, parent_path, object_name, file): 171 | self.root_directory = root_directory 172 | self.object_name = str(object_name) # NOTE could be path, so convert to str 173 | self.parent_path = parent_path 174 | self.relative_path = self.parent_path / self.object_name 175 | relative_name = str(self.relative_path) 176 | if relative_name == ".": 177 | relative_name = "" 178 | self.name = "/" + relative_name 179 | self.file = file 180 | 181 | @property # TODO consider warning if file is closed 182 | def directory(self): 183 | return self.root_directory / self.relative_path 184 | 185 | @property 186 | def attrs(self): 187 | assert_file_open(self.file) 188 | return Attribute( 189 | self, 190 | mode=Attribute._Mode.ATTRIBUTES, 191 | file=self.file, 192 | ) 193 | 194 | @attrs.setter 195 | def attrs(self, value): 196 | assert_file_open(self.file) 197 | self.attrs._set_data(value) 198 | 199 | @property 200 | def meta(self): 201 | assert_file_open(self.file) 202 | return Attribute( 203 | self, 204 | mode=Attribute._Mode.METADATA, 205 | file=self.file, 206 | ) 207 | 208 | @property # TODO consider warning if file is closed, 209 | def attributes_filename(self): 210 | return self.directory / ATTRIBUTES_FILENAME 211 | 212 | @property # TODO consider warning if file is closed 213 | def meta_filename(self): 214 | return self.directory / META_FILENAME 215 | 216 | def create_raw(self, name): 217 | from .raw import Raw 218 | assert_file_open(self.file) 219 | _assert_valid_name(name, self) 220 | directory_name = self.directory / name 221 | if directory_name.exists(): 222 | raise FileExistsError(f"'{name}' already exists in '{self}'") 223 | directory_name.mkdir() 224 | return Raw( 225 | root_directory=self.root_directory, 226 | parent_path=self.relative_path, 227 | object_name=name, 228 | file=self.file 229 | ) 230 | 231 | def require_raw(self, name): 232 | from .raw import Raw 233 | assert_file_open(self.file) 234 | directory_name = self.directory / name 235 | if directory_name.exists(): 236 | if is_nonraw_object_directory(directory_name): 237 | raise FileExistsError( 238 | f"Directory '{directory_name}' already exists, but is not raw." 239 | ) 240 | return Raw( 241 | root_directory=self.root_directory, 242 | parent_path=self.relative_path, 243 | object_name=name, 244 | file=self.file 245 | ) 246 | 247 | return self.create_raw(name) 248 | 249 | @property 250 | def parent(self): 251 | from .group import Group 252 | assert_file_open(self.file) 253 | if len(self.parent_path.parts) < 1: 254 | return None 255 | parent_name = self.parent_path.name 256 | parent_parent_path = self.parent_path.parent 257 | return Group( 258 | root_directory=self.root_directory, 259 | parent_path=parent_parent_path, 260 | object_name=parent_name, 261 | file=self.file 262 | ) 263 | 264 | def __eq__(self, other): 265 | if self.file.io_mode == OpenMode.FILE_CLOSED: 266 | return False 267 | if not isinstance(other, Object): 268 | return False 269 | return ( 270 | self.relative_path == other.relative_path and 271 | self.root_directory == other.root_directory 272 | ) 273 | 274 | def __bool__(self): 275 | if self.file.io_mode == OpenMode.FILE_CLOSED: 276 | return False 277 | return True 278 | 279 | def _repr_html_(self): 280 | if self.file.io_mode == OpenMode.FILE_CLOSED: 281 | return None 282 | return exdir.utils.display.html_tree(self) 283 | 284 | def __repr__(self): 285 | if self.file.io_mode == OpenMode.FILE_CLOSED: 286 | return "" 287 | return f"" 288 | -------------------------------------------------------------------------------- /exdir/core/mode.py: -------------------------------------------------------------------------------- 1 | from enum import Enum, auto 2 | 3 | 4 | class OpenMode(Enum): 5 | READ_WRITE = auto() 6 | READ_ONLY = auto() 7 | FILE_CLOSED = auto() 8 | 9 | 10 | def assert_file_open(file_object): 11 | """ 12 | Decorator to check if the file is not closed. 13 | """ 14 | if file_object.io_mode == OpenMode.FILE_CLOSED: 15 | raise OSError("Unable to operate on closed File instance.") 16 | 17 | 18 | def assert_file_writable(file_object): 19 | """ 20 | Decorator to check if the file is not closed, 21 | and that it is not in read only mode. 22 | """ 23 | if file_object.io_mode == OpenMode.FILE_CLOSED: 24 | raise OSError("Unable to operate on closed File instance.") 25 | if file_object.io_mode == OpenMode.READ_ONLY: 26 | raise OSError("Cannot change data on file in read only 'r' mode") 27 | -------------------------------------------------------------------------------- /exdir/core/raw.py: -------------------------------------------------------------------------------- 1 | from . import exdir_object as exob 2 | 3 | 4 | class Raw(exob.Object): 5 | """ 6 | Raw objects are simple folders with any content. 7 | 8 | Raw objects currently have no features apart from showing their path. 9 | """ 10 | def __init__(self, root_directory, parent_path, object_name, file): 11 | super().__init__( 12 | root_directory=root_directory, 13 | parent_path=parent_path, 14 | object_name=object_name, 15 | file=file 16 | ) 17 | -------------------------------------------------------------------------------- /exdir/core/validation.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | import os 3 | import sys 4 | from pathlib import Path, WindowsPath 5 | from unicodedata import category 6 | from . import constants as exob 7 | 8 | # `ntpath.isreserved` forbids ASCII control characters 9 | # (https://github.com/python/cpython/blob/7c016deae62308dd1b4e2767fc6abf04857c7843/Lib/ntpath.py#L325) 10 | # while `pathlib.PureWindowsPath.is_reserved` does not, so it is easiest to 11 | # forbid all control characters. 12 | if sys.version_info.minor < 13: 13 | from pathlib import PureWindowsPath 14 | 15 | def _is_reserved(path): 16 | return PureWindowsPath(path).is_reserved() or _contains_control_character(path) 17 | else: 18 | from ntpath import isreserved 19 | 20 | def _is_reserved(path): 21 | return isreserved(path) or _contains_control_character(path) 22 | 23 | VALID_CHARACTERS = ("abcdefghijklmnopqrstuvwxyz1234567890_-.") 24 | 25 | 26 | class NamingRule(Enum): 27 | SIMPLE = 1 28 | STRICT = 2 29 | THOROUGH = 3 30 | NONE = 4 31 | 32 | 33 | def _contains_control_character(s): 34 | return any(ch for ch in s if category(ch)[0] == "C") 35 | 36 | 37 | def _assert_unique(parent_path, name): 38 | try: 39 | name_str = str(name) 40 | except UnicodeEncodeError: 41 | name = name.encode('utf8') 42 | 43 | if (parent_path / name).exists(): 44 | raise RuntimeError( 45 | f"'{name}' already exists in '{parent_path}'" 46 | ) 47 | 48 | 49 | def _assert_nonempty(parent_path, name): 50 | try: 51 | name_str = str(name) 52 | except UnicodeEncodeError: 53 | name_str = name.encode('utf8') 54 | 55 | if len(name_str) < 1: 56 | raise NameError("Name cannot be empty.") 57 | 58 | 59 | def _assert_nonreserved(name): 60 | # NOTE ignore unicode errors, they are not reserved 61 | try: 62 | name_str = str(name) 63 | except UnicodeEncodeError: 64 | name_str = name.encode('utf8') 65 | 66 | reserved_names = [ 67 | exob.META_FILENAME, 68 | exob.ATTRIBUTES_FILENAME, 69 | exob.RAW_FOLDER_NAME 70 | ] 71 | 72 | if name_str in reserved_names: 73 | raise NameError( 74 | f"Name cannot be '{name_str}' because it is a reserved filename in Exdir." 75 | ) 76 | 77 | if _is_reserved(name_str): 78 | raise NameError( 79 | f"Name cannot be '{name_str}' because it is a reserved filename in Windows." 80 | ) 81 | 82 | def _assert_valid_characters(name): 83 | try: 84 | name_str = str(name) 85 | except UnicodeEncodeError: 86 | name_str = name.encode('utf8') 87 | 88 | for char in name_str: 89 | if char not in VALID_CHARACTERS: 90 | raise NameError( 91 | f"Name '{name_str}' contains invalid character '{char}'.\n" 92 | f"Valid characters are:\n{VALID_CHARACTERS}" 93 | ) 94 | 95 | def unique(parent_path, name): 96 | _assert_nonempty(parent_path, name) 97 | _assert_unique(parent_path, name) 98 | 99 | 100 | def minimal(parent_path, name): 101 | _assert_nonempty(parent_path, name) 102 | _assert_nonreserved(name) 103 | _assert_unique(parent_path, name) 104 | 105 | 106 | def strict(parent_path, name): 107 | _assert_nonreserved(name) 108 | _assert_unique(parent_path, name) 109 | _assert_valid_characters(name) 110 | 111 | def thorough(parent_path, name): 112 | _assert_nonempty(parent_path, name) 113 | _assert_nonreserved(name) 114 | try: 115 | name_str = str(name) 116 | except UnicodeEncodeError: 117 | name_str = name.encode('utf8') 118 | name_lower = name_str.lower() 119 | _assert_valid_characters(name_lower) 120 | 121 | if isinstance(Path(parent_path), WindowsPath): 122 | # use _assert_unique if we're already on Windows, because it is much faster 123 | # than the test below 124 | _assert_unique(parent_path, name) 125 | return 126 | 127 | # os.listdir is much faster here than os.walk or parent_path.iterdir 128 | for item in os.listdir(str(parent_path)): 129 | if name_lower == item.lower(): 130 | raise RuntimeError( 131 | f"A directory with name (case independent) '{name}' already exists " 132 | " and cannot be made according to the naming rule 'thorough'." 133 | ) 134 | 135 | 136 | def none(parent_path, name): 137 | pass 138 | -------------------------------------------------------------------------------- /exdir/plugin_interface/__init__.py: -------------------------------------------------------------------------------- 1 | from .plugin_interface import (Plugin, DatasetData, AttributeData, Dataset, Attribute, Group, File, Raw) 2 | -------------------------------------------------------------------------------- /exdir/plugin_interface/plugin_interface.py: -------------------------------------------------------------------------------- 1 | import os 2 | import inspect 3 | 4 | 5 | class Plugin: 6 | def __init__(self, name, dataset_plugins=None, attribute_plugins=None, 7 | file_plugins=None, group_plugins=None, raw_plugins=None, 8 | write_before=None, write_after=None, 9 | read_before=None, read_after=None): 10 | self.name = name 11 | self.dataset_plugins = dataset_plugins or [] 12 | self.attribute_plugins = attribute_plugins or [] 13 | self.file_plugins = file_plugins or [] 14 | self.group_plugins = group_plugins or [] 15 | self.raw_plugins = raw_plugins or [] 16 | self.write_after = write_after or [] 17 | self.write_before = write_before or [] 18 | self.read_after = read_after or [] 19 | self.read_before = read_before or [] 20 | 21 | plugin_lists = [ 22 | self.dataset_plugins, 23 | self.attribute_plugins, 24 | self.file_plugins, 25 | self.group_plugins, 26 | self.raw_plugins 27 | ] 28 | 29 | for plugin_list in plugin_lists: 30 | for plugin in plugin_list: 31 | setattr(plugin, "_plugin_module", self) 32 | 33 | 34 | class DatasetData: 35 | """ 36 | Container class for dataset plugin data 37 | 38 | Parameters 39 | ---------- 40 | data : numeric or numpy.ndarray 41 | attrs : dictionary or dictionary-like 42 | meta : dictionary or dictionary-like 43 | 44 | """ 45 | def __init__(self, data, attrs, meta): 46 | self.data = data 47 | self.attrs = attrs 48 | self.meta = meta 49 | 50 | 51 | class AttributeData: 52 | """ 53 | Container class for attribute plugin data 54 | 55 | Parameters 56 | ---------- 57 | attrs : dictionary or dictionary-like 58 | meta : dictionary or dictionary-like 59 | 60 | """ 61 | def __init__(self, attrs, meta): 62 | self.attrs = attrs 63 | self.meta = meta 64 | 65 | 66 | class Dataset: 67 | def before_load(self, dataset_path): 68 | """ 69 | Overload this function in your plugin implementation. 70 | 71 | It receives the path to the dataset NumPy file and allows you to 72 | perform operations before Exdir attempts loading the file. 73 | 74 | The function should not return anything. 75 | """ 76 | pass 77 | 78 | def prepare_read(self, dataset_data): 79 | """ 80 | Overload this function in your plugin implementation. 81 | 82 | It receives a exdir.plugin_interface.DatasetData which has its data 83 | from the NumPy file and the attributes from the YAML file. 84 | The plugin parses these and returns them in a reasonable format to be 85 | used by the user. 86 | 87 | The returned value should be exdir.plugin_interface.DatasetData. 88 | """ 89 | 90 | return dataset_data 91 | 92 | def prepare_write(self, dataset_data): 93 | """ 94 | Overload this function in your plugin implementation. 95 | 96 | It receives the value to be parsed by the plugin and returns an exdir.plugin_interface.DatasetData that is ready to be written to file. 97 | """ 98 | return dataset_data 99 | 100 | def write_before(self): 101 | """ 102 | Overload this function to return a list of plugin names that need to 103 | modify the data after this plugin. 104 | """ 105 | return [] 106 | 107 | def write_after(self): 108 | """ 109 | Overload this function to return a list of plugin names that need to 110 | modify the data before this plugin. 111 | """ 112 | return [] 113 | 114 | def read_before(self): 115 | """ 116 | Overload this function to return a list of plugin names that need to 117 | modify the data after this plugin. 118 | """ 119 | return [] 120 | 121 | def read_after(self): 122 | """ 123 | Overload this function to return a list of plugin names that need to 124 | modify the data before this plugin. 125 | """ 126 | return [] 127 | 128 | 129 | class Attribute: 130 | def prepare_read(self, attribute_data): 131 | """ 132 | Overload this function in your plugin implementation. 133 | 134 | It receives a exdir.plugin_interface.AttributeData which has its attributes 135 | from the YAML file. The plugin parses these and returns them in a reasonable 136 | format to be used by the user. 137 | 138 | The returned value should be exdir.plugin_interface.DatasetData. 139 | """ 140 | return attribute_data 141 | 142 | def prepare_write(self, attribute_data): 143 | """ 144 | Overload this function in your plugin implementation. 145 | 146 | It receives the attribute data to be parsed by the plugin and returns an exdir.plugin_interface.AttributeData that is ready to be written to file. 147 | """ 148 | return attribute_data 149 | 150 | def write_before(self): 151 | """ 152 | Overload this function to return a list of plugin names that need to 153 | modify the data after this plugin. 154 | """ 155 | return [] 156 | 157 | def write_after(self): 158 | """ 159 | Overload this function to return a list of plugin names that need to 160 | modify the data before this plugin. 161 | """ 162 | return [] 163 | 164 | def read_before(self): 165 | """ 166 | Overload this function to return a list of plugin names that need to 167 | modify the data after this plugin. 168 | """ 169 | return [] 170 | 171 | def read_after(self): 172 | """ 173 | Overload this function to return a list of plugin names that need to 174 | modify the data before this plugin. 175 | """ 176 | return [] 177 | 178 | 179 | class Group: 180 | pass 181 | 182 | 183 | class File: 184 | pass 185 | 186 | 187 | class Raw: 188 | pass 189 | 190 | 191 | def solve_plugin_order(plugins, read_mode=False): 192 | available_plugins = plugins 193 | enabled_plugins = [plugin._plugin_module.name for plugin in plugins] 194 | 195 | plugin_map = {} 196 | dependency_map = {} 197 | 198 | for plugin in available_plugins: 199 | plugin_map[plugin._plugin_module.name] = plugin 200 | if read_mode: 201 | original = plugin._plugin_module.read_after 202 | else: 203 | original = plugin._plugin_module.write_after 204 | 205 | new_set = set() 206 | for other in original: 207 | if other in enabled_plugins: 208 | new_set.add(other) 209 | dependency_map[plugin._plugin_module.name] = new_set 210 | 211 | for plugin in available_plugins: 212 | if read_mode: 213 | original = plugin._plugin_module.read_before 214 | else: 215 | original = plugin._plugin_module.write_before 216 | for before in original: 217 | if before in dependency_map: 218 | dependency_map[before].add(plugin._plugin_module.name) 219 | 220 | queue = set(enabled_plugins) 221 | needed_plugins = set() 222 | while queue: 223 | new_queue = set() 224 | for name in queue: 225 | for dependency in dependency_map[name]: 226 | new_queue.add(dependency) 227 | needed_plugins.add(name) 228 | queue = new_queue 229 | 230 | # remove missing plugins from maps 231 | plugin_map = { 232 | name: v 233 | for name, v in plugin_map.items() 234 | if name in needed_plugins 235 | } 236 | dependency_map = { 237 | name: v 238 | for name, v in dependency_map.items() 239 | if name in needed_plugins 240 | } 241 | 242 | ordered_plugins = [] 243 | while dependency_map: 244 | ready = [ 245 | name 246 | for name, dependencies in dependency_map.items() 247 | if not dependencies 248 | ] 249 | 250 | if not ready: 251 | raise ValueError("Circular plugin dependency found!") 252 | 253 | for name in ready: 254 | del dependency_map[name] 255 | 256 | for dependencies in dependency_map.values(): 257 | dependencies.difference_update(ready) 258 | 259 | for name in ready: 260 | ordered_plugins.append(plugin_map[name]) 261 | 262 | return ordered_plugins 263 | 264 | 265 | class Manager: 266 | class Ordered: 267 | def __init__(self, plugins): 268 | self.write_order = solve_plugin_order(plugins, read_mode=False) 269 | self.read_order = solve_plugin_order(plugins, read_mode=True) 270 | 271 | def __init__(self, plugins): 272 | 273 | file_plugins = [] 274 | group_plugins = [] 275 | dataset_plugins = [] 276 | attribute_plugins = [] 277 | raw_plugins = [] 278 | 279 | if plugins is None: 280 | plugins = [] 281 | 282 | # make iterable if not already so 283 | try: 284 | _ = (e for e in plugins) 285 | except TypeError: 286 | plugins = [plugins] 287 | 288 | self.plugins = [] 289 | for plugin in plugins: 290 | if inspect.ismodule(plugin): 291 | self.plugins.extend(plugin.plugins()) 292 | else: 293 | self.plugins.append(plugin) 294 | 295 | for plugin in self.plugins: 296 | dataset_plugins.extend(plugin.dataset_plugins) 297 | attribute_plugins.extend(plugin.attribute_plugins) 298 | file_plugins.extend(plugin.file_plugins) 299 | group_plugins.extend(plugin.group_plugins) 300 | raw_plugins.extend(plugin.raw_plugins) 301 | 302 | self.dataset_plugins = self.Ordered(dataset_plugins) 303 | self.attribute_plugins = self.Ordered(attribute_plugins) 304 | self.file_plugins = self.Ordered(file_plugins) 305 | self.group_plugins = self.Ordered(group_plugins) 306 | self.raw_plugins = self.Ordered(raw_plugins) 307 | -------------------------------------------------------------------------------- /exdir/plugins/__init__.py: -------------------------------------------------------------------------------- 1 | # from . import quantities 2 | # from . import numpy_attributes 3 | -------------------------------------------------------------------------------- /exdir/plugins/git_lfs.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import exdir.plugin_interface 3 | import pathlib 4 | import sys 5 | 6 | class DatasetPlugin(exdir.plugin_interface.Dataset): 7 | def __init__(self, verbose): 8 | self.verbose = verbose 9 | 10 | def before_load(self, dataset_path): 11 | path = pathlib.Path(dataset_path) 12 | parent_path = path.parent 13 | with open(dataset_path, "rb", encoding="utf-8") as f: 14 | test_string = b"version https://git-lfs.github.com/spec/v1" 15 | contents = f.read(len(test_string)) 16 | if contents == test_string: 17 | command = ['git', 'rev-parse', '--show-toplevel'] 18 | git_path = subprocess.check_output(command, cwd=str(parent_path), stderr=subprocess.STDOUT) 19 | git_path = pathlib.Path(git_path.decode('utf-8').rstrip()) 20 | relative_path = path.relative_to(git_path) 21 | if self.verbose: 22 | print(f"Fetching Git LFS object for {relative_path}") 23 | command = ['git', '-c', 'lfs.fetchexclude=""', 'lfs', 'pull', '-I', str(relative_path)] 24 | process = subprocess.Popen(command, cwd=str(git_path), stdout=subprocess.PIPE, stderr=subprocess.PIPE) 25 | if self.verbose: 26 | while not process.poll(): 27 | # Git LFS has fancy loading output - this doesn't work well in Jupyter, 28 | # so just replace carriage return with newline 29 | contents = process.stdout.read(1).decode('utf-8').replace('\r', '\n') 30 | if not contents: 31 | break 32 | sys.stdout.write(contents) 33 | sys.stdout.flush() 34 | 35 | process.communicate() 36 | 37 | 38 | class Plugin(exdir.plugin_interface.Plugin): 39 | def __init__(self, verbose=False): 40 | super().__init__("git_lfs", dataset_plugins=[DatasetPlugin(verbose)]) 41 | 42 | def plugins(): 43 | return _plugins(verbose=False) 44 | 45 | 46 | def _plugins(verbose): 47 | return [ 48 | exdir.plugin_interface.Plugin( 49 | "git_lfs", 50 | dataset_plugins=[DatasetPlugin(verbose)] 51 | ) 52 | ] 53 | -------------------------------------------------------------------------------- /exdir/plugins/numpy_attributes.py: -------------------------------------------------------------------------------- 1 | import exdir 2 | import quantities as pq 3 | import numpy as np 4 | 5 | 6 | def convert_from_list(data): 7 | if isinstance(data, dict): 8 | try: 9 | for key, value in data.items(): 10 | data[key] = convert_from_list(value) 11 | except AttributeError: 12 | pass 13 | elif isinstance(data, list): 14 | return np.array(data) 15 | return data 16 | 17 | 18 | def convert_to_list(data): 19 | if isinstance(data, np.ndarray): 20 | return data.tolist() 21 | elif isinstance(data, np.integer): 22 | return int(data) 23 | elif isinstance(data, np.float64): 24 | return float(data) 25 | else: 26 | try: 27 | new_result = {} 28 | for key, val in data.items(): 29 | new_key = convert_to_list(key) 30 | new_result[new_key] = convert_to_list(val) 31 | return new_result 32 | except AttributeError: 33 | pass 34 | 35 | return data 36 | 37 | 38 | class AttributePlugin(exdir.plugin_interface.Attribute): 39 | def prepare_write(self, attribute_data): 40 | attribute_data.attrs = convert_to_list(attribute_data.attrs) 41 | return attribute_data 42 | 43 | def prepare_read(self, attribute_data): 44 | attribute_data.attrs = convert_from_list(attribute_data.attrs) 45 | return attribute_data 46 | 47 | 48 | def plugins(): 49 | return [exdir.plugin_interface.Plugin( 50 | "numpy_attributes", 51 | attribute_plugins=[AttributePlugin()], 52 | read_after=["quantities"], 53 | write_after=["quantities"] 54 | )] 55 | -------------------------------------------------------------------------------- /exdir/plugins/quantities.py: -------------------------------------------------------------------------------- 1 | import exdir 2 | import quantities as pq 3 | import numpy as np 4 | 5 | from collections import defaultdict 6 | 7 | 8 | def convert_back_quantities(value): 9 | """Convert quantities back from dictionary.""" 10 | result = value 11 | if isinstance(value, dict): 12 | if "unit" in value and "value" in value and "uncertainty" in value: 13 | try: 14 | result = pq.UncertainQuantity(value["value"], 15 | value["unit"], 16 | value["uncertainty"]) 17 | except Exception: 18 | pass 19 | elif "unit" in value and "value" in value: 20 | try: 21 | result = pq.Quantity(value["value"], value["unit"]) 22 | except Exception: 23 | pass 24 | else: 25 | try: 26 | for key, value in result.items(): 27 | result[key] = convert_back_quantities(value) 28 | except AttributeError: 29 | pass 30 | 31 | return result 32 | 33 | 34 | def convert_quantities(value): 35 | """Convert quantities to dictionary.""" 36 | 37 | result = value 38 | if isinstance(value, pq.Quantity): 39 | result = { 40 | "value": value.magnitude.tolist(), 41 | "unit": value.dimensionality.string 42 | } 43 | if isinstance(value, pq.UncertainQuantity): 44 | assert value.dimensionality == value.uncertainty.dimensionality 45 | result["uncertainty"] = value.uncertainty.magnitude.tolist() 46 | elif isinstance(value, np.ndarray): 47 | result = value.tolist() 48 | elif isinstance(value, np.integer): 49 | result = int(value) 50 | elif isinstance(value, np.float64): 51 | result = float(value) 52 | else: 53 | # try if dictionary like objects can be converted if not return the 54 | # original object 55 | # Note, this might fail if .items() returns a strange combination of 56 | # objects 57 | try: 58 | new_result = {} 59 | for key, val in value.items(): 60 | new_key = convert_quantities(key) 61 | new_result[new_key] = convert_quantities(val) 62 | result = new_result 63 | except AttributeError: 64 | pass 65 | 66 | return result 67 | 68 | 69 | class DatasetPlugin(exdir.plugin_interface.Dataset): 70 | def prepare_read(self, dataset_data): 71 | values = dataset_data.data 72 | attrs = dataset_data.attrs 73 | 74 | if "unit" in attrs: 75 | item_dict = { 76 | "value": values, 77 | "unit": attrs["unit"] 78 | } 79 | if "uncertainty" in attrs: 80 | item_dict["uncertainty"] = attrs["uncertainty"] 81 | 82 | values = convert_back_quantities(item_dict) 83 | 84 | dataset_data.data = values 85 | 86 | return dataset_data 87 | 88 | def prepare_write(self, dataset_data): 89 | data = dataset_data.data 90 | attrs = {} 91 | 92 | meta = dataset_data.meta 93 | 94 | if isinstance(data, pq.Quantity): 95 | # TODO consider adding a helper class that wraps defaultdict and converts back again 96 | if "plugins" not in meta: 97 | meta["plugins"] = {} 98 | 99 | if "quantities" not in meta["plugins"]: 100 | meta["plugins"]["quantities"] = {} 101 | 102 | meta["plugins"]["quantities"]["required"] = True 103 | result = data.magnitude 104 | attrs["unit"] = data.dimensionality.string 105 | if isinstance(data, pq.UncertainQuantity): 106 | attrs["uncertainty"] = data.uncertainty 107 | else: 108 | result = data 109 | 110 | dataset_data.data = data 111 | dataset_data.attrs = attrs 112 | dataset_data.meta = dict(meta) 113 | 114 | return dataset_data 115 | 116 | 117 | class AttributePlugin(exdir.plugin_interface.Attribute): 118 | def prepare_read(self, attribute_data): 119 | attribute_data.attrs = convert_back_quantities(attribute_data.attrs) 120 | return attribute_data 121 | 122 | def prepare_write(self, attribute_data): 123 | attribute_data.attrs = convert_quantities(attribute_data.attrs) 124 | return attribute_data 125 | 126 | 127 | def plugins(): 128 | return [exdir.plugin_interface.Plugin( 129 | "quantities", 130 | dataset_plugins=[DatasetPlugin()], 131 | attribute_plugins=[AttributePlugin()], 132 | read_before=["numpy_attributes"], 133 | write_before=["numpy_attributes"] 134 | )] 135 | -------------------------------------------------------------------------------- /exdir/static/index.js: -------------------------------------------------------------------------------- 1 | /* 2 | CollapsibleLists.js 3 | 4 | An object allowing lists to dynamically expand and collapse 5 | 6 | Created by Kate Morley - http://code.iamkate.com/ - and released under the terms 7 | of the CC0 1.0 Universal legal code: 8 | 9 | http://creativecommons.org/publicdomain/zero/1.0/legalcode 10 | 11 | Modifications by Svenn-Arne Dragly. 12 | */ 13 | 14 | const exdir = (function() { // namespace 15 | const CollapsibleLists = (function(){ 16 | 17 | // Makes all lists with the class 'collapsibleList' collapsible. The 18 | // parameter is: 19 | // 20 | // doNotRecurse - true if sub-lists should not be made collapsible 21 | function apply(doNotRecurse){ 22 | [].forEach.call(document.getElementsByTagName('ul'), node => { 23 | if (node.classList.contains('collapsibleList')){ 24 | applyTo(node, true); 25 | if (!doNotRecurse){ 26 | [].forEach.call(node.getElementsByTagName('ul'), subnode => { 27 | subnode.classList.add('collapsibleList') 28 | }); 29 | } 30 | } 31 | }) 32 | } 33 | 34 | // Makes the specified list collapsible. The parameters are: 35 | // 36 | // node - the list element 37 | // doNotRecurse - true if sub-lists should not be made collapsible 38 | function applyTo(node, doNotRecurse){ 39 | [].forEach.call(node.getElementsByTagName('li'), li => { 40 | if (!doNotRecurse || node === li.parentNode){ 41 | li.style.userSelect = 'none'; 42 | li.style.MozUserSelect = 'none'; 43 | li.style.msUserSelect = 'none'; 44 | li.style.WebkitUserSelect = 'none'; 45 | 46 | li.addEventListener('click', handleClick.bind(null, li)); 47 | 48 | toggle(li); 49 | 50 | // If it is the root node, expand it again 51 | if (node === li.parentNode) { 52 | toggle(li); 53 | } 54 | } 55 | }); 56 | } 57 | 58 | // Handles a click. The parameter is: 59 | // 60 | // node - the node for which clicks are being handled 61 | function handleClick(node, e){ 62 | let li = e.target; 63 | while (li.nodeName !== 'LI'){ 64 | li = li.parentNode; 65 | } 66 | 67 | if (li === node){ 68 | toggle(node); 69 | } 70 | } 71 | 72 | // Opens or closes the unordered list elements directly within the 73 | // specified node. The parameter is: 74 | // 75 | // node - the node containing the unordered list elements 76 | function toggle(node){ 77 | const open = node.classList.contains('collapsibleListClosed'); 78 | const uls = node.getElementsByTagName('ul'); 79 | 80 | [].forEach.call(uls, ul => { 81 | 82 | let li = ul; 83 | while (li.nodeName !== 'LI'){ 84 | li = li.parentNode; 85 | } 86 | 87 | if (li === node){ 88 | ul.style.display = (open ? 'block' : 'none'); 89 | } 90 | 91 | }); 92 | 93 | node.classList.remove('collapsibleListOpen'); 94 | node.classList.remove('collapsibleListClosed'); 95 | 96 | if (uls.length > 0){ 97 | node.classList.add('collapsibleList' + (open ? 'Open' : 'Closed')); 98 | } 99 | 100 | } 101 | 102 | return {apply, applyTo}; 103 | 104 | })(); 105 | 106 | return {CollapsibleLists}; 107 | })(); 108 | -------------------------------------------------------------------------------- /exdir/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from . import path, display 2 | -------------------------------------------------------------------------------- /exdir/utils/display.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | import exdir 3 | 4 | 5 | def _build_tree(o): 6 | contents = "
  • " 7 | if isinstance(o, exdir.core.File): 8 | name = o.root_directory.name 9 | else: 10 | name = o.object_name 11 | 12 | contents += f"{name} ({o.__class__.__name__})" 13 | if isinstance(o, exdir.core.Dataset): 14 | contents += f"
    • Shape: {o.shape}
    • Type: {o.dtype}
    " 15 | else: 16 | try: 17 | keys = o.keys() 18 | inner_contents = "" 19 | for a in keys: 20 | inner_contents += _build_tree(o[a]) 21 | if inner_contents != "": 22 | contents += f"
      {inner_contents}
    " 23 | except AttributeError: 24 | pass 25 | 26 | contents += "
  • " 27 | 28 | return contents 29 | 30 | def html_tree(obj): 31 | from IPython.core.display import display, HTML 32 | import uuid 33 | 34 | ulid=uuid.uuid1() 35 | 36 | style = """ 37 | .collapsibleList li{ 38 | list-style-type : none; 39 | cursor : auto; 40 | } 41 | 42 | li.collapsibleListOpen{ 43 | list-style-type : circle; 44 | cursor : pointer; 45 | } 46 | 47 | li.collapsibleListClosed{ 48 | list-style-type : disc; 49 | cursor : pointer; 50 | } 51 | """ 52 | 53 | script = f""" 54 | var node = document.getElementById('{ulid}'); 55 | exdir.CollapsibleLists.applyTo(node); 56 | """ 57 | 58 | result = (f"" 59 | f"
      {_build_tree(obj)}
    " 60 | f"" 61 | "") 62 | 63 | return result 64 | 65 | 66 | def _build_attrs_tree(key, value): 67 | contents = "
  • " 68 | contents += f"{key}: " 69 | try: 70 | items = value.items() 71 | inner_contents = "" 72 | for subkey, subvalue in items: 73 | inner_contents += _build_attrs_tree(subkey, subvalue) 74 | if inner_contents != "": 75 | contents += f"
      {inner_contents}
    " 76 | except AttributeError: 77 | contents += f"{value}" 78 | 79 | contents += "
  • " 80 | 81 | return contents 82 | 83 | 84 | def html_attrs(attributes): 85 | return f"
      {_build_attrs_tree('Attributes', attributes)}
    " 86 | -------------------------------------------------------------------------------- /exdir/utils/path.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | 3 | 4 | def name_to_asserted_group_path(name): 5 | path = pathlib.PurePosixPath(name) 6 | if path.is_absolute(): 7 | raise NotImplementedError( 8 | "Absolute paths are currently not supported and unlikely to be implemented." 9 | ) 10 | 11 | if len(path.parts) < 1 and str(name) != ".": 12 | raise NotImplementedError( 13 | "Getting an item on a group with path '" + name + "' " + 14 | "is not supported and unlikely to be implemented." 15 | ) 16 | 17 | return path 18 | 19 | 20 | def remove_root(name): 21 | path = pathlib.PurePosixPath(name) 22 | if path.is_absolute(): 23 | path = path.relative_to(path.root) 24 | return path 25 | -------------------------------------------------------------------------------- /jupyter-config/jupyter_notebook_config.d/exdir.json: -------------------------------------------------------------------------------- 1 | { 2 | "NotebookApp": { 3 | "nbserver_extensions": { 4 | "exdir": true 5 | } 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /jupyter-config/nbconfig/notebook.d/exdir.json: -------------------------------------------------------------------------------- 1 | { 2 | "load_extensions": { 3 | "exdir/index": true 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /libs/travis-conda-scripts/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | 85 | # Spyder project settings 86 | .spyderproject 87 | 88 | # Rope project settings 89 | .ropeproject 90 | -------------------------------------------------------------------------------- /libs/travis-conda-scripts/.gitrepo: -------------------------------------------------------------------------------- 1 | ; DO NOT EDIT (unless you know what you are doing) 2 | ; 3 | ; This subdirectory is a git "subrepo", and this file is maintained by the 4 | ; git-subrepo command. See https://github.com/git-commands/git-subrepo#readme 5 | ; 6 | [subrepo] 7 | remote = git@github.com:CINPLA/travis-conda-scripts.git 8 | branch = master 9 | commit = 02e37ccf390b322ad9b876d49674d1e605313740 10 | parent = 1ddd49fa7c04f124bfa5f4e1717e974c19ad0f58 11 | cmdver = 0.3.0 12 | -------------------------------------------------------------------------------- /libs/travis-conda-scripts/README.md: -------------------------------------------------------------------------------- 1 | # travis-conda-scripts 2 | Scripts to ease testing on travis 3 | -------------------------------------------------------------------------------- /libs/travis-conda-scripts/scripts/build.sh: -------------------------------------------------------------------------------- 1 | mkdir -p "$CONDA_BLD_PATH" 2 | conda config --set anaconda_upload no 3 | conda build . $EXTRA_CONDA_CHANNELS --python "$PYTHON_VERSION" --dirty --old-build-string 4 | -------------------------------------------------------------------------------- /libs/travis-conda-scripts/scripts/config.sh: -------------------------------------------------------------------------------- 1 | git fetch --unshallow # needed to get enough commits for tag, see travis-ci/travis-ci#3412 2 | export PATH="$HOME/miniconda/bin:$PATH" 3 | export GIT_DESCRIBE=$(git describe --always --tags --long) 4 | export GIT_TAG=$(git describe --always --tags --abbrev=0) 5 | export TAG_AND_TRAVIS_BUILD=${GIT_TAG}_b${TRAVIS_BUILD_NUMBER} 6 | export TAG_TRAVIS_BUILD_AND_COMMIT=${TAG_AND_TRAVIS_BUILD}_g${TRAVIS_COMMIT} 7 | export CONDA_BLD_PATH=/tmp/conda-bld 8 | export PACKAGE=$(conda build . --output --old-build-string --python "$PYTHON_VERSION" 2> conda-errors.log | tail -1) 9 | export EXTRA_CONDA_CHANNELS="-c defaults -c conda-forge -c cinpla" 10 | export GIT_STRING=${GIT_TAG} 11 | if [ -z $TRAVIS_TAG ]; then 12 | echo "INFO: No TRAVIS_TAG found, adding dev channel" 13 | export EXTRA_CONDA_CHANNELS="$EXTRA_CONDA_CHANNELS -c cinpla/label/dev" 14 | export GIT_STRING="${GIT_TAG}_latest" 15 | fi 16 | echo PATH $PATH 17 | echo GIT_DESCRIBE $GIT_DESCRIBE 18 | echo CONDA_BLD_PATH $CONDA_BLD_PATH 19 | echo GIT_TAG $GIT_TAG 20 | echo TAG_AND_TRAVIS_BUILD $TAG_AND_TRAVIS_BUILD 21 | echo TAG_TRAVIS_BUILD_AND_COMMIT $TAG_TRAVIS_BUILD_AND_COMMIT 22 | echo PACKAGE $PACKAGE 23 | echo CONDA_CHANNELS $EXTRA_CONDA_CHANNELS 24 | echo conda-errors: 25 | cat conda-errors.log 26 | -------------------------------------------------------------------------------- /libs/travis-conda-scripts/scripts/doctest.sh: -------------------------------------------------------------------------------- 1 | cd docs 2 | conda install sphinx 3 | make doctest 4 | -------------------------------------------------------------------------------- /libs/travis-conda-scripts/scripts/install_package.sh: -------------------------------------------------------------------------------- 1 | conda install --use-local "$1" python="$PYTHON_VERSION" $EXTRA_CONDA_CHANNELS 2 | -------------------------------------------------------------------------------- /libs/travis-conda-scripts/scripts/travis_install.sh: -------------------------------------------------------------------------------- 1 | if [[ "$PYTHON_VERSION" == "2.7" ]]; then 2 | wget https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh -O miniconda.sh; 3 | else 4 | wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; 5 | fi 6 | bash miniconda.sh -b -p $HOME/miniconda 7 | export PATH="$HOME/miniconda/bin:$PATH" 8 | hash -r 9 | conda config --set always_yes yes --set changeps1 no 10 | conda update -q conda 11 | conda install -y conda-build anaconda-client 12 | -------------------------------------------------------------------------------- /libs/travis-conda-scripts/scripts/upload.sh: -------------------------------------------------------------------------------- 1 | if [ -z "$1" ]; then 2 | echo "ERROR: No channel provided" 3 | echo "Usage: upload.sh [label]" 4 | exit 5 | fi 6 | if [ $TRAVIS_TEST_RESULT -eq 0 ]; then 7 | LABEL=${2:-main} 8 | 9 | for TARBALL in $CONDA_BLD_PATH/noarch/*.tar.bz2; do 10 | echo "Uploading $TARBALL to anaconda with anaconda upload..." 11 | set +x # hide token 12 | anaconda -t "$CONDA_UPLOAD_TOKEN" upload -u "$1" --force "$TARBALL" -l "$LABEL" 13 | set -x 14 | done 15 | echo "Upload command complete!" 16 | else 17 | echo "Upload cancelled due to failed test." 18 | fi 19 | -------------------------------------------------------------------------------- /postBuild: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python setup.py install 3 | -------------------------------------------------------------------------------- /requirements.in: -------------------------------------------------------------------------------- 1 | # These are developer requirements needed for testing and CI 2 | h5py 3 | numpy 4 | pytest 5 | quantities 6 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile 3 | # To update, run: 4 | # 5 | # pip-compile requirements.in 6 | # 7 | h5py==3.13.0 8 | # via -r requirements.in 9 | iniconfig==1.1.1 10 | # via pytest 11 | numpy==2.2.3 12 | # via 13 | # -r requirements.in 14 | # h5py 15 | # quantities 16 | packaging==20.9 17 | # via pytest 18 | pluggy==1.5.0 19 | # via pytest 20 | pyparsing==2.4.7 21 | # via packaging 22 | pytest==8.3.5 23 | # via -r requirements.in 24 | quantities==0.16.1 25 | # via -r requirements.in 26 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | 2 | # See the docstring in versioneer.py for instructions. Note that you must 3 | # re-run 'versioneer.py setup' after changing this section, and commit the 4 | # resulting files. 5 | 6 | [versioneer] 7 | VCS = git 8 | style = pep440 9 | versionfile_source = exdir/_version.py 10 | versionfile_build = exdir/_version.py 11 | tag_prefix = v 12 | parentdir_prefix = 13 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | import versioneer 3 | 4 | 5 | with open("README.md", encoding="utf-8") as handle: 6 | long_description = handle.read() 7 | 8 | setup( 9 | name="exdir", 10 | packages=find_packages(), 11 | include_package_data=True, 12 | version=versioneer.get_version(), 13 | cmdclass=versioneer.get_cmdclass(), 14 | data_files=[ 15 | # like `jupyter nbextension install --sys-prefix` 16 | ( 17 | "share/jupyter/nbextensions/exdir", 18 | [ 19 | "exdir/static/index.js", 20 | ], 21 | ), 22 | # like `jupyter nbextension enable --sys-prefix` 23 | ( 24 | "etc/jupyter/nbconfig/notebook.d", 25 | ["jupyter-config/nbconfig/notebook.d/exdir.json"], 26 | ), 27 | # like `jupyter serverextension enable --sys-prefix` 28 | ( 29 | "etc/jupyter/jupyter_notebook_config.d", 30 | ["jupyter-config/jupyter_notebook_config.d/exdir.json"], 31 | ), 32 | ], 33 | install_requires=[ 34 | "numpy>=1.20", 35 | "ruamel.yaml==0.18.10", 36 | ], 37 | python_requires=">=3.7", 38 | zip_safe=False, 39 | ) 40 | -------------------------------------------------------------------------------- /tests/benchmarks/benchmarks.py: -------------------------------------------------------------------------------- 1 | import exdir 2 | import pytest 3 | import os 4 | import shutil 5 | import time 6 | import numpy as np 7 | import h5py 8 | 9 | one_hundred_attributes = {} 10 | for i in range(200): 11 | one_hundred_attributes["hello" + str(i)] = "world" 12 | 13 | def benchmark(name, target, setup=None, teardown=None, iterations=1): 14 | print(f"Running {name}...") 15 | 16 | total_time = 0 17 | setup_teardown_start = time.time() 18 | for i in range(iterations): 19 | data = tuple() 20 | if setup is not None: 21 | data = setup() 22 | start_time = time.time() 23 | target(*data) 24 | end_time = time.time() 25 | total_time += end_time - start_time 26 | if teardown is not None: 27 | teardown(*data) 28 | setup_teardown_end = time.time() 29 | total_setup_teardown = setup_teardown_end - setup_teardown_start 30 | 31 | output = ( 32 | f"{name}\n" + 33 | ("-" * len(name)) + "\n" + 34 | f"Iterations:\n{iterations}\n" + 35 | f"Total time:\n{total_time}\n" + 36 | f"Total time (iterations + setup/teardown):\n{total_setup_teardown}\n" + 37 | f"Mean:\n{total_time / iterations}\n" 38 | ) 39 | 40 | print(output) 41 | 42 | 43 | def setup_exdir(): 44 | testpath = "/tmp/ramdisk/test.exdir" 45 | # testpath = tmpdir / "test.exdir" 46 | if os.path.exists(testpath): 47 | shutil.rmtree(testpath) 48 | f = exdir.File(testpath, name_validation=exdir.validation.none) 49 | return f, testpath 50 | 51 | 52 | def setup_h5py(): 53 | testpath = "/tmp/ramdisk/test.h5" 54 | # testpath = tmpdir / "test.h5" 55 | if os.path.exists(testpath): 56 | os.remove(testpath) 57 | f = h5py.File(testpath) 58 | return f, testpath 59 | 60 | 61 | def benchmark_exdir(function, iterations=100): 62 | benchmark( 63 | "exdir_" + function.__name__, 64 | lambda f: function(f), 65 | setup_exdir, 66 | teardown_exdir, 67 | iterations=iterations 68 | ) 69 | 70 | 71 | def benchmark_h5py(function, iterations=100): 72 | benchmark( 73 | "h5py_" + function.__name__, 74 | lambda f: function(f), 75 | setup_h5py, 76 | teardown_h5py, 77 | iterations=iterations 78 | ) 79 | 80 | 81 | def teardown_exdir(f, testpath): 82 | f.close() 83 | shutil.rmtree(testpath) 84 | 85 | 86 | def teardown_h5py(f, testpath): 87 | os.remove(testpath) 88 | 89 | 90 | def add_few_attributes(obj): 91 | for i in range(5): 92 | obj.attrs["hello" + str(i)] = "world" 93 | 94 | 95 | def add_many_attributes(obj): 96 | for i in range(200): 97 | obj.attrs["hello" + str(i)] = "world" 98 | 99 | def add_many_attributes_single_operation(obj): 100 | obj.attrs = one_hundred_attributes 101 | 102 | def add_attribute_tree(obj): 103 | tree = {} 104 | for i in range(100): 105 | tree["hello" + str(i)] = "world" 106 | tree["intermediate"] = {} 107 | intermediate = tree["intermediate"] 108 | for level in range(10): 109 | level_str = "level" + str(level) 110 | intermediate[level_str] = {} 111 | intermediate = intermediate[level_str] 112 | intermediate = 42 113 | obj.attrs["test"] = tree 114 | 115 | 116 | def add_small_dataset(obj): 117 | data = np.zeros((100, 100, 100)) 118 | obj.create_dataset("foo", data=data) 119 | obj.close() 120 | 121 | 122 | def add_medium_dataset(obj): 123 | data = np.zeros((1000, 100, 100)) 124 | obj.create_dataset("foo", data=data) 125 | obj.close() 126 | 127 | 128 | def add_large_dataset(obj): 129 | data = np.zeros((1000, 1000, 100)) 130 | obj.create_dataset("foo", data=data) 131 | obj.close() 132 | 133 | 134 | def create_many_objects(obj): 135 | for i in range(5000): 136 | group = obj.create_group(f"group{i}") 137 | # data = np.zeros((10, 10, 10)) 138 | # group.create_dataset(f"dataset{i}", data=data) 139 | 140 | 141 | def iterate_objects(obj): 142 | i = 0 143 | for a in obj: 144 | i += 1 145 | return i 146 | 147 | 148 | def create_large_tree(obj, level=0): 149 | if level > 4: 150 | return 151 | for i in range(3): 152 | group = obj.create_group(f"group_{i}_{level}") 153 | data = np.zeros((10, 10, 10)) 154 | group.create_dataset(f"dataset_{i}_{level}", data=data) 155 | create_large_tree(group, level + 1) 156 | 157 | 158 | if not os.path.exists("/tmp/ramdisk"): 159 | print("Error: You need to set up a ramdisk at /tmp/ramdisk first:") 160 | print() 161 | print(" mkdir /tmp/ramdisk/") 162 | print(" sudo mount -t tmpfs -o size=2048M tmpfs /tmp/ramdisk/") 163 | print() 164 | print("Rerun this script after setting up the ramdisk.") 165 | 166 | benchmarks = [ 167 | (add_few_attributes, 100), 168 | (add_many_attributes, 10), 169 | #(add_attribute_tree, 100), 170 | (add_small_dataset, 100), 171 | (add_medium_dataset, 10), 172 | (add_large_dataset, 10), 173 | (create_many_objects, 3), 174 | (create_large_tree, 10), 175 | ] 176 | 177 | for function, iterations in benchmarks: 178 | benchmark_exdir(function, iterations) 179 | benchmark_h5py(function, iterations) 180 | 181 | benchmark_exdir(add_attribute_tree) 182 | benchmark_exdir(add_many_attributes_single_operation) 183 | 184 | def create_setup_many_objects(setup_function): 185 | def setup(): 186 | obj, path = setup_function() 187 | create_many_objects(obj) 188 | return obj, path 189 | return setup 190 | 191 | benchmark( 192 | "exdir_iteration", 193 | lambda obj, path: iterate_objects(obj), 194 | create_setup_many_objects(setup_exdir), 195 | teardown_exdir, 196 | iterations=2 197 | ) 198 | 199 | benchmark( 200 | "h5py_iteration", 201 | lambda obj, path: iterate_objects(obj), 202 | create_setup_many_objects(setup_h5py), 203 | teardown_h5py, 204 | iterations=2 205 | ) 206 | 207 | def partial_write(dataset): 208 | dataset[320:420, 0:100, 0:100] = np.ones((100, 100, 100)) 209 | 210 | def create_setup_dataset(setup_function): 211 | def setup(): 212 | f, path = setup_function() 213 | data = np.zeros((1000, 100, 100)) 214 | dataset = f.create_dataset("foo", data=data) 215 | return dataset, f, path 216 | return setup 217 | 218 | benchmark( 219 | "exdir_partial", 220 | lambda dataset, f, path: partial_write(dataset), 221 | create_setup_dataset(setup_exdir), 222 | lambda dataset, f, path: teardown_exdir(f, path), 223 | iterations=200 224 | ) 225 | 226 | benchmark( 227 | "h5py_partial", 228 | lambda dataset, f, path: partial_write(dataset), 229 | create_setup_dataset(setup_h5py), 230 | lambda dataset, f, path: teardown_h5py(f, path), 231 | iterations=200 232 | ) 233 | -------------------------------------------------------------------------------- /tests/benchmarks/profiling.py: -------------------------------------------------------------------------------- 1 | import exdir 2 | import exdir.core.validation 3 | import pytest 4 | import os 5 | import shutil 6 | import time 7 | import numpy as np 8 | import h5py 9 | 10 | def setup_exdir(): 11 | testpath = "/tmp/ramdisk/test.exdir" 12 | # testpath = tmpdir / "test.exdir" 13 | if os.path.exists(testpath): 14 | shutil.rmtree(testpath) 15 | f = exdir.File(testpath, validate_name=exdir.validation.none) 16 | return f, testpath 17 | 18 | obj = setup_exdir()[0] 19 | 20 | for i in range(5000): 21 | group = obj.create_group(f"group{i}") 22 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import shutil 3 | import os 4 | import h5py 5 | import pathlib 6 | import time 7 | 8 | import exdir 9 | 10 | 11 | def remove(name): 12 | if name.exists(): 13 | shutil.rmtree(str(name)) 14 | assert not name.exists() 15 | 16 | 17 | @pytest.fixture 18 | def setup_teardown_folder(tmpdir): 19 | testpath = pathlib.Path(tmpdir.strpath) 20 | testdir = testpath / "exdir_dir" 21 | testfile = testpath / "test.exdir" 22 | 23 | remove(testpath) 24 | 25 | testpath.mkdir(parents=True) 26 | 27 | yield testpath, testfile, testdir 28 | 29 | remove(testpath) 30 | 31 | 32 | @pytest.fixture 33 | def setup_teardown_file(tmpdir): 34 | testpath = pathlib.Path(tmpdir.strpath) 35 | testdir = testpath / "exdir_dir" 36 | testfile = testpath / "test.exdir" 37 | 38 | remove(testpath) 39 | 40 | testpath.mkdir(parents=True) 41 | 42 | f = exdir.File(testfile, mode="w") 43 | 44 | yield testpath, testfile, testdir, f 45 | 46 | f.close() 47 | 48 | remove(testpath) 49 | 50 | @pytest.fixture 51 | def exdir_tmpfile(tmpdir): 52 | testpath = pathlib.Path(tmpdir.strpath) / "test.exdir" 53 | f = exdir.File(testpath, mode="w") 54 | yield f 55 | f.close() 56 | remove(testpath) 57 | 58 | 59 | @pytest.fixture 60 | def h5py_tmpfile(tmpdir): 61 | testpath = pathlib.Path(tmpdir.strpath) / "test.h5" 62 | f = h5py.File(testpath, mode="w") 63 | yield f 64 | f.close() 65 | os.remove(str(testpath)) 66 | 67 | 68 | @pytest.fixture 69 | def quantities_tmpfile(tmpdir): 70 | testpath = pathlib.Path(tmpdir.strpath) / "test.exdir" 71 | f = exdir.File(testpath, mode="w", plugins=exdir.plugins.quantities) 72 | yield f 73 | f.close() 74 | remove(testpath) 75 | 76 | @pytest.fixture 77 | def exdir_benchmark(tmpdir): 78 | def benchmark(name, target, setup=None, teardown=None, iterations=1): 79 | total_time = 0 80 | for i in range(iterations): 81 | data = tuple() 82 | if setup is not None: 83 | data = setup() 84 | start_time = time.time() 85 | target(*data) 86 | end_time = time.time() 87 | total_time += end_time - start_time 88 | if teardown is not None: 89 | teardown(*data) 90 | print("--------------------") 91 | print("Result for:", name, sep="\n") 92 | print("Iterations:", iterations, sep="\n") 93 | print("Mean:", total_time / iterations, sep="\n") 94 | print("--------------------") 95 | 96 | yield benchmark 97 | -------------------------------------------------------------------------------- /tests/test_attr.py: -------------------------------------------------------------------------------- 1 | # This file is part of Exdir, the Experimental Directory Structure. 2 | # 3 | # Copyright 2017 Simen Tennøe 4 | # 5 | # License: MIT, see "LICENSE" file for the full license terms. 6 | # 7 | # This file contains code from h5py, a Python interface to the HDF5 library, 8 | # licensed under a standard 3-clause BSD license 9 | # with copyright Andrew Collette and contributors. 10 | # See http://www.h5py.org and the "3rdparty/h5py-LICENSE" file for details. 11 | 12 | 13 | import pytest 14 | import numpy as np 15 | try: 16 | import ruamel_yaml as yaml 17 | except ImportError: 18 | import ruamel.yaml as yaml 19 | 20 | from exdir.core import Attribute, File 21 | 22 | 23 | def test_attr_init(): 24 | attribute = Attribute("parent", "mode", "file") 25 | 26 | assert attribute.parent == "parent" 27 | assert attribute.mode == "mode" 28 | assert attribute.file == "file" 29 | assert attribute.path == [] 30 | 31 | # Attribute creation/retrieval via special methods 32 | def test_create(setup_teardown_file): 33 | """Attribute creation by direct assignment.""" 34 | f = setup_teardown_file[3] 35 | f.attrs["a"] = 4.0 36 | assert list(f.attrs.keys()) == ["a"] 37 | assert f.attrs["a"] == 4.0 38 | 39 | 40 | def test_create_dict(setup_teardown_file): 41 | f = setup_teardown_file[3] 42 | 43 | dictionary = {"a": 1.0, "b": 2.0, "c": 3.0} 44 | f.attrs["d"] = dictionary 45 | 46 | out = list(f.attrs["d"].items()) 47 | out.sort() 48 | assert out == [("a", 1.0), ("b", 2.0), ("c", 3.0)] 49 | 50 | 51 | def test_to_dict(setup_teardown_file): 52 | f = setup_teardown_file[3] 53 | 54 | dictionary = {"a": 1.0, "b": 2.0, "c": 3.0} 55 | f.attrs["d"] = dictionary 56 | out = f.attrs["d"].to_dict() 57 | assert out == dictionary 58 | 59 | 60 | def test_number(setup_teardown_file): 61 | f = setup_teardown_file[3] 62 | f.attrs[2] = 2 63 | assert f.attrs[2] == 2 64 | 65 | 66 | def test_overwrite(setup_teardown_file): 67 | """Attributes are silently overwritten.""" 68 | f = setup_teardown_file[3] 69 | f.attrs["a"] = 4.0 70 | f.attrs["a"] = 5.0 71 | assert f.attrs["a"] == 5.0 72 | 73 | 74 | def test_rank(setup_teardown_file): 75 | """Attribute rank is preserved.""" 76 | f = setup_teardown_file[3] 77 | f.attrs["a"] = (4.0, 5.0) 78 | assert type(f.attrs["a"]) == list 79 | assert f.attrs["a"] == [4.0, 5.0] 80 | 81 | 82 | def test_single(setup_teardown_file): 83 | """Numpy arrays as attribute gives errors.""" 84 | f = setup_teardown_file[3] 85 | 86 | with pytest.raises(yaml.representer.RepresenterError): 87 | f.attrs["a"] = np.ones((1,)) 88 | 89 | def test_array(setup_teardown_file): 90 | """Numpy arrays as attribute gives errors.""" 91 | f = setup_teardown_file[3] 92 | 93 | with pytest.raises(yaml.representer.RepresenterError): 94 | f.attrs["a"] = np.ones((2, 2)) 95 | 96 | 97 | 98 | 99 | def test_access_exc(setup_teardown_file): 100 | """Attempt to access missing item raises KeyError.""" 101 | f = setup_teardown_file[3] 102 | 103 | with pytest.raises(KeyError): 104 | f.attrs["a"] 105 | 106 | 107 | def test_in(setup_teardown_file): 108 | """Test that in (__contains__) works.""" 109 | f = setup_teardown_file[3] 110 | 111 | f.attrs["a"] = 4.0 112 | f.attrs["b"] = 4.0 113 | f.attrs["c"] = 4.0 114 | 115 | assert "a" in f.attrs 116 | assert "b" in f.attrs 117 | assert "c" in f.attrs 118 | assert "d" not in f.attrs 119 | 120 | def test_keys(setup_teardown_file): 121 | """Test that in (__contains__) works.""" 122 | f = setup_teardown_file[3] 123 | 124 | f.attrs["a"] = 1.0 125 | f.attrs["b"] = 2.0 126 | f.attrs["c"] = 3.0 127 | out = list(f.attrs.keys()) 128 | out.sort() 129 | assert out == ["a", "b", "c"] 130 | 131 | 132 | def test_values(setup_teardown_file): 133 | """Test that in (__contains__) works.""" 134 | f = setup_teardown_file[3] 135 | 136 | f.attrs["a"] = 1.0 137 | f.attrs["b"] = 2.0 138 | f.attrs["c"] = 3.0 139 | out = list(f.attrs.values()) 140 | out.sort() 141 | assert out == [1.0, 2.0, 3.0] 142 | 143 | 144 | def test_items(setup_teardown_file): 145 | """Test that in (__contains__) works.""" 146 | f = setup_teardown_file[3] 147 | 148 | f.attrs["a"] = 1.0 149 | f.attrs["b"] = 2.0 150 | f.attrs["c"] = 3.0 151 | out = list(f.attrs.items()) 152 | out.sort() 153 | assert out == [("a", 1.0), ("b", 2.0), ("c", 3.0)] 154 | 155 | 156 | def test_iter(setup_teardown_file): 157 | """Test that in (__contains__) works.""" 158 | f = setup_teardown_file[3] 159 | 160 | f.attrs["a"] = 1.0 161 | f.attrs["b"] = 2.0 162 | f.attrs["c"] = 3.0 163 | 164 | for i in f.attrs: 165 | assert i in ["a", "b", "c"] 166 | 167 | 168 | # TODO uncomment as soon as __del__ is implemented 169 | # Deletion of attributes using __delitem__ 170 | # def test_delete(setup_teardown_file): 171 | # """Deletion via "del".""" 172 | # f = setup_teardown_file[3] 173 | # 174 | # f.attrs["a"] = 4.0 175 | # assert "a" in f.attrs 176 | # del f.attrs["a"] 177 | # assert "a" not in f.attrs 178 | # 179 | # 180 | # def test_delete_exc(setup_teardown_file): 181 | # """Attempt to delete missing item raises KeyError.""" 182 | # f = setup_teardown_file[3] 183 | # with pytest.raises(KeyError): 184 | # del f.attrs["a"] 185 | 186 | 187 | 188 | 189 | # Attributes can be accessed via Unicode or byte strings 190 | 191 | def test_ascii(setup_teardown_file): 192 | """Access via pure-ASCII byte string.""" 193 | f = setup_teardown_file[3] 194 | 195 | f.attrs[b"ascii"] = 42 196 | out = f.attrs[b"ascii"] 197 | assert out == 42 198 | 199 | # TODO verify that we don't want to support non-ASCII byte strings 200 | def test_raw(setup_teardown_file): 201 | """Access via non-ASCII byte string.""" 202 | f = setup_teardown_file[3] 203 | 204 | name = b"non-ascii\xfe" 205 | f.attrs[name] = 42 206 | out = f.attrs[name] 207 | assert out == 42 208 | 209 | 210 | def test_unicode(setup_teardown_file): 211 | """Access via Unicode string with non-ASCII characters.""" 212 | f = setup_teardown_file[3] 213 | 214 | name = "Omega" + chr(0x03A9) 215 | f.attrs[name] = 42 216 | out = f.attrs[name] 217 | assert out == 42 218 | 219 | 220 | 221 | def test_validity(): 222 | """ 223 | Test that the required functions are implemented. 224 | """ 225 | Attribute.__len__ 226 | Attribute.__getitem__ 227 | Attribute.__setitem__ 228 | Attribute.__iter__ 229 | # TODO uncomment as soon as __del__ is implemented 230 | # Attribute.__delitem__ 231 | 232 | 233 | # All supported types can be stored in attributes 234 | 235 | 236 | def test_string_scalar(setup_teardown_file): 237 | """Storage of variable-length byte string scalars (auto-creation).""" 238 | f = setup_teardown_file[3] 239 | 240 | f.attrs["x"] = b"Hello" 241 | out = f.attrs["x"] 242 | 243 | assert out == b"Hello" 244 | assert type(out) == bytes 245 | 246 | 247 | 248 | def test_unicode_scalar(setup_teardown_file): 249 | """Storage of variable-length unicode strings (auto-creation).""" 250 | f = setup_teardown_file[3] 251 | 252 | f.attrs["x"] = "Hello" + chr(0x2340) + "!!" 253 | out = f.attrs["x"] 254 | assert isinstance(out, str) 255 | assert out == "Hello" + chr(0x2340) + "!!" 256 | 257 | 258 | def test_attrs(setup_teardown_file): 259 | f = setup_teardown_file[3] 260 | 261 | f.attrs["temperature"] = 99.0 262 | assert f.attrs["temperature"] == 99.0 263 | 264 | attrs = f.attrs 265 | assert type(attrs) is Attribute 266 | 267 | attrs["test"] = { 268 | "name": "temp", 269 | "value": 19 270 | } 271 | assert "test" in f.attrs 272 | assert type(f.attrs["test"]) is Attribute 273 | assert dict(f.attrs["test"]) == {"name": "temp", "value": 19} 274 | 275 | 276 | 277 | 278 | # TODO uncomment and use these tests if we allows for all attribute information 279 | # to be saved 280 | # # Feature: Scalar types map correctly to array scalars 281 | 282 | # def test_int(setup_teardown_file): 283 | # """Integers are read as correct NumPy type.""" 284 | # f = setup_teardown_file[3] 285 | 286 | # f.attrs['x'] = np.array(1, dtype=np.int8) 287 | # out = f.attrs['x'] 288 | # print (out) 289 | # assert isinstance(out, np.int8) 290 | 291 | # def test_compound(setup_teardown_file): 292 | # """Compound scalars are read as numpy.void.""" 293 | # f = setup_teardown_file[3] 294 | 295 | # dt = np.dtype([('a', 'i'), ('b', 'f')]) 296 | # data = np.array((1, 4.2), dtype=dt) 297 | # f.attrs['x'] = data 298 | # out = f.attrs['x'] 299 | # assert isinstance(out, np.void) 300 | # assert out == data 301 | # assert out['b'] == data['b'] 302 | 303 | # # Feature: Non-scalar types are correctly retrieved as ndarrays 304 | 305 | # def test_single_array(setup_teardown_file): 306 | # """Single-element arrays are correctly recovered.""" 307 | # f = setup_teardown_file[3] 308 | 309 | # data = np.ndarray((1,), dtype='f') 310 | # f.attrs['x'] = data 311 | # out = f.attrs['x'] 312 | # assert isinstance(out, np.ndarray) 313 | # assert out.shape == (1,) 314 | 315 | # def test_multi_array(setup_teardown_file): 316 | # """Rank-1 arrays are correctly recovered.""" 317 | # f = setup_teardown_file[3] 318 | 319 | # data = np.ndarray((42,), dtype='f') 320 | # data[:] = 42.0 321 | # data[10:35] = -47.0 322 | # f.attrs['x'] = data 323 | # out = f.attrs['x'] 324 | # assert isinstance(out, np.ndarray) 325 | # assert out.shape == (42,) 326 | # assert np.array_equal(out, data) 327 | 328 | 329 | # # Feature: All supported types can be stored in attributes 330 | 331 | # def test_int_all(setup_teardown_file): 332 | # """Storage of integer types.""" 333 | # f = setup_teardown_file[3] 334 | 335 | # dtypes = (np.int8, np.int16, np.int32, np.int64, 336 | # np.uint8, np.uint16, np.uint32, np.uint64) 337 | # for dt in dtypes: 338 | # data = np.ndarray((1,), dtype=dt) 339 | # data[...] = 42 340 | # f.attrs['x'] = data 341 | # out = f.attrs['x'] 342 | # assert out.dtype == dt 343 | # assert np.array_equal(out, data) 344 | 345 | # def test_float(setup_teardown_file): 346 | # """Storage of floating point types.""" 347 | # f = setup_teardown_file[3] 348 | 349 | # dtypes = tuple(np.dtype(x) for x in ('f4', 'f8')) 350 | 351 | # for dt in dtypes: 352 | # data = np.ndarray((1,), dtype=dt) 353 | # data[...] = 42.3 354 | # f.attrs['x'] = data 355 | # out = f.attrs['x'] 356 | # assert out.dtype == dt 357 | # assert np.array_equal(out, data) 358 | 359 | # def test_complex(setup_teardown_file): 360 | # """Storage of complex types.""" 361 | # f = setup_teardown_file[3] 362 | 363 | # dtypes = tuple(np.dtype(x) for x in ('c8', 'c16')) 364 | 365 | # for dt in dtypes: 366 | # data = np.ndarray((1,), dtype=dt) 367 | # data[...] = -4.2j+35.9 368 | # f.attrs['x'] = data 369 | # out = f.attrs['x'] 370 | # assert out.dtype == dt 371 | # assert np.array_equal(out, data) 372 | 373 | # def test_string(setup_teardown_file): 374 | # """Storage of fixed-length strings.""" 375 | # f = setup_teardown_file[3] 376 | 377 | # dtypes = tuple(np.dtype(x) for x in ('|S1', '|S10')) 378 | 379 | # for dt in dtypes: 380 | # data = np.ndarray((1,), dtype=dt) 381 | # data[...] = 'h' 382 | # f.attrs['x'] = data 383 | # out = f.attrs['x'] 384 | # assert out.dtype == dt 385 | # assert out[0] == data[0] 386 | 387 | # def test_bool(setup_teardown_file): 388 | # """Storage of NumPy booleans.""" 389 | # f = setup_teardown_file[3] 390 | 391 | # data = np.ndarray((2,), dtype=np.bool_) 392 | # data[...] = True, False 393 | # f.attrs['x'] = data 394 | # out = f.attrs['x'] 395 | # assert out.dtype == data.dtype 396 | # assert out[0] == data[0] 397 | # assert out[1] == data[1] 398 | -------------------------------------------------------------------------------- /tests/test_file.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file is part of Exdir, the Experimental Directory Structure. 3 | # 4 | # Copyright 2017 Simen Tennøe 5 | # 6 | # License: MIT, see "LICENSE" file for the full license terms. 7 | # 8 | # This file contains code from h5py, a Python interface to the HDF5 library, 9 | # licensed under a standard 3-clause BSD license 10 | # with copyright Andrew Collette and contributors. 11 | # See http://www.h5py.org and the "3rdparty/h5py-LICENSE" file for details. 12 | 13 | 14 | import pytest 15 | import pathlib 16 | 17 | from exdir.core import File, Group 18 | from exdir.core.exdir_object import _create_object_directory, is_nonraw_object_directory, DATASET_TYPENAME, FILE_TYPENAME 19 | import exdir.core.exdir_object as exob 20 | from exdir import validation as fv 21 | 22 | import numpy as np 23 | 24 | from conftest import remove 25 | 26 | 27 | def test_file_init(setup_teardown_folder): 28 | no_exdir = setup_teardown_folder[0] / "no_exdir" 29 | 30 | f = File(no_exdir, mode="w") 31 | f.close() 32 | assert is_nonraw_object_directory(no_exdir.with_suffix(".exdir")) 33 | remove(setup_teardown_folder[1]) 34 | 35 | f = File(setup_teardown_folder[1], mode="w") 36 | f.close() 37 | assert is_nonraw_object_directory(setup_teardown_folder[1]) 38 | remove(setup_teardown_folder[1]) 39 | 40 | f = File(setup_teardown_folder[1], mode="a") 41 | f.close() 42 | assert is_nonraw_object_directory(setup_teardown_folder[1]) 43 | remove(setup_teardown_folder[1]) 44 | 45 | f = File(setup_teardown_folder[1], mode="a") 46 | f.close() 47 | assert is_nonraw_object_directory(setup_teardown_folder[1]) 48 | remove(setup_teardown_folder[1]) 49 | 50 | setup_teardown_folder[1].mkdir(parents=True) 51 | with pytest.raises(RuntimeError): 52 | f = File(setup_teardown_folder[1], mode="w") 53 | 54 | remove(setup_teardown_folder[1]) 55 | 56 | _create_object_directory(pathlib.Path(setup_teardown_folder[1]), exob._default_metadata(DATASET_TYPENAME)) 57 | with pytest.raises(RuntimeError): 58 | f = File(setup_teardown_folder[1], mode="w") 59 | 60 | remove(setup_teardown_folder[1]) 61 | 62 | with pytest.raises(RuntimeError): 63 | f = File(setup_teardown_folder[1], mode="r") 64 | with pytest.raises(RuntimeError): 65 | f = File(setup_teardown_folder[1], mode="r+") 66 | 67 | _create_object_directory(pathlib.Path(setup_teardown_folder[1]), exob._default_metadata(FILE_TYPENAME)) 68 | 69 | with pytest.raises(RuntimeError): 70 | f = File(setup_teardown_folder[1], mode="w") 71 | 72 | remove(setup_teardown_folder[1]) 73 | 74 | _create_object_directory(pathlib.Path(setup_teardown_folder[1]), exob._default_metadata(FILE_TYPENAME)) 75 | f = File(setup_teardown_folder[1], mode="w", allow_remove=True) 76 | remove(setup_teardown_folder[1]) 77 | 78 | _create_object_directory(pathlib.Path(setup_teardown_folder[1]), exob._default_metadata(FILE_TYPENAME)) 79 | 80 | with pytest.raises(RuntimeError): 81 | f = File(setup_teardown_folder[1], mode="w-") 82 | 83 | with pytest.raises(RuntimeError): 84 | f = File(setup_teardown_folder[1], mode="x") 85 | 86 | with pytest.raises(ValueError): 87 | f = File(setup_teardown_folder[1], mode="not existing") 88 | 89 | 90 | def test_create(setup_teardown_folder): 91 | """Mode 'w' opens file in overwrite mode.""" 92 | f = File(setup_teardown_folder[1], 'w') 93 | assert isinstance(f, File) 94 | f.create_group('foo') 95 | f.close() 96 | 97 | f = File(setup_teardown_folder[1], 'w', allow_remove=True) 98 | assert 'foo' not in f 99 | f.close() 100 | with pytest.raises(RuntimeError): 101 | f = File(setup_teardown_folder[1], 'w') 102 | 103 | 104 | def test_create_exclusive(setup_teardown_folder): 105 | """Mode 'w-' opens file in exclusive mode.""" 106 | 107 | f = File(setup_teardown_folder[1], 'w-') 108 | assert isinstance(f, File) 109 | f.close() 110 | with pytest.raises(RuntimeError): 111 | File(setup_teardown_folder[1], 'w-') 112 | 113 | 114 | def test_append(setup_teardown_folder): 115 | """Mode 'a' opens file in append/readwrite mode, creating if necessary.""" 116 | 117 | f = File(setup_teardown_folder[1], 'a') 118 | assert isinstance(f, File) 119 | f.create_group('foo') 120 | assert 'foo' in f 121 | 122 | f = File(setup_teardown_folder[1], 'a') 123 | assert 'foo' in f 124 | f.create_group('bar') 125 | assert 'bar' in f 126 | 127 | 128 | def test_readonly(setup_teardown_folder): 129 | """Mode 'r' opens file in readonly mode.""" 130 | 131 | f = File(setup_teardown_folder[1], 'w') 132 | f.close() 133 | assert not f 134 | f = File(setup_teardown_folder[1], 'r') 135 | assert isinstance(f, File) 136 | with pytest.raises(IOError): 137 | f.create_group('foo') 138 | f.create_dataset("bar", (2)) 139 | f.close() 140 | 141 | 142 | def test_readwrite(setup_teardown_folder): 143 | """Mode 'r+' opens existing file in readwrite mode.""" 144 | 145 | f = File(setup_teardown_folder[1], 'w') 146 | f.create_group('foo') 147 | f.close() 148 | f = File(setup_teardown_folder[1], 'r+') 149 | assert 'foo' in f 150 | f.create_group('bar') 151 | assert 'bar' in f 152 | f.close() 153 | 154 | 155 | def test_nonexistent_file(setup_teardown_folder): 156 | """Modes 'r' and 'r+' do not create files.""" 157 | 158 | with pytest.raises(RuntimeError): 159 | File(setup_teardown_folder[1], 'r') 160 | with pytest.raises(RuntimeError): 161 | File(setup_teardown_folder[1], 'r+') 162 | 163 | 164 | def test_invalid_mode(setup_teardown_folder): 165 | """Invalid modes raise ValueError.""" 166 | with pytest.raises(ValueError): 167 | File(setup_teardown_folder[1], 'Error mode') 168 | 169 | 170 | def test_file_close(setup_teardown_folder): 171 | """Closing a file.""" 172 | f = File(setup_teardown_folder[1], mode="w") 173 | f.close() 174 | 175 | 176 | def test_validate_name_thorough(setup_teardown_folder): 177 | """Test naming rule thorough.""" 178 | f = File(setup_teardown_folder[0] / "test.exdir", name_validation=fv.thorough) 179 | f.close() 180 | 181 | with pytest.raises(RuntimeError): 182 | File(setup_teardown_folder[0] / "Test.exdir", name_validation=fv.thorough) 183 | with pytest.raises(NameError): 184 | File(setup_teardown_folder[0] / "tes#.exdir", name_validation=fv.thorough) 185 | 186 | 187 | def test_validate_name_strict(setup_teardown_folder): 188 | """Test naming rule strict.""" 189 | f = File(setup_teardown_folder[1], name_validation=fv.strict) 190 | f.close() 191 | 192 | with pytest.raises(NameError): 193 | File(setup_teardown_folder[1].with_suffix(".exdirA"), name_validation=fv.strict) 194 | 195 | 196 | def test_validate_name_error(setup_teardown_folder): 197 | """Test naming rule with error.""" 198 | 199 | with pytest.raises(ValueError): 200 | File(setup_teardown_folder[1], name_validation='Error rule') 201 | 202 | 203 | def test_validate_name_none(setup_teardown_folder): 204 | """Test naming rule with error.""" 205 | 206 | File(setup_teardown_folder[1].with_name("test&().exdir"), name_validation=fv.none) 207 | 208 | 209 | def test_opening_with_different_validate_name(setup_teardown_folder): 210 | """Test opening with wrong naming rule.""" 211 | 212 | f = File(setup_teardown_folder[1], "w", name_validation=fv.none) 213 | f.create_group("AAA") 214 | f.close() 215 | 216 | # TODO changing name validation should result in warning/error 217 | f = File(setup_teardown_folder[1], "a", name_validation=fv.thorough) 218 | with pytest.raises(RuntimeError): 219 | f.create_group("aaa") 220 | f.close() 221 | 222 | 223 | def test_contains(setup_teardown_file): 224 | """Root group (by itself) is contained.""" 225 | f = setup_teardown_file[3] 226 | f.create_group("test") 227 | 228 | assert "/" in f 229 | assert "/test" in f 230 | 231 | 232 | def test_create_group(setup_teardown_file): 233 | """Root group (by itself) is contained.""" 234 | f = setup_teardown_file[3] 235 | grp = f.create_group("/test") 236 | 237 | assert isinstance(grp, Group) 238 | 239 | 240 | def test_require_group(setup_teardown_file): 241 | """Root group (by itself) is contained.""" 242 | f = setup_teardown_file[3] 243 | 244 | grp = f.require_group("/foo") 245 | assert isinstance(grp, Group) 246 | 247 | 248 | def test_open(setup_teardown_file): 249 | """thorough obj[name] opening.""" 250 | f = setup_teardown_file[3] 251 | grp = f.create_group("foo") 252 | 253 | grp2 = f["foo"] 254 | grp3 = f["/foo"] 255 | f = f["/"] 256 | 257 | assert grp == grp2 258 | assert grp2 == grp3 259 | assert f == f 260 | 261 | 262 | def test_open_mode(setup_teardown_folder): 263 | # must exist 264 | for mode in ["r+", "r"]: 265 | with pytest.raises(RuntimeError): 266 | f = File(setup_teardown_folder[1], mode) 267 | # create if not exist 268 | for mode in ["a", "w", "w-"]: 269 | remove(setup_teardown_folder[1]) 270 | f = File(setup_teardown_folder[1], mode) 271 | f.require_dataset('dset', np.arange(10)) 272 | f.attrs['can_overwrite'] = 42 273 | f.attrs['can_overwrite'] = 14 274 | f.require_group('mygroup') 275 | 276 | remove(setup_teardown_folder[1]) 277 | f = File(setup_teardown_folder[1], 'w') 278 | f.close() # dummy close 279 | # read write if exist 280 | f = File(setup_teardown_folder[1], "r+") 281 | f.require_group('mygroup') 282 | f.require_dataset('dset', np.arange(10)) 283 | f.attrs['can_overwrite'] = 42 284 | f.attrs['can_overwrite'] = 14 285 | 286 | # read only, can not write 287 | f = File(setup_teardown_folder[1], 'r') 288 | with pytest.raises(IOError): 289 | f.require_dataset('dset', np.arange(10)) 290 | f.attrs['can_not_write'] = 42 291 | f.create_group('mygroup') 292 | 293 | 294 | def test_open_two_attrs(setup_teardown_file): 295 | f = setup_teardown_file[3] 296 | 297 | f.attrs['can_overwrite'] = 42 298 | f.attrs['another_attribute'] = 14 299 | 300 | 301 | def test_exc(setup_teardown_file): 302 | """'in' on closed group returns False.""" 303 | f = setup_teardown_file[3] 304 | 305 | f.create_group("a") 306 | f.close() 307 | 308 | assert not f 309 | assert "a" not in f 310 | 311 | 312 | def test_close_group(setup_teardown_file): 313 | """'closed file is unable to handle.""" 314 | f = setup_teardown_file[3] 315 | 316 | grp = f.create_group("group") 317 | 318 | f.close() 319 | assert not f 320 | assert "group" not in f 321 | assert 'dataset' not in f 322 | 323 | # unable to create new stuff 324 | mtch = "Unable to operate on closed File instance." 325 | with pytest.raises(IOError, match=mtch): 326 | f.create_group("group") 327 | with pytest.raises(IOError, match=mtch): 328 | grp.create_group("group") 329 | with pytest.raises(IOError, match=mtch): 330 | grp.attrs = {'group': 'attrs'} 331 | 332 | 333 | def test_close_attrs(setup_teardown_file): 334 | """'closed file is unable to handle.""" 335 | f = setup_teardown_file[3] 336 | 337 | grp = f.create_group("group") 338 | dset = f.create_dataset('dataset', data=np.array([1,2,3])) 339 | raw = f.create_raw('raw') 340 | f.attrs = {'file': 'attrs'} 341 | file_attrs = f.attrs 342 | f.close() 343 | 344 | mtch = "Unable to operate on closed File instance." 345 | with pytest.raises(IOError, match=mtch): 346 | f.attrs = {'file': 'attrs'} 347 | with pytest.raises(IOError, match=mtch): 348 | file_attrs['new'] = 'yo' 349 | 350 | # unable to retrieve stuff 351 | with pytest.raises(IOError, match=mtch): 352 | file_attrs['file'] 353 | with pytest.raises(IOError, match=mtch): 354 | f.attrs 355 | assert 'file' not in file_attrs 356 | 357 | 358 | def test_close_raw(setup_teardown_file): 359 | """'closed file is unable to handle.""" 360 | f = setup_teardown_file[3] 361 | 362 | raw = f.create_raw('raw') 363 | f.close() 364 | 365 | assert "raw" not in f 366 | 367 | # unable to create new stuff 368 | mtch = "Unable to operate on closed File instance." 369 | with pytest.raises(IOError, match=mtch): 370 | f.create_raw('raw') 371 | 372 | # unable to retrieve 373 | with pytest.raises(IOError, match=mtch): 374 | f['raw'] 375 | 376 | 377 | def test_close_dataset(setup_teardown_file): 378 | """'closed file is unable to handle.""" 379 | f = setup_teardown_file[3] 380 | 381 | grp = f.create_group("group") 382 | dset = f.create_dataset('dataset', data=np.array([1,2,3])) 383 | dset.attrs = {'dataset': 'attrs'} 384 | dset_attrs = dset.attrs 385 | data = dset.data 386 | f.close() 387 | 388 | assert 'dataset' not in f 389 | 390 | # unable to create new stuff 391 | mtch = "Unable to operate on closed File instance." 392 | 393 | with pytest.raises(IOError, match=mtch): 394 | f.create_dataset('dataset', data=np.array([1,2,3])) 395 | with pytest.raises(IOError, match=mtch): 396 | grp.create_dataset('dataset', data=np.array([1,2,3])) 397 | with pytest.raises(IOError, match=mtch): 398 | dset.attrs = {'dataset': 'attrs'} 399 | with pytest.raises(IOError, match=mtch): 400 | dset_attrs['new'] = 'yo' 401 | 402 | # unable to retrieve stuff 403 | with pytest.raises(IOError, match=mtch): 404 | dset.data 405 | with pytest.raises(IOError, match=mtch): 406 | dset.shape 407 | with pytest.raises(IOError, match=mtch): 408 | dset.dtype 409 | with pytest.raises(IOError, match=mtch): 410 | dset.attrs 411 | 412 | assert 'dataset' not in dset_attrs 413 | 414 | # TODO unable to close datasets: uncomment when done 415 | # assert 1 not in data 416 | # data[:] = np.array([3,2,1]) # TODO should give error 417 | # f.io_mode = 1 418 | # assert np.array_equal(dset.data, np.array([1,2,3])) 419 | 420 | 421 | # Feature: File objects can be used as context managers 422 | def test_context_manager(setup_teardown_folder): 423 | """File objects can be used in with statements.""" 424 | 425 | no_exdir = setup_teardown_folder[2] 426 | 427 | with File(no_exdir, mode="w") as f: 428 | assert isinstance(f, File) 429 | 430 | assert not f 431 | -------------------------------------------------------------------------------- /tests/test_group.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file is part of Exdir, the Experimental Directory Structure. 3 | # 4 | # Copyright 2017 Simen Tennøe, Svenn-Arne Dragly 5 | # 6 | # License: MIT, see "LICENSE" file for the full license terms. 7 | # 8 | # This file contains code from h5py, a Python interface to the HDF5 library, 9 | # licensed under a standard 3-clause BSD license 10 | # with copyright Andrew Collette and contributors. 11 | # See http://www.h5py.org and the "3rdparty/h5py-LICENSE" file for details. 12 | 13 | 14 | import os 15 | import pytest 16 | import pathlib 17 | import numpy as np 18 | try: 19 | from collections.abc import KeysView, ValuesView, ItemsView 20 | except: 21 | from collections.abc import KeysView, ValuesView, ItemsView 22 | 23 | from exdir.core import Group, File, Dataset 24 | from exdir import validation as fv 25 | from conftest import remove 26 | 27 | # tests for Group class 28 | def test_group_init(setup_teardown_folder): 29 | group = Group(setup_teardown_folder[2], pathlib.PurePosixPath(""), "test_object", file=None) 30 | 31 | assert group.root_directory == setup_teardown_folder[2] 32 | assert group.object_name == "test_object" 33 | assert group.parent_path == pathlib.PurePosixPath("") 34 | assert group.file is None 35 | assert group.relative_path == pathlib.PurePosixPath("test_object") 36 | assert group.name == "/test_object" 37 | 38 | 39 | # New groups can be created via .create_group method 40 | 41 | def test_create_group(setup_teardown_file): 42 | """Simple .create_group call.""" 43 | 44 | f = setup_teardown_file[3] 45 | grp = f.create_group("test") 46 | 47 | grp2 = grp.create_group("a") 48 | 49 | assert isinstance(grp2, Group) 50 | 51 | grp3 = grp.create_group("b/") 52 | assert isinstance(grp3, Group) 53 | 54 | 55 | def test_len(setup_teardown_file): 56 | """Simple .create_group call.""" 57 | 58 | f = setup_teardown_file[3] 59 | grp = f.create_group("test") 60 | 61 | grp2 = grp.create_group("a") 62 | 63 | grp3 = grp.create_group("b") 64 | 65 | assert len(grp) == 2 66 | assert len(grp2) == 0 67 | assert len(grp3) == 0 68 | 69 | 70 | def test_get(setup_teardown_file): 71 | """Simple .create_group call.""" 72 | 73 | f = setup_teardown_file[3] 74 | grp = f.create_group("test") 75 | 76 | grp2 = grp.create_group("a") 77 | 78 | grp2_get = grp.get('a') 79 | 80 | grp3_get = grp.get('b') 81 | 82 | assert grp2 == grp2_get 83 | assert grp3_get is None 84 | 85 | 86 | def test_create_group_absolute(setup_teardown_file): 87 | """Starting .create_group argument with /.""" 88 | 89 | f = setup_teardown_file[3] 90 | grp = f.create_group("/a") 91 | 92 | with pytest.raises(NotImplementedError): 93 | grp.create_group("/b") 94 | 95 | 96 | def test_create_existing_twice(exdir_tmpfile): 97 | exdir_tmpfile.create_group("test") 98 | with pytest.raises(RuntimeError): 99 | exdir_tmpfile.create_group("test") 100 | 101 | 102 | def test_create_intermediate(setup_teardown_file): 103 | """intermediate groups can be created automatically.""" 104 | f = setup_teardown_file[3] 105 | grp = f.create_group("test") 106 | 107 | grp2 = grp.create_group("foo/bar/baz") 108 | 109 | assert isinstance(grp["foo/bar/baz"], Group) 110 | assert isinstance(grp2, Group) 111 | 112 | assert grp2.name == "/test/foo/bar/baz" 113 | assert "foo" in grp 114 | assert "bar" in grp.require_group("foo") 115 | assert "baz" in grp.require_group("foo").require_group("bar") 116 | assert grp.require_group("foo").require_group("bar").require_group("baz") == grp2 117 | 118 | 119 | def test_create_exception(setup_teardown_file): 120 | """Name conflict causes group creation to fail with IOError.""" 121 | f = setup_teardown_file[3] 122 | grp = f.create_group("test") 123 | 124 | grp.create_group("foo") 125 | 126 | with pytest.raises(RuntimeError): 127 | grp.create_group("foo") 128 | grp.create_group("foo/") 129 | 130 | 131 | # Feature: Groups can be auto-created, or opened via .require_group 132 | def test_open_existing(setup_teardown_file): 133 | """Existing group is opened and returned.""" 134 | f = setup_teardown_file[3] 135 | grp = f.create_group("test") 136 | 137 | grp2 = grp.create_group("foo") 138 | grp3 = grp.require_group("foo") 139 | grp4 = grp.require_group("foo/") 140 | 141 | assert grp2 == grp3 142 | assert grp2.name == grp4.name 143 | assert grp2 == grp4 144 | 145 | 146 | def test_create(setup_teardown_file): 147 | """Group is created if it doesn"t exist.""" 148 | f = setup_teardown_file[3] 149 | grp = f.create_group("test") 150 | 151 | grp2 = grp.require_group("foo") 152 | assert isinstance(grp2, Group) 153 | assert grp2.name == "/test/foo" 154 | 155 | 156 | def test_require_exception(setup_teardown_file): 157 | """Opening conflicting object results in TypeError.""" 158 | f = setup_teardown_file[3] 159 | grp = f.create_group("test") 160 | 161 | grp.create_dataset("foo", (1,)) 162 | 163 | with pytest.raises(TypeError): 164 | grp.require_group("foo") 165 | 166 | 167 | def test_set_item_intermediate(exdir_tmpfile): 168 | group1 = exdir_tmpfile.create_group("group1") 169 | group2 = group1.create_group("group2") 170 | group3 = group2.create_group("group3") 171 | exdir_tmpfile["group1/group2/group3/dataset"] = np.array([1, 2, 3]) 172 | 173 | assert np.array_equal(exdir_tmpfile["group1/group2/group3/dataset"].data, np.array([1, 2, 3])) 174 | 175 | 176 | # Feature: Objects can be unlinked via "del" operator 177 | def test_delete_group(setup_teardown_file): 178 | """Object deletion via "del".""" 179 | 180 | f = setup_teardown_file[3] 181 | grp = f.create_group("test") 182 | grp.create_group("foo") 183 | 184 | assert "foo" in grp 185 | del grp["foo"] 186 | assert "foo" not in grp 187 | 188 | 189 | def test_delete_group_from_file(setup_teardown_file): 190 | """Object deletion via "del".""" 191 | 192 | f = setup_teardown_file[3] 193 | grp = f.create_group("test") 194 | 195 | assert "test" in f 196 | del f["test"] 197 | assert "test" not in f 198 | 199 | 200 | def test_delete_raw(setup_teardown_file): 201 | """Object deletion via "del".""" 202 | 203 | f = setup_teardown_file[3] 204 | grp = f.create_group("test") 205 | grp.create_raw("foo") 206 | 207 | assert "foo" in grp 208 | del grp["foo"] 209 | assert "foo" not in grp 210 | 211 | 212 | def test_nonexisting(setup_teardown_file): 213 | """Deleting non-existent object raises KeyError.""" 214 | f = setup_teardown_file[3] 215 | grp = f.create_group("test") 216 | mtch = "No such object: 'foo' in path *" 217 | with pytest.raises(KeyError, match=mtch): 218 | del grp["foo"] 219 | 220 | 221 | def test_readonly_delete_exception(setup_teardown_file): 222 | """Deleting object in readonly file raises KeyError.""" 223 | f = setup_teardown_file[3] 224 | f.close() 225 | 226 | f = File(setup_teardown_file[1], "r") 227 | mtch = "Cannot change data on file in read only 'r' mode" 228 | with pytest.raises(IOError, match=mtch): 229 | del f["foo"] 230 | 231 | 232 | def test_delete_dataset(setup_teardown_file): 233 | """Create new dataset with no conflicts.""" 234 | f = setup_teardown_file[3] 235 | grp = f.create_group("test") 236 | 237 | foo = grp.create_dataset('foo', (10, 3), 'f') 238 | assert isinstance(grp['foo'], Dataset) 239 | assert foo.shape == (10, 3) 240 | bar = grp.require_dataset('bar', data=(3, 10)) 241 | del foo 242 | assert 'foo' in grp 243 | del grp['foo'] 244 | mtch = "No such object: 'foo' in path *" 245 | with pytest.raises(KeyError, match=mtch): 246 | grp['foo'] 247 | # the "bar" dataset is intact 248 | assert isinstance(grp['bar'], Dataset) 249 | assert np.all(bar[:] == (3, 10)) 250 | # even though the dataset is deleted on file, the memmap stays open until 251 | # garbage collected 252 | del grp['bar'] 253 | assert bar.shape == (2,) 254 | assert np.all(bar[:] == (3, 10)) 255 | with pytest.raises(KeyError): 256 | grp['bar'] 257 | 258 | # Feature: Objects can be opened via indexing syntax obj[name] 259 | 260 | def test_open(setup_teardown_file): 261 | """Simple obj[name] opening.""" 262 | f = setup_teardown_file[3] 263 | grp = f.create_group("test") 264 | grp2 = grp.create_group("foo") 265 | 266 | grp3 = grp["foo"] 267 | grp4 = grp["foo/"] 268 | 269 | assert grp2 == grp3 270 | assert grp2.name == grp4.name 271 | assert grp2 == grp4 272 | 273 | with pytest.raises(NotImplementedError): 274 | grp["/test"] 275 | 276 | 277 | def test_open_deep(setup_teardown_file): 278 | """Simple obj[name] opening.""" 279 | f = setup_teardown_file[3] 280 | grp = f.create_group("test") 281 | grp2 = grp.create_group("a") 282 | grp3 = grp2.create_group("b") 283 | 284 | grp4 = grp["a/b"] 285 | 286 | assert grp3 == grp4 287 | 288 | 289 | def test_nonexistent(setup_teardown_file): 290 | """Opening missing objects raises KeyError.""" 291 | f = setup_teardown_file[3] 292 | mtch = "No such object: 'foo' in path *" 293 | with pytest.raises(KeyError, match=mtch): 294 | f["foo"] 295 | 296 | 297 | # Feature: The Python "in" builtin tests for containership 298 | def test_contains(setup_teardown_file): 299 | """'in' builtin works for containership.""" 300 | f = setup_teardown_file[3] 301 | grp = f.create_group("test") 302 | 303 | grp.create_group("b") 304 | 305 | assert "b" in grp 306 | assert not "c" in grp 307 | 308 | with pytest.raises(NotImplementedError): 309 | assert "/b" in grp 310 | 311 | 312 | def test_contains_deep(setup_teardown_file): 313 | """'in' builtin works for containership.""" 314 | f = setup_teardown_file[3] 315 | grp = f.create_group("test") 316 | 317 | grp2 = grp.create_group("a") 318 | grp3 = grp2.create_group("b") 319 | 320 | assert "a/b" in grp 321 | 322 | 323 | def test_empty(setup_teardown_file): 324 | """Empty strings work properly and aren"t contained.""" 325 | f = setup_teardown_file[3] 326 | grp = f.create_group("test") 327 | 328 | assert "" not in grp 329 | 330 | def test_dot(setup_teardown_file): 331 | """Current group "." is always contained.""" 332 | f = setup_teardown_file[3] 333 | 334 | assert "." in f 335 | 336 | def test_root(setup_teardown_file): 337 | """Root group (by itself) is contained.""" 338 | f = setup_teardown_file[3] 339 | grp = f.create_group("test") 340 | 341 | with pytest.raises(NotImplementedError): 342 | assert "/" in grp 343 | 344 | def test_trailing_slash(setup_teardown_file): 345 | """Trailing slashes are unconditionally ignored.""" 346 | f = setup_teardown_file[3] 347 | grp = f.create_group("test") 348 | 349 | grp.create_group("a") 350 | assert "a/" in grp 351 | assert "a//" in grp 352 | assert "a////" in grp 353 | 354 | # Feature: Standard Python 3 .keys, .values, etc. methods are available 355 | def test_keys(setup_teardown_file): 356 | """.keys provides a key view.""" 357 | f = setup_teardown_file[3] 358 | grp = f.create_group("test") 359 | 360 | grp.create_group("a") 361 | grp.create_group("b") 362 | grp.create_group("c") 363 | grp.create_group("d") 364 | 365 | assert isinstance(grp.keys(), KeysView) 366 | assert sorted(list(grp.keys())) == ["a", "b", "c", "d"] 367 | 368 | def test_values(setup_teardown_file): 369 | """.values provides a value view.""" 370 | f = setup_teardown_file[3] 371 | grp = f.create_group("test") 372 | 373 | grpa = grp.create_group("a") 374 | grpb = grp.create_group("b") 375 | grpc = grp.create_group("c") 376 | grpd = grp.create_group("d") 377 | 378 | assert isinstance(grp.values(), ValuesView) 379 | assert list(grp.values()) == [grpa, grpb, grpc, grpd] 380 | 381 | def test_items(setup_teardown_file): 382 | """.items provides an item view.""" 383 | f = setup_teardown_file[3] 384 | grp = f.create_group("test") 385 | 386 | grpa = grp.create_group("a") 387 | grpb = grp.create_group("b") 388 | grpc = grp.create_group("c") 389 | grpd = grp.create_group("d") 390 | 391 | groups = [grpa, grpb, grpc, grpd] 392 | names = ["a", "b", "c", "d"] 393 | 394 | assert isinstance(grp.items(), ItemsView) 395 | 396 | for i, (key, value) in enumerate(grp.items()): 397 | assert key == names[i] 398 | assert value == groups[i] 399 | 400 | 401 | 402 | 403 | # Feature: You can iterate over group members via "for x in y", etc. 404 | 405 | def test_iter(setup_teardown_file): 406 | """'for x in y' iteration.""" 407 | f = setup_teardown_file[3] 408 | grp = f.create_group("test") 409 | 410 | grp.create_group("a") 411 | grp.create_group("b") 412 | grp.create_group("c") 413 | grp.create_group("d") 414 | 415 | lst = [x for x in grp] 416 | assert lst == ["a", "b", "c", "d"] 417 | 418 | def test_iter_zero(setup_teardown_file): 419 | """Iteration works properly for the case with no group members.""" 420 | f = setup_teardown_file[3] 421 | grp = f.create_group("test") 422 | 423 | lst = [x for x in grp] 424 | assert lst == [] 425 | 426 | 427 | # Feature: Equal 428 | 429 | def test_eq(setup_teardown_file): 430 | """Test equal.""" 431 | f = setup_teardown_file[3] 432 | grp = f.create_group("test") 433 | 434 | grp2 = grp.create_group("a") 435 | 436 | assert grp2 == grp2 437 | assert grp != grp2 438 | 439 | 440 | # Feature: Parent 441 | def test_eq_parent(setup_teardown_file): 442 | """Test equal.""" 443 | f = setup_teardown_file[3] 444 | grp = f.create_group("test") 445 | 446 | grp2 = grp.create_group("a") 447 | 448 | grp_parent = grp2.parent 449 | 450 | assert grp == grp_parent 451 | 452 | 453 | # Feature: Test different naming rules 454 | def test_validate_name_simple(setup_teardown_folder): 455 | """Test naming rule simple.""" 456 | f = File(setup_teardown_folder[1], name_validation=fv.thorough) 457 | grp = f.create_group("test") 458 | 459 | grp.create_group("abcdefghijklmnopqrstuvwxyz1234567890_-") 460 | 461 | with pytest.raises(NameError): 462 | grp.create_group("()") 463 | 464 | f.close() 465 | remove(setup_teardown_folder[1]) 466 | 467 | f = File(setup_teardown_folder[1], name_validation=fv.thorough) 468 | grp = f.create_group("test") 469 | grp.create_group("aa") 470 | 471 | with pytest.raises(RuntimeError): 472 | grp.create_group("AA") 473 | 474 | 475 | def test_validate_name_strict(setup_teardown_folder): 476 | """Test naming rule strict.""" 477 | f = File(setup_teardown_folder[1], name_validation=fv.strict) 478 | f.create_group("abcdefghijklmnopqrstuvwxyz1234567890_-") 479 | 480 | with pytest.raises(NameError): 481 | f.create_group("A") 482 | 483 | f.close() 484 | 485 | 486 | def test_validate_name_none(setup_teardown_folder): 487 | """Test naming rule with error.""" 488 | f = File(setup_teardown_folder[1], name_validation=fv.none) 489 | f.create_group("abcdefghijklmnopqrstuvwxyz1234567890_-") 490 | f.create_group("ABNCUIY&z()(d()&") 491 | 492 | f.close() 493 | -------------------------------------------------------------------------------- /tests/test_help_functions.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | import quantities as pq 3 | import numpy as np 4 | import pytest 5 | try: 6 | import ruamel_yaml as yaml 7 | except ImportError: 8 | import ruamel.yaml as yaml 9 | 10 | import exdir 11 | import exdir.core 12 | import exdir.core.exdir_object as exob 13 | from exdir import validation as fv 14 | 15 | from conftest import remove 16 | 17 | 18 | def test_assert_valid_name_minimal(setup_teardown_folder): 19 | f = exdir.File(setup_teardown_folder[1], name_validation=fv.minimal) 20 | exob._assert_valid_name("abcdefghijklmnopqrstuvwxyz1234567890_-", f) 21 | with pytest.raises(NameError): 22 | exob._assert_valid_name("", f) 23 | 24 | exob._assert_valid_name("A", f) 25 | 26 | with pytest.raises(NameError): 27 | exob._assert_valid_name("\n", f) 28 | 29 | exob._assert_valid_name(chr(0x4500), f) 30 | 31 | with pytest.raises(NameError): 32 | exob._assert_valid_name(exob.META_FILENAME, f) 33 | 34 | with pytest.raises(NameError): 35 | exob._assert_valid_name(exob.ATTRIBUTES_FILENAME, f) 36 | 37 | with pytest.raises(NameError): 38 | exob._assert_valid_name(exob.RAW_FOLDER_NAME, f) 39 | 40 | 41 | def test_assert_valid_name_thorough(setup_teardown_folder): 42 | f = exdir.File(setup_teardown_folder[1], name_validation=fv.thorough) 43 | exob._assert_valid_name("abcdefghijklmnopqrstuvwxyz1234567890_-", f) 44 | with pytest.raises(NameError): 45 | exob._assert_valid_name("", f) 46 | 47 | exob._assert_valid_name("A", f) 48 | 49 | with pytest.raises(NameError): 50 | exob._assert_valid_name("\n", f) 51 | 52 | with pytest.raises(NameError): 53 | exob._assert_valid_name(chr(0x4500), f) 54 | 55 | with pytest.raises(NameError): 56 | exob._assert_valid_name(exob.META_FILENAME, f) 57 | 58 | with pytest.raises(NameError): 59 | exob._assert_valid_name(exob.ATTRIBUTES_FILENAME, f) 60 | 61 | with pytest.raises(NameError): 62 | exob._assert_valid_name(exob.RAW_FOLDER_NAME, f) 63 | 64 | 65 | def test_assert_valid_name_none(setup_teardown_folder): 66 | f = exdir.File(setup_teardown_folder[1], name_validation=fv.none) 67 | valid_name = ("abcdefghijklmnopqrstuvwxyz1234567890_-") 68 | 69 | exob._assert_valid_name(valid_name, f) 70 | 71 | invalid_name = " " 72 | exob._assert_valid_name(invalid_name, f) 73 | 74 | invalid_name = "A" 75 | exob._assert_valid_name(invalid_name, f) 76 | 77 | invalid_name = "\n" 78 | exob._assert_valid_name(invalid_name, f) 79 | 80 | invalid_name = chr(0x4500) 81 | exob._assert_valid_name(invalid_name, f) 82 | 83 | exob._assert_valid_name(exob.META_FILENAME, f) 84 | 85 | exob._assert_valid_name(exob.ATTRIBUTES_FILENAME, f) 86 | 87 | exob._assert_valid_name(exob.RAW_FOLDER_NAME, f) 88 | 89 | 90 | def test_create_object_directory(setup_teardown_folder): 91 | with pytest.raises(ValueError): 92 | exob._create_object_directory(pathlib.Path(setup_teardown_folder[2]), exob._default_metadata("wrong_typename")) 93 | 94 | exob._create_object_directory(pathlib.Path(setup_teardown_folder[2]), exob._default_metadata(exob.DATASET_TYPENAME)) 95 | 96 | assert setup_teardown_folder[2].is_dir() 97 | 98 | file_path = setup_teardown_folder[2] / exob.META_FILENAME 99 | assert file_path.is_file() 100 | 101 | compare_metadata = { 102 | exob.EXDIR_METANAME: { 103 | exob.TYPE_METANAME: exob.DATASET_TYPENAME, 104 | exob.VERSION_METANAME: 1} 105 | } 106 | 107 | with file_path.open("r", encoding="utf-8") as meta_file: 108 | metadata = yaml.YAML(typ="safe", pure=True).load(meta_file) 109 | 110 | assert metadata == compare_metadata 111 | 112 | with pytest.raises(IOError): 113 | exob._create_object_directory(pathlib.Path(setup_teardown_folder[2]), exob.DATASET_TYPENAME) 114 | 115 | 116 | def test_is_nonraw_object_directory(setup_teardown_folder): 117 | setup_teardown_folder[2].mkdir() 118 | 119 | result = exob.is_nonraw_object_directory(setup_teardown_folder[2]) 120 | assert result is False 121 | 122 | compare_metafile = setup_teardown_folder[2] / exob.META_FILENAME 123 | with compare_metafile.open("w", encoding="utf-8") as f: 124 | pass 125 | 126 | result = exob.is_nonraw_object_directory(setup_teardown_folder[2]) 127 | assert result is False 128 | 129 | remove(setup_teardown_folder[1]) 130 | with compare_metafile.open("w", encoding="utf-8") as meta_file: 131 | metadata = { 132 | exob.EXDIR_METANAME: { 133 | exob.VERSION_METANAME: 1} 134 | } 135 | yaml.YAML(typ="safe", pure=True).dump( 136 | metadata, 137 | meta_file) 138 | 139 | result = exob.is_nonraw_object_directory(setup_teardown_folder[2]) 140 | assert result is False 141 | 142 | remove(setup_teardown_folder[1]) 143 | with compare_metafile.open("w", encoding="utf-8") as meta_file: 144 | metadata = { 145 | exob.EXDIR_METANAME: { 146 | exob.TYPE_METANAME: "wrong_typename", 147 | exob.VERSION_METANAME: 1} 148 | } 149 | yaml.YAML(typ="safe", pure=True).dump( 150 | metadata, 151 | meta_file) 152 | 153 | result = exob.is_nonraw_object_directory(setup_teardown_folder[2]) 154 | assert result is False 155 | 156 | remove(setup_teardown_folder[1]) 157 | with compare_metafile.open("w", encoding="utf-8") as meta_file: 158 | metadata = { 159 | exob.EXDIR_METANAME: { 160 | exob.TYPE_METANAME: exob.DATASET_TYPENAME, 161 | exob.VERSION_METANAME: 1} 162 | } 163 | yaml.YAML(typ="safe", pure=True).dump( 164 | metadata, 165 | meta_file) 166 | 167 | result = exob.is_nonraw_object_directory(setup_teardown_folder[2]) 168 | assert result is True 169 | 170 | remove(setup_teardown_folder[2]) 171 | 172 | exob._create_object_directory(pathlib.Path(setup_teardown_folder[2]), exob._default_metadata(exob.DATASET_TYPENAME)) 173 | result = exob.is_nonraw_object_directory(setup_teardown_folder[2]) 174 | assert result is True 175 | 176 | 177 | def test_root_directory(setup_teardown_file): 178 | f = setup_teardown_file[3] 179 | grp = f.create_group("foo") 180 | grp.create_group("bar") 181 | 182 | path = setup_teardown_file[1] / "foo" / "bar" 183 | assert pathlib.Path(setup_teardown_file[1]) == exob.root_directory(path) 184 | 185 | 186 | def test_is_inside_exdir(setup_teardown_file): 187 | f = setup_teardown_file[3] 188 | 189 | grp = f.create_group("foo") 190 | grp.create_group("bar") 191 | 192 | path = setup_teardown_file[1] / "foo" / "bar" 193 | assert exob.is_inside_exdir(path) 194 | 195 | 196 | def test_assert_inside_exdir(setup_teardown_file): 197 | f = setup_teardown_file[3] 198 | 199 | grp = f.create_group("foo") 200 | grp.create_group("bar") 201 | 202 | 203 | path = setup_teardown_file[1] / "foo" / "bar" 204 | assert exob.assert_inside_exdir(path) is None 205 | 206 | 207 | def test_open_object(setup_teardown_file): 208 | f = setup_teardown_file[3] 209 | 210 | grp = f.create_group("foo") 211 | grp2 = grp.create_group("bar") 212 | 213 | path = setup_teardown_file[1] / "foo" / "bar" 214 | loaded_grp = exob.open_object(path) 215 | 216 | assert grp2 == loaded_grp 217 | -------------------------------------------------------------------------------- /tests/test_numpy_attributes.py: -------------------------------------------------------------------------------- 1 | import exdir 2 | 3 | import pytest 4 | import os 5 | import numpy as np 6 | import quantities as pq 7 | import exdir.plugins.quantities 8 | import exdir.plugins.numpy_attributes 9 | 10 | def test_simple(setup_teardown_folder): 11 | f = exdir.File(setup_teardown_folder[1], 'w', plugins=[exdir.plugins.numpy_attributes]) 12 | f.attrs["array"] = np.array([1, 2, 3]) 13 | f.close() 14 | 15 | with open(str(setup_teardown_folder[1] / "attributes.yaml"), encoding="utf-8") as f: 16 | content = "array:\n- 1\n- 2\n- 3\n" 17 | assert content == f.read() 18 | 19 | 20 | 21 | def test_with_quantities(setup_teardown_folder): 22 | f = exdir.File(setup_teardown_folder[1], 'w', plugins=[exdir.plugins.numpy_attributes, exdir.plugins.quantities]) 23 | f.attrs["array"] = np.array([1, 2, 3]) * pq.m 24 | f.close() 25 | 26 | with open(str(setup_teardown_folder[1] / "attributes.yaml"), encoding="utf-8") as f: 27 | content = 'array:\n value:\n - 1.0\n - 2.0\n - 3.0\n unit: "m"\n' 28 | 29 | # NOTE split and conversion to set is just because the order of the items is not important 30 | assert set(content.split("\n")) == set(f.read().split("\n")) 31 | 32 | 33 | def test_with_quantities_reverse_order(setup_teardown_folder): 34 | f = exdir.File(setup_teardown_folder[1], 'w', plugins=[exdir.plugins.quantities, exdir.plugins.numpy_attributes]) 35 | f.attrs["array"] = np.array([1, 2, 3]) * pq.m 36 | f.close() 37 | 38 | with open(str(setup_teardown_folder[1] / "attributes.yaml")) as f: 39 | content = 'array:\n value:\n - 1.0\n - 2.0\n - 3.0\n unit: "m"\n' 40 | 41 | # NOTE split and conversion to set is just because the order of the items is not important 42 | assert set(content.split("\n")) == set(f.read().split("\n")) 43 | -------------------------------------------------------------------------------- /tests/test_object.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file is part of Exdir, the Experimental Directory Structure. 3 | # 4 | # Copyright 2017 Simen Tennøe, Svenn-Arne Dragly 5 | # 6 | # License: MIT, see "LICENSE" file for the full license terms. 7 | # 8 | # This file contains code from h5py, a Python interface to the HDF5 library, 9 | # licensed under a standard 3-clause BSD license 10 | # with copyright Andrew Collette and contributors. 11 | # See http://www.h5py.org and the "3rdparty/h5py-LICENSE" file for details. 12 | 13 | 14 | import pytest 15 | import os 16 | import pathlib 17 | import exdir 18 | try: 19 | import ruamel_yaml as yaml 20 | except ImportError: 21 | import ruamel.yaml as yaml 22 | 23 | from exdir.core import Object, Attribute 24 | # TODO Remove this import and use import <> as <> instead 25 | from exdir.core.exdir_object import DATASET_TYPENAME, GROUP_TYPENAME, ATTRIBUTES_FILENAME, META_FILENAME, _create_object_directory, is_nonraw_object_directory 26 | import exdir.core.exdir_object as exob 27 | 28 | 29 | # tests for Object class 30 | 31 | def test_object_init(setup_teardown_folder): 32 | obj = Object(setup_teardown_folder[2], pathlib.PurePosixPath(""), "test_object", file=None) 33 | 34 | assert obj.root_directory == setup_teardown_folder[2] 35 | assert obj.object_name == "test_object" 36 | assert obj.parent_path == pathlib.PurePosixPath("") 37 | assert obj.file is None 38 | assert obj.relative_path == pathlib.PurePosixPath("test_object") 39 | assert obj.name == "/test_object" 40 | 41 | 42 | def test_open_object(exdir_tmpfile): 43 | grp = exdir_tmpfile.create_group("test") 44 | grp2 = grp.create_group("test2") 45 | exob.open_object(grp2.directory) 46 | 47 | 48 | def test_object_attrs(setup_teardown_file): 49 | obj = setup_teardown_file[3].create_dataset("test_object", shape=(1,), dtype=float) 50 | 51 | assert isinstance(obj.attrs, Attribute) 52 | assert obj.attrs.mode.value == 1 53 | obj.attrs = "test value" 54 | 55 | with (setup_teardown_file[1] / "test_object" / ATTRIBUTES_FILENAME).open("r", encoding="utf-8") as meta_file: 56 | meta_data = yaml.YAML(typ="safe", pure=True).load(meta_file) 57 | assert meta_data == "test value" 58 | 59 | 60 | def test_object_meta(setup_teardown_file): 61 | obj = setup_teardown_file[3].create_dataset("test_object", shape=(1,), dtype=float) 62 | 63 | assert isinstance(obj.meta, Attribute) 64 | assert obj.meta.mode == exdir.core.Attribute._Mode.METADATA 65 | with pytest.raises(AttributeError): 66 | obj.meta = "test value" 67 | 68 | 69 | def test_object_directory(setup_teardown_file): 70 | obj = setup_teardown_file[3].create_dataset("test_object", shape=(1,), dtype=float) 71 | 72 | assert obj.directory == pathlib.Path(setup_teardown_file[1]) / "test_object" 73 | assert obj.attributes_filename == setup_teardown_file[1] / "test_object" / ATTRIBUTES_FILENAME 74 | assert obj.meta_filename == setup_teardown_file[1] / "test_object" / META_FILENAME 75 | 76 | 77 | def test_object_create_raw(setup_teardown_file): 78 | obj = setup_teardown_file[3].create_dataset("test_object", shape=(1,), dtype=float) 79 | obj.create_raw("test_raw") 80 | assert (setup_teardown_file[1] / "test_object" / "test_raw").is_dir() 81 | 82 | with pytest.raises(RuntimeError): 83 | obj.create_raw("test_raw") 84 | -------------------------------------------------------------------------------- /tests/test_plugins.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file is part of Exdir, the Experimental Directory Structure. 3 | # 4 | # Copyright 2017 Svenn-Arne Dragly 5 | # Copyright 2017 Simen Tennøe 6 | # 7 | # License: MIT, see "LICENSE" file for the full license terms. 8 | # 9 | # This file contains code from h5py, a Python interface to the HDF5 library, 10 | # licensed under a standard 3-clause BSD license 11 | # with copyright Andrew Collette and contributors. 12 | # See http://www.h5py.org and the "3rdparty/h5py-LICENSE" file for details. 13 | 14 | 15 | import pytest 16 | import numpy as np 17 | import os 18 | import quantities as pq 19 | 20 | import exdir 21 | import exdir.core 22 | 23 | 24 | def test_plugin_order(): 25 | class DatasetPlugin(exdir.plugin_interface.Dataset): 26 | def prepare_read(self, dataset_data): 27 | return values 28 | 29 | def prepare_write(self, dataset_data): 30 | return dataset_data 31 | 32 | first = exdir.plugin_interface.Plugin( 33 | "first", 34 | write_before=["third"], 35 | read_before=["second"], 36 | dataset_plugins=[DatasetPlugin()] 37 | ) 38 | 39 | second = exdir.plugin_interface.Plugin( 40 | "second", 41 | write_after=["first", "dummy"], 42 | read_after=["first", "none"], 43 | read_before=["third", "dummy"], 44 | dataset_plugins=[DatasetPlugin()] 45 | ) 46 | 47 | third = exdir.plugin_interface.Plugin( 48 | "third", 49 | write_after=["second", "test"], 50 | write_before=["fourth", "test"], 51 | read_after=["first", "test"], 52 | read_before=["fourth", "test"], 53 | dataset_plugins=[DatasetPlugin()] 54 | ) 55 | 56 | fourth = exdir.plugin_interface.Plugin( 57 | "fourth", 58 | write_before=["fifth", "test"], 59 | read_before=["fifth", "something"], 60 | dataset_plugins=[DatasetPlugin()] 61 | ) 62 | 63 | fifth = exdir.plugin_interface.Plugin( 64 | "fifth", 65 | write_after=["first", "second", "third"], 66 | read_after=["third", "dummy"], 67 | dataset_plugins=[DatasetPlugin()] 68 | ) 69 | 70 | manager = exdir.plugin_interface.plugin_interface.Manager([first, second, third, fourth, fifth]) 71 | 72 | names = [plugin._plugin_module.name for plugin in manager.dataset_plugins.write_order] 73 | assert(names == ["first", "second", "third", "fourth", "fifth"]) 74 | names = [plugin._plugin_module.name for plugin in manager.dataset_plugins.read_order] 75 | assert(names == ["first", "second", "third", "fourth", "fifth"]) 76 | 77 | 78 | def test_noop(setup_teardown_folder): 79 | class DatasetPlugin(exdir.plugin_interface.Dataset): 80 | def prepare_read(self, dataset_data): 81 | return dataset_data 82 | 83 | def prepare_write(self, dataset_data): 84 | return dataset_data 85 | 86 | noop = exdir.plugin_interface.Plugin( 87 | "noop", 88 | dataset_plugins=[DatasetPlugin()] 89 | ) 90 | 91 | f = exdir.File(setup_teardown_folder[1], 'w', plugins=noop) 92 | d = f.create_dataset("foo", data=np.array([1, 2, 3])) 93 | assert all(d.data == np.array([1, 2, 3])) 94 | f.close() 95 | 96 | 97 | def test_fail_reading_without_required(setup_teardown_folder): 98 | class DatasetPlugin(exdir.plugin_interface.Dataset): 99 | def prepare_read(self, dataset_data): 100 | return dataset_data 101 | 102 | def prepare_write(self, dataset_data): 103 | if "plugins" not in dataset_data.meta: 104 | dataset_data.meta["plugins"] = {} 105 | if "required" not in dataset_data.meta["plugins"]: 106 | dataset_data.meta["plugins"]["required"] = {"required": True} 107 | return dataset_data 108 | 109 | required = exdir.plugin_interface.Plugin( 110 | "required", 111 | dataset_plugins=[DatasetPlugin()] 112 | ) 113 | 114 | f = exdir.File(setup_teardown_folder[1], 'w', plugins=required) 115 | assert isinstance(f, exdir.File) 116 | d = f.create_dataset("foo", data=np.array([1, 2, 3])) 117 | assert all(d.data == np.array([1, 2, 3])) 118 | f.close() 119 | 120 | f = exdir.File(setup_teardown_folder[1], 'r+') 121 | assert isinstance(f, exdir.File) 122 | d = f["foo"] 123 | with pytest.raises(Exception): 124 | print(d.data) 125 | f.close() 126 | 127 | 128 | def test_one_way_scaling(setup_teardown_folder): 129 | class DatasetPlugin(exdir.plugin_interface.Dataset): 130 | def prepare_read(self, dataset_data): 131 | return dataset_data 132 | 133 | def prepare_write(self, dataset_data): 134 | if "plugins" not in dataset_data.meta: 135 | dataset_data.meta["plugins"] = {} 136 | if "scaling" not in dataset_data.meta["plugins"]: 137 | dataset_data.meta["plugins"]["scaling"] = {"required": True} 138 | dataset_data.data *= 2 139 | return dataset_data 140 | 141 | one_way_scaling = exdir.plugin_interface.Plugin( 142 | "scaling", 143 | dataset_plugins=[DatasetPlugin()] 144 | ) 145 | 146 | f = exdir.File(setup_teardown_folder[1], 'w', plugins=[one_way_scaling]) 147 | assert isinstance(f, exdir.File) 148 | d = f.create_dataset("scaling", data=np.array([1, 2, 3])) 149 | assert all(d.data == np.array([2, 4, 6])) 150 | f.close() 151 | 152 | 153 | def test_scaling(setup_teardown_folder): 154 | 155 | class DatasetPlugin(exdir.plugin_interface.Dataset): 156 | def prepare_read(self, dataset_data): 157 | meta = dataset_data.meta 158 | dataset_data.data = dataset_data.data / 2 159 | return dataset_data 160 | 161 | def prepare_write(self, dataset_data): 162 | dataset_data.data *= 2 163 | if "plugins" not in dataset_data.meta: 164 | dataset_data.meta["plugins"] = {} 165 | if "scaling" not in dataset_data.meta["plugins"]: 166 | dataset_data.meta["plugins"]["scaling"] = {"required": True} 167 | dataset_data.meta 168 | return dataset_data 169 | 170 | scaling = exdir.plugin_interface.Plugin( 171 | "scaling", 172 | dataset_plugins=[DatasetPlugin()] 173 | ) 174 | 175 | f = exdir.File(setup_teardown_folder[1], 'w', plugins=[scaling]) 176 | assert isinstance(f, exdir.File) 177 | d = f.create_dataset("scaling", data=np.array([1, 2, 3])) 178 | assert all(d.data == np.array([1, 2, 3])) 179 | f.close() 180 | 181 | 182 | def test_attribute_plugin(setup_teardown_folder): 183 | class AttributePlugin(exdir.plugin_interface.Attribute): 184 | def prepare_read(self, attribute_data): 185 | attribute_data.attrs["value"] = attribute_data.attrs["value"]["value"] 186 | return attribute_data 187 | 188 | def prepare_write(self, attribute_data): 189 | meta = attribute_data.meta 190 | if "plugins" not in meta: 191 | meta["plugins"] = {} 192 | if "scaling" not in meta["plugins"]: 193 | meta["plugins"]["scaling"] = {"required": True} 194 | old_value = attribute_data.attrs["value"] 195 | attribute_data.attrs["value"] = { 196 | "unit": "m", 197 | "value": old_value * 2 198 | } 199 | return attribute_data 200 | 201 | scaling_unit = exdir.plugin_interface.Plugin( 202 | "scaling", 203 | attribute_plugins=[AttributePlugin()] 204 | ) 205 | 206 | f = exdir.File(setup_teardown_folder[1], "w", plugins=[scaling_unit]) 207 | assert isinstance(f, exdir.File) 208 | d = f.create_dataset("foo", data=np.array([1, 2, 3])) 209 | d.attrs["value"] = 42 210 | assert d.attrs["value"] == 84 211 | f.close() 212 | 213 | def test_reading_in_order(setup_teardown_folder): 214 | class DatasetPlugin1(exdir.plugin_interface.Dataset): 215 | def prepare_read(self, dataset_data): 216 | dataset_data.data = dataset_data.data * 2 217 | return dataset_data 218 | 219 | class DatasetPlugin2(exdir.plugin_interface.Dataset): 220 | def prepare_read(self, dataset_data): 221 | dataset_data.data = dataset_data.data * 3 222 | return dataset_data 223 | 224 | plugin1 = exdir.plugin_interface.Plugin( 225 | "plugin1", 226 | dataset_plugins=[DatasetPlugin1()] 227 | ) 228 | plugin2 = exdir.plugin_interface.Plugin( 229 | "plugin2", 230 | dataset_plugins=[DatasetPlugin2()] 231 | ) 232 | 233 | f = exdir.File(setup_teardown_folder[1], "w", plugins=[plugin1, plugin2]) 234 | assert isinstance(f, exdir.File) 235 | d = f.create_dataset("foo", data=np.array([1, 2, 3])) 236 | assert all(d.data == np.array([6, 12, 18])) 237 | f.close() 238 | -------------------------------------------------------------------------------- /tests/test_quantities.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file is part of Exdir, the Experimental Directory Structure. 3 | # 4 | # Copyright 2017 Simen Tennøe 5 | # 6 | # License: MIT, see "LICENSE" file for the full license terms. 7 | # 8 | # This file contains code from h5py, a Python interface to the HDF5 library, 9 | # licensed under a standard 3-clause BSD license 10 | # with copyright Andrew Collette and contributors. 11 | # See http://www.h5py.org and the "3rdparty/h5py-LICENSE" file for details. 12 | 13 | 14 | import pytest 15 | import numpy as np 16 | import os 17 | import quantities as pq 18 | import exdir 19 | 20 | from exdir.core import Attribute, File, Dataset 21 | 22 | from exdir.plugins.quantities import convert_quantities, convert_back_quantities 23 | 24 | 25 | def test_create_quantities_file(setup_teardown_folder): 26 | f = exdir.File(setup_teardown_folder[1], 'w', plugins=exdir.plugins.quantities) 27 | d = f.create_dataset("foo", data=np.array([1, 2, 3]) * pq.m) 28 | assert all(d.data.magnitude == np.array([1, 2, 3])) 29 | assert d.data.units == pq.m 30 | f.close() 31 | 32 | 33 | def test_quantities_attributes(quantities_tmpfile): 34 | """ 35 | Test if quantities is saved 36 | """ 37 | f = quantities_tmpfile 38 | 39 | f.attrs["temperature"] = 99.0 40 | assert f.attrs["temperature"] == 99.0 41 | f.attrs["temperature"] = 99.0 * pq.deg 42 | assert f.attrs["temperature"] == 99.0 * pq.deg 43 | 44 | attrs = f.attrs 45 | assert type(attrs) is Attribute 46 | 47 | 48 | def test_create_quantities_data(quantities_tmpfile): 49 | f = quantities_tmpfile 50 | grp = f.create_group("test") 51 | 52 | testdata = np.array([1, 2, 3]) * pq.J 53 | dset = grp.create_dataset('data', data=testdata) 54 | 55 | outdata = dset[()] 56 | 57 | assert isinstance(outdata, pq.Quantity) 58 | assert np.all(outdata == testdata) 59 | assert outdata.dtype == testdata.dtype 60 | 61 | outdata = dset[0] 62 | 63 | assert isinstance(outdata, pq.Quantity) 64 | assert np.all(outdata == testdata[0]) 65 | assert outdata.dtype == testdata.dtype 66 | 67 | 68 | 69 | def test_assign_quantities(quantities_tmpfile): 70 | f = quantities_tmpfile 71 | grp = f.create_group("test") 72 | 73 | testdata = np.array([1,2,3]) * pq.J 74 | dset = grp.create_dataset('data', data=testdata) 75 | 76 | outdata = f['test']["data"][()] 77 | 78 | assert isinstance(outdata, pq.Quantity) 79 | assert np.all(outdata == testdata) 80 | assert outdata.dtype == testdata.dtype 81 | 82 | 83 | def test_set_quantities(quantities_tmpfile): 84 | f = quantities_tmpfile 85 | grp = f.create_group("test") 86 | 87 | dset = grp.create_dataset('data', data=np.array([1])) 88 | 89 | testdata = np.array([1.1, 2, 3]) * pq.J 90 | dset.value = testdata 91 | outdata = f['test']["data"][()] 92 | 93 | assert isinstance(outdata, pq.Quantity) 94 | assert np.all(outdata == testdata) 95 | assert outdata.dtype == testdata.dtype 96 | 97 | 98 | 99 | def test_mmap_quantities(setup_teardown_file): 100 | f = setup_teardown_file[3] 101 | grp = f.create_group("test") 102 | 103 | testdata = np.array([1, 2, 3]) * pq.J 104 | dset = grp.create_dataset('data', data=testdata) 105 | 106 | dset[1] = 100 107 | 108 | tmp_file = np.load(str(setup_teardown_file[1] / "test" / "data" / "data.npy")) 109 | 110 | assert dset.data[1] == 100 111 | assert tmp_file[1] == 100 112 | 113 | 114 | def test_require_quantities(quantities_tmpfile): 115 | f = quantities_tmpfile 116 | grp = f.create_group("test") 117 | 118 | testdata = np.array([1, 2, 3]) * pq.J 119 | dset = grp.create_dataset('data', data=testdata) 120 | 121 | dset2 = grp.require_dataset('data', data=testdata) 122 | 123 | assert dset == dset2 124 | assert np.all(dset[:] == testdata) 125 | assert np.all(dset2[:] == testdata) 126 | assert isinstance(dset[:], pq.Quantity) 127 | 128 | 129 | 130 | 131 | # 132 | def test_convert_quantities(): 133 | pq_value = pq.Quantity(1, "m") 134 | result = convert_quantities(pq_value) 135 | assert result == {"value": 1, "unit": "m"} 136 | 137 | pq_value = pq.Quantity([1, 2, 3], "m") 138 | result = convert_quantities(pq_value) 139 | assert result == {"value": [1, 2, 3], "unit": "m"} 140 | 141 | result = convert_quantities(np.array([1, 2, 3])) 142 | assert result == [1, 2, 3] 143 | 144 | result = convert_quantities(1) 145 | assert result == 1 146 | 147 | result = convert_quantities(2.3) 148 | assert result == 2.3 149 | 150 | pq_value = pq.UncertainQuantity([1, 2], "m", [3, 4]) 151 | result = convert_quantities(pq_value) 152 | assert result == {"unit": "m", "uncertainty": [3, 4], "value": [1.0, 2.0]} 153 | 154 | pq_values = {"quantity": pq.Quantity(1, "m"), 155 | "uq_quantity": pq.UncertainQuantity([1, 2], "m", [3, 4])} 156 | result = convert_quantities(pq_values) 157 | assert(result == {"quantity": {"unit": "m", "value": 1}, 158 | "uq_quantity": {"unit": "m", "uncertainty": [3, 4], "value": [1.0, 2.0]}}) 159 | 160 | pq_values = {"list": [1, 2, 3], "quantity": pq.Quantity(1, "m")} 161 | pq_dict = {"list": [1, 2, 3], "quantity": {"unit": "m", "value": 1}} 162 | result = convert_quantities(pq_values) 163 | assert result == pq_dict 164 | 165 | 166 | def test_convert_back_quantities(): 167 | pq_dict = {"value": 1, "unit": "m"} 168 | result = convert_back_quantities(pq_dict) 169 | assert result == pq.Quantity(1, "m") 170 | 171 | pq_dict = {"value": [1, 2, 3], "unit": "m"} 172 | result = convert_back_quantities(pq_dict) 173 | assert np.array_equal(result, pq.Quantity([1, 2, 3], "m")) 174 | 175 | pq_dict = {"value": [1, 2, 3]} 176 | result = convert_back_quantities(pq_dict) 177 | assert result == pq_dict 178 | 179 | result = convert_back_quantities(1) 180 | assert result == 1 181 | 182 | result = convert_back_quantities(2.3) 183 | assert result == 2.3 184 | 185 | pq_dict = {"unit": "m", "uncertainty": [3, 4], "value": [1.0, 2.0]} 186 | result = convert_back_quantities(pq_dict) 187 | pq_value = pq.UncertainQuantity([1, 2], "m", [3, 4]) 188 | 189 | assert isinstance(result, pq.UncertainQuantity) 190 | assert result.magnitude.tolist() == pq_value.magnitude.tolist() 191 | assert result.dimensionality.string == pq_value.dimensionality.string 192 | assert result.uncertainty.magnitude.tolist() == pq_value.uncertainty.magnitude.tolist() 193 | 194 | pq_dict = {"quantity": {"unit": "m", "value": 1}, 195 | "uq_quantity": {"unit": "m", "uncertainty": [3, 4], "value": [1.0, 2.0]}} 196 | pq_values = {"quantity": pq.Quantity(1, "m"), 197 | "uq_quantity": pq.UncertainQuantity([1, 2], "m", [3, 4])} 198 | result = convert_back_quantities(pq_values) 199 | assert result == pq_values 200 | 201 | pq_values = {"list": [1, 2, 3], "quantity": {"unit": "m", "value": 1}} 202 | result = convert_back_quantities(pq_values) 203 | assert result == {"list": [1, 2, 3], "quantity": pq.Quantity(1, "m")} 204 | -------------------------------------------------------------------------------- /tests/test_raw.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pytest 3 | import pathlib 4 | from exdir.core import Raw 5 | 6 | def test_raw_init(setup_teardown_folder): 7 | raw = Raw(setup_teardown_folder[2], pathlib.PurePosixPath(""), "test_object", file=None) 8 | 9 | assert raw.root_directory == setup_teardown_folder[2] 10 | assert raw.object_name == "test_object" 11 | assert raw.parent_path == pathlib.PurePosixPath("") 12 | assert raw.file is None 13 | assert raw.relative_path == pathlib.PurePosixPath("test_object") 14 | assert raw.name == "/test_object" 15 | 16 | 17 | def test_create_raw(setup_teardown_file): 18 | """Simple .create_raw call.""" 19 | 20 | f = setup_teardown_file[3] 21 | raw = f.create_raw("test") 22 | 23 | raw2 = f["test"] 24 | 25 | assert (f.root_directory / "test").exists() 26 | 27 | assert raw == raw2 28 | 29 | 30 | def test_require_raw(setup_teardown_file): 31 | """Raw is created if it doesn't exist.""" 32 | 33 | f = setup_teardown_file[3] 34 | grp = f.create_group("test") 35 | 36 | raw = grp.require_raw("foo") 37 | raw2 = grp.require_raw("foo") 38 | 39 | raw3 = grp["foo"] 40 | 41 | assert (f.root_directory / "test" / "foo").exists() 42 | 43 | assert raw == raw2 44 | assert raw == raw3 45 | 46 | 47 | def test_create_raw_twice(exdir_tmpfile): 48 | exdir_tmpfile.create_raw("test") 49 | with pytest.raises(RuntimeError): 50 | exdir_tmpfile.create_raw("test") 51 | 52 | 53 | def test_create_dataset_raw(exdir_tmpfile): 54 | group = exdir_tmpfile.create_group("group") 55 | dataset = group.create_dataset("dataset", shape=(1, 1), dtype='float32') 56 | raw = dataset.create_raw("raw") 57 | assert (exdir_tmpfile.directory / "group" / "dataset" / "raw").exists() 58 | --------------------------------------------------------------------------------