├── requirements.txt ├── requirements_tests.txt ├── examples └── example_hdf5storage_marshaller_plugin │ ├── COPYING.txt │ ├── setup.cfg │ ├── pyproject.toml │ ├── README.rst │ ├── setup.py │ └── example_hdf5storage_marshaller_plugin.py ├── requirements_doc.txt ├── doc ├── source │ ├── thanks.rst │ ├── information.rst │ ├── api.rst │ ├── index.rst │ ├── hdf5storage.exceptions.rst │ ├── hdf5storage.rst │ ├── paths.rst │ ├── hdf5storage.utilities.rst │ ├── development.rst │ ├── hdf5storage.Marshallers.rst │ ├── compression.rst │ ├── conf.py │ └── introduction.rst ├── make.bat └── Makefile ├── setup.cfg ├── pyproject.toml ├── MANIFEST.in ├── .gitattributes ├── THANKS.rst ├── .gitignore ├── .travis.yml ├── COPYING.txt ├── tests ├── julia_read_mat.jl ├── read_write_mat.m ├── test_string_utf16_conversion.py ├── test_str_conv_utils.py ├── test_ndarray_O_field.py ├── test_marshaller_plugins.py ├── test_matlab_compatibility.py ├── test_multi_io.py ├── make_mat_with_all_types.m ├── test_marshaller_collection_priority.py ├── test_julia_mat_compatibility.py ├── test_marshallers_requiring_modules.py ├── test_path_escaping.py ├── test_dict_like_storage_methods.py ├── test_hdf5_filters.py ├── make_randoms.py └── asserts.py ├── hdf5storage └── exceptions.py └── setup.py /requirements.txt: -------------------------------------------------------------------------------- 1 | setuptools 2 | numpy 3 | h5py>=2.3 4 | -------------------------------------------------------------------------------- /requirements_tests.txt: -------------------------------------------------------------------------------- 1 | -r requirements.txt 2 | nose>=1.0 3 | -------------------------------------------------------------------------------- /examples/example_hdf5storage_marshaller_plugin/COPYING.txt: -------------------------------------------------------------------------------- 1 | ../../COPYING.txt -------------------------------------------------------------------------------- /requirements_doc.txt: -------------------------------------------------------------------------------- 1 | -r requirements.txt 2 | sphinx>=1.7 3 | sphinx_rtd_theme 4 | -------------------------------------------------------------------------------- /doc/source/thanks.rst: -------------------------------------------------------------------------------- 1 | ====== 2 | THANKS 3 | ====== 4 | 5 | .. include:: ../../THANKS.rst 6 | -------------------------------------------------------------------------------- /examples/example_hdf5storage_marshaller_plugin/setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal=1 3 | -------------------------------------------------------------------------------- /doc/source/information.rst: -------------------------------------------------------------------------------- 1 | =========== 2 | hdf5storage 3 | =========== 4 | 5 | .. include:: ../../README.rst 6 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal=1 3 | 4 | [build_sphinx] 5 | all-files=1 6 | build-dir=doc/build 7 | source-dir=doc/source -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | # Minimum requirements for the build system to execute. 3 | requires = ["setuptools"] # PEP 508 specifications. 4 | -------------------------------------------------------------------------------- /examples/example_hdf5storage_marshaller_plugin/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | # Minimum requirements for the build system to execute. 3 | requires = ["setuptools"] # PEP 508 specifications. 4 | -------------------------------------------------------------------------------- /doc/source/api.rst: -------------------------------------------------------------------------------- 1 | API 2 | === 3 | 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | hdf5storage 8 | hdf5storage.exceptions 9 | hdf5storage.Marshallers 10 | hdf5storage.utilities 11 | 12 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.cfg 2 | include *.toml 3 | include *.txt 4 | include *.rst 5 | include *.py 6 | recursive-include tests *.py *.m *.jl 7 | recursive-include examples *.py *.txt *.rst *.cfg 8 | recursive-include doc * 9 | prune doc/build 10 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Set default behaviour, in case users don't have core.autocrlf set. 2 | * text=auto 3 | 4 | # Explicitly declare text files we want to always be normalized and converted 5 | # to native line endings on checkout. 6 | *.py text 7 | *.txt text eol=crlf 8 | *.cfg text 9 | *.toml text -------------------------------------------------------------------------------- /THANKS.rst: -------------------------------------------------------------------------------- 1 | The following people helped contributed code to fix bugs, add features, etc. 2 | 3 | * `Steven Dee `_ 4 | * `WANG Longqi `_ 5 | * `Jakub Urban `_ 6 | * `Ghislain Antony Vaillant `_ 7 | 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | 3 | # C extensions 4 | *.so 5 | 6 | # Packages 7 | *.egg 8 | *.egg-info 9 | dist 10 | build 11 | eggs 12 | parts 13 | bin 14 | var 15 | sdist 16 | develop-eggs 17 | .installed.cfg 18 | lib 19 | lib64 20 | __pycache__ 21 | 22 | # Installer logs 23 | pip-log.txt 24 | 25 | # Unit test / coverage reports 26 | .coverage 27 | .tox 28 | nosetests.xml 29 | 30 | # Translations 31 | *.mo 32 | 33 | # Mr Developer 34 | .mr.developer.cfg 35 | .project 36 | .pydevproject 37 | 38 | # autosaves 39 | *.py~ 40 | *.yml~ 41 | *.rst~ 42 | *.txt~ 43 | *.toml~ -------------------------------------------------------------------------------- /doc/source/index.rst: -------------------------------------------------------------------------------- 1 | .. hdf5storage documentation master file, created by 2 | sphinx-quickstart on Sun Dec 22 00:05:54 2013. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to hdf5storage's documentation! 7 | ======================================= 8 | 9 | Contents: 10 | 11 | .. toctree:: 12 | :maxdepth: 2 13 | 14 | information 15 | introduction 16 | paths 17 | compression 18 | storage_format 19 | development 20 | thanks 21 | api 22 | 23 | Indices and tables 24 | ================== 25 | 26 | * :ref:`genindex` 27 | * :ref:`modindex` 28 | * :ref:`search` 29 | 30 | -------------------------------------------------------------------------------- /doc/source/hdf5storage.exceptions.rst: -------------------------------------------------------------------------------- 1 | hdf5storage.exceptions 2 | ====================== 3 | 4 | .. currentmodule:: hdf5storage.exceptions 5 | 6 | .. automodule:: hdf5storage.exceptions 7 | 8 | .. autosummary:: 9 | 10 | Hdf5storageError 11 | CantReadError 12 | TypeNotMatlabCompatibleError 13 | 14 | 15 | Hdf5storageError 16 | ---------------- 17 | 18 | .. autoexception:: Hdf5storageError 19 | :show-inheritance: 20 | 21 | 22 | CantReadError 23 | ------------- 24 | 25 | .. autoexception:: CantReadError 26 | :show-inheritance: 27 | 28 | 29 | TypeNotMatlabCompatibleError 30 | ---------------------------- 31 | 32 | .. autoexception:: TypeNotMatlabCompatibleError 33 | :show-inheritance: 34 | 35 | -------------------------------------------------------------------------------- /examples/example_hdf5storage_marshaller_plugin/README.rst: -------------------------------------------------------------------------------- 1 | Overview 2 | ======== 3 | 4 | This is an example plugin package for providing Marshallers for the 5 | `hdf5storage `_ package (this 6 | example package is included in it). 7 | 8 | The base package's documetation is found at 9 | http://pythonhosted.org/hdf5storage/ 10 | 11 | The base package's source code is found at 12 | https://github.com/frejanordsiek/hdf5storage 13 | with this example packages source code being at 14 | https://github.com/frejanordsiek/hdf5storage/tests/example_hdf5storage_marshaller_plugin 15 | 16 | The package is licensed under a 2-clause BSD license 17 | (https://github.com/frejanordsiek/hdf5storage/blob/master/COPYING.txt). 18 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | dist: xenial 2 | sudo: required 3 | 4 | language: python 5 | cache: pip 6 | 7 | notifications: 8 | email: false 9 | 10 | env: 11 | - H5PY_VERSION="==2.3" 12 | - H5PY_VERSION="==2.4" 13 | - H5PY_VERSION="==2.5" 14 | - H5PY_VERSION="==2.6" 15 | - H5PY_VERSION="==2.7" 16 | - H5PY_VERSION="==2.8" 17 | - H5PY_VERSION="==2.9" 18 | - H5PY_VERSION="" 19 | 20 | python: 21 | - "3.5" 22 | - "3.6" 23 | - "3.7" 24 | - "3.8" 25 | - "3.9-dev" 26 | 27 | before_install: 28 | - sudo apt-get -qq update 29 | - sudo apt-get install -y gcc libhdf5-serial-dev libblas-dev liblapack-dev libatlas-dev libatlas-base-dev libquadmath0 30 | 31 | # command to install dependencies 32 | # 33 | # Seem to need newer Numpy as well. 34 | install: 35 | - pip install -U numpy 36 | - pip install h5py$H5PY_VERSION 37 | - pip install -r requirements_tests.txt 38 | - pip install -e examples/example_hdf5storage_marshaller_plugin 39 | 40 | # command to run tests 41 | script: nosetests 42 | -------------------------------------------------------------------------------- /doc/source/hdf5storage.rst: -------------------------------------------------------------------------------- 1 | hdf5storage 2 | =========== 3 | 4 | .. currentmodule:: hdf5storage 5 | 6 | .. automodule:: hdf5storage 7 | 8 | .. autosummary:: 9 | 10 | write 11 | writes 12 | read 13 | reads 14 | savemat 15 | loadmat 16 | get_default_MarshallerCollection 17 | make_new_default_MarshallerCollection 18 | Options 19 | MarshallerCollection 20 | 21 | 22 | write 23 | ----- 24 | 25 | .. autofunction:: write 26 | 27 | 28 | writes 29 | ------ 30 | 31 | .. autofunction:: writes 32 | 33 | 34 | read 35 | ----- 36 | 37 | .. autofunction:: read 38 | 39 | 40 | reads 41 | ----- 42 | 43 | .. autofunction:: reads 44 | 45 | 46 | savemat 47 | ------- 48 | 49 | .. autofunction:: savemat 50 | 51 | 52 | loadmat 53 | ------- 54 | 55 | .. autofunction:: loadmat 56 | 57 | 58 | get_default_MarshallerCollection 59 | -------------------------------- 60 | 61 | .. autofunction:: get_default_MarshallerCollection 62 | 63 | 64 | make_new_default_MarshallerCollection 65 | ------------------------------------- 66 | 67 | .. autofunction:: make_new_default_MarshallerCollection 68 | 69 | 70 | Options 71 | ------- 72 | 73 | .. autoclass:: Options 74 | :members: 75 | :show-inheritance: 76 | 77 | 78 | MarshallerCollection 79 | -------------------- 80 | 81 | .. autoclass:: MarshallerCollection 82 | :members: 83 | :show-inheritance: 84 | 85 | -------------------------------------------------------------------------------- /COPYING.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013-2016, Freja Nordsiek 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, 8 | this list of conditions and the following disclaimer. 9 | 10 | 2. Redistributions in binary form must reproduce the above copyright 11 | notice, this list of conditions and the following disclaimer in the 12 | documentation and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 18 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 20 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 21 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 22 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 | POSSIBILITY OF SUCH DAMAGE. 25 | -------------------------------------------------------------------------------- /tests/julia_read_mat.jl: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2013-2016, Freja Nordsiek 2 | # All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are 6 | # met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright 9 | # notice, this list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright 12 | # notice, this list of conditions and the following disclaimer in the 13 | # documentation and/or other materials provided with the distribution. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19 | # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | using MAT 28 | 29 | a = matread(ARGS[1]) 30 | matwrite(ARGS[2], a) 31 | 32 | quit() 33 | -------------------------------------------------------------------------------- /tests/read_write_mat.m: -------------------------------------------------------------------------------- 1 | % Copyright (c) 2013-2016, Freja Nordsiek 2 | % All rights reserved. 3 | % 4 | % Redistribution and use in source and binary forms, with or without 5 | % modification, are permitted provided that the following conditions are 6 | % met: 7 | % 8 | % 1. Redistributions of source code must retain the above copyright 9 | % notice, this list of conditions and the following disclaimer. 10 | % 11 | % 2. Redistributions in binary form must reproduce the above copyright 12 | % notice, this list of conditions and the following disclaimer in the 13 | % documentation and/or other materials provided with the distribution. 14 | % 15 | % THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 | % "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 | % LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 | % A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19 | % HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 | % SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21 | % LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 | % DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 | % THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | % (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | % OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | a = load('python_v7p3.mat'); 28 | save('python_v7.mat','-struct','a','-v7'); 29 | 30 | exit; 31 | -------------------------------------------------------------------------------- /hdf5storage/exceptions.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2013-2020, Freja Nordsiek 2 | # All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are 6 | # met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright 9 | # notice, this list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright 12 | # notice, this list of conditions and the following disclaimer in the 13 | # documentation and/or other materials provided with the distribution. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19 | # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | """ Module of Exceptions. """ 28 | 29 | 30 | class Hdf5storageError(IOError): 31 | """ Base class of hdf5storage package exceptions.""" 32 | pass 33 | 34 | 35 | class CantReadError(Hdf5storageError): 36 | """ Exception for a failure to read the desired data.""" 37 | pass 38 | 39 | 40 | class TypeNotMatlabCompatibleError(Hdf5storageError): 41 | """ Exception for trying to write non-MATLAB compatible data. 42 | 43 | In the event that MATLAB compatibility is being done 44 | (``Options.matlab_compatible``) and a Python type is not importable 45 | by MATLAB, the data is either not written or this exception is 46 | thrown depending on the value of 47 | ``Options.action_for_matlab_incompatible``. 48 | 49 | See Also 50 | -------- 51 | hdf5storage.Options.matlab_compatible 52 | hdf5storage.Options.action_for_matlab_incompatible 53 | 54 | """ 55 | pass 56 | -------------------------------------------------------------------------------- /tests/test_string_utf16_conversion.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2013-2020, Freja Nordsiek 2 | # All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are 6 | # met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright 9 | # notice, this list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright 12 | # notice, this list of conditions and the following disclaimer in the 13 | # documentation and/or other materials provided with the distribution. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19 | # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | import os 28 | import os.path 29 | import tempfile 30 | 31 | import numpy as np 32 | import h5py 33 | 34 | from nose.tools import assert_equal as assert_equal_nose 35 | 36 | import hdf5storage 37 | 38 | 39 | # A test to make sure that the following are written as UTF-16 40 | # (uint16) if they don't contain doublets and the 41 | # convert_numpy_str_to_utf16 option is set. 42 | # 43 | # * str 44 | # * numpy.unicode_ scalars 45 | 46 | def check_conv_utf16(tp): 47 | name = '/a' 48 | data = tp('abcdefghijklmnopqrstuvwxyz') 49 | fld = None 50 | try: 51 | fld = tempfile.mkstemp() 52 | os.close(fld[0]) 53 | filename = fld[1] 54 | hdf5storage.write(data, path=name, filename=filename, 55 | matlab_compatible=False, 56 | store_python_metadata=False, 57 | convert_numpy_str_to_utf16=True) 58 | with h5py.File(filename, mode='r') as f: 59 | assert_equal_nose(f[name].dtype.type, np.uint16) 60 | except: 61 | raise 62 | finally: 63 | if fld is not None: 64 | os.remove(fld[1]) 65 | 66 | 67 | def test_conv_utf16(): 68 | tps = (str, np.unicode_) 69 | for tp in tps: 70 | yield check_conv_utf16, tp 71 | -------------------------------------------------------------------------------- /examples/example_hdf5storage_marshaller_plugin/setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-2020, Freja Nordsiek 2 | # All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are 6 | # met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright 9 | # notice, this list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright 12 | # notice, this list of conditions and the following disclaimer in the 13 | # documentation and/or other materials provided with the distribution. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19 | # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | from setuptools import setup 28 | 29 | with open('README.rst') as file: 30 | long_description = file.read() 31 | 32 | setup(name='example_hdf5storage_marshaller_plugin', 33 | version='0.2', 34 | description='Example marshaller plugin for hdf5storage package.', 35 | long_description=long_description, 36 | author='Freja Nordsiek', 37 | author_email='fnordsie at gmail dt com', 38 | url='https://github.com/frejanordsiek/hdf5storage/tests/example_hdf5storage_marshaller_plugin', 39 | py_modules=['example_hdf5storage_marshaller_plugin'], 40 | entry_points={'hdf5storage.marshallers.plugins': 41 | '1.0 = example_hdf5storage_marshaller_plugin:get_marshallers_1p0'}, 42 | license='BSD', 43 | keywords='hdf5storage', 44 | zip_safe=True, 45 | classifiers=[ 46 | "Programming Language :: Python :: 3 :: Only", 47 | "Development Status :: 3 - Alpha", 48 | "License :: OSI Approved :: BSD License", 49 | "Operating System :: OS Independent", 50 | "Intended Audience :: Developers", 51 | "Intended Audience :: Information Technology", 52 | "Intended Audience :: Science/Research", 53 | "Topic :: Scientific/Engineering", 54 | "Topic :: Database", 55 | "Topic :: Software Development :: Libraries :: Python Modules" 56 | ] 57 | ) 58 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2013-2020, Freja Nordsiek 2 | # All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are 6 | # met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright 9 | # notice, this list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright 12 | # notice, this list of conditions and the following disclaimer in the 13 | # documentation and/or other materials provided with the distribution. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19 | # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | import sys 28 | from setuptools import setup 29 | 30 | if sys.hexversion < 0x3050000: 31 | raise NotImplementedError('Python < 3.5 not supported.') 32 | 33 | with open('README.rst') as file: 34 | long_description = file.read() 35 | 36 | setup(name='hdf5storage', 37 | version='0.2', 38 | description='Utilities to read/write Python types to/from HDF5 files, including MATLAB v7.3 MAT files.', 39 | long_description=long_description, 40 | author='Freja Nordsiek', 41 | author_email='fnordsie@gmail.com', 42 | url='https://github.com/frejanordsiek/hdf5storage', 43 | packages=['hdf5storage'], 44 | install_requires=["setuptools", "numpy", "h5py>=2.3"], 45 | tests_require=['nose>=1.0'], 46 | test_suite='nose.collector', 47 | license='BSD', 48 | keywords='hdf5 matlab', 49 | zip_safe=True, 50 | classifiers=[ 51 | "Programming Language :: Python :: 3 :: Only", 52 | "Development Status :: 3 - Alpha", 53 | "License :: OSI Approved :: BSD License", 54 | "Operating System :: OS Independent", 55 | "Intended Audience :: Developers", 56 | "Intended Audience :: Information Technology", 57 | "Intended Audience :: Science/Research", 58 | "Topic :: Scientific/Engineering", 59 | "Topic :: Database", 60 | "Topic :: Software Development :: Libraries :: Python Modules" 61 | ] 62 | ) 63 | -------------------------------------------------------------------------------- /tests/test_str_conv_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2013-2020, Freja Nordsiek 2 | # All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are 6 | # met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright 9 | # notice, this list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright 12 | # notice, this list of conditions and the following disclaimer in the 13 | # documentation and/or other materials provided with the distribution. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19 | # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | import string 28 | 29 | import numpy as np 30 | 31 | import hdf5storage.utilities as utils 32 | 33 | from nose.tools import assert_equal as assert_equal_nose 34 | 35 | from asserts import assert_equal 36 | 37 | 38 | # Make two strings, one with the main ascii characters and another with 39 | # the same characters plus a lot of unicode characters. 40 | str_ascii = string.ascii_letters + string.digits 41 | str_unicode = str_ascii + ''.join([chr(500 + i) 42 | for i in range(1000)]) 43 | 44 | 45 | def test_numpy_str_ascii_to_uint16_back(): 46 | for i in range(100): 47 | data = np.unicode_(str_ascii) 48 | intermed = utils.convert_numpy_str_to_uint16(data) 49 | out = utils.convert_to_numpy_str(intermed)[0] 50 | assert_equal_nose(out.tostring(), data.tostring()) 51 | assert_equal(out, data) 52 | 53 | 54 | def test_numpy_str_someunicode_to_uint16_back(): 55 | for i in range(100): 56 | data = np.unicode_(str_unicode) 57 | intermed = utils.convert_numpy_str_to_uint16(data) 58 | out = utils.convert_to_numpy_str(intermed)[0] 59 | assert_equal_nose(out.tostring(), data.tostring()) 60 | assert_equal(out, data) 61 | 62 | 63 | def test_numpy_str_ascii_to_uint32_back(): 64 | for i in range(100): 65 | data = np.unicode_(str_ascii) 66 | intermed = utils.convert_numpy_str_to_uint32(data) 67 | out = utils.convert_to_numpy_str(intermed)[0] 68 | assert_equal_nose(intermed.tostring(), data.tostring()) 69 | assert_equal_nose(out.tostring(), data.tostring()) 70 | assert_equal(out, data) 71 | 72 | 73 | def test_numpy_str_someunicode_to_uint32_back(): 74 | for i in range(100): 75 | data = np.unicode_(str_unicode) 76 | intermed = utils.convert_numpy_str_to_uint32(data) 77 | out = utils.convert_to_numpy_str(intermed)[0] 78 | assert_equal_nose(intermed.tostring(), data.tostring()) 79 | assert_equal_nose(out.tostring(), data.tostring()) 80 | assert_equal(out, data) 81 | -------------------------------------------------------------------------------- /examples/example_hdf5storage_marshaller_plugin/example_hdf5storage_marshaller_plugin.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017, Freja Nordsiek 2 | # All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are 6 | # met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright 9 | # notice, this list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright 12 | # notice, this list of conditions and the following disclaimer in the 13 | # documentation and/or other materials provided with the distribution. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19 | # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | """ 27 | This is an example package for providing hdf5storage plugins. 28 | 29 | """ 30 | 31 | __version__ = '0.2' 32 | 33 | import hdf5storage.Marshallers as hm 34 | 35 | 36 | # Going to make a class that subclasses lists but doesn't really do 37 | # anything else, but it will be enought to need a new marshaller. 38 | 39 | class SubList(list): 40 | def __init__(self, *args, **keywords): 41 | list.__init__(self, *args, **keywords) 42 | 43 | 44 | # The marshaller for this will be rather trivial. It inherits from 45 | # PythonTupleSetDequeMarshaller which inherits from 46 | # PythonListMarshaller. The only things that require any work is 47 | # __init__ as PythonTupleSetDequeMarshaller's methods will otherwise 48 | # work as is. 49 | class SubListMarshaller(hm.PythonTupleSetDequeMarshaller): 50 | def __init__(self): 51 | hm.PythonTupleSetDequeMarshaller.__init__(self) 52 | self.types = ['example_hdf5storage_marshaller_plugin.SubList'] 53 | self.python_type_strings = ['hdf5storage_marshallers_plugins_' 54 | 'example.SubList'] 55 | # As the parent class already has MATLAB strings handled, there 56 | # are no MATLAB classes that this marshaller should be used for. 57 | self.matlab_classes = [] 58 | # Update the type lookups. 59 | self.update_type_lookups() 60 | 61 | def read(self, f, dsetgrp, attributes, options): 62 | # Use the grand-parent class version to read it and do most of 63 | # the work. 64 | data = hm.PythonListMarshaller.read(self, f, dsetgrp, 65 | attributes, options) 66 | return SubList(data) 67 | 68 | 69 | # Return an instance of the one and only marshaller using the 1.0 70 | # Marshaller API when given the hdf5storage version string. The version 71 | # string is given so that plugin implementors can possibly select 72 | # marshallers based on the version or initialize them in different ways, 73 | # beyond just what the Marshaller API version information provides 74 | # (perhaps some particular versions of hdf5storage require a work around 75 | # on some issue or another). 76 | def get_marshallers_1p0(hdf5storage_version): 77 | return (SubListMarshaller(), ) 78 | 79 | -------------------------------------------------------------------------------- /doc/source/paths.rst: -------------------------------------------------------------------------------- 1 | .. currentmodule:: hdf5storage 2 | 3 | .. _Paths: 4 | 5 | ===== 6 | Paths 7 | ===== 8 | 9 | Paths in HDF5 10 | ============= 11 | 12 | HDF5 files are structured much like a Unix filesystem, so everything can 13 | be referenced with a POSIX style path, which look like 14 | ``'/pyth/hf'``. Unlike a Windows path, forward slashes (``'/'``) are 15 | used as directory separators instead of backward slashes (``'\\'``) and 16 | the base of the file system is just ``'/'`` instead of something like 17 | ``'C:\\'``. In the language of HDF5, what we call directories and files 18 | in filesystems are called groups and datasets. 19 | 20 | Limitations of HDF5 Paths 21 | ========================= 22 | 23 | The HDF5 format and library do not support having Dataset or Group names 24 | containing nulls (``'\x00'``), containing forward slashes (``'/'``), or 25 | starting out with one or more periods (``'.'``). 26 | 27 | Solution - Escaping 28 | =================== 29 | 30 | .. versionadded:: 0.2 31 | 32 | Ability to escape characters not allowed in Group or Dataset names. 33 | 34 | .. warning:: 35 | 36 | Before version 0.2, no escaping is supported and errors are thrown 37 | when a workaround cannot be found. 38 | 39 | In order to work around these limitations in HDF5 Dataset and Group 40 | names, the ability to escape these characters is provided. They are 41 | escaped as hexidecimal specifications or as doubling, which is fairly 42 | standard. The conversions are 43 | 44 | ============== ========== =========== 45 | Name Character Escaped 46 | ============== ========== =========== 47 | null ``'\x00'`` ``'\\x00'`` 48 | forward slash ``'/'`` ``'\\x2f'`` 49 | backward slash ``'\\'`` ``'\\\\'`` 50 | ============== ========== =========== 51 | 52 | The backward slash has to be escaped or else it will be impossible to 53 | accurately unescape. 54 | 55 | When unescaping, all the hex and unicode escapes allowed in python 56 | strings as well as how backward slashes are entered are used. They are 57 | 58 | ================= ================ ========== 59 | Escape Kind Conversion 60 | ================= ================ ========== 61 | ``'\\\\'`` double backslash ``'\\'`` 62 | ``'\\xYY'`` hex ``chr(N)`` 63 | ``'\\uYYYY'`` unicode ``chr(N)`` 64 | ``'\\UYYYYYYYY'`` unicode ``chr(N)`` 65 | ================= ================ ========== 66 | 67 | Where the Y are hexidecimal digits and N is the value of the hexidecimal 68 | number (the unicode character codepoint). 69 | 70 | Supported Paths 71 | =============== 72 | 73 | Paths can be given in a number of ways. 74 | 75 | No Escaping 76 | ----------- 77 | 78 | The path is given as a ``str`` or ``bytes``. It is the responsibility of 79 | the caller to make sure all escaping has been done. Forward slashes are 80 | interpreted as path separators. 81 | 82 | Escaping 83 | -------- 84 | 85 | The path is given as an iterable (e.g. ``list``, ``tuple``, etc.) of 86 | separated parts of the path (split at the separators) which must each be 87 | ``str`` and ``bytes``. These parts will each be escaped before being 88 | joined. 89 | 90 | Escaping/Unescaping Functions 91 | ============================= 92 | 93 | .. versionadded:: 0.2 94 | 95 | The functions described here. 96 | 97 | :py:func:`utilities.escape_path` is the function to escape an individual 98 | part of a path with. 99 | 100 | :py:func:`utilities.unescape_path` is the function to unescape a path. 101 | 102 | :py:func:`utilities.process_path` is a function that will take a path of 103 | any form, escape it if it is meant to be escaped, and get the Group that 104 | the target of the path is in as well as the name of the target inside 105 | that Group the path is pointing at. 106 | -------------------------------------------------------------------------------- /doc/source/hdf5storage.utilities.rst: -------------------------------------------------------------------------------- 1 | hdf5storage.utilities 2 | ===================== 3 | 4 | .. currentmodule:: hdf5storage.utilities 5 | 6 | .. automodule:: hdf5storage.utilities 7 | 8 | .. autosummary:: 9 | 10 | escape_path 11 | unescape_path 12 | process_path 13 | does_dtype_have_a_zero_shape 14 | write_data 15 | read_data 16 | write_object_array 17 | read_object_array 18 | next_unused_name_in_group 19 | convert_numpy_str_to_uint16 20 | convert_numpy_str_to_uint32 21 | convert_to_str 22 | convert_to_numpy_str 23 | convert_to_numpy_bytes 24 | decode_complex 25 | encode_complex 26 | get_attribute 27 | convert_attribute_to_string 28 | get_attribute_string 29 | convert_attribute_to_string_array 30 | get_attribute_string_array 31 | set_attribute 32 | set_attribute_string 33 | set_attribute_string_array 34 | set_attributes_all 35 | del_attribute 36 | 37 | 38 | escape_path 39 | ----------- 40 | 41 | .. autofunction:: escape_path 42 | 43 | 44 | unescape_path 45 | ------------- 46 | 47 | .. autofunction:: unescape_path 48 | 49 | 50 | process_path 51 | ------------ 52 | 53 | .. autofunction:: process_path 54 | 55 | 56 | does_dtype_have_a_zero_shape 57 | ---------------------------- 58 | 59 | .. autofunction:: does_dtype_have_a_zero_shape 60 | 61 | 62 | write_data 63 | ---------- 64 | 65 | .. autofunction:: write_data 66 | 67 | 68 | read_data 69 | --------- 70 | 71 | .. autofunction:: read_data 72 | 73 | 74 | write_object_array 75 | ------------------ 76 | 77 | .. autofunction:: write_object_array 78 | 79 | 80 | read_object_array 81 | ------------------ 82 | 83 | .. autofunction:: read_object_array 84 | 85 | 86 | next_unused_name_in_group 87 | ------------------------- 88 | 89 | .. autofunction:: next_unused_name_in_group 90 | 91 | 92 | convert_numpy_str_to_uint16 93 | --------------------------- 94 | 95 | .. autofunction:: convert_numpy_str_to_uint16 96 | 97 | 98 | convert_numpy_str_to_uint32 99 | --------------------------- 100 | 101 | .. autofunction:: convert_numpy_str_to_uint32 102 | 103 | 104 | convert_to_str 105 | -------------- 106 | 107 | .. autofunction:: convert_to_str 108 | 109 | 110 | convert_to_numpy_str 111 | -------------------- 112 | 113 | .. autofunction:: convert_to_numpy_str 114 | 115 | 116 | convert_to_numpy_bytes 117 | ---------------------- 118 | 119 | .. autofunction:: convert_to_numpy_bytes 120 | 121 | 122 | decode_complex 123 | -------------- 124 | 125 | .. autofunction:: decode_complex 126 | 127 | 128 | encode_complex 129 | -------------- 130 | 131 | .. autofunction:: encode_complex 132 | 133 | 134 | get_attribute 135 | ------------- 136 | 137 | .. autofunction:: get_attribute 138 | 139 | 140 | 141 | convert_attribute_to_string 142 | --------------------------- 143 | 144 | .. autofunction:: convert_attribute_to_string 145 | 146 | 147 | get_attribute_string 148 | -------------------- 149 | 150 | .. autofunction:: get_attribute_string 151 | 152 | 153 | convert_attribute_to_string_array 154 | --------------------------------- 155 | 156 | .. autofunction:: convert_attribute_to_string_array 157 | 158 | 159 | get_attribute_string_array 160 | -------------------------- 161 | 162 | .. autofunction:: get_attribute_string_array 163 | 164 | 165 | set_attribute 166 | ------------- 167 | 168 | .. autofunction:: set_attribute 169 | 170 | 171 | set_attribute_string 172 | -------------------- 173 | 174 | .. autofunction:: set_attribute_string 175 | 176 | 177 | set_attribute_string_array 178 | -------------------------- 179 | 180 | .. autofunction:: set_attribute_string_array 181 | 182 | 183 | set_attributes_all 184 | ------------------ 185 | 186 | .. autofunction:: set_attributes_all 187 | 188 | 189 | del_attribute 190 | ------------- 191 | 192 | .. autofunction:: del_attribute 193 | 194 | -------------------------------------------------------------------------------- /tests/test_ndarray_O_field.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2013-2016, Freja Nordsiek 2 | # All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are 6 | # met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright 9 | # notice, this list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright 12 | # notice, this list of conditions and the following disclaimer in the 13 | # documentation and/or other materials provided with the distribution. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19 | # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | import os 28 | import os.path 29 | import tempfile 30 | 31 | import numpy as np 32 | import h5py 33 | 34 | import hdf5storage 35 | 36 | 37 | # A series of tests to make sure that structured ndarrays with a field 38 | # that has an object dtype are written like structs (are HDF5 Groups) 39 | # but are written as an HDF5 COMPOUND Dataset otherwise (even in the 40 | # case that a field's name is 'O'). 41 | 42 | 43 | def test_O_field_compound(): 44 | name = '/a' 45 | data = np.empty(shape=(1, ), dtype=[('O', 'int8'), ('a', 'uint16')]) 46 | fld = None 47 | try: 48 | fld = tempfile.mkstemp() 49 | os.close(fld[0]) 50 | filename = fld[1] 51 | hdf5storage.write(data, path=name, filename=filename, 52 | matlab_compatible=False, 53 | structured_numpy_ndarray_as_struct=False) 54 | with h5py.File(filename, mode='r') as f: 55 | assert isinstance(f[name], h5py.Dataset) 56 | except: 57 | raise 58 | finally: 59 | if fld is not None: 60 | os.remove(fld[1]) 61 | 62 | 63 | def test_object_field_group(): 64 | name = '/a' 65 | data = np.empty(shape=(1, ), dtype=[('a', 'O'), ('b', 'uint16')]) 66 | data['a'][0] = [1, 2] 67 | fld = None 68 | try: 69 | fld = tempfile.mkstemp() 70 | os.close(fld[0]) 71 | filename = fld[1] 72 | hdf5storage.write(data, path=name, filename=filename, 73 | matlab_compatible=False, 74 | structured_numpy_ndarray_as_struct=False) 75 | with h5py.File(filename, mode='r') as f: 76 | assert isinstance(f[name], h5py.Group) 77 | except: 78 | raise 79 | finally: 80 | if fld is not None: 81 | os.remove(fld[1]) 82 | 83 | 84 | def test_O_and_object_field_group(): 85 | name = '/a' 86 | data = np.empty(shape=(1, ), dtype=[('a', 'O'), ('O', 'uint16')]) 87 | data['a'][0] = [1, 2] 88 | fld = None 89 | try: 90 | fld = tempfile.mkstemp() 91 | os.close(fld[0]) 92 | filename = fld[1] 93 | hdf5storage.write(data, path=name, filename=filename, 94 | matlab_compatible=False, 95 | structured_numpy_ndarray_as_struct=False) 96 | with h5py.File(filename, mode='r') as f: 97 | assert isinstance(f[name], h5py.Group) 98 | except: 99 | raise 100 | finally: 101 | if fld is not None: 102 | os.remove(fld[1]) 103 | -------------------------------------------------------------------------------- /tests/test_marshaller_plugins.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2014-2016, Freja Nordsiek 2 | # All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are 6 | # met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright 9 | # notice, this list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright 12 | # notice, this list of conditions and the following disclaimer in the 13 | # documentation and/or other materials provided with the distribution. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19 | # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | import os 28 | import os.path 29 | import tempfile 30 | 31 | import pkg_resources 32 | 33 | from nose.tools import assert_equal as assert_equal_nose 34 | 35 | import unittest 36 | 37 | import hdf5storage 38 | 39 | # Check if the example package is installed because some tests will 40 | # depend on it. 41 | try: 42 | import example_hdf5storage_marshaller_plugin 43 | has_example_hdf5storage_marshaller_plugin = True 44 | except: 45 | has_example_hdf5storage_marshaller_plugin = False 46 | 47 | 48 | def test_marshaller_api_versions(): 49 | assert_equal_nose(('1.0', ), 50 | hdf5storage.supported_marshaller_api_versions()) 51 | 52 | 53 | def test_find_thirdparty_marshaller_plugins(): 54 | found_example = False 55 | apivs = hdf5storage.supported_marshaller_api_versions() 56 | plugins = hdf5storage.find_thirdparty_marshaller_plugins() 57 | assert isinstance(plugins, dict) 58 | assert_equal_nose(set(apivs), set(plugins)) 59 | for k, v in plugins.items(): 60 | assert isinstance(k, str) 61 | assert isinstance(v, dict) 62 | for k2, v2 in v.items(): 63 | assert isinstance(k2, str) 64 | assert isinstance(v2, pkg_resources.EntryPoint) 65 | if k2 == 'example_hdf5storage_marshaller_plugin': 66 | found_example = True 67 | assert_equal_nose(has_example_hdf5storage_marshaller_plugin, 68 | found_example) 69 | 70 | 71 | @unittest.skipUnless(has_example_hdf5storage_marshaller_plugin, 72 | 'requires example_hdf5storage_marshaller_plugin') 73 | def test_plugin_marshaller_SubList(): 74 | mc = hdf5storage.MarshallerCollection(load_plugins=True, 75 | lazy_loading=True) 76 | options = hdf5storage.Options(store_python_metadata=True, 77 | matlab_compatible=False, 78 | marshaller_collection=mc) 79 | ell = [1, 2, 'b1', b'3991', True, None] 80 | data = example_hdf5storage_marshaller_plugin.SubList(ell) 81 | f = None 82 | name = '/a' 83 | try: 84 | f = tempfile.mkstemp() 85 | os.close(f[0]) 86 | filename = f[1] 87 | hdf5storage.write(data, path=name, filename=filename, 88 | options=options) 89 | out = hdf5storage.read(path=name, filename=filename, 90 | options=options) 91 | except: 92 | raise 93 | finally: 94 | if f is not None: 95 | os.remove(f[1]) 96 | assert_equal_nose(ell, list(out)) 97 | assert_equal_nose(type(out), 98 | example_hdf5storage_marshaller_plugin.SubList) 99 | -------------------------------------------------------------------------------- /tests/test_matlab_compatibility.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2014-2016, Freja Nordsiek 2 | # All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are 6 | # met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright 9 | # notice, this list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright 12 | # notice, this list of conditions and the following disclaimer in the 13 | # documentation and/or other materials provided with the distribution. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19 | # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | import os 28 | import os.path 29 | import subprocess 30 | 31 | from nose.plugins.skip import SkipTest 32 | 33 | import hdf5storage 34 | 35 | from asserts import assert_equal_from_matlab 36 | 37 | mat_files = ['types_v7p3.mat', 'types_v7.mat', 38 | 'python_v7p3.mat', 'python_v7.mat'] 39 | for i in range(0, len(mat_files)): 40 | mat_files[i] = os.path.join(os.path.dirname(__file__), mat_files[i]) 41 | 42 | script_names = ['make_mat_with_all_types.m', 'read_write_mat.m'] 43 | for i in range(0, len(script_names)): 44 | script_names[i] = os.path.join(os.path.dirname(__file__), 45 | script_names[i]) 46 | 47 | types_v7 = dict() 48 | types_v7p3 = dict() 49 | python_v7 = dict() 50 | python_v7p3 = dict() 51 | 52 | 53 | # Have a flag for whether matlab was found and run successfully or not, 54 | # so tests can be skipped if not. 55 | ran_matlab_successful = [False] 56 | 57 | 58 | def setup_module(): 59 | teardown_module() 60 | try: 61 | import scipy.io 62 | matlab_command = "run('" + script_names[0] + "')" 63 | subprocess.check_call(['matlab', '-nosplash', '-nodesktop', 64 | '-nojvm', '-r', matlab_command]) 65 | scipy.io.loadmat(file_name=mat_files[1], mdict=types_v7) 66 | hdf5storage.loadmat(file_name=mat_files[0], mdict=types_v7p3) 67 | 68 | hdf5storage.savemat(file_name=mat_files[2], mdict=types_v7p3) 69 | matlab_command = "run('" + script_names[1] + "')" 70 | subprocess.check_call(['matlab', '-nosplash', '-nodesktop', 71 | '-nojvm', '-r', matlab_command]) 72 | scipy.io.loadmat(file_name=mat_files[3], mdict=python_v7) 73 | hdf5storage.loadmat(file_name=mat_files[2], mdict=python_v7p3) 74 | except: 75 | pass 76 | else: 77 | ran_matlab_successful[0] = True 78 | 79 | 80 | def teardown_module(): 81 | for name in mat_files: 82 | if os.path.exists(name): 83 | os.remove(name) 84 | 85 | 86 | def test_read_from_matlab(): 87 | if not ran_matlab_successful[0]: 88 | raise SkipTest 89 | for k in (set(types_v7.keys()) - set(['__version__', '__header__', \ 90 | '__globals__'])): 91 | yield check_variable_from_matlab, k 92 | 93 | 94 | def test_to_matlab_back(): 95 | if not ran_matlab_successful[0]: 96 | raise SkipTest 97 | for k in set(types_v7p3.keys()): 98 | yield check_variable_to_matlab_back, k 99 | 100 | 101 | def check_variable_from_matlab(name): 102 | assert_equal_from_matlab(types_v7p3[name], types_v7[name]) 103 | 104 | 105 | def check_variable_to_matlab_back(name): 106 | assert_equal_from_matlab(python_v7p3[name], types_v7[name]) 107 | -------------------------------------------------------------------------------- /tests/test_multi_io.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2013-2016, Freja Nordsiek 2 | # All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are 6 | # met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright 9 | # notice, this list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright 12 | # notice, this list of conditions and the following disclaimer in the 13 | # documentation and/or other materials provided with the distribution. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19 | # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | 28 | import os 29 | import os.path 30 | import random 31 | import tempfile 32 | 33 | import hdf5storage 34 | 35 | from asserts import assert_equal 36 | from make_randoms import min_dict_keys, max_dict_keys, random_name, \ 37 | random_numpy, random_numpy_shape, dict_value_subarray_dimensions, \ 38 | max_dict_value_subarray_axis_length, dtypes 39 | 40 | 41 | random.seed() 42 | 43 | 44 | # A series of tests to make sure that more than one data item can be 45 | # written or read at a time using the writes and reads functions. 46 | 47 | def test_multi_write(): 48 | # Makes a random dict of random paths and variables (random number 49 | # of randomized paths with random numpy arrays as values). 50 | data = dict() 51 | for i in range(0, random.randint(min_dict_keys, \ 52 | max_dict_keys)): 53 | name = random_name() 54 | data[name] = \ 55 | random_numpy(random_numpy_shape( \ 56 | dict_value_subarray_dimensions, \ 57 | max_dict_value_subarray_axis_length), \ 58 | dtype=random.choice(dtypes)) 59 | 60 | # Write it and then read it back item by item. 61 | fld = None 62 | try: 63 | fld = tempfile.mkstemp() 64 | os.close(fld[0]) 65 | filename = fld[1] 66 | hdf5storage.writes(mdict=data, filename=filename) 67 | out = dict() 68 | for p in data: 69 | out[p] = hdf5storage.read(path=p, filename=filename) 70 | except: 71 | raise 72 | finally: 73 | if fld is not None: 74 | os.remove(fld[1]) 75 | 76 | # Compare data and out. 77 | assert_equal(out, data) 78 | 79 | 80 | def test_multi_read(): 81 | # Makes a random dict of random paths and variables (random number 82 | # of randomized paths with random numpy arrays as values). 83 | data = dict() 84 | for i in range(0, random.randint(min_dict_keys, \ 85 | max_dict_keys)): 86 | name = random_name() 87 | data[name] = \ 88 | random_numpy(random_numpy_shape( \ 89 | dict_value_subarray_dimensions, \ 90 | max_dict_value_subarray_axis_length), \ 91 | dtype=random.choice(dtypes)) 92 | 93 | paths = data.keys() 94 | # Write it item by item and then read it back in one unit. 95 | fld = None 96 | try: 97 | fld = tempfile.mkstemp() 98 | os.close(fld[0]) 99 | filename = fld[1] 100 | for p in paths: 101 | hdf5storage.write(data=data[p], path=p, filename=filename) 102 | out = hdf5storage.reads(paths=list(data.keys()), 103 | filename=filename) 104 | except: 105 | raise 106 | finally: 107 | if fld is not None: 108 | os.remove(fld[1]) 109 | 110 | # Compare data and out. 111 | for i, p in enumerate(paths): 112 | assert_equal(out[i], data[p]) 113 | -------------------------------------------------------------------------------- /tests/make_mat_with_all_types.m: -------------------------------------------------------------------------------- 1 | % Copyright (c) 2013-2016, Freja Nordsiek 2 | % All rights reserved. 3 | % 4 | % Redistribution and use in source and binary forms, with or without 5 | % modification, are permitted provided that the following conditions are 6 | % met: 7 | % 8 | % 1. Redistributions of source code must retain the above copyright 9 | % notice, this list of conditions and the following disclaimer. 10 | % 11 | % 2. Redistributions in binary form must reproduce the above copyright 12 | % notice, this list of conditions and the following disclaimer in the 13 | % documentation and/or other materials provided with the distribution. 14 | % 15 | % THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 | % "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 | % LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 | % A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19 | % HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 | % SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21 | % LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 | % DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 | % THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | % (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | % OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | 28 | clear a 29 | 30 | % Main types as scalars and arrays. 31 | 32 | a.logical = true; 33 | 34 | a.uint8 = uint8(2); 35 | a.uint16 = uint16(28); 36 | a.uint32 = uint32(28347394); 37 | a.uint64 = uint64(234392); 38 | 39 | a.int8 = int8(-32); 40 | a.int16 = int16(284); 41 | a.int32 = int32(-7394); 42 | a.int64 = int64(2334322); 43 | 44 | a.single = single(4.2134e-2); 45 | a.single_complex = single(33.4 + 3i); 46 | a.single_nan = single(NaN); 47 | a.single_inf = single(inf); 48 | 49 | a.double = 14.2134e200; 50 | a.double_complex = 8e-30 - 3.2e40i; 51 | a.double_nan = NaN; 52 | a.double_inf = -inf; 53 | 54 | a.char = 'p'; 55 | 56 | a.logical_array = logical([1 0 0 0; 0 1 1 0]); 57 | 58 | a.uint8_array = uint8([0 1 3 4; 92 3 2 8]); 59 | a.uint16_array = uint16([0 1; 3 4; 92 3; 2 8]); 60 | a.uint32_array = uint32([0 1 3 4 92 3 2 8]); 61 | a.uint64_array = uint64([0; 1; 3; 4; 92; 3; 2; 8]); 62 | 63 | a.int8_array = int8([0 1 3 4; 92 3 2 8]); 64 | a.int16_array = int16([0 1; 3 4; 92 3; 2 8]); 65 | a.int32_array = int32([0 1 3 4 92 3 2 8]); 66 | a.int64_array = int64([0; 1; 3; 4; 92; 3; 2; 8]); 67 | 68 | a.single_array = single(rand(4, 9)); 69 | a.single_array_complex = single(rand(2,7) + 1i*rand(2,7)); 70 | 71 | a.double_array = rand(3, 2); 72 | a.double_array_complex = rand(5,2) + 1i*rand(5,2); 73 | 74 | a.char_array = ['ivkea'; 'avvai']; 75 | a.char_cell_array = {'v83nv', 'aADvai98v3'}; 76 | 77 | % Empties of main types. 78 | 79 | a.logical_empty = logical([]); 80 | a.uint8_empty = uint8([]); 81 | a.uint16_empty = uint16([]); 82 | a.uint32_empty = uint32([]); 83 | a.uint64_empty = uint64([]); 84 | a.int8_empty = int8([]); 85 | a.int16_empty = int16([]); 86 | a.int32_empty = int32([]); 87 | a.int64_empty = int64([]); 88 | a.single_empty = single([]); 89 | a.double_empty = []; 90 | 91 | % Main container types. 92 | 93 | a.cell = {5.34+9i}; 94 | a.cell_array = {1, [2 3]; 8.3, -[3; 3]; [], 20}; 95 | a.cell_empty = {}; 96 | 97 | a.struct = struct('a', {3.3}, 'bc', {[1 4 5]}); 98 | a.struct_empty = struct('vea', {}, 'b', {}); 99 | a.struct_array = struct('a', {3.3; 3}, 'avav_Ab', {[1 4 5]; []}); 100 | 101 | % % Function handles. 102 | % 103 | % ab = 1:6; 104 | % a.fhandle = @sin; 105 | % a.fhandle_args = @(x, y) x .* cos(y); 106 | % a.fhandle_args_environment = @(m, n) m*(b.*rand(size(b))) + n; 107 | % 108 | % % Map type. 109 | % 110 | % a.map_char = containers.Map({'4v', 'u', '2vn'}, {4, uint8(9), 'bafd'}); 111 | % a.map_single = containers.Map({single(3), single(38.3), single(2e-3)}, {4, uint8(9), 'bafd'}); 112 | % a.map_empty = containers.Map; 113 | % 114 | % % The categorical type. 115 | % 116 | % b = {'small', 'medium', 'small', 'medium', 'medium', 'large', 'medium'}; 117 | % c = {'small', 'medium', 'large'}; 118 | % d = round(2*rand(10,3)); 119 | % 120 | % a.categorical = categorical(b); 121 | % a.categorical_ordinal = categorical(b, c, 'Ordinal', true); 122 | % a.categorical_ordinal_int = categorical(d, 0:2, c, 'Ordinal', true); 123 | % 124 | % a.categorical_empty = categorical({}); 125 | % a.categorical_ordinal_empty = categorical({}, c, 'Ordinal', true); 126 | % a.categorical_ordinal_int_empty = categorical([], 0:2, c, 'Ordinal', true); 127 | % 128 | % % Tables. 129 | % 130 | % a.table = readtable('patients.dat'); 131 | % a.table_oneentry = a.table(1,:); 132 | % a.table_empty = a.table([], :); 133 | % 134 | % % Not doing time series yet. 135 | 136 | save('types_v7p3.mat','-struct','a','-v7.3') 137 | save('types_v7.mat','-struct','a','-v7') 138 | 139 | exit 140 | -------------------------------------------------------------------------------- /doc/source/development.rst: -------------------------------------------------------------------------------- 1 | .. currentmodule:: hdf5storage 2 | 3 | ======================= 4 | Development Information 5 | ======================= 6 | 7 | The source code can be found on Github at 8 | https://github.com/frejanordsiek/hdf5storage 9 | 10 | Package Overview 11 | ================ 12 | 13 | The package is currently a pure Python package; using no Cython, C/C++, 14 | or other languages. 15 | 16 | Also, pickling is not used at all and should not be added. It is a 17 | security risk since pickled data is read through the interpreter 18 | allowing arbitrary code (which could be malicious) to be executed in the 19 | interpreter. One wants to be able to read possibly HDF5 and MAT files 20 | from untrusted sources, so pickling is avoided in this package. 21 | 22 | The :py:mod:`hdf5storage` module contains the high level reading and 23 | writing functions, as well as the :py:class:`Options` class for 24 | encapsulating all the various options governing how data is read and 25 | written. The high level reading and writing functions can either be 26 | given an :py:class:`Options` object, or be given the keyword arguments 27 | that its constructur takes (they will make one from those 28 | arguments). There is also the :py:class:`MarshallerCollection` which 29 | holds all the Marshallers (more below) and provides functions to find 30 | the appropriate Marshaller given the ``type`` of a Python object, the 31 | type string used for the 'Python.Type' Attribute, or the MATLAB class 32 | string (contained in the 'MATLAB_class' Attribute). One can give the 33 | collection additional user provided Marshallers. 34 | 35 | The :py:mod:`hdf5storage.exceptions` module contains the special 36 | exceptions/errors required for this package not covered by existing 37 | Python exceptions/errors or those from the h5py package. 38 | 39 | :py:mod:`hdf5storage.Marshallers` contains all the Marshallers for the 40 | different Python data types that can be read from or written to an HDF5 41 | file. They are all automitically added to any 42 | :py:class:`MarshallerCollection` which inspects this module and grabs 43 | all classes within it (if a class other than a Marshaller is added to 44 | this module, :py:class:`MarshallerCollection` will need to be 45 | modified). All Marshallers need to provide the same interface as 46 | :py:class:`Marshallers.TypeMarshaller`, which is the base class for all 47 | Marshallers in this module, and should probably be inherited from by any 48 | custom Marshallers that one would write (while it can't marshall any 49 | types, it does have some useful built in functionality). The main 50 | Marshaller in the module is 51 | :py:class:`Marshallers.NumpyScalarArrayMarshaller`, which can marshall 52 | most Numpy types. All the other built in Marshallers other than 53 | :py:class:`Marshallers.PythonDictMarshaller` inherit from it since they 54 | convert their types to and from Numpy types and use the inherited 55 | functions to do the actual work with the HDF5 file. 56 | 57 | :py:mod:`hdf5storage.utilities` contains many functions that are used 58 | throughout the pacakge, especially by the Marshallers. They include 59 | low level reading and writing functions :py:func:`utilities.read_data` 60 | and :py:func:`utilities.write_data`. They can only work on already opened 61 | HDF5 files (the high level ones handle file creation/opening), can only 62 | be given options using a :py:class:`Options` object, and read/write 63 | individual Groups/Datasets and Python objects. Any Marshaller that needs 64 | to read or write a nested object within a Group or Python object must 65 | call these functions. The functions to do path escaping, unescaping, and 66 | processing are also in the module. There are also several functions to 67 | get, set, and delete different kinds of HDF5 Attributes (handle things 68 | such as them already existing, not existing, etc). Then there functions 69 | to convert between different string representations, as well as encode 70 | for writing and decode after reading complex types. And then there is 71 | the function 72 | :py:func:`utilities.next_unused_name_in_group` which produces a random 73 | unused name in a Group. 74 | 75 | 76 | TODO 77 | ==== 78 | 79 | There are several features that need to be added, bugs that need to be 80 | fixed, etc. 81 | 82 | Standing Bugs 83 | ------------- 84 | 85 | * Structured ``np.ndarray`` with no elements, when 86 | :py:attr:`Options.structured_numpy_ndarray_as_struct` is set, are not 87 | written in a way that the dtypes for the fields can be restored when 88 | it is read back from file. 89 | 90 | Features to Add 91 | --------------- 92 | 93 | * Marshallers for more Python types. 94 | * Marshallers to be able to read the following MATLAB types 95 | 96 | * Categorical Arrays 97 | * Tables 98 | * Maps 99 | * Time Series 100 | * Classes (could be hard if they don't look like a struct in file) 101 | * Function Handles (wouldn't be able run in Python, but could at least 102 | manipulate) 103 | 104 | * A ``whosmat`` function like the SciPy one :py:func:`scipy.io.whosmat`. 105 | * A function to find and delete Datasets and Groups inside the Group 106 | :py:attr:`Options.group_for_references` that are not referenced by 107 | other Datasets in the file. 108 | 109 | -------------------------------------------------------------------------------- /doc/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | REM Command file for Sphinx documentation 4 | 5 | if "%SPHINXBUILD%" == "" ( 6 | set SPHINXBUILD=sphinx-build 7 | ) 8 | set BUILDDIR=build 9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source 10 | set I18NSPHINXOPTS=%SPHINXOPTS% source 11 | if NOT "%PAPER%" == "" ( 12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% 13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% 14 | ) 15 | 16 | if "%1" == "" goto help 17 | 18 | if "%1" == "help" ( 19 | :help 20 | echo.Please use `make ^` where ^ is one of 21 | echo. html to make standalone HTML files 22 | echo. dirhtml to make HTML files named index.html in directories 23 | echo. singlehtml to make a single large HTML file 24 | echo. pickle to make pickle files 25 | echo. json to make JSON files 26 | echo. htmlhelp to make HTML files and a HTML help project 27 | echo. qthelp to make HTML files and a qthelp project 28 | echo. devhelp to make HTML files and a Devhelp project 29 | echo. epub to make an epub 30 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter 31 | echo. text to make text files 32 | echo. man to make manual pages 33 | echo. texinfo to make Texinfo files 34 | echo. gettext to make PO message catalogs 35 | echo. changes to make an overview over all changed/added/deprecated items 36 | echo. linkcheck to check all external links for integrity 37 | echo. doctest to run all doctests embedded in the documentation if enabled 38 | goto end 39 | ) 40 | 41 | if "%1" == "clean" ( 42 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i 43 | del /q /s %BUILDDIR%\* 44 | goto end 45 | ) 46 | 47 | if "%1" == "html" ( 48 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html 49 | if errorlevel 1 exit /b 1 50 | echo. 51 | echo.Build finished. The HTML pages are in %BUILDDIR%/html. 52 | goto end 53 | ) 54 | 55 | if "%1" == "dirhtml" ( 56 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml 57 | if errorlevel 1 exit /b 1 58 | echo. 59 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. 60 | goto end 61 | ) 62 | 63 | if "%1" == "singlehtml" ( 64 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml 65 | if errorlevel 1 exit /b 1 66 | echo. 67 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. 68 | goto end 69 | ) 70 | 71 | if "%1" == "pickle" ( 72 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle 73 | if errorlevel 1 exit /b 1 74 | echo. 75 | echo.Build finished; now you can process the pickle files. 76 | goto end 77 | ) 78 | 79 | if "%1" == "json" ( 80 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json 81 | if errorlevel 1 exit /b 1 82 | echo. 83 | echo.Build finished; now you can process the JSON files. 84 | goto end 85 | ) 86 | 87 | if "%1" == "htmlhelp" ( 88 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp 89 | if errorlevel 1 exit /b 1 90 | echo. 91 | echo.Build finished; now you can run HTML Help Workshop with the ^ 92 | .hhp project file in %BUILDDIR%/htmlhelp. 93 | goto end 94 | ) 95 | 96 | if "%1" == "qthelp" ( 97 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp 98 | if errorlevel 1 exit /b 1 99 | echo. 100 | echo.Build finished; now you can run "qcollectiongenerator" with the ^ 101 | .qhcp project file in %BUILDDIR%/qthelp, like this: 102 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\hdf5storage.qhcp 103 | echo.To view the help file: 104 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\hdf5storage.ghc 105 | goto end 106 | ) 107 | 108 | if "%1" == "devhelp" ( 109 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp 110 | if errorlevel 1 exit /b 1 111 | echo. 112 | echo.Build finished. 113 | goto end 114 | ) 115 | 116 | if "%1" == "epub" ( 117 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub 118 | if errorlevel 1 exit /b 1 119 | echo. 120 | echo.Build finished. The epub file is in %BUILDDIR%/epub. 121 | goto end 122 | ) 123 | 124 | if "%1" == "latex" ( 125 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 126 | if errorlevel 1 exit /b 1 127 | echo. 128 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. 129 | goto end 130 | ) 131 | 132 | if "%1" == "text" ( 133 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text 134 | if errorlevel 1 exit /b 1 135 | echo. 136 | echo.Build finished. The text files are in %BUILDDIR%/text. 137 | goto end 138 | ) 139 | 140 | if "%1" == "man" ( 141 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man 142 | if errorlevel 1 exit /b 1 143 | echo. 144 | echo.Build finished. The manual pages are in %BUILDDIR%/man. 145 | goto end 146 | ) 147 | 148 | if "%1" == "texinfo" ( 149 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo 150 | if errorlevel 1 exit /b 1 151 | echo. 152 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. 153 | goto end 154 | ) 155 | 156 | if "%1" == "gettext" ( 157 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale 158 | if errorlevel 1 exit /b 1 159 | echo. 160 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale. 161 | goto end 162 | ) 163 | 164 | if "%1" == "changes" ( 165 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes 166 | if errorlevel 1 exit /b 1 167 | echo. 168 | echo.The overview file is in %BUILDDIR%/changes. 169 | goto end 170 | ) 171 | 172 | if "%1" == "linkcheck" ( 173 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck 174 | if errorlevel 1 exit /b 1 175 | echo. 176 | echo.Link check complete; look for any errors in the above output ^ 177 | or in %BUILDDIR%/linkcheck/output.txt. 178 | goto end 179 | ) 180 | 181 | if "%1" == "doctest" ( 182 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest 183 | if errorlevel 1 exit /b 1 184 | echo. 185 | echo.Testing of doctests in the sources finished, look at the ^ 186 | results in %BUILDDIR%/doctest/output.txt. 187 | goto end 188 | ) 189 | 190 | :end 191 | -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = build 9 | 10 | # Internal variables. 11 | PAPEROPT_a4 = -D latex_paper_size=a4 12 | PAPEROPT_letter = -D latex_paper_size=letter 13 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 14 | # the i18n builder cannot share the environment and doctrees with the others 15 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 16 | 17 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 18 | 19 | help: 20 | @echo "Please use \`make ' where is one of" 21 | @echo " html to make standalone HTML files" 22 | @echo " dirhtml to make HTML files named index.html in directories" 23 | @echo " singlehtml to make a single large HTML file" 24 | @echo " pickle to make pickle files" 25 | @echo " json to make JSON files" 26 | @echo " htmlhelp to make HTML files and a HTML help project" 27 | @echo " qthelp to make HTML files and a qthelp project" 28 | @echo " devhelp to make HTML files and a Devhelp project" 29 | @echo " epub to make an epub" 30 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 31 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 32 | @echo " text to make text files" 33 | @echo " man to make manual pages" 34 | @echo " texinfo to make Texinfo files" 35 | @echo " info to make Texinfo files and run them through makeinfo" 36 | @echo " gettext to make PO message catalogs" 37 | @echo " changes to make an overview of all changed/added/deprecated items" 38 | @echo " linkcheck to check all external links for integrity" 39 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 40 | 41 | clean: 42 | -rm -rf $(BUILDDIR)/* 43 | 44 | html: 45 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 46 | @echo 47 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 48 | 49 | dirhtml: 50 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 51 | @echo 52 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 53 | 54 | singlehtml: 55 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 56 | @echo 57 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 58 | 59 | pickle: 60 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 61 | @echo 62 | @echo "Build finished; now you can process the pickle files." 63 | 64 | json: 65 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 66 | @echo 67 | @echo "Build finished; now you can process the JSON files." 68 | 69 | htmlhelp: 70 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 71 | @echo 72 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 73 | ".hhp project file in $(BUILDDIR)/htmlhelp." 74 | 75 | qthelp: 76 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 77 | @echo 78 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 79 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 80 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/hdf5storage.qhcp" 81 | @echo "To view the help file:" 82 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/hdf5storage.qhc" 83 | 84 | devhelp: 85 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 86 | @echo 87 | @echo "Build finished." 88 | @echo "To view the help file:" 89 | @echo "# mkdir -p $$HOME/.local/share/devhelp/hdf5storage" 90 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/hdf5storage" 91 | @echo "# devhelp" 92 | 93 | epub: 94 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 95 | @echo 96 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 97 | 98 | latex: 99 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 100 | @echo 101 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 102 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 103 | "(use \`make latexpdf' here to do that automatically)." 104 | 105 | latexpdf: 106 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 107 | @echo "Running LaTeX files through pdflatex..." 108 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 109 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 110 | 111 | text: 112 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 113 | @echo 114 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 115 | 116 | man: 117 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 118 | @echo 119 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 120 | 121 | texinfo: 122 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 123 | @echo 124 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 125 | @echo "Run \`make' in that directory to run these through makeinfo" \ 126 | "(use \`make info' here to do that automatically)." 127 | 128 | info: 129 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 130 | @echo "Running Texinfo files through makeinfo..." 131 | make -C $(BUILDDIR)/texinfo info 132 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 133 | 134 | gettext: 135 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 136 | @echo 137 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 138 | 139 | changes: 140 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 141 | @echo 142 | @echo "The overview file is in $(BUILDDIR)/changes." 143 | 144 | linkcheck: 145 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 146 | @echo 147 | @echo "Link check complete; look for any errors in the above output " \ 148 | "or in $(BUILDDIR)/linkcheck/output.txt." 149 | 150 | doctest: 151 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 152 | @echo "Testing of doctests in the sources finished, look at the " \ 153 | "results in $(BUILDDIR)/doctest/output.txt." 154 | -------------------------------------------------------------------------------- /tests/test_marshaller_collection_priority.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2014-2016, Freja Nordsiek 2 | # All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are 6 | # met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright 9 | # notice, this list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright 12 | # notice, this list of conditions and the following disclaimer in the 13 | # documentation and/or other materials provided with the distribution. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19 | # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | import random 28 | 29 | from nose.tools import raises 30 | from nose.tools import assert_equal as assert_equal_nose 31 | 32 | import hdf5storage 33 | import hdf5storage.Marshallers 34 | 35 | random.seed() 36 | 37 | # Check if the example package is installed because some tests will 38 | # depend on it. 39 | try: 40 | from example_hdf5storage_marshaller_plugin import SubListMarshaller 41 | has_example_hdf5storage_marshaller_plugin = True 42 | except: 43 | has_example_hdf5storage_marshaller_plugin = False 44 | 45 | 46 | # Need a new marshaller that does nothing. 47 | class JunkMarshaller(hdf5storage.Marshallers.TypeMarshaller): 48 | pass 49 | 50 | 51 | @raises(TypeError) 52 | def check_error_non_tuplelist(obj): 53 | hdf5storage.MarshallerCollection(priority=obj) 54 | 55 | 56 | def test_error_non_tuplelist(): 57 | for v in (None, True, 1, 2.3, '39va', b'391', set(), dict()): 58 | yield check_error_non_tuplelist, v 59 | 60 | 61 | @raises(ValueError) 62 | def test_error_missing_element(): 63 | need = ('builtin', 'user', 'plugin') 64 | hdf5storage.MarshallerCollection(priority=[random.choice(need) 65 | for i in range(2)]) 66 | 67 | 68 | @raises(ValueError) 69 | def test_error_extra_element(): 70 | hdf5storage.MarshallerCollection(priority=('builtin', 'user', 71 | 'plugin', 'extra')) 72 | 73 | 74 | def test_builtin_plugin_user(): 75 | m = JunkMarshaller() 76 | mc = hdf5storage.MarshallerCollection(load_plugins=True, 77 | priority=('builtin', 'plugin', 78 | 'user'), 79 | marshallers=(m, )) 80 | assert_equal_nose(m, mc._marshallers[-1]) 81 | if has_example_hdf5storage_marshaller_plugin: 82 | assert isinstance(mc._marshallers[-2], 83 | SubListMarshaller) 84 | 85 | 86 | def test_builtin_user_plugin(): 87 | m = JunkMarshaller() 88 | mc = hdf5storage.MarshallerCollection(load_plugins=True, 89 | priority=('builtin', 'user', 90 | 'plugin'), 91 | marshallers=(m, )) 92 | if has_example_hdf5storage_marshaller_plugin: 93 | assert isinstance(mc._marshallers[-1], 94 | SubListMarshaller) 95 | assert_equal_nose(m, mc._marshallers[-2]) 96 | else: 97 | assert_equal_nose(m, mc._marshallers[-1]) 98 | 99 | 100 | def test_plugin_builtin_user(): 101 | m = JunkMarshaller() 102 | mc = hdf5storage.MarshallerCollection(load_plugins=True, 103 | priority=('plugin', 'builtin', 104 | 'user'), 105 | marshallers=(m, )) 106 | assert_equal_nose(m, mc._marshallers[-1]) 107 | if has_example_hdf5storage_marshaller_plugin: 108 | assert isinstance(mc._marshallers[0], 109 | SubListMarshaller) 110 | 111 | 112 | def test_plugin_user_builtin(): 113 | m = JunkMarshaller() 114 | mc = hdf5storage.MarshallerCollection(load_plugins=True, 115 | priority=('plugin', 'user', 116 | 'builtin'), 117 | marshallers=(m, )) 118 | if has_example_hdf5storage_marshaller_plugin: 119 | assert isinstance(mc._marshallers[0], 120 | SubListMarshaller) 121 | assert_equal_nose(m, mc._marshallers[1]) 122 | else: 123 | assert_equal_nose(m, mc._marshallers[0]) 124 | 125 | 126 | def test_user_builtin_plugin(): 127 | m = JunkMarshaller() 128 | mc = hdf5storage.MarshallerCollection(load_plugins=True, 129 | priority=('user', 'builtin', 130 | 'plugin'), 131 | marshallers=(m, )) 132 | assert_equal_nose(m, mc._marshallers[0]) 133 | if has_example_hdf5storage_marshaller_plugin: 134 | assert isinstance(mc._marshallers[-1], 135 | SubListMarshaller) 136 | 137 | 138 | def test_user_plugin_builtin(): 139 | m = JunkMarshaller() 140 | mc = hdf5storage.MarshallerCollection(load_plugins=True, 141 | priority=('user', 'plugin', 142 | 'builtin'), 143 | marshallers=(m, )) 144 | assert_equal_nose(m, mc._marshallers[0]) 145 | if has_example_hdf5storage_marshaller_plugin: 146 | assert isinstance(mc._marshallers[1], 147 | SubListMarshaller) 148 | -------------------------------------------------------------------------------- /tests/test_julia_mat_compatibility.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2014-2016, Freja Nordsiek 2 | # All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are 6 | # met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright 9 | # notice, this list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright 12 | # notice, this list of conditions and the following disclaimer in the 13 | # documentation and/or other materials provided with the distribution. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19 | # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | import os 28 | import os.path 29 | import subprocess 30 | import tempfile 31 | 32 | import numpy as np 33 | 34 | from nose.plugins.skip import SkipTest 35 | 36 | import hdf5storage 37 | 38 | from asserts import assert_equal_from_matlab 39 | from make_randoms import dtypes, random_numpy_scalar, random_numpy, \ 40 | random_numpy_shape, random_structured_numpy_array 41 | 42 | # Have a flag for whether julia was found and run successfully or not, 43 | # so tests can be skipped if not. 44 | ran_julia_successful = [False] 45 | 46 | mat_files = ['to_julia_v7.mat', 'to_julia_v7p3.mat', 47 | 'julia_v7_to_v7p3.mat', 'julia_v7p3_to_v7p3.mat'] 48 | 49 | script_names = ['julia_read_mat.jl'] 50 | for i in range(0, len(script_names)): 51 | script_names[i] = os.path.join(os.path.dirname(__file__), 52 | script_names[i]) 53 | 54 | to_julia = dict() 55 | 56 | 57 | # Julia MAT tends to squeeze extra singleton dimensions beyond 2, 58 | # meaning a (1, 1, 1) goes to (1, 1). In addition, string conversions go 59 | # on when going back and forth. Thus, string types will be excluded and 60 | # the minimum length along each dimension will be 2. 61 | 62 | dtypes_exclude = set(('S', 'U')) 63 | dtypes_to_do = tuple(set(dtypes).difference(dtypes_exclude)) 64 | 65 | for dt in dtypes_to_do: 66 | to_julia[dt] = random_numpy_scalar(dt) 67 | for dm in (2, 3): 68 | for dt in dtypes_to_do: 69 | to_julia[dt + '_array_' + str(dm)] = \ 70 | random_numpy(random_numpy_shape(dm, 6, min_length=2), dt) 71 | for dt in dtypes_to_do: 72 | if dt in ('S', 'U'): 73 | to_julia[dt + '_empty'] = np.array([], dtype=dt + str(6)) 74 | else: 75 | to_julia[dt + '_empty'] = np.array([], dtype=dt) 76 | 77 | to_julia['float32_nan'] = np.float32(np.NaN) 78 | to_julia['float32_inf'] = np.float32(np.inf) 79 | to_julia['float64_nan'] = np.float64(np.NaN) 80 | to_julia['float64_inf'] = np.float64(-np.inf) 81 | 82 | to_julia['object'] = random_numpy_scalar('object', \ 83 | object_element_dtypes=dtypes_to_do) 84 | to_julia['object_array_2'] = random_numpy( \ 85 | random_numpy_shape(2, 6, min_length=2), \ 86 | 'object', object_element_dtypes=dtypes_to_do) 87 | to_julia['object_array_3'] = random_numpy( \ 88 | random_numpy_shape(3, 6, min_length=2), \ 89 | 'object', object_element_dtypes=dtypes_to_do) 90 | 91 | 92 | # Julia MAT doesn't seem to read and then write back empty object 93 | # types. 94 | 95 | #to_julia['object_empty'] = np.array([], dtype='object') 96 | 97 | to_julia['struct'] = random_structured_numpy_array((1,), \ 98 | nondigits_fields=True) 99 | to_julia['struct_empty'] = random_structured_numpy_array(tuple(), \ 100 | nondigits_fields=True) 101 | 102 | # Something goes wrong with 2 dimensional structure arrays that warrants 103 | # further investigation. 104 | 105 | #to_julia['struct_array_2'] = random_structured_numpy_array((3, 5), \ 106 | # nondigits_fields=True) 107 | 108 | 109 | from_julia_v7_to_v7p3 = dict() 110 | from_julia_v7p3_to_v7p3 = dict() 111 | 112 | 113 | 114 | def julia_command(julia_file, fin, fout): 115 | subprocess.check_call(['julia', julia_file, 116 | fin, fout]) 117 | 118 | 119 | def setup_module(): 120 | temp_dir = None 121 | try: 122 | import scipy.io 123 | temp_dir = tempfile.mkdtemp() 124 | for i in range(0, len(mat_files)): 125 | mat_files[i] = os.path.join(temp_dir, mat_files[i]) 126 | scipy.io.savemat(file_name=mat_files[0], mdict=to_julia) 127 | hdf5storage.savemat(file_name=mat_files[1], mdict=to_julia) 128 | 129 | #julia_command(script_names[0], mat_files[0], mat_files[2]) 130 | julia_command(script_names[0], mat_files[1], mat_files[3]) 131 | 132 | #hdf5storage.loadmat(file_name=mat_files[2], 133 | # mdict=from_julia_v7_to_v7p3) 134 | hdf5storage.loadmat(file_name=mat_files[3], 135 | mdict=from_julia_v7p3_to_v7p3) 136 | except: 137 | pass 138 | else: 139 | ran_julia_successful[0] = True 140 | finally: 141 | for name in mat_files: 142 | if os.path.exists(name): 143 | os.remove(name) 144 | if temp_dir is not None and os.path.exists(temp_dir): 145 | os.rmdir(temp_dir) 146 | 147 | 148 | def teardown_module(): 149 | pass 150 | 151 | 152 | #def test_julia_v7_to_v7p3(): 153 | # for k in to_julia.keys(): 154 | # yield check_variable_julia_v7_to_v7p3, k 155 | 156 | 157 | def test_julia_v7p3_to_v7p3(): 158 | if not ran_julia_successful[0]: 159 | raise SkipTest 160 | for k in to_julia.keys(): 161 | yield check_variable_julia_v7p3_to_v7p3, k 162 | 163 | 164 | def check_variable_julia_v7_to_v7p3(name): 165 | assert name in from_julia_v7_to_v7p3 166 | assert_equal_from_matlab(from_julia_v7_to_v7p3[name], 167 | to_julia[name]) 168 | 169 | 170 | def check_variable_julia_v7p3_to_v7p3(name): 171 | assert name in from_julia_v7p3_to_v7p3 172 | assert_equal_from_matlab(from_julia_v7p3_to_v7p3[name], 173 | to_julia[name]) 174 | -------------------------------------------------------------------------------- /doc/source/hdf5storage.Marshallers.rst: -------------------------------------------------------------------------------- 1 | hdf5storage.Marshallers 2 | ======================= 3 | 4 | .. currentmodule:: hdf5storage.Marshallers 5 | 6 | .. automodule:: hdf5storage.Marshallers 7 | 8 | .. autosummary:: 9 | 10 | TypeMarshaller 11 | NumpyScalarArrayMarshaller 12 | PythonScalarMarshaller 13 | PythonStringMarshaller 14 | PythonNoneMarshaller 15 | PythonDictMarshaller 16 | PythonListMarshaller 17 | PythonTupleSetDequeMarshaller 18 | 19 | 20 | TypeMarshaller 21 | -------------- 22 | 23 | .. autoclass:: TypeMarshaller 24 | :members: update_type_lookups, get_type_string, read, read_approximate, write, write_metadata 25 | :show-inheritance: 26 | 27 | .. autoinstanceattribute:: TypeMarshaller.required_parent_modules 28 | :annotation: = () 29 | 30 | .. autoinstanceattribute:: TypeMarshaller.required_modules 31 | :annotation: = () 32 | 33 | .. autoinstanceattribute:: TypeMarshaller.python_attributes 34 | :annotation: = {'Python.Type'} 35 | 36 | .. autoinstanceattribute:: TypeMarshaller.matlab_attributes 37 | :annotation: = {'H5PATH'} 38 | 39 | .. autoinstanceattribute:: TypeMarshaller.types 40 | :annotation: = () 41 | 42 | .. autoinstanceattribute:: TypeMarshaller.python_type_strings 43 | :annotation: = () 44 | 45 | .. autoinstanceattribute:: TypeMarshaller.matlab_classes 46 | :annotation: = () 47 | 48 | .. autoinstanceattribute:: TypeMarshaller.type_to_typestring 49 | :annotation: = dict() 50 | 51 | .. autoinstanceattribute:: TypeMarshaller.typestring_to_type 52 | :annotation: = dict() 53 | 54 | 55 | NumpyScalarArrayMarshaller 56 | -------------------------- 57 | 58 | .. autoclass:: NumpyScalarArrayMarshaller 59 | :members: read, write, write_metadata 60 | :show-inheritance: 61 | 62 | Handles the following :: 63 | 64 | python_attributes = {'Python.Type', 'Python.Shape', 'Python.Empty', 65 | 'Python.numpy.UnderlyingType', 66 | 'Python.numpy.Container', 'Python.Fields'} 67 | 68 | matlab_attributes = {'H5PATH', 'MATLAB_class', 'MATLAB_empty', 69 | 'MATLAB_int_decode', 'MATLAB_fields'} 70 | 71 | types = (np.ndarray, np.matrix, 72 | np.chararray, np.core.records.recarray, 73 | np.bool_, np.void, 74 | np.uint8, np.uint16, np.uint32, np.uint64, 75 | np.int8, np.int16, np.int32, np.int64, 76 | np.float16, np.float32, np.float64, 77 | np.complex64, np.complex128, 78 | np.bytes_, np.str_, np.object_) 79 | 80 | python_type_strings = ('numpy.ndarray', 'numpy.matrix', 81 | 'numpy.chararray', 'numpy.recarray', 82 | 'numpy.bool_', 'numpy.void', 83 | 'numpy.uint8', 'numpy.uint16', 84 | 'numpy.uint32', 'numpy.uint64', 'numpy.int8', 85 | 'numpy.int16', 'numpy.int32', 'numpy.int64', 86 | 'numpy.float16', 'numpy.float32', 'numpy.float64', 87 | 'numpy.complex64', 'numpy.complex128', 88 | 'numpy.bytes_', 'numpy.str_', 'numpy.object_') 89 | 90 | matlab_classes = ('logical', 'char', 'single', 'double', 'uint8', 91 | 'uint16', 'uint32', 'uint64', 'int8', 'int16', 92 | 'int32', 'int64', 'cell', 'canonical empty') 93 | 94 | 95 | PythonScalarMarshaller 96 | ---------------------- 97 | 98 | .. autoclass:: PythonScalarMarshaller 99 | :members: read, write 100 | :show-inheritance: 101 | 102 | Handles the following :: 103 | 104 | python_attributes = {'Python.Type', 'Python.Shape', 'Python.Empty', 105 | 'Python.numpy.UnderlyingType', 106 | 'Python.numpy.Container', 'Python.Fields'} 107 | 108 | matlab_attributes = {'H5PATH', 'MATLAB_class', 'MATLAB_empty', 109 | 'MATLAB_int_decode'} 110 | 111 | types = (bool, int, float, complex) 112 | 113 | python_type_strings = ('bool', 'int', 'float', 'complex') 114 | 115 | matlab_classes = () 116 | 117 | 118 | PythonStringMarshaller 119 | ---------------------- 120 | 121 | .. autoclass:: PythonStringMarshaller 122 | :members: read, write 123 | :show-inheritance: 124 | 125 | Handles the following :: 126 | 127 | python_attributes = {'Python.Type', 'Python.Shape', 'Python.Empty', 128 | 'Python.numpy.UnderlyingType', 129 | 'Python.numpy.Container', 'Python.Fields'} 130 | 131 | matlab_attributes = {'H5PATH', 'MATLAB_class', 'MATLAB_empty', 132 | 'MATLAB_int_decode'} 133 | 134 | types = (str, bytes, bytearray) 135 | 136 | python_type_strings = ('str', 'bytes', 'bytearray') 137 | 138 | matlab_classes = () 139 | 140 | 141 | PythonNoneMarshaller 142 | -------------------- 143 | 144 | .. autoclass:: PythonNoneMarshaller 145 | :members: read, write 146 | :show-inheritance: 147 | 148 | Handles the following :: 149 | 150 | python_attributes = {'Python.Type', 'Python.Shape', 'Python.Empty', 151 | 'Python.numpy.UnderlyingType', 152 | 'Python.numpy.Container', 'Python.Fields'} 153 | 154 | matlab_attributes = {'H5PATH', 'MATLAB_class', 'MATLAB_empty', 155 | 'MATLAB_int_decode'} 156 | 157 | types = (builtins.NoneType, ) 158 | 159 | python_type_strings = ('builtins.NoneType', ) 160 | 161 | matlab_classes = () 162 | 163 | 164 | PythonDictMarshaller 165 | -------------------- 166 | 167 | .. autoclass:: PythonDictMarshaller 168 | :members: read, write, write_metadata 169 | :show-inheritance: 170 | 171 | Handles the following :: 172 | 173 | python_attributes = {'Python.Type', 'Python.Fields'} 174 | 175 | matlab_attributes = {'H5PATH', 'MATLAB_class', 'MATLAB_fields'} 176 | 177 | types = (dict, collections.OrderedDict) 178 | 179 | python_type_strings = ('dict', 'collections.OrderedDict') 180 | 181 | matlab_classes = () 182 | 183 | 184 | PythonListMarshaller 185 | -------------------- 186 | 187 | .. autoclass:: PythonListMarshaller 188 | :members: read, write 189 | :show-inheritance: 190 | 191 | Handles the following :: 192 | 193 | python_attributes = {'Python.Type', 'Python.Shape', 'Python.Empty', 194 | 'Python.numpy.UnderlyingType', 195 | 'Python.numpy.Container', 'Python.Fields'} 196 | 197 | matlab_attributes = {'H5PATH', 'MATLAB_class', 'MATLAB_empty', 198 | 'MATLAB_int_decode'} 199 | 200 | types = (list, ) 201 | 202 | python_type_strings = ('list', ) 203 | 204 | matlab_classes = () 205 | 206 | 207 | PythonTupleSetDequeMarshaller 208 | ----------------------------- 209 | 210 | .. autoclass:: PythonTupleSetDequeMarshaller 211 | :members: read, write 212 | :show-inheritance: 213 | 214 | Handles the following :: 215 | 216 | python_attributes = {'Python.Type', 'Python.Shape', 'Python.Empty', 217 | 'Python.numpy.UnderlyingType', 218 | 'Python.numpy.Container', 'Python.Fields'} 219 | 220 | matlab_attributes = {'H5PATH', 'MATLAB_class', 'MATLAB_empty', 221 | 'MATLAB_int_decode'} 222 | 223 | types = (tuple, set, frozenset, collections.deque) 224 | 225 | python_type_strings = ('tuple', 'set', 'frozenset', 'collections.deque') 226 | 227 | matlab_classes = () 228 | 229 | -------------------------------------------------------------------------------- /doc/source/compression.rst: -------------------------------------------------------------------------------- 1 | .. currentmodule:: hdf5storage 2 | 3 | .. _Compression: 4 | 5 | =========== 6 | Compression 7 | =========== 8 | 9 | The HDF5 libraries and the :py:mod:`h5py` module support transparent 10 | compression of data in HDF5 files. 11 | 12 | The use of compression can sometimes drastically reduce file size, often 13 | makes it faster to read the data from the file, and sometimes makes it 14 | faster to write the data. Though, not all data compresses very well and 15 | can occassionally end up larger after compression than it was 16 | uncompressed. Compression does cost CPU time both when compressing the 17 | data and when decompressing it. The reason this can sometimes lead to 18 | faster read and write times is because disks are very slow and the space 19 | savings can save enough disk access time to make up for the CPU time. 20 | 21 | All versions of this package can read compressed data, but not all 22 | versions can write compressed data. 23 | 24 | .. versionadded:: 0.1.9 25 | 26 | HDF5 write compression features added along with several options to 27 | control it in :py:class:`Options`. 28 | 29 | 30 | .. versionadded:: 0.1.7 31 | 32 | :py:class:`Options` will take the compression options but ignores 33 | them. 34 | 35 | 36 | .. warning:: 37 | 38 | Passing the compression options for versions earlier than ``0.1.7`` 39 | will result in an error. 40 | 41 | 42 | Enabling Compression 43 | ==================== 44 | 45 | Compression, which is enabled by default, is controlled by setting 46 | :py:attr:`Options.compress` to ``True`` or passing ``compress=X`` to 47 | :py:func:`write` and :py:func:`savemat` where ``X`` is ``True`` or 48 | ``False``. 49 | 50 | 51 | .. note:: 52 | 53 | Not all python objects written to the HDF5 file will be compressed, 54 | or even support compression. For one, :py:mod:`numpy` scalars or any 55 | type that is stored as one do not support compression due to 56 | limitations of the HDF5 library, though compressing them would be a 57 | waste (hence the lack of support). 58 | 59 | 60 | Setting The Minimum Data Size for Compression 61 | ============================================= 62 | 63 | Compressing small pieces of data often wastes space (compressed size is 64 | larger than uncompressed size) and CPU time. Due to this, python objects 65 | have to be larger than a particular size before this package will 66 | compress them. The threshold, in bytes, is controlled by setting 67 | :py:attr:`Options.compress_size_threshold` or passing 68 | ``compress_size_threshold=X`` to :py:func:`write` and 69 | :py:func:`savemat` where ``X`` is a non-negative integer. The default 70 | value is 16 KB. 71 | 72 | 73 | Controlling The Compression Algorithm And Level 74 | =============================================== 75 | 76 | Many compression algorithms can be used with HDF5 files, though only 77 | three are common. The Deflate algorithm (sometimes known as the GZIP 78 | algorithm), LZF algorithm, and SZIP algorithms are the algorithms that 79 | the HDF5 library is explicitly setup to support. The library has a 80 | mechanism for adding additional algorithms. Popular ones include the 81 | BZIP2 and BLOSC algorithms. 82 | 83 | The compression algorithm used is controlled by setting 84 | :py:attr:`Options.compression_algorithm` or passing 85 | ``compression_algorithm=X`` to :py:func:`write` and :py:func:`savemat`. 86 | ``X`` is the ``str`` name of the algorithm. The default is ``'gzip'`` 87 | corresponding to the Deflate/GZIP algorithm. 88 | 89 | .. note:: 90 | 91 | As of version ``0.2``, only the Deflate (``X = 'gzip'``), LZF 92 | (``X = 'lzf'``), and SZIP (``X = 'szip'``) algorithms are supported. 93 | 94 | 95 | .. note:: 96 | 97 | If doing MATLAB compatibility (:py:attr:`Options.matlab_compatible` 98 | is ``True``), only the Deflate algorithm is supported. 99 | 100 | 101 | The algorithms, in more detail 102 | 103 | GZIP / Deflate (``'gzip'``) 104 | The common Deflate algorithm seen in the Unix and Linux ``gzip`` 105 | utility and the most common compression algorithm used in ZIP files. 106 | It is the most compatible algorithm. It achieves good compression and 107 | is reasonably fast. It has no patent or license restrictions. 108 | 109 | LZF (``'lzf'``) 110 | A very fast algorithm but with inferior compression to GZIP/Deflate. 111 | It is less commonly used than GZIP/Deflate, but similarly has no 112 | patent or license restrictions. 113 | 114 | SZIP (``'szip'``) 115 | This compression algorithm isn't always available and has patent 116 | and license restrictions. See 117 | `SZIP License `_. 118 | 119 | 120 | If GZIP/Deflate compression is being used, the compression level can be 121 | adjusted by setting :py:attr:`Options.gzip_compression_level` or passing 122 | ``gzip_compression_level=X`` to :py:func:`write` and :py:func:`savemat` 123 | where ``X`` is an integer between ``0`` and ``9`` inclusive. ``0`` is 124 | the lowest compression, but is the fastest. ``9`` gives the best 125 | compression, but is the slowest. The default is ``7``. 126 | 127 | For all compression algorithms, there is an additional filter which can 128 | help achieve better compression at relatively low cost in CPU time. It 129 | is the shuffle filter. It is controlled by setting 130 | :py:attr:`Options.shuffle_filter` or passing ``shuffle_filter=X`` to 131 | :py:func:`write` and :py:func:`savemat` where ``X`` is ``True`` or 132 | ``False``. The default is ``True``. 133 | 134 | 135 | Using Checksums 136 | =============== 137 | 138 | Fletcher32 checksums can be calculated and stored for most types of 139 | stored data in an HDF5 file. These are then checked when the data is 140 | read to catch file corruption, which will cause an error when reading 141 | the data informing the user that there is data corruption. The filter 142 | can be enabled or disabled separately for data that is compressed and 143 | data that is not compressed (e.g. compression is disabled, the python 144 | object can't be compressed, or the python object's data size is smaller 145 | than the compression threshold). 146 | 147 | For compressed data, it is controlled by setting 148 | :py:attr:`Options.compressed_fletcher32_filter` or passing 149 | ``compressed_fletcher32_filter=X`` to :py:func:`write` and 150 | :py:func:`savemat` where ``X`` is ``True`` or ``False``. The default is 151 | ``True``. 152 | 153 | For uncompressed data, it is controlled by setting 154 | :py:attr:`Options.uncompressed_fletcher32_filter` or passing 155 | ``uncompressed_fletcher32_filter=X`` to :py:func:`write` and 156 | :py:func:`savemat` where ``X`` is ``True`` or ``False``. The default is 157 | ``False``. 158 | 159 | 160 | .. note:: 161 | 162 | Fletcher32 checksums are not computed for anything that is stored 163 | as a :py:mod:`numpy` scalar. 164 | 165 | 166 | Chunking 167 | ======== 168 | 169 | When no filters are used (compression and Fletcher32), this package 170 | stores data in HDF5 files in a contiguous manner. The use of any filter 171 | requires that the data use chunked storage. Chunk sizes are determined 172 | automatically using the autochunk feature of :py:mod:`h5py`. The HDF5 173 | libraries make reading contiguous and chunked data transparent, though 174 | access speeds can differ and the chunk size affects the compression 175 | ratio. 176 | 177 | 178 | Further Reading 179 | =============== 180 | 181 | .. seealso:: 182 | 183 | `HDF5 Datasets Filter pipeline `_ 184 | Description of the Dataset filter pipeline in the :py:mod:`h5py` 185 | 186 | `Using Compression in HDF5 `_ 187 | FAQ on compression from the HDF Group. 188 | 189 | `HDF5 Tutorial: Learning The Basics: Dataset Storage Layout `_ 190 | Information on Dataset storage format from the HDF Group 191 | 192 | `SZIP License `_ 193 | The license for using the SZIP compression algorithm. 194 | 195 | `SZIP COMPRESSION IN HDF PRODUCTS `_ 196 | Information on using SZIP compression from the HDF Group. 197 | 198 | `3rd Party Compression Algorithms for HDF5 `_ 199 | List of common additional compression algorithms. 200 | 201 | -------------------------------------------------------------------------------- /doc/source/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # hdf5storage documentation build configuration file, created by 5 | # sphinx-quickstart on Sun Dec 22 00:05:54 2013. 6 | # 7 | # This file is execfile()d with the current directory set to its containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | import sys, os 16 | 17 | # If extensions (or modules to document with autodoc) are in another directory, 18 | # add these directories to sys.path here. If the directory is relative to the 19 | # documentation root, use os.path.abspath to make it absolute, like shown here. 20 | #sys.path.insert(0, os.path.abspath('.')) 21 | 22 | # -- General configuration ----------------------------------------------------- 23 | 24 | # If your documentation needs a minimal Sphinx version, state it here. 25 | needs_sphinx = '1.7' 26 | 27 | # Add any Sphinx extension module names here, as strings. They can be extensions 28 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 29 | extensions = ['sphinx.ext.autodoc', 30 | 'sphinx.ext.intersphinx', 31 | 'sphinx.ext.viewcode', 32 | 'sphinx.ext.autosummary', 33 | 'sphinx.ext.napoleon'] 34 | 35 | # Add any paths that contain templates here, relative to this directory. 36 | templates_path = ['_templates'] 37 | 38 | # The suffix of source filenames. 39 | source_suffix = '.rst' 40 | 41 | # The encoding of source files. 42 | #source_encoding = 'utf-8-sig' 43 | 44 | # The master toctree document. 45 | master_doc = 'index' 46 | 47 | # General information about the project. 48 | project = 'hdf5storage' 49 | copyright = '2013-2020, Freja Nordsiek' 50 | 51 | # The version info for the project you're documenting, acts as replacement for 52 | # |version| and |release|, also used in various other places throughout the 53 | # built documents. 54 | # 55 | # The short X.Y version. 56 | version = '0.2' 57 | # The full version, including alpha/beta/rc tags. 58 | release = '0.2' 59 | 60 | # The language for content autogenerated by Sphinx. Refer to documentation 61 | # for a list of supported languages. 62 | #language = None 63 | 64 | # There are two options for replacing |today|: either, you set today to some 65 | # non-false value, then it is used: 66 | #today = '' 67 | # Else, today_fmt is used as the format for a strftime call. 68 | #today_fmt = '%B %d, %Y' 69 | 70 | # List of patterns, relative to source directory, that match files and 71 | # directories to ignore when looking for source files. 72 | exclude_patterns = [] 73 | 74 | # The reST default role (used for this markup: `text`) to use for all documents. 75 | #default_role = None 76 | 77 | # If true, '()' will be appended to :func: etc. cross-reference text. 78 | #add_function_parentheses = True 79 | 80 | # If true, the current module name will be prepended to all description 81 | # unit titles (such as .. function::). 82 | #add_module_names = True 83 | 84 | # If true, sectionauthor and moduleauthor directives will be shown in the 85 | # output. They are ignored by default. 86 | #show_authors = False 87 | 88 | # The name of the Pygments (syntax highlighting) style to use. 89 | pygments_style = 'sphinx' 90 | 91 | # A list of ignored prefixes for module index sorting. 92 | #modindex_common_prefix = [] 93 | 94 | 95 | # -- Options for HTML output --------------------------------------------------- 96 | 97 | # The theme to use for HTML and HTML Help pages. See the documentation for 98 | # a list of builtin themes. 99 | html_theme = 'sphinx_rtd_theme' 100 | 101 | # Theme options are theme-specific and customize the look and feel of a theme 102 | # further. For a list of options available for each theme, see the 103 | # documentation. 104 | #html_theme_options = {} 105 | 106 | # Add any paths that contain custom themes here, relative to this directory. 107 | #html_theme_path = [] 108 | 109 | # The name for this set of Sphinx documents. If None, it defaults to 110 | # " v documentation". 111 | #html_title = None 112 | 113 | # A shorter title for the navigation bar. Default is the same as html_title. 114 | #html_short_title = None 115 | 116 | # The name of an image file (relative to this directory) to place at the top 117 | # of the sidebar. 118 | #html_logo = None 119 | 120 | # The name of an image file (within the static path) to use as favicon of the 121 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 122 | # pixels large. 123 | #html_favicon = None 124 | 125 | # Add any paths that contain custom static files (such as style sheets) here, 126 | # relative to this directory. They are copied after the builtin static files, 127 | # so a file named "default.css" will overwrite the builtin "default.css". 128 | html_static_path = ['_static'] 129 | 130 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 131 | # using the given strftime format. 132 | #html_last_updated_fmt = '%b %d, %Y' 133 | 134 | # If true, SmartyPants will be used to convert quotes and dashes to 135 | # typographically correct entities. 136 | #html_use_smartypants = True 137 | 138 | # Custom sidebar templates, maps document names to template names. 139 | #html_sidebars = {} 140 | 141 | # Additional templates that should be rendered to pages, maps page names to 142 | # template names. 143 | #html_additional_pages = {} 144 | 145 | # If false, no module index is generated. 146 | #html_domain_indices = True 147 | 148 | # If false, no index is generated. 149 | #html_use_index = True 150 | 151 | # If true, the index is split into individual pages for each letter. 152 | #html_split_index = False 153 | 154 | # If true, links to the reST sources are added to the pages. 155 | html_show_sourcelink = True 156 | 157 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 158 | #html_show_sphinx = True 159 | 160 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 161 | #html_show_copyright = True 162 | 163 | # If true, an OpenSearch description file will be output, and all pages will 164 | # contain a tag referring to it. The value of this option must be the 165 | # base URL from which the finished HTML is served. 166 | #html_use_opensearch = '' 167 | 168 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 169 | #html_file_suffix = None 170 | 171 | # Output file base name for HTML help builder. 172 | htmlhelp_basename = 'hdf5storagedoc' 173 | 174 | 175 | # -- Options for LaTeX output -------------------------------------------------- 176 | 177 | latex_elements = { 178 | # The paper size ('letterpaper' or 'a4paper'). 179 | #'papersize': 'letterpaper', 180 | 181 | # The font size ('10pt', '11pt' or '12pt'). 182 | #'pointsize': '10pt', 183 | 184 | # Additional stuff for the LaTeX preamble. 185 | #'preamble': '', 186 | } 187 | 188 | # Grouping the document tree into LaTeX files. List of tuples 189 | # (source start file, target name, title, author, documentclass [howto/manual]). 190 | latex_documents = [ 191 | ('index', 'hdf5storage.tex', 'hdf5storage Documentation', 192 | 'Freja Nordsiek', 'manual'), 193 | ] 194 | 195 | # The name of an image file (relative to this directory) to place at the top of 196 | # the title page. 197 | #latex_logo = None 198 | 199 | # For "manual" documents, if this is true, then toplevel headings are parts, 200 | # not chapters. 201 | #latex_use_parts = False 202 | 203 | # If true, show page references after internal links. 204 | #latex_show_pagerefs = False 205 | 206 | # If true, show URL addresses after external links. 207 | #latex_show_urls = False 208 | 209 | # Documents to append as an appendix to all manuals. 210 | #latex_appendices = [] 211 | 212 | # If false, no module index is generated. 213 | #latex_domain_indices = True 214 | 215 | 216 | # -- Options for manual page output -------------------------------------------- 217 | 218 | # One entry per manual page. List of tuples 219 | # (source start file, name, description, authors, manual section). 220 | man_pages = [ 221 | ('index', 'hdf5storage', 'hdf5storage Documentation', 222 | ['Freja Nordsiek'], 1) 223 | ] 224 | 225 | # If true, show URL addresses after external links. 226 | #man_show_urls = False 227 | 228 | 229 | # -- Options for Texinfo output ------------------------------------------------ 230 | 231 | # Grouping the document tree into Texinfo files. List of tuples 232 | # (source start file, target name, title, author, 233 | # dir menu entry, description, category) 234 | texinfo_documents = [ 235 | ('index', 'hdf5storage', 'hdf5storage Documentation', 236 | 'Freja Nordsiek', 'hdf5storage', 'One line description of project.', 237 | 'Miscellaneous'), 238 | ] 239 | 240 | # Documents to append as an appendix to all manuals. 241 | #texinfo_appendices = [] 242 | 243 | # If false, no module index is generated. 244 | #texinfo_domain_indices = True 245 | 246 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 247 | #texinfo_show_urls = 'footnote' 248 | 249 | 250 | # Example configuration for intersphinx: refer to the Python standard library. 251 | 252 | intersphinx_mapping = {'python': ('http://docs.python.org/3', None), 253 | 'numpy': ('http://docs.scipy.org/doc/numpy', None), 254 | 'scipy': ('http://docs.scipy.org/doc/scipy/reference', None), 255 | 'h5py': ('http://docs.h5py.org/en/latest/', None)} 256 | 257 | # -- Options for Autosummary --------------------------------------------------- 258 | 259 | autosummary_generate = True 260 | 261 | # -- Options for Napoleon ------------------------------------------------------ 262 | 263 | napoleon_use_ivar = True 264 | napoleon_use_param = True 265 | napoleon_use_rtype = True 266 | -------------------------------------------------------------------------------- /tests/test_marshallers_requiring_modules.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2016-2020, Freja Nordsiek 2 | # All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are 6 | # met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright 9 | # notice, this list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright 12 | # notice, this list of conditions and the following disclaimer in the 13 | # documentation and/or other materials provided with the distribution. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19 | # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | import os 28 | import sys 29 | import tempfile 30 | 31 | import numpy as np 32 | import h5py 33 | 34 | import hdf5storage 35 | import hdf5storage.utilities 36 | import hdf5storage.Marshallers 37 | 38 | from nose.tools import assert_is_not_none, assert_is_none, \ 39 | assert_false, assert_equal, assert_not_in, assert_in 40 | 41 | 42 | class Tmarshaller(hdf5storage.Marshallers.TypeMarshaller): 43 | def read(self, f, dsetgrp, attributes, options): 44 | return 'read' 45 | 46 | def read_approximate(self, f, dsetgrp, attributes, options): 47 | return 'read_approximate' 48 | 49 | 50 | def test_missing_required_parent(): 51 | m = hdf5storage.Marshallers.TypeMarshaller() 52 | m.required_parent_modules = ['ainivieanvueaq'] 53 | m.python_type_strings = ['vi8vaeaniea'] 54 | m.types = [s for s in m.python_type_strings] 55 | m.update_type_lookups() 56 | mc = hdf5storage.MarshallerCollection(marshallers=[m]) 57 | assert_false(mc._has_required_modules[-1]) 58 | assert_false(mc._imported_required_modules[-1]) 59 | mback, has_modules = mc.get_marshaller_for_type_string( \ 60 | m.python_type_strings[0]) 61 | assert_is_not_none(mback) 62 | assert_false(has_modules) 63 | assert_false(mc._has_required_modules[-1]) 64 | assert_false(mc._imported_required_modules[-1]) 65 | for name in m.required_parent_modules: 66 | assert_not_in(name, sys.modules) 67 | 68 | 69 | def test_missing_required_lazy(): 70 | m = hdf5storage.Marshallers.TypeMarshaller() 71 | m.required_parent_modules = ['numpy'] 72 | m.required_modules = ['ainivieanvueaq'] 73 | m.python_type_strings = ['vi8vaeaniea'] 74 | m.types = [s for s in m.python_type_strings] 75 | m.update_type_lookups() 76 | mc = hdf5storage.MarshallerCollection(lazy_loading=True, 77 | marshallers=[m]) 78 | assert mc._has_required_modules[-1] 79 | assert_false(mc._imported_required_modules[-1]) 80 | mback, has_modules = mc.get_marshaller_for_type_string( \ 81 | m.python_type_strings[0]) 82 | assert_is_not_none(mback) 83 | assert_false(has_modules) 84 | assert_false(mc._has_required_modules[-1]) 85 | assert_false(mc._imported_required_modules[-1]) 86 | for name in m.required_modules: 87 | assert_not_in(name, sys.modules) 88 | 89 | 90 | def test_missing_required_non_lazy(): 91 | m = hdf5storage.Marshallers.TypeMarshaller() 92 | m.required_parent_modules = ['numpy'] 93 | m.required_modules = ['ainivieanvueaq'] 94 | m.python_type_strings = ['vi8vaeaniea'] 95 | m.types = [s for s in m.python_type_strings] 96 | m.update_type_lookups() 97 | mc = hdf5storage.MarshallerCollection(lazy_loading=False, 98 | marshallers=[m]) 99 | assert_false(mc._has_required_modules[-1]) 100 | assert_false(mc._imported_required_modules[-1]) 101 | mback, has_modules = mc.get_marshaller_for_type_string( \ 102 | m.python_type_strings[0]) 103 | assert_is_not_none(mback) 104 | assert_false(has_modules) 105 | assert_false(mc._has_required_modules[-1]) 106 | assert_false(mc._imported_required_modules[-1]) 107 | for name in m.required_modules: 108 | assert_not_in(name, sys.modules) 109 | 110 | 111 | def test_has_required_lazy(): 112 | m = hdf5storage.Marshallers.TypeMarshaller() 113 | m.required_parent_modules = ['json'] 114 | m.required_modules = ['json'] 115 | m.python_type_strings = ['ellipsis'] 116 | m.types = ['builtins.ellipsis'] 117 | m.update_type_lookups() 118 | for name in m.required_modules: 119 | assert_not_in(name, sys.modules) 120 | mc = hdf5storage.MarshallerCollection(lazy_loading=True, 121 | marshallers=[m]) 122 | for name in m.required_modules: 123 | assert_not_in(name, sys.modules) 124 | assert mc._has_required_modules[-1] 125 | assert_false(mc._imported_required_modules[-1]) 126 | mback, has_modules = mc.get_marshaller_for_type_string( \ 127 | m.python_type_strings[0]) 128 | assert_is_not_none(mback) 129 | assert has_modules 130 | assert mc._has_required_modules[-1] 131 | assert mc._imported_required_modules[-1] 132 | for name in m.required_modules: 133 | assert_in(name, sys.modules) 134 | 135 | # Do it again, but this time the modules are already loaded so that 136 | # flag should be set. 137 | mc = hdf5storage.MarshallerCollection(lazy_loading=True, 138 | marshallers=[m]) 139 | assert mc._has_required_modules[-1] 140 | assert mc._imported_required_modules[-1] 141 | mback, has_modules = mc.get_marshaller_for_type_string( \ 142 | m.python_type_strings[0]) 143 | assert_is_not_none(mback) 144 | assert has_modules 145 | assert mc._has_required_modules[-1] 146 | assert mc._imported_required_modules[-1] 147 | 148 | 149 | def test_has_required_non_lazy(): 150 | m = hdf5storage.Marshallers.TypeMarshaller() 151 | m.required_parent_modules = ['csv'] 152 | m.required_modules = ['csv'] 153 | m.python_type_strings = ['ellipsis'] 154 | m.types = ['builtins.ellipsis'] 155 | m.update_type_lookups() 156 | for name in m.required_modules: 157 | assert_not_in(name, sys.modules) 158 | mc = hdf5storage.MarshallerCollection(lazy_loading=False, 159 | marshallers=[m]) 160 | for name in m.required_modules: 161 | assert_in(name, sys.modules) 162 | assert mc._has_required_modules[-1] 163 | assert mc._imported_required_modules[-1] 164 | mback, has_modules = mc.get_marshaller_for_type_string( \ 165 | m.python_type_strings[0]) 166 | assert_is_not_none(mback) 167 | assert has_modules 168 | assert mc._has_required_modules[-1] 169 | assert mc._imported_required_modules[-1] 170 | 171 | 172 | def test_marshaller_read(): 173 | m = Tmarshaller() 174 | m.required_parent_modules = ['json'] 175 | m.required_modules = ['json'] 176 | m.python_type_strings = ['ellipsis'] 177 | m.types = ['builtins.ellipsis'] 178 | m.update_type_lookups() 179 | mc = hdf5storage.MarshallerCollection(lazy_loading=True, 180 | marshallers=[m]) 181 | options = hdf5storage.Options(marshaller_collection=mc) 182 | 183 | fld = None 184 | name = '/the' 185 | try: 186 | fld = tempfile.mkstemp() 187 | os.close(fld[0]) 188 | filename = fld[1] 189 | with h5py.File(filename, mode='w') as f: 190 | f.create_dataset(name, data=np.int64([1])) 191 | f[name].attrs.create('Python.Type', 192 | b'ellipsis') 193 | out = hdf5storage.utilities.read_data(f, f, name, options) 194 | except: 195 | raise 196 | finally: 197 | if fld is not None: 198 | os.remove(fld[1]) 199 | 200 | assert_equal(out, 'read') 201 | 202 | 203 | def test_marshaller_read_approximate_missing_parent(): 204 | m = Tmarshaller() 205 | m.required_parent_modules = ['aiveneiavie'] 206 | m.required_modules = ['json'] 207 | m.python_type_strings = ['ellipsis'] 208 | m.types = ['builtins.ellipsis'] 209 | m.update_type_lookups() 210 | mc = hdf5storage.MarshallerCollection(lazy_loading=True, 211 | marshallers=[m]) 212 | options = hdf5storage.Options(marshaller_collection=mc) 213 | 214 | fld = None 215 | name = '/the' 216 | try: 217 | fld = tempfile.mkstemp() 218 | os.close(fld[0]) 219 | filename = fld[1] 220 | with h5py.File(filename, mode='w') as f: 221 | f.create_dataset(name, data=np.int64([1])) 222 | f[name].attrs.create('Python.Type', 223 | b'ellipsis') 224 | out = hdf5storage.utilities.read_data(f, f, name, options) 225 | except: 226 | raise 227 | finally: 228 | if fld is not None: 229 | os.remove(fld[1]) 230 | 231 | assert_equal(out, 'read_approximate') 232 | 233 | 234 | def test_marshaller_read_approximate_missing_import(): 235 | m = Tmarshaller() 236 | m.required_parent_modules = ['json'] 237 | m.required_modules = ['aiveneiavie'] 238 | m.python_type_strings = ['ellipsis'] 239 | m.types = ['builtins.ellipsis'] 240 | m.update_type_lookups() 241 | mc = hdf5storage.MarshallerCollection(lazy_loading=True, 242 | marshallers=[m]) 243 | options = hdf5storage.Options(marshaller_collection=mc) 244 | 245 | fld = None 246 | name = '/the' 247 | try: 248 | fld = tempfile.mkstemp() 249 | os.close(fld[0]) 250 | filename = fld[1] 251 | with h5py.File(filename, mode='w') as f: 252 | f.create_dataset(name, data=np.int64([1])) 253 | f[name].attrs.create('Python.Type', 254 | b'ellipsis') 255 | out = hdf5storage.utilities.read_data(f, f, name, options) 256 | except: 257 | raise 258 | finally: 259 | if fld is not None: 260 | os.remove(fld[1]) 261 | 262 | assert_equal(out, 'read_approximate') 263 | -------------------------------------------------------------------------------- /tests/test_path_escaping.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2016, Freja Nordsiek 2 | # All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are 6 | # met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright 9 | # notice, this list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright 12 | # notice, this list of conditions and the following disclaimer in the 13 | # documentation and/or other materials provided with the distribution. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19 | # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | import posixpath 28 | import random 29 | 30 | from hdf5storage.utilities import escape_path, unescape_path, process_path 31 | 32 | from make_randoms import random_str_ascii, random_str_some_unicode 33 | 34 | from nose.tools import assert_equal 35 | 36 | random.seed() 37 | 38 | 39 | # Get the characters that have to be escaped and make sure they are str 40 | # instead of bytes. 41 | chars_to_escape = ['\\', '/', '\x00'] 42 | substitutions = ['\\\\', '\\x2f', '\\x00'] 43 | period = '.' 44 | period_substitute = '\\x2e' 45 | if isinstance(chars_to_escape[0], bytes): 46 | chars_to_escape = [c.decode('utf-8') for c in chars_to_escape] 47 | substitutions = [c.decode('utf-8') for c in substitutions] 48 | period = period.decode('utf-8') 49 | period_substitute = period_substitute.decode('utf-8') 50 | 51 | 52 | def make_str_for_esc(include_escapes=None, 53 | include_leading_periods=False, 54 | no_unicode=False, 55 | pack_digits=True): 56 | sl = list(random_str_ascii(10)) 57 | if not no_unicode: 58 | sl += list(random_str_some_unicode(10)) 59 | if pack_digits: 60 | chars = b'0 1 2 3 4 5 6 7 8 9 a b c d e f A B C D E F' 61 | sl += chars.decode('ascii').split(b' '.decode('ascii')) * 10 62 | sl += [period] * 10 63 | if include_escapes is not None: 64 | for c in include_escapes: 65 | sl += [c] * 3 66 | random.shuffle(sl) 67 | s = b''.decode('ascii').join(sl).lstrip(period) 68 | if include_leading_periods: 69 | s = period * random.randint(1, 10) + s 70 | return s 71 | 72 | 73 | def test_escaping(): 74 | for i in range(20): 75 | s = make_str_for_esc(include_escapes=chars_to_escape, 76 | include_leading_periods=True) 77 | s_e = s 78 | for j, c in enumerate(chars_to_escape): 79 | s_e = s_e.replace(c, substitutions[j]) 80 | length = len(s_e) 81 | s_e = s_e.lstrip(period) 82 | s_e = period_substitute * (length - len(s_e)) + s_e 83 | assert_equal(s_e, escape_path(s)) 84 | 85 | 86 | def test_unescaping_x(): 87 | fmts = [b'{0:02x}'.decode('ascii'), b'{0:02X}'.decode('ascii')] 88 | prefix = b'\\x'.decode('ascii') 89 | for i in range(20): 90 | s = make_str_for_esc(no_unicode=True, 91 | pack_digits=True) 92 | index = random.randrange(1, len(s) - 1) 93 | c = s[index] 94 | n = ord(c) 95 | c_e = prefix + random.choice(fmts).format(n) 96 | s_e = s[:index] + c_e + s[(index + 1):] 97 | assert_equal(s, unescape_path(s_e)) 98 | 99 | 100 | def test_unescaping_u(): 101 | fmts = [b'{0:04x}'.decode('ascii'), b'{0:04X}'.decode('ascii')] 102 | prefix = b'\\u'.decode('ascii') 103 | for i in range(20): 104 | s = make_str_for_esc(pack_digits=True) 105 | index = random.randrange(1, len(s) - 1) 106 | c = s[index] 107 | n = ord(c) 108 | c_e = prefix + random.choice(fmts).format(n) 109 | s_e = s[:index] + c_e + s[(index + 1):] 110 | assert_equal(s, unescape_path(s_e)) 111 | 112 | 113 | def test_unescaping_U(): 114 | fmts = [b'{0:08x}'.decode('ascii'), b'{0:08X}'.decode('ascii')] 115 | prefix = b'\\U'.decode('ascii') 116 | for i in range(20): 117 | s = make_str_for_esc(pack_digits=True) 118 | index = random.randrange(1, len(s) - 1) 119 | c = s[index] 120 | n = ord(c) 121 | c_e = prefix + random.choice(fmts).format(n) 122 | s_e = s[:index] + c_e + s[(index + 1):] 123 | assert_equal(s, unescape_path(s_e)) 124 | 125 | 126 | def test_escape_reversibility_no_escapes(): 127 | for i in range(20): 128 | s = make_str_for_esc() 129 | s_e = escape_path(s) 130 | s_e_u = unescape_path(s_e) 131 | assert_equal(s, s_e) 132 | assert_equal(s, s_e_u) 133 | 134 | 135 | def test_escape_reversibility_no_escapes_bytes(): 136 | for i in range(20): 137 | s = make_str_for_esc() 138 | s = s.encode('utf-8') 139 | s_e = escape_path(s) 140 | s_e_u = unescape_path(s_e) 141 | assert_equal(s, s_e.encode('utf-8')) 142 | assert_equal(s, s_e_u.encode('utf-8')) 143 | 144 | 145 | def test_escape_reversibility_escapes(): 146 | for i in range(20): 147 | s = make_str_for_esc(include_escapes=chars_to_escape) 148 | s_e = escape_path(s) 149 | s_e_u = unescape_path(s_e) 150 | assert_equal(s, s_e_u) 151 | 152 | 153 | def test_escape_reversibility_escapes_bytes(): 154 | for i in range(20): 155 | s = make_str_for_esc(include_escapes=chars_to_escape) 156 | s = s.encode('utf-8') 157 | s_e = escape_path(s) 158 | s_e_u = unescape_path(s_e) 159 | assert_equal(s, s_e_u.encode('utf-8')) 160 | 161 | 162 | def test_escape_reversibility_leading_periods(): 163 | for i in range(20): 164 | s = make_str_for_esc(include_leading_periods=True) 165 | s_e = escape_path(s) 166 | s_e_u = unescape_path(s_e) 167 | assert_equal(s, s_e_u) 168 | 169 | 170 | def test_escape_reversibility_leading_periods_bytes(): 171 | for i in range(20): 172 | s = make_str_for_esc(include_leading_periods=True) 173 | s = s.encode('utf-8') 174 | s_e = escape_path(s) 175 | s_e_u = unescape_path(s_e) 176 | assert_equal(s, s_e_u.encode('utf-8')) 177 | 178 | 179 | def test_escape_reversibility_escapes_leading_periods(): 180 | for i in range(20): 181 | s = make_str_for_esc(include_escapes=chars_to_escape, 182 | include_leading_periods=True) 183 | s_e = escape_path(s) 184 | s_e_u = unescape_path(s_e) 185 | assert_equal(s, s_e_u) 186 | 187 | 188 | def test_escape_reversibility_escapes_leading_periods_bytes(): 189 | for i in range(20): 190 | s = make_str_for_esc(include_escapes=chars_to_escape, 191 | include_leading_periods=True) 192 | s = s.encode('utf-8') 193 | s_e = escape_path(s) 194 | s_e_u = unescape_path(s_e) 195 | assert_equal(s, s_e_u.encode('utf-8')) 196 | 197 | 198 | def test_process_path_no_escapes(): 199 | for i in range(10): 200 | pth = [make_str_for_esc() for j in range(10)] 201 | beginning = tuple(pth[:-1]) 202 | gs = posixpath.join(*beginning) 203 | ts = pth[-1] 204 | gname, tname = process_path(pth) 205 | assert_equal(gs, gname) 206 | assert_equal(ts, tname) 207 | 208 | 209 | def test_process_path_no_escapes_bytes(): 210 | for i in range(10): 211 | pth = [make_str_for_esc().encode('utf-8') for j in range(10)] 212 | beginning = tuple(pth[:-1]) 213 | gs = posixpath.join(*beginning).decode('utf-8') 214 | ts = pth[-1].decode('utf-8') 215 | gname, tname = process_path(pth) 216 | assert_equal(gs, gname) 217 | assert_equal(ts, tname) 218 | 219 | 220 | def test_process_path_escapes(): 221 | for i in range(10): 222 | pth = [make_str_for_esc(include_escapes=chars_to_escape) 223 | for j in range(10)] 224 | beginning = tuple([escape_path(s) for s in pth[:-1]]) 225 | gs = posixpath.join(*beginning) 226 | ts = escape_path(pth[-1]) 227 | gname, tname = process_path(pth) 228 | assert_equal(gs, gname) 229 | assert_equal(ts, tname) 230 | 231 | 232 | def test_process_path_escapes_bytes(): 233 | for i in range(10): 234 | pth = [make_str_for_esc( 235 | include_escapes=chars_to_escape).encode('utf-8') 236 | for j in range(10)] 237 | beginning = tuple([escape_path(s) for s in pth[:-1]]) 238 | gs = posixpath.join(*beginning) 239 | ts = escape_path(pth[-1]) 240 | gname, tname = process_path(pth) 241 | assert_equal(gs, gname) 242 | assert_equal(ts, tname) 243 | 244 | 245 | def test_process_path_leading_periods(): 246 | for i in range(10): 247 | pth = [make_str_for_esc(include_leading_periods=True) 248 | for j in range(10)] 249 | beginning = tuple([escape_path(s) for s in pth[:-1]]) 250 | gs = posixpath.join(*beginning) 251 | ts = escape_path(pth[-1]) 252 | gname, tname = process_path(pth) 253 | assert_equal(gs, gname) 254 | assert_equal(ts, tname) 255 | 256 | 257 | def test_process_path_leading_periods_bytes(): 258 | for i in range(10): 259 | pth = [make_str_for_esc( 260 | include_leading_periods=True).encode('utf-8') 261 | for j in range(10)] 262 | beginning = tuple([escape_path(s) for s in pth[:-1]]) 263 | gs = posixpath.join(*beginning) 264 | ts = escape_path(pth[-1]) 265 | gname, tname = process_path(pth) 266 | assert_equal(gs, gname) 267 | assert_equal(ts, tname) 268 | 269 | 270 | def test_process_path_escapes_leading_periods(): 271 | for i in range(10): 272 | pth = [make_str_for_esc(include_escapes=chars_to_escape, 273 | include_leading_periods=True) 274 | for j in range(10)] 275 | beginning = tuple([escape_path(s) for s in pth[:-1]]) 276 | gs = posixpath.join(*beginning) 277 | ts = escape_path(pth[-1]) 278 | gname, tname = process_path(pth) 279 | assert_equal(gs, gname) 280 | assert_equal(ts, tname) 281 | 282 | 283 | def test_process_path_escapes_leading_periods_bytes(): 284 | for i in range(10): 285 | pth = [make_str_for_esc( 286 | include_escapes=chars_to_escape, 287 | include_leading_periods=True).encode('utf-8') 288 | for j in range(10)] 289 | beginning = tuple([escape_path(s) for s in pth[:-1]]) 290 | gs = posixpath.join(*beginning) 291 | ts = escape_path(pth[-1]) 292 | gname, tname = process_path(pth) 293 | assert_equal(gs, gname) 294 | assert_equal(ts, tname) 295 | -------------------------------------------------------------------------------- /tests/test_dict_like_storage_methods.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2013-2020, Freja Nordsiek 2 | # All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are 6 | # met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright 9 | # notice, this list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright 12 | # notice, this list of conditions and the following disclaimer in the 13 | # documentation and/or other materials provided with the distribution. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19 | # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | import os 28 | import os.path 29 | import random 30 | import tempfile 31 | 32 | import numpy as np 33 | 34 | import h5py 35 | 36 | import hdf5storage 37 | from hdf5storage.utilities import escape_path 38 | 39 | from nose.tools import assert_equal as assert_equal_nose 40 | 41 | from make_randoms import random_name, random_dict, random_int, \ 42 | random_str_ascii, random_str_some_unicode, max_dict_key_length 43 | 44 | random.seed() 45 | 46 | 47 | 48 | # Need a list of dict-like types, which will depend on Python 49 | # version. 50 | dict_like = ['dict', 'OrderedDict'] 51 | 52 | # Need a list of previously invalid characters. 53 | invalid_characters = ('\x00', '/') 54 | 55 | 56 | def check_all_valid_str_keys(tp, option_keywords): 57 | options = hdf5storage.Options(**option_keywords) 58 | key_value_names = (options.dict_like_keys_name, 59 | options.dict_like_values_name) 60 | 61 | data = random_dict(tp) 62 | for k in key_value_names: 63 | if k in data: 64 | del data[k] 65 | 66 | # Make a random name. 67 | name = random_name() 68 | 69 | # Write the data to the proper file with the given name with the 70 | # provided options. The file needs to be deleted after to keep junk 71 | # from building up. 72 | fld = None 73 | try: 74 | fld = tempfile.mkstemp() 75 | os.close(fld[0]) 76 | filename = fld[1] 77 | hdf5storage.write(data, path=name, filename=filename, 78 | options=options) 79 | 80 | with h5py.File(filename, mode='r') as f: 81 | for k in key_value_names: 82 | assert escape_path(k) not in f[name] 83 | for k in data: 84 | assert escape_path(k) in f[name] 85 | except: 86 | raise 87 | finally: 88 | if fld is not None: 89 | os.remove(fld[1]) 90 | 91 | 92 | def check_str_key_previously_invalid_char(tp, ch, option_keywords): 93 | options = hdf5storage.Options(**option_keywords) 94 | key_value_names = (options.dict_like_keys_name, 95 | options.dict_like_values_name) 96 | 97 | data = random_dict(tp) 98 | for k in key_value_names: 99 | if k in data: 100 | del data[k] 101 | 102 | # Add a random invalid str key using the provided character 103 | key = key_value_names[0] 104 | while key in key_value_names: 105 | key = ch.join([random_str_ascii(max_dict_key_length) 106 | for i in range(2)]) 107 | data[key] = random_int() 108 | 109 | # Make a random name. 110 | name = random_name() 111 | 112 | # Write the data to the proper file with the given name with the 113 | # provided options. The file needs to be deleted after to keep junk 114 | # from building up. 115 | fld = None 116 | try: 117 | fld = tempfile.mkstemp() 118 | os.close(fld[0]) 119 | filename = fld[1] 120 | hdf5storage.write(data, path=name, filename=filename, 121 | options=options) 122 | 123 | with h5py.File(filename, mode='r') as f: 124 | for k in key_value_names: 125 | assert escape_path(k) not in f[name] 126 | for k in data: 127 | assert escape_path(k) in f[name] 128 | except: 129 | raise 130 | finally: 131 | if fld is not None: 132 | os.remove(fld[1]) 133 | 134 | 135 | def check_string_type_non_str_key(tp, other_tp, option_keywords): 136 | options = hdf5storage.Options(**option_keywords) 137 | key_value_names = (options.dict_like_keys_name, 138 | options.dict_like_values_name) 139 | 140 | data = random_dict(tp) 141 | for k in key_value_names: 142 | if k in data: 143 | del data[k] 144 | keys = list(data.keys()) 145 | 146 | key_gen = random_str_some_unicode(max_dict_key_length) 147 | if other_tp == 'numpy.bytes_': 148 | key = np.bytes_(key_gen.encode('UTF-8')) 149 | elif other_tp == 'numpy.unicode_': 150 | key = np.unicode_(key_gen) 151 | elif other_tp == 'bytes': 152 | key = key_gen.encode('UTF-8') 153 | data[key] = random_int() 154 | keys.append(key_gen) 155 | 156 | # Make a random name. 157 | name = random_name() 158 | 159 | # Write the data to the proper file with the given name with the 160 | # provided options. The file needs to be deleted after to keep junk 161 | # from building up. 162 | fld = None 163 | try: 164 | fld = tempfile.mkstemp() 165 | os.close(fld[0]) 166 | filename = fld[1] 167 | hdf5storage.write(data, path=name, filename=filename, 168 | options=options) 169 | 170 | with h5py.File(filename, mode='r') as f: 171 | assert_equal_nose(set(keys), set(f[name].keys())) 172 | 173 | except: 174 | raise 175 | finally: 176 | if fld is not None: 177 | os.remove(fld[1]) 178 | 179 | 180 | def check_int_key(tp, option_keywords): 181 | options = hdf5storage.Options(**option_keywords) 182 | key_value_names = (options.dict_like_keys_name, 183 | options.dict_like_values_name) 184 | 185 | data = random_dict(tp) 186 | for k in key_value_names: 187 | if k in data: 188 | del data[k] 189 | 190 | key = random_int() 191 | data[key] = random_int() 192 | 193 | # Make a random name. 194 | name = random_name() 195 | 196 | # Write the data to the proper file with the given name with the 197 | # provided options. The file needs to be deleted after to keep junk 198 | # from building up. 199 | fld = None 200 | try: 201 | fld = tempfile.mkstemp() 202 | os.close(fld[0]) 203 | filename = fld[1] 204 | hdf5storage.write(data, path=name, filename=filename, 205 | options=options) 206 | 207 | with h5py.File(filename, mode='r') as f: 208 | assert_equal_nose(set(key_value_names), set(f[name].keys())) 209 | except: 210 | raise 211 | finally: 212 | if fld is not None: 213 | os.remove(fld[1]) 214 | 215 | 216 | def test_all_valid_str_keys(): 217 | # generate some random keys_values_names 218 | keys_values_names = [('keys', 'values')] 219 | for i in range(3): 220 | names = ('a', 'a') 221 | while names[0] == names[1]: 222 | names = [random_str_ascii(8) for i in range(2)] 223 | keys_values_names.append(names) 224 | for pyth_meta in (True, False): 225 | for mat_meta in (True, False): 226 | for tp in dict_like: 227 | for names in keys_values_names: 228 | options_keywords = { \ 229 | 'store_python_metadata': pyth_meta, \ 230 | 'matlab_compatible': mat_meta, \ 231 | 'dict_like_keys_name': names[0], \ 232 | 'dict_like_values_name': names[1]} 233 | yield check_all_valid_str_keys, tp, options_keywords 234 | 235 | 236 | def test_str_key_previously_invalid_char(): 237 | # generate some random keys_values_names 238 | keys_values_names = [('keys', 'values')] 239 | for i in range(3): 240 | names = ('a', 'a') 241 | while names[0] == names[1]: 242 | names = [random_str_ascii(8) for i in range(2)] 243 | keys_values_names.append(names) 244 | for pyth_meta in (True, False): 245 | for mat_meta in (True, False): 246 | for tp in dict_like: 247 | for c in invalid_characters: 248 | for names in keys_values_names: 249 | options_keywords = { \ 250 | 'store_python_metadata': pyth_meta, \ 251 | 'matlab_compatible': mat_meta, \ 252 | 'dict_like_keys_name': names[0], \ 253 | 'dict_like_values_name': names[1]} 254 | yield check_str_key_previously_invalid_char, tp, c, options_keywords 255 | 256 | 257 | def test_string_type_non_str_key(): 258 | # Set the other key types. 259 | other_tps = ['bytes', 'numpy.bytes_', 'numpy.unicode_'] 260 | # generate some random keys_values_names 261 | keys_values_names = [('keys', 'values')] 262 | for i in range(1): 263 | names = ('a', 'a') 264 | while names[0] == names[1]: 265 | names = [random_str_ascii(8) for i in range(2)] 266 | keys_values_names.append(names) 267 | for pyth_meta in (True, False): 268 | for mat_meta in (True, False): 269 | for tp in dict_like: 270 | for other_tp in other_tps: 271 | for names in keys_values_names: 272 | options_keywords = { \ 273 | 'store_python_metadata': pyth_meta, \ 274 | 'matlab_compatible': mat_meta, \ 275 | 'dict_like_keys_name': names[0], \ 276 | 'dict_like_values_name': names[1]} 277 | yield check_string_type_non_str_key, tp, other_tp, options_keywords 278 | 279 | 280 | def test_int_key(): 281 | # generate some random keys_values_names 282 | keys_values_names = [('keys', 'values')] 283 | for i in range(3): 284 | names = ('a', 'a') 285 | while names[0] == names[1]: 286 | names = [random_str_ascii(8) for i in range(2)] 287 | keys_values_names.append(names) 288 | for pyth_meta in (True, False): 289 | for mat_meta in (True, False): 290 | for tp in dict_like: 291 | for names in keys_values_names: 292 | options_keywords = { \ 293 | 'store_python_metadata': pyth_meta, \ 294 | 'matlab_compatible': mat_meta, \ 295 | 'dict_like_keys_name': names[0], \ 296 | 'dict_like_values_name': names[1]} 297 | yield check_int_key, tp, options_keywords 298 | -------------------------------------------------------------------------------- /tests/test_hdf5_filters.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2013-2016, Freja Nordsiek 2 | # All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are 6 | # met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright 9 | # notice, this list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright 12 | # notice, this list of conditions and the following disclaimer in the 13 | # documentation and/or other materials provided with the distribution. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19 | # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | import os 28 | import os.path 29 | import random 30 | import tempfile 31 | 32 | import h5py 33 | 34 | import hdf5storage 35 | 36 | from nose.tools import assert_equal as assert_equal_nose 37 | 38 | from asserts import assert_equal 39 | from make_randoms import random_numpy, random_numpy_shape, \ 40 | max_array_axis_length, dtypes, random_name 41 | 42 | random.seed() 43 | 44 | 45 | 46 | def check_read_filters(filters): 47 | # Read out the filter arguments. 48 | filts = {'compression': 'gzip', 49 | 'shuffle': True, 50 | 'fletcher32': True, 51 | 'gzip_level': 7} 52 | for k, v in filters.items(): 53 | filts[k] = v 54 | if filts['compression'] == 'gzip': 55 | filts['compression_opts'] = filts['gzip_level'] 56 | del filts['gzip_level'] 57 | 58 | # Make some random data. 59 | dims = random.randint(1, 4) 60 | data = random_numpy(shape=random_numpy_shape(dims, 61 | max_array_axis_length), 62 | dtype=random.choice(tuple( 63 | set(dtypes) - set(['U'])))) 64 | # Make a random name. 65 | name = random_name() 66 | 67 | # Write the data to the proper file with the given name with the 68 | # provided filters and read it backt. The file needs to be deleted 69 | # after to keep junk from building up. 70 | fld = None 71 | try: 72 | fld = tempfile.mkstemp() 73 | os.close(fld[0]) 74 | filename = fld[1] 75 | with h5py.File(filename, mode='w') as f: 76 | f.create_dataset(name, data=data, chunks=True, **filts) 77 | out = hdf5storage.read(path=name, filename=filename, 78 | matlab_compatible=False) 79 | except: 80 | raise 81 | finally: 82 | if fld is not None: 83 | os.remove(fld[1]) 84 | 85 | # Compare 86 | assert_equal(out, data) 87 | 88 | 89 | def check_write_filters(filters): 90 | # Read out the filter arguments. 91 | filts = {'compression': 'gzip', 92 | 'shuffle': True, 93 | 'fletcher32': True, 94 | 'gzip_level': 7} 95 | for k, v in filters.items(): 96 | filts[k] = v 97 | 98 | # Make some random data. The dtype must be restricted so that it can 99 | # be read back reliably. 100 | dims = random.randint(1, 4) 101 | dts = tuple(set(dtypes) - set(['U', 'S', 'bool', 'complex64', \ 102 | 'complex128'])) 103 | 104 | data = random_numpy(shape=random_numpy_shape(dims, 105 | max_array_axis_length), 106 | dtype=random.choice(dts)) 107 | # Make a random name. 108 | name = random_name() 109 | 110 | # Write the data to the proper file with the given name with the 111 | # provided filters and read it backt. The file needs to be deleted 112 | # after to keep junk from building up. 113 | fld = None 114 | try: 115 | fld = tempfile.mkstemp() 116 | os.close(fld[0]) 117 | filename = fld[1] 118 | hdf5storage.write(data, path=name, filename=filename, \ 119 | store_python_metadata=False, matlab_compatible=False, \ 120 | compress=True, compress_size_threshold=0, \ 121 | compression_algorithm=filts['compression'], \ 122 | gzip_compression_level=filts['gzip_level'], \ 123 | shuffle_filter=filts['shuffle'], \ 124 | compressed_fletcher32_filter=filts['fletcher32']) 125 | 126 | with h5py.File(filename, mode='r') as f: 127 | d = f[name] 128 | fletcher32 = d.fletcher32 129 | shuffle = d.shuffle 130 | compression = d.compression 131 | gzip_level = d.compression_opts 132 | out = d[...] 133 | except: 134 | raise 135 | finally: 136 | if fld is not None: 137 | os.remove(fld[1]) 138 | 139 | # Check the filters 140 | assert_equal_nose(fletcher32, filts['fletcher32']) 141 | assert_equal_nose(shuffle, filts['shuffle']) 142 | assert_equal_nose(compression, filts['compression']) 143 | if filts['compression'] == 'gzip': 144 | assert_equal_nose(gzip_level, filts['gzip_level']) 145 | 146 | # Compare 147 | assert_equal(out, data) 148 | 149 | 150 | def check_uncompressed_write_filters(method, 151 | uncompressed_fletcher32_filter, 152 | filters): 153 | # Read out the filter arguments. 154 | filts = {'compression': 'gzip', 155 | 'shuffle': True, 156 | 'fletcher32': True, 157 | 'gzip_level': 7} 158 | for k, v in filters.items(): 159 | filts[k] = v 160 | 161 | # Make some random data. The dtype must be restricted so that it can 162 | # be read back reliably. 163 | dims = random.randint(1, 4) 164 | dts = tuple(set(dtypes) - set(['U', 'S', 'bool', 'complex64', \ 165 | 'complex128'])) 166 | 167 | data = random_numpy(shape=random_numpy_shape(dims, 168 | max_array_axis_length), 169 | dtype=random.choice(dts)) 170 | # Make a random name. 171 | name = random_name() 172 | 173 | # Make the options to disable compression by the method specified, 174 | # which is either that it is outright disabled or that the data is 175 | # smaller than the compression threshold. 176 | if method == 'compression_disabled': 177 | opts = {'compress': False, 'compress_size_threshold': 0} 178 | else: 179 | opts = {'compress': True, 180 | 'compress_size_threshold': data.nbytes + 1} 181 | 182 | # Write the data to the proper file with the given name with the 183 | # provided filters and read it backt. The file needs to be deleted 184 | # after to keep junk from building up. 185 | fld = None 186 | try: 187 | fld = tempfile.mkstemp() 188 | os.close(fld[0]) 189 | filename = fld[1] 190 | hdf5storage.write(data, path=name, filename=filename, \ 191 | store_python_metadata=False, matlab_compatible=False, \ 192 | compression_algorithm=filts['compression'], \ 193 | gzip_compression_level=filts['gzip_level'], \ 194 | shuffle_filter=filts['shuffle'], \ 195 | compressed_fletcher32_filter=filts['fletcher32'], \ 196 | uncompressed_fletcher32_filter= \ 197 | uncompressed_fletcher32_filter, \ 198 | **opts) 199 | 200 | with h5py.File(filename, mode='r') as f: 201 | d = f[name] 202 | fletcher32 = d.fletcher32 203 | shuffle = d.shuffle 204 | compression = d.compression 205 | gzip_level = d.compression_opts 206 | out = d[...] 207 | except: 208 | raise 209 | finally: 210 | if fld is not None: 211 | os.remove(fld[1]) 212 | 213 | # Check the filters 214 | assert_equal_nose(compression, None) 215 | assert_equal_nose(shuffle, False) 216 | assert_equal_nose(fletcher32, uncompressed_fletcher32_filter) 217 | 218 | # Compare 219 | assert_equal(out, data) 220 | 221 | 222 | def test_read_filtered_data(): 223 | for compression in ('gzip', 'lzf'): 224 | for shuffle in (True, False): 225 | for fletcher32 in (True, False): 226 | if compression != 'gzip': 227 | filters = {'compression': compression, 228 | 'shuffle': shuffle, 229 | 'fletcher32': fletcher32} 230 | yield check_read_filters, filters 231 | else: 232 | for level in range(10): 233 | filters = {'compression': compression, 234 | 'shuffle': shuffle, 235 | 'fletcher32': fletcher32, 236 | 'gzip_level': level} 237 | yield check_read_filters, filters 238 | 239 | 240 | def test_write_filtered_data(): 241 | for compression in ('gzip', 'lzf'): 242 | for shuffle in (True, False): 243 | for fletcher32 in (True, False): 244 | if compression != 'gzip': 245 | filters = {'compression': compression, 246 | 'shuffle': shuffle, 247 | 'fletcher32': fletcher32} 248 | yield check_read_filters, filters 249 | else: 250 | for level in range(10): 251 | filters = {'compression': compression, 252 | 'shuffle': shuffle, 253 | 'fletcher32': fletcher32, 254 | 'gzip_level': level} 255 | yield check_write_filters, filters 256 | 257 | 258 | def test_uncompressed_write_filtered_data(): 259 | for method in ('compression_disabled', 'data_too_small'): 260 | for uncompressed_fletcher32_filter in (True, False): 261 | for compression in ('gzip', 'lzf'): 262 | for shuffle in (True, False): 263 | for fletcher32 in (True, False): 264 | if compression != 'gzip': 265 | filters = {'compression': compression, 266 | 'shuffle': shuffle, 267 | 'fletcher32': fletcher32} 268 | yield check_read_filters, filters 269 | else: 270 | for level in range(10): 271 | filters = {'compression': compression, 272 | 'shuffle': shuffle, 273 | 'fletcher32': fletcher32, 274 | 'gzip_level': level} 275 | yield check_uncompressed_write_filters,\ 276 | method, uncompressed_fletcher32_filter,\ 277 | filters 278 | -------------------------------------------------------------------------------- /tests/make_randoms.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2013-2020, Freja Nordsiek 4 | # All rights reserved. 5 | # 6 | # Redistribution and use in source and binary forms, with or without 7 | # modification, are permitted provided that the following conditions are 8 | # met: 9 | # 10 | # 1. Redistributions of source code must retain the above copyright 11 | # notice, this list of conditions and the following disclaimer. 12 | # 13 | # 2. Redistributions in binary form must reproduce the above copyright 14 | # notice, this list of conditions and the following disclaimer in the 15 | # documentation and/or other materials provided with the distribution. 16 | # 17 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | import collections 30 | import posixpath 31 | import random 32 | import string 33 | import warnings 34 | 35 | import numpy as np 36 | import numpy.random 37 | 38 | 39 | random.seed() 40 | 41 | 42 | # The dtypes that can be made 43 | dtypes = ['bool', 'uint8', 'uint16', 'uint32', 'uint64', 44 | 'int8', 'int16', 'int32', 'int64', 45 | 'float32', 'float64', 'complex64', 'complex128', 46 | 'S', 'U'] 47 | 48 | # Define the sizes of random datasets to use. 49 | max_string_length = 10 50 | max_array_axis_length = 8 51 | max_list_length = 6 52 | max_posix_path_depth = 5 53 | max_posix_path_lengths = 17 54 | object_subarray_dimensions = 2 55 | max_object_subarray_axis_length = 5 56 | min_dict_keys = 4 57 | max_dict_keys = 12 58 | max_dict_key_length = 10 59 | dict_value_subarray_dimensions = 2 60 | max_dict_value_subarray_axis_length = 5 61 | min_structured_ndarray_fields = 2 62 | max_structured_ndarray_fields = 5 63 | max_structured_ndarray_field_lengths = 10 64 | max_structured_ndarray_axis_length = 2 65 | structured_ndarray_subarray_dimensions = 2 66 | max_structured_ndarray_subarray_axis_length = 4 67 | 68 | 69 | def random_str_ascii_letters(length): 70 | # Makes a random ASCII str of the specified length. 71 | ltrs = string.ascii_letters 72 | return ''.join([random.choice(ltrs) for i in 73 | range(0, length)]) 74 | 75 | 76 | def random_str_ascii(length): 77 | # Makes a random ASCII str of the specified length. 78 | ltrs = string.ascii_letters + string.digits 79 | return ''.join([random.choice(ltrs) for i in 80 | range(0, length)]) 81 | 82 | 83 | def random_str_some_unicode(length): 84 | # Makes a random ASCII+limited unicode str of the specified 85 | # length. 86 | ltrs = random_str_ascii(10) 87 | ltrs += 'αβγδεζηθικλμνξοπρστυφχψωΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩς' 88 | c = '' 89 | return c.join([random.choice(ltrs) for i in range(0, length)]) 90 | 91 | 92 | def random_bytes(length): 93 | # Makes a random sequence of bytes of the specified length from 94 | # the ASCII set. 95 | ltrs = bytes(range(1, 127)) 96 | return bytes([random.choice(ltrs) for i in range(0, length)]) 97 | 98 | 99 | def random_bytes_fullrange(length): 100 | # Makes a random sequence of bytes of the specified length from 101 | # the ASCII set. 102 | ltrs = bytes(range(1, 255)) 103 | return bytes([random.choice(ltrs) for i in range(0, length)]) 104 | 105 | def random_int(): 106 | return random.randint(-(2**31 - 1), 2**31) 107 | 108 | 109 | def random_float(): 110 | return random.uniform(-1.0, 1.0) \ 111 | * 10.0**random.randint(-300, 300) 112 | 113 | 114 | def random_numpy(shape, dtype, allow_nan=True, 115 | allow_unicode=False, 116 | object_element_dtypes=None): 117 | # Makes a random numpy array of the specified shape and dtype 118 | # string. The method is slightly different depending on the 119 | # type. For 'bytes', 'str', and 'object'; an array of the 120 | # specified size is made and then each element is set to either 121 | # a numpy.bytes_, numpy.str_, or some other object of any type 122 | # (here, it is a randomly typed random numpy array). If it is 123 | # any other type, then it is just a matter of constructing the 124 | # right sized ndarray from a random sequence of bytes (all must 125 | # be forced to 0 and 1 for bool). Optionally include unicode 126 | # characters. Optionally, for object dtypes, the allowed dtypes for 127 | # their elements can be given. 128 | if dtype == 'S': 129 | length = random.randint(1, max_string_length) 130 | data = np.zeros(shape=shape, dtype='S' + str(length)) 131 | for index, x in np.ndenumerate(data): 132 | if allow_unicode: 133 | chars = random_bytes_fullrange(length) 134 | else: 135 | chars = random_bytes(length) 136 | data[index] = np.bytes_(chars) 137 | return data 138 | elif dtype == 'U': 139 | length = random.randint(1, max_string_length) 140 | data = np.zeros(shape=shape, dtype='U' + str(length)) 141 | for index, x in np.ndenumerate(data): 142 | if allow_unicode: 143 | chars = random_str_some_unicode(length) 144 | else: 145 | chars = random_str_ascii(length) 146 | data[index] = np.unicode_(chars) 147 | return data 148 | elif dtype == 'object': 149 | if object_element_dtypes is None: 150 | object_element_dtypes = dtypes 151 | data = np.zeros(shape=shape, dtype='object') 152 | for index, x in np.ndenumerate(data): 153 | data[index] = random_numpy( \ 154 | shape=random_numpy_shape( \ 155 | object_subarray_dimensions, \ 156 | max_object_subarray_axis_length), \ 157 | dtype=random.choice(object_element_dtypes)) 158 | return data 159 | else: 160 | nbytes = np.ndarray(shape=(1,), dtype=dtype).nbytes 161 | bts = np.random.bytes(nbytes * np.prod(shape)) 162 | if dtype == 'bool': 163 | bts = b''.join([{True: b'\x01', False: b'\x00'}[ \ 164 | ch > 127] for ch in bts]) 165 | data = np.ndarray(shape=shape, dtype=dtype, buffer=bts) 166 | # If it is a floating point type and we are supposed to 167 | # remove NaN's, then turn them to zeros. Numpy will throw 168 | # RuntimeWarnings for some NaN values, so those warnings need to 169 | # be caught and ignored. 170 | if not allow_nan and data.dtype.kind in ('f', 'c'): 171 | data = data.copy() 172 | with warnings.catch_warnings(): 173 | warnings.simplefilter('ignore', RuntimeWarning) 174 | if data.dtype.kind == 'f': 175 | data[np.isnan(data)] = 0.0 176 | else: 177 | data.real[np.isnan(data.real)] = 0.0 178 | data.imag[np.isnan(data.imag)] = 0.0 179 | return data 180 | 181 | 182 | def random_numpy_scalar(dtype, object_element_dtypes=None): 183 | # How a random scalar is made depends on th type. For must, it 184 | # is just a single number. But for the string types, it is a 185 | # string of any length. 186 | if dtype == 'S': 187 | return np.bytes_(random_bytes(random.randint(1, 188 | max_string_length))) 189 | elif dtype == 'U': 190 | return np.unicode_(random_str_ascii( 191 | random.randint(1, 192 | max_string_length))) 193 | else: 194 | return random_numpy(tuple(), dtype, \ 195 | object_element_dtypes=object_element_dtypes)[()] 196 | 197 | 198 | def random_numpy_shape(dimensions, max_length, min_length=1): 199 | # Makes a random shape tuple having the specified number of 200 | # dimensions. The maximum size along each axis is max_length. 201 | return tuple([random.randint(min_length, max_length) 202 | for x in range(0, dimensions)]) 203 | 204 | 205 | def random_list(N, python_or_numpy='numpy'): 206 | # Makes a random list of the specified type. If instructed, it 207 | # will be composed entirely from random numpy arrays (make a 208 | # random object array and then convert that to a 209 | # list). Otherwise, it will be a list of random bytes. 210 | if python_or_numpy == 'numpy': 211 | return random_numpy((N,), dtype='object').tolist() 212 | else: 213 | data = [] 214 | for i in range(0, N): 215 | data.append(random_bytes(random.randint(1, 216 | max_string_length))) 217 | return data 218 | 219 | 220 | def random_dict(tp='dict'): 221 | # Makes a random dict or dict-like object tp (random number of 222 | # randomized keys with random numpy arrays as values). The only 223 | # supported values of tp are 'dict' and 'OrderedDict'. 224 | data = dict() 225 | for i in range(0, random.randint(min_dict_keys, \ 226 | max_dict_keys)): 227 | name = random_str_ascii(max_dict_key_length) 228 | data[name] = \ 229 | random_numpy(random_numpy_shape( \ 230 | dict_value_subarray_dimensions, \ 231 | max_dict_value_subarray_axis_length), \ 232 | dtype=random.choice(dtypes)) 233 | 234 | # If tp is 'dict', return as is. Otherwise, randomize the order. 235 | if tp == 'dict': 236 | return data 237 | elif tp == 'OrderedDict': 238 | # An ordered dict is made by randomizing the field order. 239 | itms = list(data.items()) 240 | random.shuffle(itms) 241 | return collections.OrderedDict(itms) 242 | else: 243 | return data 244 | 245 | 246 | def random_structured_numpy_array(shape, field_shapes=None, 247 | nonascii_fields=False, 248 | nondigits_fields=False, 249 | names=None): 250 | # Make random field names (if not provided with field names), 251 | # dtypes, and sizes. Though, if field_shapes is explicitly given, 252 | # the sizes should be random. The field names must all be of type 253 | # str, not unicode in Python 2. Optionally include non-ascii 254 | # characters in the field names (will have to be encoded in Python 255 | # 2.x). String types will not be used due to the difficulty in 256 | # assigning the length. 257 | if names is None: 258 | if nonascii_fields: 259 | name_func = random_str_some_unicode 260 | elif nondigits_fields: 261 | name_func = random_str_ascii_letters 262 | else: 263 | name_func = random_str_ascii 264 | names = [name_func( 265 | max_structured_ndarray_field_lengths) 266 | for i in range(0, random.randint( 267 | min_structured_ndarray_fields, 268 | max_structured_ndarray_fields))] 269 | dts = [random.choice(list(set(dtypes) 270 | - set(('S', 'U')))) 271 | for i in range(len(names))] 272 | if field_shapes is None: 273 | shapes = [random_numpy_shape( 274 | structured_ndarray_subarray_dimensions, 275 | max_structured_ndarray_subarray_axis_length) 276 | for i in range(len(names))] 277 | else: 278 | shapes = [field_shapes] * len(names) 279 | # Construct the type of the whole thing. 280 | dt = np.dtype([(names[i], dts[i], shapes[i]) 281 | for i in range(len(names))]) 282 | # Make the array. If dt.itemsize is 0, then we need to make an 283 | # array of int8's the size in shape and convert it to work 284 | # around a numpy bug. Otherwise, we will just create an empty 285 | # array and then proceed by assigning each field. 286 | if dt.itemsize == 0: 287 | return np.zeros(shape=shape, dtype='int8').astype(dt) 288 | else: 289 | data = np.empty(shape=shape, dtype=dt) 290 | for index, x in np.ndenumerate(data): 291 | for i, name in enumerate(names): 292 | data[name][index] = random_numpy(shapes[i], \ 293 | dts[i], allow_nan=False) 294 | return data 295 | 296 | 297 | def random_name(): 298 | # Makes a random POSIX path of a random depth. 299 | depth = random.randint(1, max_posix_path_depth) 300 | path = '/' 301 | for i in range(0, depth): 302 | path = posixpath.join(path, random_str_ascii( 303 | random.randint(1, 304 | max_posix_path_lengths))) 305 | return path 306 | -------------------------------------------------------------------------------- /doc/source/introduction.rst: -------------------------------------------------------------------------------- 1 | .. currentmodule:: hdf5storage 2 | 3 | ============ 4 | Introduction 5 | ============ 6 | 7 | Getting Started 8 | =============== 9 | 10 | Most of the functionality that one will use is contained in the main 11 | module :: 12 | 13 | import hdf5storage 14 | 15 | Lower level functionality needed mostly for extending this package to 16 | work with more datatypes are in its submodules. 17 | 18 | The main functions in this module are :py:func:`write` and 19 | :py:func:`read` which write a single Python variable to an HDF5 file or 20 | read the specified contents at one location in an HDF5 file and convert 21 | to Python types. 22 | 23 | HDF5 files are structured much like a Unix filesystem, so everything can 24 | be referenced with a POSIX style path, which look like 25 | ``'/pyth/hf'``. Unlike a Windows path, forward slashes (``'/'``) are 26 | used as directory separators instead of backward slashes (``'\\'``) and 27 | the base of the file system is just ``'/'`` instead of something like 28 | ``'C:\\'``. In the language of HDF5, what we call directories and files 29 | in filesystems are called groups and datasets. 30 | 31 | More information about paths, the supported escapes, etc. can be found 32 | at :ref:`Paths`. 33 | 34 | .. versionadded:: 0.2 35 | 36 | Ability to escape characters not allowed in Group or Dataset names. 37 | 38 | :py:func:`write` has many options for controlling how the data is 39 | stored, and what metadata is stored, but we can ignore that for now. If 40 | we have a variable named ``foo`` that we want to write to an HDF5 file 41 | named ``data.h5``, we would write it by :: 42 | 43 | hdf5storage.write(foo, path='/foo', filename='data.h5') 44 | 45 | And then we can read it back from the file with the :py:func:`read` 46 | function, which returns the read data. Here, we will put the data we 47 | read back into the variable ``bar`` :: 48 | 49 | bar = hdf5storage.read(path='/foo', filename='data.h5') 50 | 51 | Writing And Reading Several Python Variables at Once 52 | ==================================================== 53 | 54 | To write and read more than one Python variable, one could use 55 | :py:func:`write` and :py:func:`read` for each variable individually. 56 | This can incur a major performance penalty, especially for large HDF5 57 | files, since each call opens and closes the HDF5 file (sometimes more 58 | than once). 59 | 60 | Version ``0.1.10`` added a way to do this without incuring this 61 | performance penalty by adding two new functions: :py:func:`writes` and 62 | :py:func:`reads`. 63 | 64 | They can write and read more than one Python variable at once, though 65 | they can still work with a single variable. In fact, :py:func:`write` 66 | and :py:func:`read` are now wrappers around them. :py:func:`savemat` 67 | and :py:func:`loadmat` currently use them for the improved performance. 68 | 69 | .. versionadded:: 0.1.10 70 | 71 | Ability to write and read more than one Python variable at a time 72 | without opening and closing the HDF5 file each time. 73 | 74 | Main Options Controlling Writing/Reading Data 75 | ============================================= 76 | 77 | There are many individual options that control how data is written and 78 | read to/from file. These can be set by passing an :py:class:`Options` 79 | object to :py:func:`write` and :py:func:`read` by :: 80 | 81 | options = hdf5storage.Options(...) 82 | hdf5storage.write(... , options=options) 83 | hdf5storage.read(... , options=options) 84 | 85 | or passing the individual keyword arguments used by the 86 | :py:class:`Options` constructor to :py:func:`write` and 87 | :py:func:`read`. The two methods cannot be mixed (the functions will 88 | give precedence to the given :py:class:`Options` object). 89 | 90 | .. note:: 91 | 92 | Functions in the various submodules only support the 93 | :py:class:`Options` object method of passing options. 94 | 95 | The two main options are :py:attr:`Options.store_python_metadata` and 96 | :py:attr:`Options.matlab_compatible`. A more minor option is 97 | :py:attr:`Options.oned_as`. 98 | 99 | 100 | .. versionadded:: 0.1.9 101 | 102 | Support for the transparent compression of data has been added. It 103 | is enabled by default, compressing all python objects resulting in 104 | HDF5 Datasets larger than 16 KB with the GZIP/Deflate algorithm. 105 | 106 | 107 | store_python_metadata 108 | --------------------- 109 | 110 | ``bool`` 111 | 112 | Setting this options causes metadata to be written so that the written 113 | objects can be read back into Python accurately. As HDF5 does not 114 | natively support many Python data types (essentially only Numpy types), 115 | most Python data types have to be converted before being written. If 116 | metadata isn't also written, the data cannot be read back to its 117 | original form and will instead be read back as the Python type most 118 | closely resembling how it is stored, which will be a Numpy type of some 119 | sort. 120 | 121 | .. note 122 | 123 | This option is especially important when we consider that when 124 | ``matlab_compatible == True``, many additional conversions and 125 | manipulations will be done to the data that cannot be reversed 126 | without this metadata. 127 | 128 | matlab_compatible 129 | ----------------- 130 | 131 | ``bool`` 132 | 133 | Setting this option causes the writing of HDF5 files be done in a way 134 | compatible with MATLAB v7.3 MAT files. This consists of writing some 135 | file metadata so that MATLAB recognizes the file, adding specific 136 | metadata to every stored object so that MATLAB recognizes them, and 137 | transforming the data to be in the form that MATLAB expects for certain 138 | types (for example, MATLAB expects everything to be at least a 2D array 139 | and strings to be stored in UTF-16 but with no doublets). 140 | 141 | .. note:: 142 | 143 | There are many individual small options in the :py:class:`Options` 144 | class that this option sets to specific values. Setting 145 | ``matlab_compatible`` automatically sets them, while changing their 146 | values to something else automatically turns ``matlab_compatible`` 147 | off. 148 | 149 | action_for_matlab_incompatible 150 | ------------------------------ 151 | 152 | {``'ignore'``, ``'discard'``, ``'error'``} 153 | 154 | The action to perform when doing MATLAB compatibility 155 | (``matlab_compatible == True``) but a type 156 | being written is not MATLAB compatible. The actions are to write the 157 | data anyways ('ignore'), don't write the incompatible data ('discard'), 158 | or throw a :py:exc:`exceptions.TypeNotMatlabCompatibleError` 159 | exception. The default is 'error'. 160 | 161 | oned_as 162 | ------- 163 | 164 | {'row', 'column'} 165 | 166 | This option is only actually relevant when 167 | ``matlab_compatible == True``. MATLAB only supports 2D and higher 168 | dimensionality arrays, but Numpy supports 1D arrays. So, 1D arrays have 169 | to be made 2 dimensional making them either into row vectors or column 170 | vectors. This option sets which they become when imported into MATLAB. 171 | 172 | 173 | compress 174 | -------- 175 | 176 | .. versionadded:: 0.1.9 177 | 178 | ``bool`` 179 | 180 | Whether to use compression when writing data. Enabled (``True``) by default. See :ref:`Compression` for more information. 181 | 182 | 183 | Convenience Functions for MATLAB MAT Files 184 | ========================================== 185 | 186 | Two functions are provided for reading and writing to MATLAB MAT files 187 | in a convenient way. They are :py:func:`savemat` and :py:func:`loadmat`, 188 | which are modelled after the SciPy functions of the same name 189 | (:py:func:`scipy.io.savemat` and :py:func:`scipy.io.loadmat`), which 190 | work with non-HDF5 based MAT files. They take not only the same options, 191 | but dispatch calls automatically to the SciPy versions when instructed 192 | to write to a non-HDF5 based MAT file, or read a MAT file that is not 193 | HDF5 based. SciPy must be installed to take advantage of this 194 | functionality. 195 | 196 | :py:func:`savemat` takes a ``dict`` having data (values) and the names 197 | to give each piece of data (keys), and writes them to a MATLAB 198 | compatible MAT file. The `format` keyword sets the MAT file format, with 199 | ``'7.3'`` being the HDF5 based format supported by this package and 200 | ``'5'`` and ``'4'`` being the non HDF5 based formats supported by 201 | SciPy. If you want the data to be able to be read accurately back into 202 | Python, you should set ``store_python_metadata=True``. Writing a couple 203 | variables to a file looks like :: 204 | 205 | hdf5storage.savemat('data.mat', {'foo': 2.3, 'bar': (1+2j)}, format='7.3', oned_as='column', store_python_metadata=True) 206 | 207 | Then, to read variables back, we can either explicitly name the 208 | variables we want :: 209 | 210 | out = hdf5storage.loadmat('data.mat', variable_names=['foo', 'bar']) 211 | 212 | or grab all variables by either not giving the `variable_names` option 213 | or setting it to ``None``. :: 214 | 215 | out = hdf5storage.loadmat('data.mat') 216 | 217 | 218 | Example: Write And Readback Including Different Metadata 219 | ======================================================== 220 | 221 | Making The Data 222 | --------------- 223 | 224 | Make a ``dict`` containing many different types in it that we want to 225 | store to disk in an HDF5 file. The initialization method depends on 226 | the Python version. 227 | 228 | .. versionchanged:: 0.2 229 | The ``dict`` keys no longer have to all be ``str`` (the unicode 230 | string type). However, if python metadata is not included, other 231 | string type keys can get converted to ``str`` when read back or one 232 | reads back a ``dict`` with two fields, ``keys`` and ``values``, 233 | holding all the keys and values if at least one key is not a string 234 | type. 235 | 236 | >>> import numpy as np 237 | >>> import hdf5storage 238 | >>> a = {'a': True, 239 | ... 'b': None, 240 | ... 'c': 2, 241 | ... 'd': -3.2, 242 | ... 'e': (1-2.3j), 243 | ... 'f': 'hello', 244 | ... 'g': b'goodbye', 245 | ... 'h': ['list', 'of', 'stuff', [30, 2.3]], 246 | ... 'i': np.zeros(shape=(2,), dtype=[('bi', 'uint8')]), 247 | ... 'j':{'aa': np.bool_(False), 248 | ... 'bb': np.uint8(4), 249 | ... 'cc': np.uint32([70, 8]), 250 | ... 'dd': np.int32([]), 251 | ... 'ee': np.float32([[3.3], [5.3e3]]), 252 | ... 'ff': np.complex128([[3.4, 3], [9+2j, 0]]), 253 | ... 'gg': np.array(['one', 'two', 'three'], dtype='str'), 254 | ... 'hh': np.bytes_(b'how many?'), 255 | ... 'ii': np.object_(['text', np.int8([1, -3, 0])])}} 256 | 257 | Using No Metadata 258 | ----------------- 259 | 260 | Write it to a file at the ``'/a'`` directory, but include no Python or 261 | MATLAB metadata. Then, read it back and notice that many objects come 262 | back quite different from what was written. Namely, everything was 263 | converted to Numpy types. This even included the dictionaries which were 264 | converted to structured ``np.ndarray``s. This happens because all 265 | other types (other than ``dict``) must be converted to these types 266 | before being written to the HDF5 file, and without metadata, the 267 | conversion cannot be reversed (while ``dict`` isn't converted, it has 268 | the same form and thus cannot be extracted reversibly). 269 | 270 | >>> hdf5storage.write(data=a, path='/a', filename='data.h5', 271 | ... store_python_metadata=False, 272 | ... matlab_compatible=False) 273 | >>> hdf5storage.read(path='/a', filename='data.h5') 274 | array([ (True, 275 | [], 276 | 2, 277 | -3.2, 278 | (1-2.3j), 279 | b'hello', 280 | b'goodbye', 281 | [array(b'list', dtype='|S4'), 282 | array(b'of', dtype='|S2'), 283 | array(b'stuff', dtype='|S5'), 284 | array([array(30), array(2.3)], dtype=object)], 285 | [(0,), (0,)], 286 | [(False, 287 | 4, 288 | array([70, 8], dtype=uint32), 289 | array([], dtype=int32), 290 | array([[ 3.29999995e+00], [ 5.30000000e+03]], dtype=float32), 291 | array([[ 3.4+0.j, 3.0+0.j], [ 9.0+2.j, 0.0+0.j]]), 292 | array([111, 110, 101, 0, 0, 116, 119, 111, 0, 0, 116, 104, 114, 293 | 101, 101], dtype=uint32), 294 | b'how many?', 295 | array([array(b'text', dtype='|S4'), 296 | array([ 1, -3, 0], dtype=int8)], 297 | dtype=object))])], 298 | dtype=[('a', '?'), 299 | ('b', '>> hdf5storage.write(data=a, path='/a', filename='data_typeinfo.h5', 326 | ... store_python_metadata=True, 327 | ... matlab_compatible=False) 328 | >>> hdf5storage.read(path='/a', filename='data_typeinfo.h5') 329 | {'a': True, 330 | 'b': None, 331 | 'c': 2, 332 | 'd': -3.2, 333 | 'e': (1-2.3j), 334 | 'f': 'hello', 335 | 'g': b'goodbye', 336 | 'h': ['list', 'of', 'stuff', [30, 2.3]], 337 | 'i': array([(0,), (0,)], 338 | dtype=[('bi', 'u1')]), 339 | 'j': {'aa': False, 340 | 'bb': 4, 341 | 'cc': array([70, 8], dtype=uint32), 342 | 'dd': array([], dtype=int32), 343 | 'ee': array([[ 3.29999995e+00], 344 | [ 5.30000000e+03]], dtype=float32), 345 | 'ff': array([[ 3.4+0.j, 3.0+0.j], 346 | [ 9.0+2.j, 0.0+0.j]]), 347 | 'gg': array(['one', 'two', 'three'], 348 | dtype='>> hdf5storage.write(data=a, path='/a', filename='data.mat', 366 | ... store_python_metadata=False, 367 | ... matlab_compatible=True) 368 | >>> hdf5storage.read(path='/a', filename='data.mat') 369 | array([ ([[True]], 370 | [[]], 371 | [[2]], 372 | [[-3.2]], 373 | [[(1-2.3j)]], 374 | [['hello']], 375 | [['goodbye']], 376 | [[array([['list']], dtype='>> hdf5storage.write(data=a, path='/a', filename='data_typeinfo.mat', 424 | ... store_python_metadata=True, 425 | ... matlab_compatible=True) 426 | >>> hdf5storage.read(path='/a', filename='data_typeinfo.mat') 427 | {'a': True, 428 | 'b': None, 429 | 'c': 2, 430 | 'd': -3.2, 431 | 'e': (1-2.3j), 432 | 'f': 'hello', 433 | 'g': b'goodbye', 434 | 'h': ['list', 'of', 'stuff', [30, 2.3]], 435 | 'i': array([(0,), (0,)], 436 | dtype=[('bi', 'u1')]), 437 | 'j': {'aa': False, 438 | 'bb': 4, 439 | 'cc': array([70, 8], dtype=uint32), 440 | 'dd': array([], dtype=int32), 441 | 'ee': array([[ 3.29999995e+00], 442 | [ 5.30000000e+03]], dtype=float32), 443 | 'ff': array([[ 3.4+0.j, 3.0+0.j], 444 | [ 9.0+2.j, 0.0+0.j]]), 445 | 'gg': array(['one', 'two', 'three'], 446 | dtype=' 2**63 or b < -(2**63 - 1): 174 | assert_equal_none_format(a, np.bytes_(b), options) 175 | else: 176 | assert_equal_none_format(a, np.int64(b), options) 177 | else: 178 | assert_equal_none_format(a, np.array(b)[()], options) 179 | elif isinstance(b, np.recarray): 180 | assert_equal_none_format(a, b.view(np.ndarray), 181 | options) 182 | else: 183 | if b.dtype.name != 'object': 184 | if b.dtype.char in ('U', 'S'): 185 | if b.dtype.char == 'S' and b.shape == tuple() \ 186 | and len(b) == 0: 187 | assert_equal(a, \ 188 | np.zeros(shape=tuple(), dtype=b.dtype.char), \ 189 | options) 190 | elif b.dtype.char == 'U': 191 | if b.shape == tuple() and len(b) == 0: 192 | c = np.uint32(()) 193 | else: 194 | c = np.atleast_1d(b).view(np.uint32) 195 | assert_equal_nose(a.dtype, c.dtype) 196 | assert_equal_nose(a.shape, c.shape) 197 | npt.assert_equal(a, c) 198 | else: 199 | assert_equal_nose(a.dtype, b.dtype) 200 | assert_equal_nose(a.shape, b.shape) 201 | npt.assert_equal(a, b) 202 | else: 203 | # Check that the dtype's shape matches. 204 | assert_equal_nose(a.dtype.shape, b.dtype.shape) 205 | 206 | # Now, if b.shape is just all ones, then a.shape will 207 | # just be (1,). Otherwise, we need to compare the shapes 208 | # directly. Also, dimensions need to be squeezed before 209 | # comparison in this case. 210 | assert_equal_nose(np.prod(a.shape), np.prod(b.shape)) 211 | if a.shape != b.shape: 212 | assert_equal_nose(np.prod(b.shape), 1) 213 | assert_equal_nose(a.shape, (1, )) 214 | if np.prod(a.shape) == 1: 215 | a = np.squeeze(a) 216 | b = np.squeeze(b) 217 | # If there was a null in the dtype or the dtype of one 218 | # of its fields (or subfields) has a 0 in its shape, 219 | # then it was written as a Group so the field order 220 | # could have changed. 221 | has_zero_shape = False 222 | if b.dtype.names is not None: 223 | parts = [b.dtype] 224 | while 0 != len(parts): 225 | part = parts.pop() 226 | if 0 in part.shape: 227 | has_zero_shape = True 228 | if part.names is not None: 229 | parts.extend([v[0] for v 230 | in part.fields.values()]) 231 | if part.base != part: 232 | parts.append(part.base) 233 | if b.dtype.names is not None \ 234 | and ('\\x00' in str(b.dtype) \ 235 | or has_zero_shape): 236 | assert_equal_nose(a.shape, b.shape) 237 | assert_equal_nose(set(a.dtype.names), 238 | set(b.dtype.names)) 239 | for n in b.dtype.names: 240 | assert_equal_none_format(a[n], b[n], options) 241 | else: 242 | assert_equal_nose(a.dtype, b.dtype) 243 | with warnings.catch_warnings(): 244 | warnings.simplefilter('ignore', RuntimeWarning) 245 | npt.assert_equal(a, b) 246 | else: 247 | # If the original is structued, it is possible that the 248 | # fields got out of order, in which case the dtype won't 249 | # quite match. It will need to be checked just to make sure 250 | # all pieces are there. Otherwise, the dtypes can be 251 | # directly compared. 252 | if b.dtype.fields is None: 253 | assert_equal_nose(a.dtype, b.dtype) 254 | else: 255 | assert_equal_nose(dict(a.dtype.fields), 256 | dict(b.dtype.fields)) 257 | assert_equal_nose(a.shape, b.shape) 258 | for index, x in np.ndenumerate(a): 259 | assert_equal_none_format(a[index], b[index], options) 260 | 261 | 262 | def assert_equal_matlab_format(a, b, options=None): 263 | # Compares a and b for equality. b is always the original. If they 264 | # are dictionaries, a must be a structured ndarray and they must 265 | # have the same set of keys, after which they values must all be 266 | # compared. If they are a collection type (list, tuple, set, 267 | # frozenset, or deque), then the compairison must be made with b 268 | # converted to an object array. If the original is not a numpy type 269 | # (isn't or doesn't inherit from np.generic or np.ndarray), then it 270 | # is a matter of converting it to the appropriate numpy 271 | # type. Otherwise, both are supposed to be numpy types. For object 272 | # arrays, each element must be iterated over to be compared. Then, 273 | # if it isn't a string type, then they must have the same dtype, 274 | # shape, and all elements. All strings are converted to numpy.str_ 275 | # on read unless they were stored as a numpy.bytes_ due to having 276 | # non-ASCII characters. If it is empty, it has shape (1, 0). A 277 | # numpy.str_ has all of its strings per row compacted together. A 278 | # numpy.bytes_ string has to have the same thing done, but then it 279 | # needs to be converted up to UTF-32 and to numpy.str_ through 280 | # uint32. Big longs and ints end up getting converted to UTF-16 281 | # uint16's when written and read back as UTF-32 numpy.unicode_. 282 | # 283 | # In all cases, we expect things to be at least two dimensional 284 | # arrays. 285 | if type(b) == dict or type(b) == collections.OrderedDict: 286 | assert_equal_nose(type(a), np.ndarray) 287 | assert a.dtype.names is not None 288 | 289 | # Determine if any of the keys could not be stored as str. If 290 | # they all can be, then the dtype field names should be the 291 | # keys. Otherwise, they should be 'keys' and 'values'. 292 | all_str_keys = True 293 | tp_str = str 294 | tp_bytes = bytes 295 | converters = {tp_str: lambda x: x, 296 | tp_bytes: lambda x: x.decode('UTF-8'), 297 | np.bytes_: 298 | lambda x: bytes(x).decode('UTF-8'), 299 | np.unicode_: lambda x: str(x)} 300 | tp_conv = lambda x: converters[type(x)](x) 301 | tp_conv_str = lambda x: tp_conv(x) 302 | tps = tuple(converters.keys()) 303 | for k in b.keys(): 304 | if type(k) not in tps: 305 | all_str_keys = False 306 | break 307 | try: 308 | k_str = tp_conv(k) 309 | except: 310 | all_str_keys = False 311 | break 312 | if all_str_keys: 313 | assert_equal_nose(set(a.dtype.names), 314 | set([tp_conv_str(k) 315 | for k in b.keys()])) 316 | for k in b: 317 | assert_equal_matlab_format(a[tp_conv_str(k)][0], 318 | b[k], options) 319 | else: 320 | names = (options.dict_like_keys_name, 321 | options.dict_like_values_name) 322 | assert_equal_nose(set(a.dtype.names), set(names)) 323 | keys = a[names[0]][0] 324 | values = a[names[1]][0] 325 | assert_equal_matlab_format(keys, tuple(b.keys()), options) 326 | assert_equal_matlab_format(values, tuple(b.values()), 327 | options) 328 | elif type(b) in (list, tuple, set, frozenset, collections.deque): 329 | b_conv = np.zeros(dtype='object', shape=(len(b), )) 330 | for i, v in enumerate(b): 331 | b_conv[i] = v 332 | assert_equal_matlab_format(a, b_conv, options) 333 | elif not isinstance(b, (np.generic, np.ndarray)): 334 | if b is None: 335 | # It should be np.zeros(shape=(0, 1), dtype='float64')) 336 | assert_equal_nose(type(a), np.ndarray) 337 | assert_equal_nose(a.dtype, np.dtype('float64')) 338 | assert_equal_nose(a.shape, (1, 0)) 339 | elif isinstance(b, (bytes, str, bytearray)): 340 | if len(b) == 0: 341 | assert_equal(a, np.zeros(shape=(1, 0), dtype='U'), 342 | options) 343 | elif isinstance(b, (bytes, bytearray)): 344 | try: 345 | c = np.unicode_(b.decode('ASCII')) 346 | except: 347 | c = np.bytes_(b) 348 | assert_equal(a, np.atleast_2d(c), options) 349 | else: 350 | assert_equal(a, np.atleast_2d(np.unicode_(b)), options) 351 | elif type(b) == int: 352 | if b > 2**63 or b < -(2**63 - 1): 353 | assert_equal(a, np.atleast_2d(np.unicode_(b)), options) 354 | else: 355 | assert_equal(a, np.atleast_2d(np.int64(b)), options) 356 | else: 357 | assert_equal(a, np.atleast_2d(np.array(b)), options) 358 | else: 359 | if b.dtype.name != 'object': 360 | if b.dtype.char in ('U', 'S'): 361 | if len(b) == 0 and (b.shape == tuple() \ 362 | or b.shape == (0, )): 363 | assert_equal(a, np.zeros(shape=(1, 0), 364 | dtype='U'), options) 365 | elif b.dtype.char == 'U': 366 | c = np.atleast_1d(b) 367 | c = np.atleast_2d(c.view(np.dtype('U' \ 368 | + str(c.shape[-1]*c.dtype.itemsize//4)))) 369 | assert_equal_nose(a.dtype, c.dtype) 370 | assert_equal_nose(a.shape, c.shape) 371 | npt.assert_equal(a, c) 372 | elif b.dtype.char == 'S': 373 | c = np.atleast_1d(b).view(np.ndarray) 374 | if np.all(c.view(np.uint8) < 128): 375 | c = c.view(np.dtype('S' \ 376 | + str(c.shape[-1]*c.dtype.itemsize))) 377 | c = c.view(np.dtype('uint8')) 378 | c = np.uint32(c.view(np.dtype('uint8'))) 379 | c = c.view(np.dtype('U' + str(c.shape[-1]))) 380 | c = np.atleast_2d(c) 381 | assert_equal_nose(a.dtype, c.dtype) 382 | assert_equal_nose(a.shape, c.shape) 383 | npt.assert_equal(a, c) 384 | pass 385 | else: 386 | c = np.atleast_2d(b) 387 | assert_equal_nose(a.dtype, c.dtype) 388 | assert_equal_nose(a.shape, c.shape) 389 | with warnings.catch_warnings(): 390 | warnings.simplefilter('ignore', RuntimeWarning) 391 | npt.assert_equal(a, c) 392 | else: 393 | c = np.atleast_2d(b) 394 | # An empty complex number gets turned into a real 395 | # number when it is stored. 396 | if np.prod(c.shape) == 0 \ 397 | and b.dtype.name.startswith('complex'): 398 | c = np.real(c) 399 | # If it is structured, check that the field names are 400 | # the same, in the same order, and then go through them 401 | # one by one. Otherwise, make sure the dtypes and shapes 402 | # are the same before comparing all values. 403 | if b.dtype.names is None and a.dtype.names is None: 404 | assert_equal_nose(a.dtype, c.dtype) 405 | assert_equal_nose(a.shape, c.shape) 406 | with warnings.catch_warnings(): 407 | warnings.simplefilter('ignore', RuntimeWarning) 408 | npt.assert_equal(a, c) 409 | else: 410 | assert a.dtype.names is not None 411 | assert b.dtype.names is not None 412 | assert_equal_nose(set(a.dtype.names), 413 | set(b.dtype.names)) 414 | # The ordering of fields must be preserved if the 415 | # MATLAB_fields attribute could be used, which can 416 | # only be done if there are no non-ascii characters 417 | # in any of the field names. 418 | allfields = ''.join(b.dtype.names) 419 | if np.all(np.array([ord(ch) < 128 \ 420 | for ch in allfields])): 421 | assert_equal_nose(a.dtype.names, b.dtype.names) 422 | a = a.flatten() 423 | b = b.flatten() 424 | for k in b.dtype.names: 425 | for index, x in np.ndenumerate(a): 426 | assert_equal_from_matlab(a[k][index], 427 | b[k][index], 428 | options) 429 | else: 430 | c = np.atleast_2d(b) 431 | assert_equal_nose(a.dtype, c.dtype) 432 | assert_equal_nose(a.shape, c.shape) 433 | for index, x in np.ndenumerate(a): 434 | assert_equal_matlab_format(a[index], c[index], options) 435 | 436 | 437 | def assert_equal_from_matlab(a, b, options=None): 438 | # Compares a and b for equality. They are all going to be numpy 439 | # types. hdf5storage and scipy behave differently when importing 440 | # arrays as to whether they are 2D or not, so we will make them all 441 | # at least 2D regardless. For strings, the two packages produce 442 | # transposed results of each other, so one just needs to be 443 | # transposed. For object arrays, each element must be iterated over 444 | # to be compared. For structured ndarrays, their fields need to be 445 | # compared and then they can be compared element and field 446 | # wise. Otherwise, they can be directly compared. Note, the type is 447 | # often converted by scipy (or on route to the file before scipy 448 | # gets it), so comparisons are done by value, which is not perfect. 449 | a = np.atleast_2d(a) 450 | b = np.atleast_2d(b) 451 | if a.dtype.char == 'U': 452 | a = a.T 453 | if b.dtype.name == 'object': 454 | a = a.flatten() 455 | b = b.flatten() 456 | for index, x in np.ndenumerate(a): 457 | assert_equal_from_matlab(a[index], b[index], options) 458 | elif b.dtype.names is not None or a.dtype.names is not None: 459 | assert a.dtype.names is not None 460 | assert b.dtype.names is not None 461 | assert set(a.dtype.names) == set(b.dtype.names) 462 | a = a.flatten() 463 | b = b.flatten() 464 | for k in b.dtype.names: 465 | for index, x in np.ndenumerate(a): 466 | assert_equal_from_matlab(a[k][index], b[k][index], 467 | options) 468 | else: 469 | with warnings.catch_warnings(): 470 | warnings.simplefilter('ignore', RuntimeWarning) 471 | npt.assert_equal(a, b) 472 | --------------------------------------------------------------------------------