├── requirements.txt
├── requirements_tests.txt
├── examples
    └── example_hdf5storage_marshaller_plugin
    │   ├── COPYING.txt
    │   ├── setup.cfg
    │   ├── pyproject.toml
    │   ├── README.rst
    │   ├── setup.py
    │   └── example_hdf5storage_marshaller_plugin.py
├── requirements_doc.txt
├── doc
    ├── source
    │   ├── thanks.rst
    │   ├── information.rst
    │   ├── api.rst
    │   ├── index.rst
    │   ├── hdf5storage.exceptions.rst
    │   ├── hdf5storage.rst
    │   ├── paths.rst
    │   ├── hdf5storage.utilities.rst
    │   ├── development.rst
    │   ├── hdf5storage.Marshallers.rst
    │   ├── compression.rst
    │   ├── conf.py
    │   └── introduction.rst
    ├── make.bat
    └── Makefile
├── setup.cfg
├── pyproject.toml
├── MANIFEST.in
├── .gitattributes
├── THANKS.rst
├── .gitignore
├── .travis.yml
├── COPYING.txt
├── tests
    ├── julia_read_mat.jl
    ├── read_write_mat.m
    ├── test_string_utf16_conversion.py
    ├── test_str_conv_utils.py
    ├── test_ndarray_O_field.py
    ├── test_marshaller_plugins.py
    ├── test_matlab_compatibility.py
    ├── test_multi_io.py
    ├── make_mat_with_all_types.m
    ├── test_marshaller_collection_priority.py
    ├── test_julia_mat_compatibility.py
    ├── test_marshallers_requiring_modules.py
    ├── test_path_escaping.py
    ├── test_dict_like_storage_methods.py
    ├── test_hdf5_filters.py
    ├── make_randoms.py
    └── asserts.py
├── hdf5storage
    └── exceptions.py
└── setup.py


/requirements.txt:
--------------------------------------------------------------------------------
1 | setuptools
2 | numpy
3 | h5py>=2.3
4 | 


--------------------------------------------------------------------------------
/requirements_tests.txt:
--------------------------------------------------------------------------------
1 | -r requirements.txt
2 | nose>=1.0
3 | 


--------------------------------------------------------------------------------
/examples/example_hdf5storage_marshaller_plugin/COPYING.txt:
--------------------------------------------------------------------------------
1 | ../../COPYING.txt


--------------------------------------------------------------------------------
/requirements_doc.txt:
--------------------------------------------------------------------------------
1 | -r requirements.txt
2 | sphinx>=1.7
3 | sphinx_rtd_theme
4 | 


--------------------------------------------------------------------------------
/doc/source/thanks.rst:
--------------------------------------------------------------------------------
1 | ======
2 | THANKS
3 | ======
4 | 
5 | .. include:: ../../THANKS.rst
6 | 


--------------------------------------------------------------------------------
/examples/example_hdf5storage_marshaller_plugin/setup.cfg:
--------------------------------------------------------------------------------
1 | [bdist_wheel]
2 | universal=1
3 | 


--------------------------------------------------------------------------------
/doc/source/information.rst:
--------------------------------------------------------------------------------
1 | ===========
2 | hdf5storage
3 | ===========
4 | 
5 | .. include:: ../../README.rst
6 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [bdist_wheel]
2 | universal=1
3 | 
4 | [build_sphinx]
5 | all-files=1
6 | build-dir=doc/build
7 | source-dir=doc/source


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | # Minimum requirements for the build system to execute.
3 | requires = ["setuptools"]  # PEP 508 specifications.
4 | 


--------------------------------------------------------------------------------
/examples/example_hdf5storage_marshaller_plugin/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | # Minimum requirements for the build system to execute.
3 | requires = ["setuptools"]  # PEP 508 specifications.
4 | 


--------------------------------------------------------------------------------
/doc/source/api.rst:
--------------------------------------------------------------------------------
 1 | API
 2 | ===
 3 | 
 4 | .. toctree::
 5 |    :maxdepth: 2
 6 | 
 7 |    hdf5storage
 8 |    hdf5storage.exceptions
 9 |    hdf5storage.Marshallers
10 |    hdf5storage.utilities
11 | 
12 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include *.cfg
 2 | include *.toml
 3 | include *.txt
 4 | include *.rst
 5 | include *.py
 6 | recursive-include tests *.py *.m *.jl
 7 | recursive-include examples *.py *.txt *.rst *.cfg
 8 | recursive-include doc *
 9 | prune doc/build
10 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Set default behaviour, in case users don't have core.autocrlf set.
2 | * text=auto
3 | 
4 | # Explicitly declare text files we want to always be normalized and converted 
5 | # to native line endings on checkout.
6 | *.py text
7 | *.txt text eol=crlf
8 | *.cfg text
9 | *.toml text


--------------------------------------------------------------------------------
/THANKS.rst:
--------------------------------------------------------------------------------
1 | The following people helped contributed code to fix bugs, add features, etc.
2 | 
3 | * `Steven Dee <https://github.com/mrdomino>`_
4 | * `WANG Longqi <https://github.com/wanglongqi>`_
5 | * `Jakub Urban <https://github.com/coobas>`_
6 | * `Ghislain Antony Vaillant <https://github.com/ghisvail>`_
7 | 
8 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.py[cod]
 2 | 
 3 | # C extensions
 4 | *.so
 5 | 
 6 | # Packages
 7 | *.egg
 8 | *.egg-info
 9 | dist
10 | build
11 | eggs
12 | parts
13 | bin
14 | var
15 | sdist
16 | develop-eggs
17 | .installed.cfg
18 | lib
19 | lib64
20 | __pycache__
21 | 
22 | # Installer logs
23 | pip-log.txt
24 | 
25 | # Unit test / coverage reports
26 | .coverage
27 | .tox
28 | nosetests.xml
29 | 
30 | # Translations
31 | *.mo
32 | 
33 | # Mr Developer
34 | .mr.developer.cfg
35 | .project
36 | .pydevproject
37 | 
38 | # autosaves
39 | *.py~
40 | *.yml~
41 | *.rst~
42 | *.txt~
43 | *.toml~


--------------------------------------------------------------------------------
/doc/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. hdf5storage documentation master file, created by
 2 |    sphinx-quickstart on Sun Dec 22 00:05:54 2013.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to hdf5storage's documentation!
 7 | =======================================
 8 | 
 9 | Contents:
10 | 
11 | .. toctree::
12 |    :maxdepth: 2
13 | 
14 |    information
15 |    introduction
16 |    paths
17 |    compression
18 |    storage_format
19 |    development
20 |    thanks
21 |    api
22 | 
23 | Indices and tables
24 | ==================
25 | 
26 | * :ref:`genindex`
27 | * :ref:`modindex`
28 | * :ref:`search`
29 | 
30 | 


--------------------------------------------------------------------------------
/doc/source/hdf5storage.exceptions.rst:
--------------------------------------------------------------------------------
 1 | hdf5storage.exceptions
 2 | ======================
 3 | 
 4 | .. currentmodule:: hdf5storage.exceptions
 5 | 
 6 | .. automodule:: hdf5storage.exceptions
 7 | 
 8 | .. autosummary::
 9 |    
10 |    Hdf5storageError
11 |    CantReadError
12 |    TypeNotMatlabCompatibleError
13 | 
14 | 
15 | Hdf5storageError
16 | ----------------
17 | 
18 | .. autoexception:: Hdf5storageError
19 |    :show-inheritance:
20 | 
21 | 
22 | CantReadError
23 | -------------
24 | 
25 | .. autoexception:: CantReadError
26 |    :show-inheritance:
27 | 
28 | 
29 | TypeNotMatlabCompatibleError
30 | ----------------------------
31 | 
32 | .. autoexception:: TypeNotMatlabCompatibleError
33 |    :show-inheritance:
34 | 
35 | 


--------------------------------------------------------------------------------
/examples/example_hdf5storage_marshaller_plugin/README.rst:
--------------------------------------------------------------------------------
 1 | Overview
 2 | ========
 3 | 
 4 | This is an example plugin package for providing Marshallers for the
 5 | `hdf5storage <https://pypi.python.org/pypi/hdf5storage>`_ package (this
 6 | example package is included in it).
 7 | 
 8 | The base package's documetation is found at
 9 | http://pythonhosted.org/hdf5storage/
10 | 
11 | The base package's source code is found at
12 | https://github.com/frejanordsiek/hdf5storage
13 | with this example packages source code being at
14 | https://github.com/frejanordsiek/hdf5storage/tests/example_hdf5storage_marshaller_plugin
15 | 
16 | The package is licensed under a 2-clause BSD license
17 | (https://github.com/frejanordsiek/hdf5storage/blob/master/COPYING.txt).
18 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | dist: xenial
 2 | sudo: required
 3 | 
 4 | language: python
 5 | cache: pip
 6 | 
 7 | notifications:
 8 |   email: false
 9 | 
10 | env:
11 |   - H5PY_VERSION="==2.3"
12 |   - H5PY_VERSION="==2.4"
13 |   - H5PY_VERSION="==2.5"
14 |   - H5PY_VERSION="==2.6"
15 |   - H5PY_VERSION="==2.7"
16 |   - H5PY_VERSION="==2.8"
17 |   - H5PY_VERSION="==2.9"
18 |   - H5PY_VERSION=""
19 | 
20 | python:
21 |   - "3.5"
22 |   - "3.6"
23 |   - "3.7"
24 |   - "3.8"
25 |   - "3.9-dev"
26 | 
27 | before_install:
28 |   - sudo apt-get -qq update
29 |   - sudo apt-get install -y gcc libhdf5-serial-dev libblas-dev liblapack-dev libatlas-dev libatlas-base-dev libquadmath0
30 | 
31 | # command to install dependencies
32 | #
33 | # Seem to need newer Numpy as well.
34 | install:
35 |   - pip install -U numpy
36 |   - pip install h5py$H5PY_VERSION
37 |   - pip install -r requirements_tests.txt
38 |   - pip install -e examples/example_hdf5storage_marshaller_plugin
39 | 
40 | # command to run tests
41 | script: nosetests
42 | 


--------------------------------------------------------------------------------
/doc/source/hdf5storage.rst:
--------------------------------------------------------------------------------
 1 | hdf5storage
 2 | ===========
 3 | 
 4 | .. currentmodule:: hdf5storage
 5 | 
 6 | .. automodule:: hdf5storage
 7 | 
 8 | .. autosummary::
 9 |    
10 |    write
11 |    writes
12 |    read
13 |    reads
14 |    savemat
15 |    loadmat
16 |    get_default_MarshallerCollection
17 |    make_new_default_MarshallerCollection
18 |    Options
19 |    MarshallerCollection
20 | 
21 | 
22 | write
23 | -----
24 | 
25 | .. autofunction:: write
26 | 
27 | 
28 | writes
29 | ------
30 | 
31 | .. autofunction:: writes
32 | 
33 | 
34 | read
35 | -----
36 | 
37 | .. autofunction:: read
38 | 
39 | 
40 | reads
41 | -----
42 | 
43 | .. autofunction:: reads
44 | 
45 | 
46 | savemat
47 | -------
48 | 
49 | .. autofunction:: savemat
50 | 
51 | 
52 | loadmat
53 | -------
54 | 
55 | .. autofunction:: loadmat
56 | 
57 | 
58 | get_default_MarshallerCollection
59 | --------------------------------
60 | 
61 | .. autofunction:: get_default_MarshallerCollection
62 | 
63 | 
64 | make_new_default_MarshallerCollection
65 | -------------------------------------
66 | 
67 | .. autofunction:: make_new_default_MarshallerCollection
68 | 
69 | 
70 | Options
71 | -------
72 | 
73 | .. autoclass:: Options
74 |    :members:
75 |    :show-inheritance:
76 | 
77 | 
78 | MarshallerCollection
79 | --------------------
80 | 
81 | .. autoclass:: MarshallerCollection
82 |    :members:
83 |    :show-inheritance:
84 | 
85 | 


--------------------------------------------------------------------------------
/COPYING.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2013-2016, Freja Nordsiek
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 | 1. Redistributions of source code must retain the above copyright notice,
 8 | this list of conditions and the following disclaimer.
 9 | 
10 | 2. Redistributions in binary form must reproduce the above copyright
11 | notice, this list of conditions and the following disclaimer in the
12 | documentation and/or other materials provided with the distribution.
13 | 
14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
18 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 | POSSIBILITY OF SUCH DAMAGE.
25 | 


--------------------------------------------------------------------------------
/tests/julia_read_mat.jl:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2013-2016, Freja Nordsiek
 2 | # All rights reserved.
 3 | #
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions are
 6 | # met:
 7 | #
 8 | # 1. Redistributions of source code must retain the above copyright
 9 | # notice, this list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright
12 | # notice, this list of conditions and the following disclaimer in the
13 | # documentation and/or other materials provided with the distribution.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19 | # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | using MAT
28 | 
29 | a = matread(ARGS[1])
30 | matwrite(ARGS[2], a)
31 | 
32 | quit()
33 | 


--------------------------------------------------------------------------------
/tests/read_write_mat.m:
--------------------------------------------------------------------------------
 1 | % Copyright (c) 2013-2016, Freja Nordsiek
 2 | % All rights reserved.
 3 | %
 4 | % Redistribution and use in source and binary forms, with or without
 5 | % modification, are permitted provided that the following conditions are
 6 | % met:
 7 | %
 8 | % 1. Redistributions of source code must retain the above copyright
 9 | % notice, this list of conditions and the following disclaimer.
10 | %
11 | % 2. Redistributions in binary form must reproduce the above copyright
12 | % notice, this list of conditions and the following disclaimer in the
13 | % documentation and/or other materials provided with the distribution.
14 | %
15 | % THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 | % "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 | % LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 | % A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19 | % HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20 | % SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21 | % LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 | % DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 | % THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | % (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | % OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | a = load('python_v7p3.mat');
28 | save('python_v7.mat','-struct','a','-v7');
29 | 
30 | exit;
31 | 


--------------------------------------------------------------------------------
/hdf5storage/exceptions.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2013-2020, Freja Nordsiek
 2 | # All rights reserved.
 3 | #
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions are
 6 | # met:
 7 | #
 8 | # 1. Redistributions of source code must retain the above copyright
 9 | # notice, this list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright
12 | # notice, this list of conditions and the following disclaimer in the
13 | # documentation and/or other materials provided with the distribution.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19 | # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | """ Module of Exceptions. """
28 | 
29 | 
30 | class Hdf5storageError(IOError):
31 |     """ Base class of hdf5storage package exceptions."""
32 |     pass
33 | 
34 | 
35 | class CantReadError(Hdf5storageError):
36 |     """ Exception for a failure to read the desired data."""
37 |     pass
38 | 
39 | 
40 | class TypeNotMatlabCompatibleError(Hdf5storageError):
41 |     """ Exception for trying to write non-MATLAB compatible data.
42 | 
43 |     In the event that MATLAB compatibility is being done
44 |     (``Options.matlab_compatible``) and a Python type is not importable
45 |     by MATLAB, the data is either not written or this exception is
46 |     thrown depending on the value of
47 |     ``Options.action_for_matlab_incompatible``.
48 | 
49 |     See Also
50 |     --------
51 |     hdf5storage.Options.matlab_compatible
52 |     hdf5storage.Options.action_for_matlab_incompatible
53 | 
54 |     """
55 |     pass
56 | 


--------------------------------------------------------------------------------
/tests/test_string_utf16_conversion.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2013-2020, Freja Nordsiek
 2 | # All rights reserved.
 3 | #
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions are
 6 | # met:
 7 | #
 8 | # 1. Redistributions of source code must retain the above copyright
 9 | # notice, this list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright
12 | # notice, this list of conditions and the following disclaimer in the
13 | # documentation and/or other materials provided with the distribution.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19 | # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | import os
28 | import os.path
29 | import tempfile
30 | 
31 | import numpy as np
32 | import h5py
33 | 
34 | from nose.tools import assert_equal as assert_equal_nose
35 | 
36 | import hdf5storage
37 | 
38 | 
39 | # A test to make sure that the following are written as UTF-16
40 | # (uint16) if they don't contain doublets and the
41 | # convert_numpy_str_to_utf16 option is set.
42 | #
43 | # * str
44 | # * numpy.unicode_ scalars
45 | 
46 | def check_conv_utf16(tp):
47 |     name = '/a'
48 |     data = tp('abcdefghijklmnopqrstuvwxyz')
49 |     fld = None
50 |     try:
51 |         fld = tempfile.mkstemp()
52 |         os.close(fld[0])
53 |         filename = fld[1]
54 |         hdf5storage.write(data, path=name, filename=filename,
55 |                           matlab_compatible=False,
56 |                           store_python_metadata=False,
57 |                           convert_numpy_str_to_utf16=True)
58 |         with h5py.File(filename, mode='r') as f:
59 |             assert_equal_nose(f[name].dtype.type, np.uint16)
60 |     except:
61 |         raise
62 |     finally:
63 |         if fld is not None:
64 |             os.remove(fld[1])
65 | 
66 | 
67 | def test_conv_utf16():
68 |     tps = (str, np.unicode_)
69 |     for tp in tps:
70 |         yield check_conv_utf16, tp
71 | 


--------------------------------------------------------------------------------
/examples/example_hdf5storage_marshaller_plugin/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017-2020, Freja Nordsiek
 2 | # All rights reserved.
 3 | #
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions are
 6 | # met:
 7 | #
 8 | # 1. Redistributions of source code must retain the above copyright
 9 | # notice, this list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright
12 | # notice, this list of conditions and the following disclaimer in the
13 | # documentation and/or other materials provided with the distribution.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19 | # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | from setuptools import setup
28 | 
29 | with open('README.rst') as file:
30 |     long_description = file.read()
31 | 
32 | setup(name='example_hdf5storage_marshaller_plugin',
33 |       version='0.2',
34 |       description='Example marshaller plugin for hdf5storage package.',
35 |       long_description=long_description,
36 |       author='Freja Nordsiek',
37 |       author_email='fnordsie at gmail dt com',
38 |       url='https://github.com/frejanordsiek/hdf5storage/tests/example_hdf5storage_marshaller_plugin',
39 |       py_modules=['example_hdf5storage_marshaller_plugin'],
40 |       entry_points={'hdf5storage.marshallers.plugins':
41 |                     '1.0 = example_hdf5storage_marshaller_plugin:get_marshallers_1p0'},
42 |       license='BSD',
43 |       keywords='hdf5storage',
44 |       zip_safe=True,
45 |       classifiers=[
46 |           "Programming Language :: Python :: 3 :: Only",
47 |           "Development Status :: 3 - Alpha",
48 |           "License :: OSI Approved :: BSD License",
49 |           "Operating System :: OS Independent",
50 |           "Intended Audience :: Developers",
51 |           "Intended Audience :: Information Technology",
52 |           "Intended Audience :: Science/Research",
53 |           "Topic :: Scientific/Engineering",
54 |           "Topic :: Database",
55 |           "Topic :: Software Development :: Libraries :: Python Modules"
56 |           ]
57 |       )
58 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2013-2020, Freja Nordsiek
 2 | # All rights reserved.
 3 | #
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions are
 6 | # met:
 7 | #
 8 | # 1. Redistributions of source code must retain the above copyright
 9 | # notice, this list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright
12 | # notice, this list of conditions and the following disclaimer in the
13 | # documentation and/or other materials provided with the distribution.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19 | # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | import sys
28 | from setuptools import setup
29 | 
30 | if sys.hexversion < 0x3050000:
31 |     raise NotImplementedError('Python < 3.5 not supported.')
32 | 
33 | with open('README.rst') as file:
34 |     long_description = file.read()
35 | 
36 | setup(name='hdf5storage',
37 |       version='0.2',
38 |       description='Utilities to read/write Python types to/from HDF5 files, including MATLAB v7.3 MAT files.',
39 |       long_description=long_description,
40 |       author='Freja Nordsiek',
41 |       author_email='fnordsie@gmail.com',
42 |       url='https://github.com/frejanordsiek/hdf5storage',
43 |       packages=['hdf5storage'],
44 |       install_requires=["setuptools", "numpy", "h5py>=2.3"],
45 |       tests_require=['nose>=1.0'],
46 |       test_suite='nose.collector',
47 |       license='BSD',
48 |       keywords='hdf5 matlab',
49 |       zip_safe=True,
50 |       classifiers=[
51 |           "Programming Language :: Python :: 3 :: Only",
52 |           "Development Status :: 3 - Alpha",
53 |           "License :: OSI Approved :: BSD License",
54 |           "Operating System :: OS Independent",
55 |           "Intended Audience :: Developers",
56 |           "Intended Audience :: Information Technology",
57 |           "Intended Audience :: Science/Research",
58 |           "Topic :: Scientific/Engineering",
59 |           "Topic :: Database",
60 |           "Topic :: Software Development :: Libraries :: Python Modules"
61 |           ]
62 |       )
63 | 


--------------------------------------------------------------------------------
/tests/test_str_conv_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2013-2020, Freja Nordsiek
 2 | # All rights reserved.
 3 | #
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions are
 6 | # met:
 7 | #
 8 | # 1. Redistributions of source code must retain the above copyright
 9 | # notice, this list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright
12 | # notice, this list of conditions and the following disclaimer in the
13 | # documentation and/or other materials provided with the distribution.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19 | # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | import string
28 | 
29 | import numpy as np
30 | 
31 | import hdf5storage.utilities as utils
32 | 
33 | from nose.tools import assert_equal as assert_equal_nose
34 | 
35 | from asserts import assert_equal
36 | 
37 | 
38 | # Make two strings, one with the main ascii characters and another with
39 | # the same characters plus a lot of unicode characters.
40 | str_ascii = string.ascii_letters + string.digits
41 | str_unicode = str_ascii + ''.join([chr(500 + i)
42 |                                    for i in range(1000)])
43 | 
44 | 
45 | def test_numpy_str_ascii_to_uint16_back():
46 |     for i in range(100):
47 |         data = np.unicode_(str_ascii)
48 |         intermed = utils.convert_numpy_str_to_uint16(data)
49 |         out = utils.convert_to_numpy_str(intermed)[0]
50 |         assert_equal_nose(out.tostring(), data.tostring())
51 |         assert_equal(out, data)
52 | 
53 | 
54 | def test_numpy_str_someunicode_to_uint16_back():
55 |     for i in range(100):
56 |         data = np.unicode_(str_unicode)
57 |         intermed = utils.convert_numpy_str_to_uint16(data)
58 |         out = utils.convert_to_numpy_str(intermed)[0]
59 |         assert_equal_nose(out.tostring(), data.tostring())
60 |         assert_equal(out, data)
61 | 
62 | 
63 | def test_numpy_str_ascii_to_uint32_back():
64 |     for i in range(100):
65 |         data = np.unicode_(str_ascii)
66 |         intermed = utils.convert_numpy_str_to_uint32(data)
67 |         out = utils.convert_to_numpy_str(intermed)[0]
68 |         assert_equal_nose(intermed.tostring(), data.tostring())
69 |         assert_equal_nose(out.tostring(), data.tostring())
70 |         assert_equal(out, data)
71 | 
72 | 
73 | def test_numpy_str_someunicode_to_uint32_back():
74 |     for i in range(100):
75 |         data = np.unicode_(str_unicode)
76 |         intermed = utils.convert_numpy_str_to_uint32(data)
77 |         out = utils.convert_to_numpy_str(intermed)[0]
78 |         assert_equal_nose(intermed.tostring(), data.tostring())
79 |         assert_equal_nose(out.tostring(), data.tostring())
80 |         assert_equal(out, data)
81 | 


--------------------------------------------------------------------------------
/examples/example_hdf5storage_marshaller_plugin/example_hdf5storage_marshaller_plugin.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017, Freja Nordsiek
 2 | # All rights reserved.
 3 | #
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions are
 6 | # met:
 7 | #
 8 | # 1. Redistributions of source code must retain the above copyright
 9 | # notice, this list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright
12 | # notice, this list of conditions and the following disclaimer in the
13 | # documentation and/or other materials provided with the distribution.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19 | # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | """
27 | This is an example package for providing hdf5storage plugins.
28 | 
29 | """
30 | 
31 | __version__ = '0.2'
32 | 
33 | import hdf5storage.Marshallers as hm
34 | 
35 | 
36 | # Going to make a class that subclasses lists but doesn't really do
37 | # anything else, but it will be enought to need a new marshaller.
38 | 
39 | class SubList(list):
40 |     def __init__(self, *args, **keywords):
41 |         list.__init__(self, *args, **keywords)
42 | 
43 | 
44 | # The marshaller for this will be rather trivial. It inherits from
45 | # PythonTupleSetDequeMarshaller which inherits from
46 | # PythonListMarshaller. The only things that require any work is
47 | # __init__ as PythonTupleSetDequeMarshaller's methods will otherwise
48 | # work as is.
49 | class SubListMarshaller(hm.PythonTupleSetDequeMarshaller):
50 |     def __init__(self):
51 |         hm.PythonTupleSetDequeMarshaller.__init__(self)
52 |         self.types = ['example_hdf5storage_marshaller_plugin.SubList']
53 |         self.python_type_strings = ['hdf5storage_marshallers_plugins_'
54 |                                     'example.SubList']
55 |         # As the parent class already has MATLAB strings handled, there
56 |         # are no MATLAB classes that this marshaller should be used for.
57 |         self.matlab_classes = []
58 |         # Update the type lookups.
59 |         self.update_type_lookups()
60 | 
61 |     def read(self, f, dsetgrp, attributes, options):
62 |         # Use the grand-parent class version to read it and do most of
63 |         # the work.
64 |         data = hm.PythonListMarshaller.read(self, f, dsetgrp,
65 |                                             attributes, options)
66 |         return SubList(data)
67 | 
68 | 
69 | # Return an instance of the one and only marshaller using the 1.0
70 | # Marshaller API when given the hdf5storage version string. The version
71 | # string is given so that plugin implementors can possibly select
72 | # marshallers based on the version or initialize them in different ways,
73 | # beyond just what the Marshaller API version information provides
74 | # (perhaps some particular versions of hdf5storage require a work around
75 | # on some issue or another).
76 | def get_marshallers_1p0(hdf5storage_version):
77 |     return (SubListMarshaller(), )
78 | 
79 | 


--------------------------------------------------------------------------------
/doc/source/paths.rst:
--------------------------------------------------------------------------------
  1 | .. currentmodule:: hdf5storage
  2 | 
  3 | .. _Paths:
  4 | 
  5 | =====
  6 | Paths
  7 | =====
  8 | 
  9 | Paths in HDF5
 10 | =============
 11 | 
 12 | HDF5 files are structured much like a Unix filesystem, so everything can
 13 | be referenced with a POSIX style path, which look like
 14 | ``'/pyth/hf'``. Unlike a Windows path, forward slashes (``'/'``) are
 15 | used as directory separators instead of backward slashes (``'\\'``) and
 16 | the base of the file system is just ``'/'`` instead of something like
 17 | ``'C:\\'``. In the language of HDF5, what we call directories and files
 18 | in filesystems are called groups and datasets.
 19 | 
 20 | Limitations of HDF5 Paths
 21 | =========================
 22 | 
 23 | The HDF5 format and library do not support having Dataset or Group names
 24 | containing nulls (``'\x00'``), containing forward slashes (``'/'``), or
 25 | starting out with one or more periods (``'.'``).
 26 | 
 27 | Solution - Escaping
 28 | ===================
 29 | 
 30 | .. versionadded:: 0.2
 31 |    
 32 |    Ability to escape characters not allowed in Group or Dataset names.
 33 | 
 34 | .. warning::
 35 | 
 36 |    Before version 0.2, no escaping is supported and errors are thrown
 37 |    when a workaround cannot be found.
 38 | 
 39 | In order to work around these limitations in HDF5 Dataset and Group
 40 | names, the ability to escape these characters is provided. They are
 41 | escaped as hexidecimal specifications or as doubling, which is fairly
 42 | standard. The conversions are
 43 | 
 44 | ==============  ==========  ===========
 45 | Name            Character   Escaped
 46 | ==============  ==========  ===========
 47 | null            ``'\x00'``  ``'\\x00'``
 48 | forward slash   ``'/'``     ``'\\x2f'``
 49 | backward slash  ``'\\'``    ``'\\\\'``
 50 | ==============  ==========  ===========
 51 | 
 52 | The backward slash has to be escaped or else it will be impossible to
 53 | accurately unescape.
 54 | 
 55 | When unescaping, all the hex and unicode escapes allowed in python
 56 | strings as well as how backward slashes are entered are used. They are
 57 | 
 58 | =================  ================  ==========
 59 | Escape             Kind              Conversion
 60 | =================  ================  ==========
 61 | ``'\\\\'``         double backslash  ``'\\'``
 62 | ``'\\xYY'``        hex               ``chr(N)``
 63 | ``'\\uYYYY'``      unicode           ``chr(N)``
 64 | ``'\\UYYYYYYYY'``  unicode           ``chr(N)``
 65 | =================  ================  ==========
 66 | 
 67 | Where the Y are hexidecimal digits and N is the value of the hexidecimal
 68 | number (the unicode character codepoint).
 69 | 
 70 | Supported Paths
 71 | ===============
 72 | 
 73 | Paths can be given in a number of ways.
 74 | 
 75 | No Escaping
 76 | -----------
 77 | 
 78 | The path is given as a ``str`` or ``bytes``. It is the responsibility of
 79 | the caller to make sure all escaping has been done. Forward slashes are
 80 | interpreted as path separators.
 81 | 
 82 | Escaping
 83 | --------
 84 | 
 85 | The path is given as an iterable (e.g. ``list``, ``tuple``, etc.) of
 86 | separated parts of the path (split at the separators) which must each be
 87 | ``str`` and ``bytes``. These parts will each be escaped before being
 88 | joined.
 89 | 
 90 | Escaping/Unescaping Functions
 91 | =============================
 92 | 
 93 | .. versionadded:: 0.2
 94 |    
 95 |    The functions described here.
 96 | 
 97 | :py:func:`utilities.escape_path` is the function to escape an individual
 98 | part of a path with.
 99 | 
100 | :py:func:`utilities.unescape_path` is the function to unescape a path.
101 | 
102 | :py:func:`utilities.process_path` is a function that will take a path of
103 | any form, escape it if it is meant to be escaped, and get the Group that
104 | the target of the path is in as well as the name of the target inside
105 | that Group the path is pointing at.
106 | 


--------------------------------------------------------------------------------
/doc/source/hdf5storage.utilities.rst:
--------------------------------------------------------------------------------
  1 | hdf5storage.utilities
  2 | =====================
  3 | 
  4 | .. currentmodule:: hdf5storage.utilities
  5 | 
  6 | .. automodule:: hdf5storage.utilities
  7 | 
  8 | .. autosummary::
  9 | 
 10 |    escape_path
 11 |    unescape_path
 12 |    process_path
 13 |    does_dtype_have_a_zero_shape
 14 |    write_data
 15 |    read_data
 16 |    write_object_array
 17 |    read_object_array
 18 |    next_unused_name_in_group
 19 |    convert_numpy_str_to_uint16
 20 |    convert_numpy_str_to_uint32
 21 |    convert_to_str
 22 |    convert_to_numpy_str
 23 |    convert_to_numpy_bytes
 24 |    decode_complex
 25 |    encode_complex
 26 |    get_attribute
 27 |    convert_attribute_to_string
 28 |    get_attribute_string
 29 |    convert_attribute_to_string_array
 30 |    get_attribute_string_array
 31 |    set_attribute
 32 |    set_attribute_string
 33 |    set_attribute_string_array
 34 |    set_attributes_all
 35 |    del_attribute
 36 | 
 37 | 
 38 | escape_path
 39 | -----------
 40 | 
 41 | .. autofunction:: escape_path
 42 | 
 43 | 
 44 | unescape_path
 45 | -------------
 46 | 
 47 | .. autofunction:: unescape_path
 48 | 
 49 | 
 50 | process_path
 51 | ------------
 52 | 
 53 | .. autofunction:: process_path
 54 | 
 55 | 
 56 | does_dtype_have_a_zero_shape
 57 | ----------------------------
 58 | 
 59 | .. autofunction:: does_dtype_have_a_zero_shape
 60 | 
 61 | 
 62 | write_data
 63 | ----------
 64 | 
 65 | .. autofunction:: write_data
 66 | 
 67 | 
 68 | read_data
 69 | ---------
 70 | 
 71 | .. autofunction:: read_data
 72 | 
 73 | 
 74 | write_object_array
 75 | ------------------
 76 | 
 77 | .. autofunction:: write_object_array
 78 | 
 79 | 
 80 | read_object_array
 81 | ------------------
 82 | 
 83 | .. autofunction:: read_object_array
 84 | 
 85 | 
 86 | next_unused_name_in_group
 87 | -------------------------
 88 | 
 89 | .. autofunction:: next_unused_name_in_group
 90 | 
 91 | 
 92 | convert_numpy_str_to_uint16
 93 | ---------------------------
 94 | 
 95 | .. autofunction:: convert_numpy_str_to_uint16
 96 | 
 97 | 
 98 | convert_numpy_str_to_uint32
 99 | ---------------------------
100 | 
101 | .. autofunction:: convert_numpy_str_to_uint32
102 | 
103 | 
104 | convert_to_str
105 | --------------
106 | 
107 | .. autofunction:: convert_to_str
108 | 
109 | 
110 | convert_to_numpy_str
111 | --------------------
112 | 
113 | .. autofunction:: convert_to_numpy_str
114 | 
115 | 
116 | convert_to_numpy_bytes
117 | ----------------------
118 | 
119 | .. autofunction:: convert_to_numpy_bytes
120 | 
121 | 
122 | decode_complex
123 | --------------
124 | 
125 | .. autofunction:: decode_complex
126 | 
127 | 
128 | encode_complex
129 | --------------
130 | 
131 | .. autofunction:: encode_complex
132 | 
133 | 
134 | get_attribute
135 | -------------
136 | 
137 | .. autofunction:: get_attribute
138 | 
139 | 
140 | 
141 | convert_attribute_to_string
142 | ---------------------------
143 | 
144 | .. autofunction:: convert_attribute_to_string
145 | 
146 | 
147 | get_attribute_string
148 | --------------------
149 | 
150 | .. autofunction:: get_attribute_string
151 | 
152 | 
153 | convert_attribute_to_string_array
154 | ---------------------------------
155 | 
156 | .. autofunction:: convert_attribute_to_string_array
157 | 
158 | 
159 | get_attribute_string_array
160 | --------------------------
161 | 
162 | .. autofunction:: get_attribute_string_array
163 | 
164 | 
165 | set_attribute
166 | -------------
167 | 
168 | .. autofunction:: set_attribute
169 | 
170 | 
171 | set_attribute_string
172 | --------------------
173 | 
174 | .. autofunction:: set_attribute_string
175 | 
176 | 
177 | set_attribute_string_array
178 | --------------------------
179 | 
180 | .. autofunction:: set_attribute_string_array
181 | 
182 | 
183 | set_attributes_all
184 | ------------------
185 | 
186 | .. autofunction:: set_attributes_all
187 | 
188 | 
189 | del_attribute
190 | -------------
191 | 
192 | .. autofunction:: del_attribute
193 | 
194 | 


--------------------------------------------------------------------------------
/tests/test_ndarray_O_field.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2013-2016, Freja Nordsiek
  2 | # All rights reserved.
  3 | #
  4 | # Redistribution and use in source and binary forms, with or without
  5 | # modification, are permitted provided that the following conditions are
  6 | # met:
  7 | #
  8 | # 1. Redistributions of source code must retain the above copyright
  9 | # notice, this list of conditions and the following disclaimer.
 10 | #
 11 | # 2. Redistributions in binary form must reproduce the above copyright
 12 | # notice, this list of conditions and the following disclaimer in the
 13 | # documentation and/or other materials provided with the distribution.
 14 | #
 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 16 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 17 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 18 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 19 | # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 20 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 21 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 22 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 23 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 26 | 
 27 | import os
 28 | import os.path
 29 | import tempfile
 30 | 
 31 | import numpy as np
 32 | import h5py
 33 | 
 34 | import hdf5storage
 35 | 
 36 | 
 37 | # A series of tests to make sure that structured ndarrays with a field
 38 | # that has an object dtype are written like structs (are HDF5 Groups)
 39 | # but are written as an HDF5 COMPOUND Dataset otherwise (even in the
 40 | # case that a field's name is 'O').
 41 | 
 42 | 
 43 | def test_O_field_compound():
 44 |     name = '/a'
 45 |     data = np.empty(shape=(1, ), dtype=[('O', 'int8'), ('a', 'uint16')])
 46 |     fld = None
 47 |     try:
 48 |         fld = tempfile.mkstemp()
 49 |         os.close(fld[0])
 50 |         filename = fld[1]
 51 |         hdf5storage.write(data, path=name, filename=filename,
 52 |                           matlab_compatible=False,
 53 |                           structured_numpy_ndarray_as_struct=False)
 54 |         with h5py.File(filename, mode='r') as f:
 55 |             assert isinstance(f[name], h5py.Dataset)
 56 |     except:
 57 |         raise
 58 |     finally:
 59 |         if fld is not None:
 60 |             os.remove(fld[1])
 61 | 
 62 | 
 63 | def test_object_field_group():
 64 |     name = '/a'
 65 |     data = np.empty(shape=(1, ), dtype=[('a', 'O'), ('b', 'uint16')])
 66 |     data['a'][0] = [1, 2]
 67 |     fld = None
 68 |     try:
 69 |         fld = tempfile.mkstemp()
 70 |         os.close(fld[0])
 71 |         filename = fld[1]
 72 |         hdf5storage.write(data, path=name, filename=filename,
 73 |                           matlab_compatible=False,
 74 |                           structured_numpy_ndarray_as_struct=False)
 75 |         with h5py.File(filename, mode='r') as f:
 76 |             assert isinstance(f[name], h5py.Group)
 77 |     except:
 78 |         raise
 79 |     finally:
 80 |         if fld is not None:
 81 |             os.remove(fld[1])
 82 | 
 83 | 
 84 | def test_O_and_object_field_group():
 85 |     name = '/a'
 86 |     data = np.empty(shape=(1, ), dtype=[('a', 'O'), ('O', 'uint16')])
 87 |     data['a'][0] = [1, 2]
 88 |     fld = None
 89 |     try:
 90 |         fld = tempfile.mkstemp()
 91 |         os.close(fld[0])
 92 |         filename = fld[1]
 93 |         hdf5storage.write(data, path=name, filename=filename,
 94 |                           matlab_compatible=False,
 95 |                           structured_numpy_ndarray_as_struct=False)
 96 |         with h5py.File(filename, mode='r') as f:
 97 |             assert isinstance(f[name], h5py.Group)
 98 |     except:
 99 |         raise
100 |     finally:
101 |         if fld is not None:
102 |             os.remove(fld[1])
103 | 


--------------------------------------------------------------------------------
/tests/test_marshaller_plugins.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2014-2016, Freja Nordsiek
 2 | # All rights reserved.
 3 | #
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions are
 6 | # met:
 7 | #
 8 | # 1. Redistributions of source code must retain the above copyright
 9 | # notice, this list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright
12 | # notice, this list of conditions and the following disclaimer in the
13 | # documentation and/or other materials provided with the distribution.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19 | # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 
27 | import os
28 | import os.path
29 | import tempfile
30 | 
31 | import pkg_resources
32 | 
33 | from nose.tools import assert_equal as assert_equal_nose
34 | 
35 | import unittest
36 | 
37 | import hdf5storage
38 | 
39 | # Check if the example package is installed because some tests will
40 | # depend on it.
41 | try:
42 |     import example_hdf5storage_marshaller_plugin
43 |     has_example_hdf5storage_marshaller_plugin = True
44 | except:
45 |     has_example_hdf5storage_marshaller_plugin = False
46 | 
47 | 
48 | def test_marshaller_api_versions():
49 |     assert_equal_nose(('1.0', ),
50 |                       hdf5storage.supported_marshaller_api_versions())
51 | 
52 | 
53 | def test_find_thirdparty_marshaller_plugins():
54 |     found_example = False
55 |     apivs = hdf5storage.supported_marshaller_api_versions()
56 |     plugins = hdf5storage.find_thirdparty_marshaller_plugins()
57 |     assert isinstance(plugins, dict)
58 |     assert_equal_nose(set(apivs), set(plugins))
59 |     for k, v in plugins.items():
60 |         assert isinstance(k, str)
61 |         assert isinstance(v, dict)
62 |         for k2, v2 in v.items():
63 |             assert isinstance(k2, str)
64 |             assert isinstance(v2, pkg_resources.EntryPoint)
65 |             if k2 == 'example_hdf5storage_marshaller_plugin':
66 |                 found_example = True
67 |     assert_equal_nose(has_example_hdf5storage_marshaller_plugin,
68 |                       found_example)
69 | 
70 | 
71 | @unittest.skipUnless(has_example_hdf5storage_marshaller_plugin,
72 |                      'requires example_hdf5storage_marshaller_plugin')
73 | def test_plugin_marshaller_SubList():
74 |     mc = hdf5storage.MarshallerCollection(load_plugins=True,
75 |                                           lazy_loading=True)
76 |     options = hdf5storage.Options(store_python_metadata=True,
77 |                                   matlab_compatible=False,
78 |                                   marshaller_collection=mc)
79 |     ell = [1, 2, 'b1', b'3991', True, None]
80 |     data = example_hdf5storage_marshaller_plugin.SubList(ell)
81 |     f = None
82 |     name = '/a'
83 |     try:
84 |         f = tempfile.mkstemp()
85 |         os.close(f[0])
86 |         filename = f[1]
87 |         hdf5storage.write(data, path=name, filename=filename,
88 |                           options=options)
89 |         out = hdf5storage.read(path=name, filename=filename,
90 |                                options=options)
91 |     except:
92 |         raise
93 |     finally:
94 |         if f is not None:
95 |             os.remove(f[1])
96 |     assert_equal_nose(ell, list(out))
97 |     assert_equal_nose(type(out),
98 |                       example_hdf5storage_marshaller_plugin.SubList)
99 | 


--------------------------------------------------------------------------------
/tests/test_matlab_compatibility.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2014-2016, Freja Nordsiek
  2 | # All rights reserved.
  3 | #
  4 | # Redistribution and use in source and binary forms, with or without
  5 | # modification, are permitted provided that the following conditions are
  6 | # met:
  7 | #
  8 | # 1. Redistributions of source code must retain the above copyright
  9 | # notice, this list of conditions and the following disclaimer.
 10 | #
 11 | # 2. Redistributions in binary form must reproduce the above copyright
 12 | # notice, this list of conditions and the following disclaimer in the
 13 | # documentation and/or other materials provided with the distribution.
 14 | #
 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 16 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 17 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 18 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 19 | # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 20 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 21 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 22 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 23 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 26 | 
 27 | import os
 28 | import os.path
 29 | import subprocess
 30 | 
 31 | from nose.plugins.skip import SkipTest
 32 | 
 33 | import hdf5storage
 34 | 
 35 | from asserts import assert_equal_from_matlab
 36 | 
 37 | mat_files = ['types_v7p3.mat', 'types_v7.mat',
 38 |              'python_v7p3.mat', 'python_v7.mat']
 39 | for i in range(0, len(mat_files)):
 40 |     mat_files[i] = os.path.join(os.path.dirname(__file__), mat_files[i])
 41 | 
 42 | script_names = ['make_mat_with_all_types.m', 'read_write_mat.m']
 43 | for i in range(0, len(script_names)):
 44 |     script_names[i] = os.path.join(os.path.dirname(__file__),
 45 |                                    script_names[i])
 46 | 
 47 | types_v7 = dict()
 48 | types_v7p3 = dict()
 49 | python_v7 = dict()
 50 | python_v7p3 = dict()
 51 | 
 52 | 
 53 | # Have a flag for whether matlab was found and run successfully or not,
 54 | # so tests can be skipped if not.
 55 | ran_matlab_successful = [False]
 56 | 
 57 | 
 58 | def setup_module():
 59 |     teardown_module()
 60 |     try:
 61 |         import scipy.io
 62 |         matlab_command = "run('" + script_names[0] + "')"
 63 |         subprocess.check_call(['matlab', '-nosplash', '-nodesktop',
 64 |                               '-nojvm', '-r', matlab_command])
 65 |         scipy.io.loadmat(file_name=mat_files[1], mdict=types_v7)
 66 |         hdf5storage.loadmat(file_name=mat_files[0], mdict=types_v7p3)
 67 | 
 68 |         hdf5storage.savemat(file_name=mat_files[2], mdict=types_v7p3)
 69 |         matlab_command = "run('" + script_names[1] + "')"
 70 |         subprocess.check_call(['matlab', '-nosplash', '-nodesktop',
 71 |                               '-nojvm', '-r', matlab_command])
 72 |         scipy.io.loadmat(file_name=mat_files[3], mdict=python_v7)
 73 |         hdf5storage.loadmat(file_name=mat_files[2], mdict=python_v7p3)
 74 |     except:
 75 |         pass
 76 |     else:
 77 |         ran_matlab_successful[0] = True
 78 | 
 79 | 
 80 | def teardown_module():
 81 |     for name in mat_files:
 82 |         if os.path.exists(name):
 83 |             os.remove(name)
 84 | 
 85 | 
 86 | def test_read_from_matlab():
 87 |     if not ran_matlab_successful[0]:
 88 |         raise SkipTest
 89 |     for k in (set(types_v7.keys()) - set(['__version__', '__header__', \
 90 |             '__globals__'])):
 91 |         yield check_variable_from_matlab, k
 92 | 
 93 | 
 94 | def test_to_matlab_back():
 95 |     if not ran_matlab_successful[0]:
 96 |         raise SkipTest
 97 |     for k in set(types_v7p3.keys()):
 98 |         yield check_variable_to_matlab_back, k
 99 | 
100 | 
101 | def check_variable_from_matlab(name):
102 |     assert_equal_from_matlab(types_v7p3[name], types_v7[name])
103 | 
104 | 
105 | def check_variable_to_matlab_back(name):
106 |     assert_equal_from_matlab(python_v7p3[name], types_v7[name])
107 | 


--------------------------------------------------------------------------------
/tests/test_multi_io.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2013-2016, Freja Nordsiek
  2 | # All rights reserved.
  3 | #
  4 | # Redistribution and use in source and binary forms, with or without
  5 | # modification, are permitted provided that the following conditions are
  6 | # met:
  7 | #
  8 | # 1. Redistributions of source code must retain the above copyright
  9 | # notice, this list of conditions and the following disclaimer.
 10 | #
 11 | # 2. Redistributions in binary form must reproduce the above copyright
 12 | # notice, this list of conditions and the following disclaimer in the
 13 | # documentation and/or other materials provided with the distribution.
 14 | #
 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 16 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 17 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 18 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 19 | # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 20 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 21 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 22 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 23 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 26 | 
 27 | 
 28 | import os
 29 | import os.path
 30 | import random
 31 | import tempfile
 32 | 
 33 | import hdf5storage
 34 | 
 35 | from asserts import assert_equal
 36 | from make_randoms import min_dict_keys, max_dict_keys, random_name, \
 37 |     random_numpy, random_numpy_shape, dict_value_subarray_dimensions, \
 38 |     max_dict_value_subarray_axis_length, dtypes
 39 | 
 40 | 
 41 | random.seed()
 42 | 
 43 | 
 44 | # A series of tests to make sure that more than one data item can be
 45 | # written or read at a time using the writes and reads functions.
 46 | 
 47 | def test_multi_write():
 48 |     # Makes a random dict of random paths and variables (random number
 49 |     # of randomized paths with random numpy arrays as values).
 50 |     data = dict()
 51 |     for i in range(0, random.randint(min_dict_keys, \
 52 |             max_dict_keys)):
 53 |         name = random_name()
 54 |         data[name] = \
 55 |             random_numpy(random_numpy_shape( \
 56 |             dict_value_subarray_dimensions, \
 57 |             max_dict_value_subarray_axis_length), \
 58 |             dtype=random.choice(dtypes))
 59 | 
 60 |     # Write it and then read it back item by item.
 61 |     fld = None
 62 |     try:
 63 |         fld = tempfile.mkstemp()
 64 |         os.close(fld[0])
 65 |         filename = fld[1]
 66 |         hdf5storage.writes(mdict=data, filename=filename)
 67 |         out = dict()
 68 |         for p in data:
 69 |             out[p] = hdf5storage.read(path=p, filename=filename)
 70 |     except:
 71 |         raise
 72 |     finally:
 73 |         if fld is not None:
 74 |             os.remove(fld[1])
 75 | 
 76 |     # Compare data and out.
 77 |     assert_equal(out, data)
 78 | 
 79 | 
 80 | def test_multi_read():
 81 |     # Makes a random dict of random paths and variables (random number
 82 |     # of randomized paths with random numpy arrays as values).
 83 |     data = dict()
 84 |     for i in range(0, random.randint(min_dict_keys, \
 85 |             max_dict_keys)):
 86 |         name = random_name()
 87 |         data[name] = \
 88 |             random_numpy(random_numpy_shape( \
 89 |             dict_value_subarray_dimensions, \
 90 |             max_dict_value_subarray_axis_length), \
 91 |             dtype=random.choice(dtypes))
 92 | 
 93 |     paths = data.keys()
 94 |     # Write it item by item  and then read it back in one unit.
 95 |     fld = None
 96 |     try:
 97 |         fld = tempfile.mkstemp()
 98 |         os.close(fld[0])
 99 |         filename = fld[1]
100 |         for p in paths:
101 |             hdf5storage.write(data=data[p], path=p, filename=filename)
102 |         out = hdf5storage.reads(paths=list(data.keys()),
103 |                                 filename=filename)
104 |     except:
105 |         raise
106 |     finally:
107 |         if fld is not None:
108 |             os.remove(fld[1])
109 | 
110 |     # Compare data and out.
111 |     for i, p in enumerate(paths):
112 |         assert_equal(out[i], data[p])
113 | 


--------------------------------------------------------------------------------
/tests/make_mat_with_all_types.m:
--------------------------------------------------------------------------------
  1 | % Copyright (c) 2013-2016, Freja Nordsiek
  2 | % All rights reserved.
  3 | %
  4 | % Redistribution and use in source and binary forms, with or without
  5 | % modification, are permitted provided that the following conditions are
  6 | % met:
  7 | %
  8 | % 1. Redistributions of source code must retain the above copyright
  9 | % notice, this list of conditions and the following disclaimer.
 10 | %
 11 | % 2. Redistributions in binary form must reproduce the above copyright
 12 | % notice, this list of conditions and the following disclaimer in the
 13 | % documentation and/or other materials provided with the distribution.
 14 | %
 15 | % THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 16 | % "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 17 | % LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 18 | % A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 19 | % HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 20 | % SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 21 | % LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 22 | % DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 23 | % THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 24 | % (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 25 | % OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 26 | 
 27 | 
 28 | clear a
 29 | 
 30 | % Main types as scalars and arrays.
 31 | 
 32 | a.logical = true;
 33 | 
 34 | a.uint8 = uint8(2);
 35 | a.uint16 = uint16(28);
 36 | a.uint32 = uint32(28347394);
 37 | a.uint64 = uint64(234392);
 38 | 
 39 | a.int8 = int8(-32);
 40 | a.int16 = int16(284);
 41 | a.int32 = int32(-7394);
 42 | a.int64 = int64(2334322);
 43 | 
 44 | a.single = single(4.2134e-2);
 45 | a.single_complex = single(33.4 + 3i);
 46 | a.single_nan = single(NaN);
 47 | a.single_inf = single(inf);
 48 | 
 49 | a.double = 14.2134e200;
 50 | a.double_complex = 8e-30 - 3.2e40i;
 51 | a.double_nan = NaN;
 52 | a.double_inf = -inf;
 53 | 
 54 | a.char = 'p';
 55 | 
 56 | a.logical_array = logical([1 0 0 0; 0 1 1 0]);
 57 | 
 58 | a.uint8_array = uint8([0 1 3 4; 92 3 2 8]);
 59 | a.uint16_array = uint16([0 1; 3 4; 92 3; 2 8]);
 60 | a.uint32_array = uint32([0 1 3 4 92 3 2 8]);
 61 | a.uint64_array = uint64([0; 1; 3; 4; 92; 3; 2; 8]);
 62 | 
 63 | a.int8_array = int8([0 1 3 4; 92 3 2 8]);
 64 | a.int16_array = int16([0 1; 3 4; 92 3; 2 8]);
 65 | a.int32_array = int32([0 1 3 4 92 3 2 8]);
 66 | a.int64_array = int64([0; 1; 3; 4; 92; 3; 2; 8]);
 67 | 
 68 | a.single_array = single(rand(4, 9));
 69 | a.single_array_complex = single(rand(2,7) + 1i*rand(2,7));
 70 | 
 71 | a.double_array = rand(3, 2);
 72 | a.double_array_complex = rand(5,2) + 1i*rand(5,2);
 73 | 
 74 | a.char_array = ['ivkea'; 'avvai'];
 75 | a.char_cell_array = {'v83nv', 'aADvai98v3'};
 76 | 
 77 | % Empties of main types.
 78 | 
 79 | a.logical_empty = logical([]);
 80 | a.uint8_empty = uint8([]);
 81 | a.uint16_empty = uint16([]);
 82 | a.uint32_empty = uint32([]);
 83 | a.uint64_empty = uint64([]);
 84 | a.int8_empty = int8([]);
 85 | a.int16_empty = int16([]);
 86 | a.int32_empty = int32([]);
 87 | a.int64_empty = int64([]);
 88 | a.single_empty = single([]);
 89 | a.double_empty = [];
 90 | 
 91 | % Main container types.
 92 | 
 93 | a.cell = {5.34+9i};
 94 | a.cell_array = {1, [2 3]; 8.3, -[3; 3]; [], 20};
 95 | a.cell_empty = {};
 96 | 
 97 | a.struct = struct('a', {3.3}, 'bc', {[1 4 5]});
 98 | a.struct_empty = struct('vea', {}, 'b', {});
 99 | a.struct_array = struct('a', {3.3; 3}, 'avav_Ab', {[1 4 5]; []});
100 | 
101 | % % Function handles.
102 | % 
103 | % ab = 1:6;
104 | % a.fhandle = @sin;
105 | % a.fhandle_args = @(x, y) x .* cos(y);
106 | % a.fhandle_args_environment = @(m, n) m*(b.*rand(size(b))) + n;
107 | % 
108 | % % Map type.
109 | % 
110 | % a.map_char = containers.Map({'4v', 'u', '2vn'}, {4, uint8(9), 'bafd'});
111 | % a.map_single = containers.Map({single(3), single(38.3), single(2e-3)}, {4, uint8(9), 'bafd'});
112 | % a.map_empty = containers.Map;
113 | % 
114 | % % The categorical type.
115 | % 
116 | % b = {'small', 'medium', 'small', 'medium', 'medium', 'large', 'medium'};
117 | % c = {'small', 'medium', 'large'};
118 | % d = round(2*rand(10,3));
119 | % 
120 | % a.categorical = categorical(b);
121 | % a.categorical_ordinal = categorical(b, c, 'Ordinal', true);
122 | % a.categorical_ordinal_int = categorical(d, 0:2, c, 'Ordinal', true);
123 | % 
124 | % a.categorical_empty = categorical({});
125 | % a.categorical_ordinal_empty = categorical({}, c, 'Ordinal', true);
126 | % a.categorical_ordinal_int_empty = categorical([], 0:2, c, 'Ordinal', true);
127 | % 
128 | % % Tables.
129 | % 
130 | % a.table = readtable('patients.dat');
131 | % a.table_oneentry = a.table(1,:);
132 | % a.table_empty = a.table([], :);
133 | % 
134 | % % Not doing time series yet.
135 | 
136 | save('types_v7p3.mat','-struct','a','-v7.3')
137 | save('types_v7.mat','-struct','a','-v7')
138 | 
139 | exit
140 | 


--------------------------------------------------------------------------------
/doc/source/development.rst:
--------------------------------------------------------------------------------
  1 | .. currentmodule:: hdf5storage
  2 | 
  3 | =======================
  4 | Development Information
  5 | =======================
  6 | 
  7 | The source code can be found on Github at
  8 | https://github.com/frejanordsiek/hdf5storage
  9 | 
 10 | Package Overview
 11 | ================
 12 | 
 13 | The package is currently a pure Python package; using no Cython, C/C++,
 14 | or other languages.
 15 | 
 16 | Also, pickling is not used at all and should not be added. It is a
 17 | security risk since pickled data is read through the interpreter
 18 | allowing arbitrary code (which could be malicious) to be executed in the
 19 | interpreter. One wants to be able to read possibly HDF5 and MAT files
 20 | from untrusted sources, so pickling is avoided in this package.
 21 | 
 22 | The :py:mod:`hdf5storage` module contains the high level reading and
 23 | writing functions, as well as the :py:class:`Options` class for
 24 | encapsulating all the various options governing how data is read and
 25 | written. The high level reading and writing functions can either be
 26 | given an :py:class:`Options` object, or be given the keyword arguments
 27 | that its constructur takes (they will make one from those
 28 | arguments). There is also the :py:class:`MarshallerCollection` which
 29 | holds all the Marshallers (more below) and provides functions to find
 30 | the appropriate Marshaller given the ``type`` of a Python object, the
 31 | type string used for the 'Python.Type' Attribute, or the MATLAB class
 32 | string (contained in the 'MATLAB_class' Attribute). One can give the
 33 | collection additional user provided Marshallers.
 34 | 
 35 | The :py:mod:`hdf5storage.exceptions` module contains the special
 36 | exceptions/errors required for this package not covered by existing
 37 | Python exceptions/errors or those from the h5py package.
 38 | 
 39 | :py:mod:`hdf5storage.Marshallers` contains all the Marshallers for the
 40 | different Python data types that can be read from or written to an HDF5
 41 | file. They are all automitically added to any
 42 | :py:class:`MarshallerCollection` which inspects this module and grabs
 43 | all classes within it (if a class other than a Marshaller is added to
 44 | this module, :py:class:`MarshallerCollection` will need to be
 45 | modified). All Marshallers need to provide the same interface as
 46 | :py:class:`Marshallers.TypeMarshaller`, which is the base class for all
 47 | Marshallers in this module, and should probably be inherited from by any
 48 | custom Marshallers that one would write (while it can't marshall any
 49 | types, it does have some useful built in functionality). The main
 50 | Marshaller in the module is
 51 | :py:class:`Marshallers.NumpyScalarArrayMarshaller`, which can marshall
 52 | most Numpy types. All the other built in Marshallers other than
 53 | :py:class:`Marshallers.PythonDictMarshaller` inherit from it since they
 54 | convert their types to and from Numpy types and use the inherited
 55 | functions to do the actual work with the HDF5 file.
 56 | 
 57 | :py:mod:`hdf5storage.utilities` contains many functions that are used
 58 | throughout the pacakge, especially by the Marshallers. They include
 59 | low level reading and writing functions :py:func:`utilities.read_data`
 60 | and :py:func:`utilities.write_data`. They can only work on already opened
 61 | HDF5 files (the high level ones handle file creation/opening), can only
 62 | be given options using a :py:class:`Options` object, and read/write
 63 | individual Groups/Datasets and Python objects. Any Marshaller that needs
 64 | to read or write a nested object within a Group or Python object must
 65 | call these functions. The functions to do path escaping, unescaping, and
 66 | processing are also in the module. There are also several functions to
 67 | get, set, and delete different kinds of HDF5 Attributes (handle things
 68 | such as them already existing, not existing, etc). Then there functions
 69 | to convert between different string representations, as well as encode
 70 | for writing and decode after reading complex types. And then there is
 71 | the function
 72 | :py:func:`utilities.next_unused_name_in_group` which produces a random
 73 | unused name in a Group.
 74 | 
 75 | 
 76 | TODO
 77 | ====
 78 | 
 79 | There are several features that need to be added, bugs that need to be
 80 | fixed, etc.
 81 | 
 82 | Standing Bugs
 83 | -------------
 84 | 
 85 | * Structured ``np.ndarray`` with no elements, when
 86 |   :py:attr:`Options.structured_numpy_ndarray_as_struct` is set, are not
 87 |   written in a way that the dtypes for the fields can be restored when
 88 |   it is read back from file.
 89 | 
 90 | Features to Add
 91 | ---------------
 92 | 
 93 | * Marshallers for more Python types.
 94 | * Marshallers to be able to read the following MATLAB types
 95 | 
 96 |   * Categorical Arrays
 97 |   * Tables
 98 |   * Maps
 99 |   * Time Series
100 |   * Classes (could be hard if they don't look like a struct in file)
101 |   * Function Handles (wouldn't be able run in Python, but could at least
102 |     manipulate)
103 | 
104 | * A ``whosmat`` function like the SciPy one :py:func:`scipy.io.whosmat`.
105 | * A function to find and delete Datasets and Groups inside the Group
106 |   :py:attr:`Options.group_for_references` that are not referenced by
107 |   other Datasets in the file.
108 | 
109 | 


--------------------------------------------------------------------------------
/doc/make.bat:
--------------------------------------------------------------------------------
  1 | @ECHO OFF
  2 | 
  3 | REM Command file for Sphinx documentation
  4 | 
  5 | if "%SPHINXBUILD%" == "" (
  6 | 	set SPHINXBUILD=sphinx-build
  7 | )
  8 | set BUILDDIR=build
  9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source
 10 | set I18NSPHINXOPTS=%SPHINXOPTS% source
 11 | if NOT "%PAPER%" == "" (
 12 | 	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
 13 | 	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
 14 | )
 15 | 
 16 | if "%1" == "" goto help
 17 | 
 18 | if "%1" == "help" (
 19 | 	:help
 20 | 	echo.Please use `make ^<target^>` where ^<target^> is one of
 21 | 	echo.  html       to make standalone HTML files
 22 | 	echo.  dirhtml    to make HTML files named index.html in directories
 23 | 	echo.  singlehtml to make a single large HTML file
 24 | 	echo.  pickle     to make pickle files
 25 | 	echo.  json       to make JSON files
 26 | 	echo.  htmlhelp   to make HTML files and a HTML help project
 27 | 	echo.  qthelp     to make HTML files and a qthelp project
 28 | 	echo.  devhelp    to make HTML files and a Devhelp project
 29 | 	echo.  epub       to make an epub
 30 | 	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
 31 | 	echo.  text       to make text files
 32 | 	echo.  man        to make manual pages
 33 | 	echo.  texinfo    to make Texinfo files
 34 | 	echo.  gettext    to make PO message catalogs
 35 | 	echo.  changes    to make an overview over all changed/added/deprecated items
 36 | 	echo.  linkcheck  to check all external links for integrity
 37 | 	echo.  doctest    to run all doctests embedded in the documentation if enabled
 38 | 	goto end
 39 | )
 40 | 
 41 | if "%1" == "clean" (
 42 | 	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
 43 | 	del /q /s %BUILDDIR%\*
 44 | 	goto end
 45 | )
 46 | 
 47 | if "%1" == "html" (
 48 | 	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
 49 | 	if errorlevel 1 exit /b 1
 50 | 	echo.
 51 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
 52 | 	goto end
 53 | )
 54 | 
 55 | if "%1" == "dirhtml" (
 56 | 	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
 57 | 	if errorlevel 1 exit /b 1
 58 | 	echo.
 59 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
 60 | 	goto end
 61 | )
 62 | 
 63 | if "%1" == "singlehtml" (
 64 | 	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
 65 | 	if errorlevel 1 exit /b 1
 66 | 	echo.
 67 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
 68 | 	goto end
 69 | )
 70 | 
 71 | if "%1" == "pickle" (
 72 | 	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
 73 | 	if errorlevel 1 exit /b 1
 74 | 	echo.
 75 | 	echo.Build finished; now you can process the pickle files.
 76 | 	goto end
 77 | )
 78 | 
 79 | if "%1" == "json" (
 80 | 	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
 81 | 	if errorlevel 1 exit /b 1
 82 | 	echo.
 83 | 	echo.Build finished; now you can process the JSON files.
 84 | 	goto end
 85 | )
 86 | 
 87 | if "%1" == "htmlhelp" (
 88 | 	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
 89 | 	if errorlevel 1 exit /b 1
 90 | 	echo.
 91 | 	echo.Build finished; now you can run HTML Help Workshop with the ^
 92 | .hhp project file in %BUILDDIR%/htmlhelp.
 93 | 	goto end
 94 | )
 95 | 
 96 | if "%1" == "qthelp" (
 97 | 	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
 98 | 	if errorlevel 1 exit /b 1
 99 | 	echo.
100 | 	echo.Build finished; now you can run "qcollectiongenerator" with the ^
101 | .qhcp project file in %BUILDDIR%/qthelp, like this:
102 | 	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\hdf5storage.qhcp
103 | 	echo.To view the help file:
104 | 	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\hdf5storage.ghc
105 | 	goto end
106 | )
107 | 
108 | if "%1" == "devhelp" (
109 | 	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
110 | 	if errorlevel 1 exit /b 1
111 | 	echo.
112 | 	echo.Build finished.
113 | 	goto end
114 | )
115 | 
116 | if "%1" == "epub" (
117 | 	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
118 | 	if errorlevel 1 exit /b 1
119 | 	echo.
120 | 	echo.Build finished. The epub file is in %BUILDDIR%/epub.
121 | 	goto end
122 | )
123 | 
124 | if "%1" == "latex" (
125 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
126 | 	if errorlevel 1 exit /b 1
127 | 	echo.
128 | 	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
129 | 	goto end
130 | )
131 | 
132 | if "%1" == "text" (
133 | 	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
134 | 	if errorlevel 1 exit /b 1
135 | 	echo.
136 | 	echo.Build finished. The text files are in %BUILDDIR%/text.
137 | 	goto end
138 | )
139 | 
140 | if "%1" == "man" (
141 | 	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
142 | 	if errorlevel 1 exit /b 1
143 | 	echo.
144 | 	echo.Build finished. The manual pages are in %BUILDDIR%/man.
145 | 	goto end
146 | )
147 | 
148 | if "%1" == "texinfo" (
149 | 	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
150 | 	if errorlevel 1 exit /b 1
151 | 	echo.
152 | 	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
153 | 	goto end
154 | )
155 | 
156 | if "%1" == "gettext" (
157 | 	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
158 | 	if errorlevel 1 exit /b 1
159 | 	echo.
160 | 	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
161 | 	goto end
162 | )
163 | 
164 | if "%1" == "changes" (
165 | 	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
166 | 	if errorlevel 1 exit /b 1
167 | 	echo.
168 | 	echo.The overview file is in %BUILDDIR%/changes.
169 | 	goto end
170 | )
171 | 
172 | if "%1" == "linkcheck" (
173 | 	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
174 | 	if errorlevel 1 exit /b 1
175 | 	echo.
176 | 	echo.Link check complete; look for any errors in the above output ^
177 | or in %BUILDDIR%/linkcheck/output.txt.
178 | 	goto end
179 | )
180 | 
181 | if "%1" == "doctest" (
182 | 	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
183 | 	if errorlevel 1 exit /b 1
184 | 	echo.
185 | 	echo.Testing of doctests in the sources finished, look at the ^
186 | results in %BUILDDIR%/doctest/output.txt.
187 | 	goto end
188 | )
189 | 
190 | :end
191 | 


--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = build
  9 | 
 10 | # Internal variables.
 11 | PAPEROPT_a4     = -D latex_paper_size=a4
 12 | PAPEROPT_letter = -D latex_paper_size=letter
 13 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
 14 | # the i18n builder cannot share the environment and doctrees with the others
 15 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
 16 | 
 17 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
 18 | 
 19 | help:
 20 | 	@echo "Please use \`make <target>' where <target> is one of"
 21 | 	@echo "  html       to make standalone HTML files"
 22 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 23 | 	@echo "  singlehtml to make a single large HTML file"
 24 | 	@echo "  pickle     to make pickle files"
 25 | 	@echo "  json       to make JSON files"
 26 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 27 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 28 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 29 | 	@echo "  epub       to make an epub"
 30 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 31 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 32 | 	@echo "  text       to make text files"
 33 | 	@echo "  man        to make manual pages"
 34 | 	@echo "  texinfo    to make Texinfo files"
 35 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 36 | 	@echo "  gettext    to make PO message catalogs"
 37 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 38 | 	@echo "  linkcheck  to check all external links for integrity"
 39 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 40 | 
 41 | clean:
 42 | 	-rm -rf $(BUILDDIR)/*
 43 | 
 44 | html:
 45 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 46 | 	@echo
 47 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 48 | 
 49 | dirhtml:
 50 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 51 | 	@echo
 52 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 53 | 
 54 | singlehtml:
 55 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 56 | 	@echo
 57 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 58 | 
 59 | pickle:
 60 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 61 | 	@echo
 62 | 	@echo "Build finished; now you can process the pickle files."
 63 | 
 64 | json:
 65 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 66 | 	@echo
 67 | 	@echo "Build finished; now you can process the JSON files."
 68 | 
 69 | htmlhelp:
 70 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 71 | 	@echo
 72 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 73 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 74 | 
 75 | qthelp:
 76 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 77 | 	@echo
 78 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 79 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 80 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/hdf5storage.qhcp"
 81 | 	@echo "To view the help file:"
 82 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/hdf5storage.qhc"
 83 | 
 84 | devhelp:
 85 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
 86 | 	@echo
 87 | 	@echo "Build finished."
 88 | 	@echo "To view the help file:"
 89 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/hdf5storage"
 90 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/hdf5storage"
 91 | 	@echo "# devhelp"
 92 | 
 93 | epub:
 94 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
 95 | 	@echo
 96 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
 97 | 
 98 | latex:
 99 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
100 | 	@echo
101 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
102 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
103 | 	      "(use \`make latexpdf' here to do that automatically)."
104 | 
105 | latexpdf:
106 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
107 | 	@echo "Running LaTeX files through pdflatex..."
108 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
109 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
110 | 
111 | text:
112 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
113 | 	@echo
114 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
115 | 
116 | man:
117 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
118 | 	@echo
119 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
120 | 
121 | texinfo:
122 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
123 | 	@echo
124 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
125 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
126 | 	      "(use \`make info' here to do that automatically)."
127 | 
128 | info:
129 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
130 | 	@echo "Running Texinfo files through makeinfo..."
131 | 	make -C $(BUILDDIR)/texinfo info
132 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
133 | 
134 | gettext:
135 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
136 | 	@echo
137 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
138 | 
139 | changes:
140 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
141 | 	@echo
142 | 	@echo "The overview file is in $(BUILDDIR)/changes."
143 | 
144 | linkcheck:
145 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
146 | 	@echo
147 | 	@echo "Link check complete; look for any errors in the above output " \
148 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
149 | 
150 | doctest:
151 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
152 | 	@echo "Testing of doctests in the sources finished, look at the " \
153 | 	      "results in $(BUILDDIR)/doctest/output.txt."
154 | 


--------------------------------------------------------------------------------
/tests/test_marshaller_collection_priority.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2014-2016, Freja Nordsiek
  2 | # All rights reserved.
  3 | #
  4 | # Redistribution and use in source and binary forms, with or without
  5 | # modification, are permitted provided that the following conditions are
  6 | # met:
  7 | #
  8 | # 1. Redistributions of source code must retain the above copyright
  9 | # notice, this list of conditions and the following disclaimer.
 10 | #
 11 | # 2. Redistributions in binary form must reproduce the above copyright
 12 | # notice, this list of conditions and the following disclaimer in the
 13 | # documentation and/or other materials provided with the distribution.
 14 | #
 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 16 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 17 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 18 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 19 | # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 20 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 21 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 22 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 23 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 26 | 
 27 | import random
 28 | 
 29 | from nose.tools import raises
 30 | from nose.tools import assert_equal as assert_equal_nose
 31 | 
 32 | import hdf5storage
 33 | import hdf5storage.Marshallers
 34 | 
 35 | random.seed()
 36 | 
 37 | # Check if the example package is installed because some tests will
 38 | # depend on it.
 39 | try:
 40 |     from example_hdf5storage_marshaller_plugin import SubListMarshaller
 41 |     has_example_hdf5storage_marshaller_plugin = True
 42 | except:
 43 |     has_example_hdf5storage_marshaller_plugin = False
 44 | 
 45 | 
 46 | # Need a new marshaller that does nothing.
 47 | class JunkMarshaller(hdf5storage.Marshallers.TypeMarshaller):
 48 |     pass
 49 | 
 50 | 
 51 | @raises(TypeError)
 52 | def check_error_non_tuplelist(obj):
 53 |     hdf5storage.MarshallerCollection(priority=obj)
 54 | 
 55 | 
 56 | def test_error_non_tuplelist():
 57 |     for v in (None, True, 1, 2.3, '39va', b'391', set(), dict()):
 58 |         yield check_error_non_tuplelist, v
 59 | 
 60 | 
 61 | @raises(ValueError)
 62 | def test_error_missing_element():
 63 |     need = ('builtin', 'user', 'plugin')
 64 |     hdf5storage.MarshallerCollection(priority=[random.choice(need)
 65 |                                                for i in range(2)])
 66 | 
 67 | 
 68 | @raises(ValueError)
 69 | def test_error_extra_element():
 70 |     hdf5storage.MarshallerCollection(priority=('builtin', 'user',
 71 |                                                'plugin', 'extra'))
 72 | 
 73 | 
 74 | def test_builtin_plugin_user():
 75 |     m = JunkMarshaller()
 76 |     mc = hdf5storage.MarshallerCollection(load_plugins=True,
 77 |                                           priority=('builtin', 'plugin',
 78 |                                                     'user'),
 79 |                                           marshallers=(m, ))
 80 |     assert_equal_nose(m, mc._marshallers[-1])
 81 |     if has_example_hdf5storage_marshaller_plugin:
 82 |         assert isinstance(mc._marshallers[-2],
 83 |                           SubListMarshaller)
 84 | 
 85 | 
 86 | def test_builtin_user_plugin():
 87 |     m = JunkMarshaller()
 88 |     mc = hdf5storage.MarshallerCollection(load_plugins=True,
 89 |                                           priority=('builtin', 'user',
 90 |                                                     'plugin'),
 91 |                                           marshallers=(m, ))
 92 |     if has_example_hdf5storage_marshaller_plugin:
 93 |         assert isinstance(mc._marshallers[-1],
 94 |                           SubListMarshaller)
 95 |         assert_equal_nose(m, mc._marshallers[-2])
 96 |     else:
 97 |         assert_equal_nose(m, mc._marshallers[-1])
 98 | 
 99 | 
100 | def test_plugin_builtin_user():
101 |     m = JunkMarshaller()
102 |     mc = hdf5storage.MarshallerCollection(load_plugins=True,
103 |                                           priority=('plugin', 'builtin',
104 |                                                     'user'),
105 |                                           marshallers=(m, ))
106 |     assert_equal_nose(m, mc._marshallers[-1])
107 |     if has_example_hdf5storage_marshaller_plugin:
108 |         assert isinstance(mc._marshallers[0],
109 |                           SubListMarshaller)
110 | 
111 | 
112 | def test_plugin_user_builtin():
113 |     m = JunkMarshaller()
114 |     mc = hdf5storage.MarshallerCollection(load_plugins=True,
115 |                                           priority=('plugin', 'user',
116 |                                                     'builtin'),
117 |                                           marshallers=(m, ))
118 |     if has_example_hdf5storage_marshaller_plugin:
119 |         assert isinstance(mc._marshallers[0],
120 |                           SubListMarshaller)
121 |         assert_equal_nose(m, mc._marshallers[1])
122 |     else:
123 |         assert_equal_nose(m, mc._marshallers[0])
124 | 
125 | 
126 | def test_user_builtin_plugin():
127 |     m = JunkMarshaller()
128 |     mc = hdf5storage.MarshallerCollection(load_plugins=True,
129 |                                           priority=('user', 'builtin',
130 |                                                     'plugin'),
131 |                                           marshallers=(m, ))
132 |     assert_equal_nose(m, mc._marshallers[0])
133 |     if has_example_hdf5storage_marshaller_plugin:
134 |         assert isinstance(mc._marshallers[-1],
135 |                           SubListMarshaller)
136 | 
137 | 
138 | def test_user_plugin_builtin():
139 |     m = JunkMarshaller()
140 |     mc = hdf5storage.MarshallerCollection(load_plugins=True,
141 |                                           priority=('user', 'plugin',
142 |                                                     'builtin'),
143 |                                           marshallers=(m, ))
144 |     assert_equal_nose(m, mc._marshallers[0])
145 |     if has_example_hdf5storage_marshaller_plugin:
146 |         assert isinstance(mc._marshallers[1],
147 |                           SubListMarshaller)
148 | 


--------------------------------------------------------------------------------
/tests/test_julia_mat_compatibility.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2014-2016, Freja Nordsiek
  2 | # All rights reserved.
  3 | #
  4 | # Redistribution and use in source and binary forms, with or without
  5 | # modification, are permitted provided that the following conditions are
  6 | # met:
  7 | #
  8 | # 1. Redistributions of source code must retain the above copyright
  9 | # notice, this list of conditions and the following disclaimer.
 10 | #
 11 | # 2. Redistributions in binary form must reproduce the above copyright
 12 | # notice, this list of conditions and the following disclaimer in the
 13 | # documentation and/or other materials provided with the distribution.
 14 | #
 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 16 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 17 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 18 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 19 | # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 20 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 21 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 22 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 23 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 26 | 
 27 | import os
 28 | import os.path
 29 | import subprocess
 30 | import tempfile
 31 | 
 32 | import numpy as np
 33 | 
 34 | from nose.plugins.skip import SkipTest
 35 | 
 36 | import hdf5storage
 37 | 
 38 | from asserts import assert_equal_from_matlab
 39 | from make_randoms import dtypes, random_numpy_scalar, random_numpy, \
 40 |     random_numpy_shape, random_structured_numpy_array
 41 | 
 42 | # Have a flag for whether julia was found and run successfully or not,
 43 | # so tests can be skipped if not.
 44 | ran_julia_successful = [False]
 45 | 
 46 | mat_files = ['to_julia_v7.mat', 'to_julia_v7p3.mat',
 47 |              'julia_v7_to_v7p3.mat', 'julia_v7p3_to_v7p3.mat']
 48 | 
 49 | script_names = ['julia_read_mat.jl']
 50 | for i in range(0, len(script_names)):
 51 |     script_names[i] = os.path.join(os.path.dirname(__file__),
 52 |                                    script_names[i])
 53 | 
 54 | to_julia = dict()
 55 | 
 56 | 
 57 | # Julia MAT tends to squeeze extra singleton dimensions beyond 2,
 58 | # meaning a (1, 1, 1) goes to (1, 1). In addition, string conversions go
 59 | # on when going back and forth. Thus, string types will be excluded and
 60 | # the minimum length along each dimension will be 2.
 61 | 
 62 | dtypes_exclude = set(('S', 'U'))
 63 | dtypes_to_do = tuple(set(dtypes).difference(dtypes_exclude))
 64 | 
 65 | for dt in dtypes_to_do:
 66 |     to_julia[dt] = random_numpy_scalar(dt)
 67 | for dm in (2, 3):
 68 |     for dt in dtypes_to_do:
 69 |         to_julia[dt + '_array_' + str(dm)] = \
 70 |             random_numpy(random_numpy_shape(dm, 6, min_length=2), dt)
 71 | for dt in dtypes_to_do:
 72 |     if dt in ('S', 'U'):
 73 |         to_julia[dt + '_empty'] = np.array([], dtype=dt + str(6))
 74 |     else:
 75 |         to_julia[dt + '_empty'] = np.array([], dtype=dt)
 76 | 
 77 | to_julia['float32_nan'] = np.float32(np.NaN)
 78 | to_julia['float32_inf'] = np.float32(np.inf)
 79 | to_julia['float64_nan'] = np.float64(np.NaN)
 80 | to_julia['float64_inf'] = np.float64(-np.inf)
 81 | 
 82 | to_julia['object'] = random_numpy_scalar('object', \
 83 |     object_element_dtypes=dtypes_to_do)
 84 | to_julia['object_array_2'] = random_numpy( \
 85 |     random_numpy_shape(2, 6, min_length=2), \
 86 |     'object', object_element_dtypes=dtypes_to_do)
 87 | to_julia['object_array_3'] = random_numpy( \
 88 |     random_numpy_shape(3, 6, min_length=2), \
 89 |     'object', object_element_dtypes=dtypes_to_do)
 90 | 
 91 | 
 92 | # Julia MAT doesn't seem to read and then write back empty object
 93 | # types.
 94 | 
 95 | #to_julia['object_empty'] = np.array([], dtype='object')
 96 | 
 97 | to_julia['struct'] = random_structured_numpy_array((1,), \
 98 |     nondigits_fields=True)
 99 | to_julia['struct_empty'] = random_structured_numpy_array(tuple(), \
100 |     nondigits_fields=True)
101 | 
102 | # Something goes wrong with 2 dimensional structure arrays that warrants
103 | # further investigation.
104 | 
105 | #to_julia['struct_array_2'] = random_structured_numpy_array((3, 5), \
106 | #    nondigits_fields=True)
107 | 
108 | 
109 | from_julia_v7_to_v7p3 = dict()
110 | from_julia_v7p3_to_v7p3 = dict()
111 | 
112 | 
113 | 
114 | def julia_command(julia_file, fin, fout):
115 |     subprocess.check_call(['julia', julia_file,
116 |                            fin, fout])
117 | 
118 | 
119 | def setup_module():
120 |     temp_dir = None
121 |     try:
122 |         import scipy.io
123 |         temp_dir = tempfile.mkdtemp()
124 |         for i in range(0, len(mat_files)):
125 |             mat_files[i] = os.path.join(temp_dir, mat_files[i])
126 |         scipy.io.savemat(file_name=mat_files[0], mdict=to_julia)
127 |         hdf5storage.savemat(file_name=mat_files[1], mdict=to_julia)
128 | 
129 |         #julia_command(script_names[0], mat_files[0], mat_files[2])
130 |         julia_command(script_names[0], mat_files[1], mat_files[3])
131 | 
132 |         #hdf5storage.loadmat(file_name=mat_files[2],
133 |         #                    mdict=from_julia_v7_to_v7p3)
134 |         hdf5storage.loadmat(file_name=mat_files[3],
135 |                             mdict=from_julia_v7p3_to_v7p3)
136 |     except:
137 |         pass
138 |     else:
139 |         ran_julia_successful[0] = True
140 |     finally:
141 |         for name in mat_files:
142 |             if os.path.exists(name):
143 |                 os.remove(name)
144 |         if temp_dir is not None and os.path.exists(temp_dir):
145 |             os.rmdir(temp_dir)
146 | 
147 | 
148 | def teardown_module():
149 |     pass
150 | 
151 | 
152 | #def test_julia_v7_to_v7p3():
153 | #    for k in to_julia.keys():
154 | #        yield check_variable_julia_v7_to_v7p3, k
155 | 
156 | 
157 | def test_julia_v7p3_to_v7p3():
158 |     if not ran_julia_successful[0]:
159 |         raise SkipTest
160 |     for k in to_julia.keys():
161 |         yield check_variable_julia_v7p3_to_v7p3, k
162 | 
163 | 
164 | def check_variable_julia_v7_to_v7p3(name):
165 |     assert name in from_julia_v7_to_v7p3
166 |     assert_equal_from_matlab(from_julia_v7_to_v7p3[name],
167 |                              to_julia[name])
168 | 
169 | 
170 | def check_variable_julia_v7p3_to_v7p3(name):
171 |     assert name in from_julia_v7p3_to_v7p3
172 |     assert_equal_from_matlab(from_julia_v7p3_to_v7p3[name],
173 |                              to_julia[name])
174 | 


--------------------------------------------------------------------------------
/doc/source/hdf5storage.Marshallers.rst:
--------------------------------------------------------------------------------
  1 | hdf5storage.Marshallers
  2 | =======================
  3 | 
  4 | .. currentmodule:: hdf5storage.Marshallers
  5 | 
  6 | .. automodule:: hdf5storage.Marshallers
  7 | 
  8 | .. autosummary::
  9 | 
 10 |    TypeMarshaller
 11 |    NumpyScalarArrayMarshaller
 12 |    PythonScalarMarshaller
 13 |    PythonStringMarshaller
 14 |    PythonNoneMarshaller
 15 |    PythonDictMarshaller
 16 |    PythonListMarshaller
 17 |    PythonTupleSetDequeMarshaller
 18 | 
 19 | 
 20 | TypeMarshaller
 21 | --------------
 22 | 
 23 | .. autoclass:: TypeMarshaller
 24 |    :members: update_type_lookups, get_type_string, read, read_approximate, write, write_metadata
 25 |    :show-inheritance:
 26 | 
 27 |    .. autoinstanceattribute:: TypeMarshaller.required_parent_modules
 28 |       :annotation: = ()
 29 | 
 30 |    .. autoinstanceattribute:: TypeMarshaller.required_modules
 31 |       :annotation: = ()
 32 | 
 33 |    .. autoinstanceattribute:: TypeMarshaller.python_attributes
 34 |       :annotation: = {'Python.Type'}
 35 | 
 36 |    .. autoinstanceattribute:: TypeMarshaller.matlab_attributes
 37 |       :annotation: = {'H5PATH'}
 38 | 
 39 |    .. autoinstanceattribute:: TypeMarshaller.types
 40 |       :annotation: = ()
 41 | 
 42 |    .. autoinstanceattribute:: TypeMarshaller.python_type_strings
 43 |       :annotation: = ()
 44 | 
 45 |    .. autoinstanceattribute:: TypeMarshaller.matlab_classes
 46 |       :annotation: = ()
 47 | 
 48 |    .. autoinstanceattribute:: TypeMarshaller.type_to_typestring
 49 |       :annotation: = dict()
 50 | 
 51 |    .. autoinstanceattribute:: TypeMarshaller.typestring_to_type
 52 |       :annotation: = dict()
 53 | 
 54 | 
 55 | NumpyScalarArrayMarshaller
 56 | --------------------------
 57 | 
 58 | .. autoclass:: NumpyScalarArrayMarshaller
 59 |    :members: read, write, write_metadata
 60 |    :show-inheritance:
 61 | 
 62 |    Handles the following ::
 63 | 
 64 |        python_attributes = {'Python.Type', 'Python.Shape', 'Python.Empty',
 65 |                             'Python.numpy.UnderlyingType',
 66 |                             'Python.numpy.Container', 'Python.Fields'}
 67 | 
 68 |        matlab_attributes = {'H5PATH', 'MATLAB_class', 'MATLAB_empty',
 69 |                             'MATLAB_int_decode', 'MATLAB_fields'}
 70 | 
 71 |        types = (np.ndarray, np.matrix,
 72 |                 np.chararray, np.core.records.recarray,
 73 |                 np.bool_, np.void,
 74 |                 np.uint8, np.uint16, np.uint32, np.uint64,
 75 |                 np.int8, np.int16, np.int32, np.int64,
 76 |                 np.float16, np.float32, np.float64,
 77 |                 np.complex64, np.complex128,
 78 |                 np.bytes_, np.str_, np.object_)
 79 | 
 80 |        python_type_strings = ('numpy.ndarray', 'numpy.matrix',
 81 |                               'numpy.chararray', 'numpy.recarray',
 82 |                               'numpy.bool_', 'numpy.void',
 83 |                               'numpy.uint8', 'numpy.uint16',
 84 |                               'numpy.uint32', 'numpy.uint64', 'numpy.int8',
 85 |                               'numpy.int16', 'numpy.int32', 'numpy.int64',
 86 |                               'numpy.float16', 'numpy.float32', 'numpy.float64',
 87 |                               'numpy.complex64', 'numpy.complex128',
 88 |                               'numpy.bytes_', 'numpy.str_', 'numpy.object_')
 89 | 
 90 |        matlab_classes = ('logical', 'char', 'single', 'double', 'uint8',
 91 |                          'uint16', 'uint32', 'uint64', 'int8', 'int16',
 92 |                          'int32', 'int64', 'cell', 'canonical empty')
 93 | 
 94 | 
 95 | PythonScalarMarshaller
 96 | ----------------------
 97 | 
 98 | .. autoclass:: PythonScalarMarshaller
 99 |    :members: read, write
100 |    :show-inheritance:
101 | 
102 |    Handles the following ::
103 | 
104 |        python_attributes = {'Python.Type', 'Python.Shape', 'Python.Empty',
105 |                             'Python.numpy.UnderlyingType',
106 |                             'Python.numpy.Container', 'Python.Fields'}
107 | 
108 |        matlab_attributes = {'H5PATH', 'MATLAB_class', 'MATLAB_empty',
109 |                             'MATLAB_int_decode'}
110 | 
111 |        types = (bool, int, float, complex)
112 | 
113 |        python_type_strings = ('bool', 'int', 'float', 'complex')
114 | 
115 |        matlab_classes = ()
116 | 
117 | 
118 | PythonStringMarshaller
119 | ----------------------
120 | 
121 | .. autoclass:: PythonStringMarshaller
122 |    :members: read, write
123 |    :show-inheritance:
124 | 
125 |    Handles the following ::
126 | 
127 |        python_attributes = {'Python.Type', 'Python.Shape', 'Python.Empty',
128 |                             'Python.numpy.UnderlyingType',
129 |                             'Python.numpy.Container', 'Python.Fields'}
130 | 
131 |        matlab_attributes = {'H5PATH', 'MATLAB_class', 'MATLAB_empty',
132 |                             'MATLAB_int_decode'}
133 | 
134 |        types = (str, bytes, bytearray)
135 | 
136 |        python_type_strings = ('str', 'bytes', 'bytearray')
137 | 
138 |        matlab_classes = ()
139 | 
140 | 
141 | PythonNoneMarshaller
142 | --------------------
143 | 
144 | .. autoclass:: PythonNoneMarshaller
145 |    :members: read, write
146 |    :show-inheritance:
147 | 
148 |    Handles the following ::
149 | 
150 |        python_attributes = {'Python.Type', 'Python.Shape', 'Python.Empty',
151 |                             'Python.numpy.UnderlyingType',
152 |                             'Python.numpy.Container', 'Python.Fields'}
153 | 
154 |        matlab_attributes = {'H5PATH', 'MATLAB_class', 'MATLAB_empty',
155 |                             'MATLAB_int_decode'}
156 | 
157 |        types = (builtins.NoneType, )
158 | 
159 |        python_type_strings = ('builtins.NoneType', )
160 | 
161 |        matlab_classes = ()
162 | 
163 | 
164 | PythonDictMarshaller
165 | --------------------
166 | 
167 | .. autoclass:: PythonDictMarshaller
168 |    :members: read, write, write_metadata
169 |    :show-inheritance:
170 | 
171 |    Handles the following ::
172 | 
173 |        python_attributes = {'Python.Type', 'Python.Fields'}
174 | 
175 |        matlab_attributes = {'H5PATH', 'MATLAB_class', 'MATLAB_fields'}
176 | 
177 |        types = (dict, collections.OrderedDict)
178 | 
179 |        python_type_strings = ('dict', 'collections.OrderedDict')
180 | 
181 |        matlab_classes = ()
182 | 
183 | 
184 | PythonListMarshaller
185 | --------------------
186 | 
187 | .. autoclass:: PythonListMarshaller
188 |    :members: read, write
189 |    :show-inheritance:
190 | 
191 |    Handles the following ::
192 | 
193 |        python_attributes = {'Python.Type', 'Python.Shape', 'Python.Empty',
194 |                             'Python.numpy.UnderlyingType',
195 |                             'Python.numpy.Container', 'Python.Fields'}
196 | 
197 |        matlab_attributes = {'H5PATH', 'MATLAB_class', 'MATLAB_empty',
198 |                             'MATLAB_int_decode'}
199 | 
200 |        types = (list, )
201 | 
202 |        python_type_strings = ('list', )
203 | 
204 |        matlab_classes = ()
205 | 
206 | 
207 | PythonTupleSetDequeMarshaller
208 | -----------------------------
209 | 
210 | .. autoclass:: PythonTupleSetDequeMarshaller
211 |    :members: read, write
212 |    :show-inheritance:
213 | 
214 |    Handles the following ::
215 | 
216 |        python_attributes = {'Python.Type', 'Python.Shape', 'Python.Empty',
217 |                             'Python.numpy.UnderlyingType',
218 |                             'Python.numpy.Container', 'Python.Fields'}
219 | 
220 |        matlab_attributes = {'H5PATH', 'MATLAB_class', 'MATLAB_empty',
221 |                             'MATLAB_int_decode'}
222 | 
223 |        types = (tuple, set, frozenset, collections.deque)
224 | 
225 |        python_type_strings = ('tuple', 'set', 'frozenset', 'collections.deque')
226 | 
227 |        matlab_classes = ()
228 | 
229 | 


--------------------------------------------------------------------------------
/doc/source/compression.rst:
--------------------------------------------------------------------------------
  1 | .. currentmodule:: hdf5storage
  2 | 
  3 | .. _Compression:
  4 | 
  5 | ===========
  6 | Compression
  7 | ===========
  8 | 
  9 | The HDF5 libraries and the :py:mod:`h5py` module support transparent
 10 | compression of data in HDF5 files.
 11 | 
 12 | The use of compression can sometimes drastically reduce file size, often
 13 | makes it faster to read the data from the file, and sometimes makes it
 14 | faster to write the data. Though, not all data compresses very well and
 15 | can occassionally end up larger after compression than it was
 16 | uncompressed. Compression does cost CPU time both when compressing the
 17 | data and when decompressing it. The reason this can sometimes lead to
 18 | faster read and write times is because disks are very slow and the space
 19 | savings can save enough disk access time to make up for the CPU time.
 20 | 
 21 | All versions of this package can read compressed data, but not all
 22 | versions can write compressed data.
 23 | 
 24 | .. versionadded:: 0.1.9
 25 |    
 26 |    HDF5 write compression features added along with several options to
 27 |    control it in :py:class:`Options`.
 28 | 
 29 | 
 30 | .. versionadded:: 0.1.7
 31 | 
 32 |    :py:class:`Options` will take the compression options but ignores
 33 |    them.
 34 | 
 35 | 
 36 | .. warning::
 37 | 
 38 |    Passing the compression options for versions earlier than ``0.1.7``
 39 |    will result in an error.
 40 | 
 41 | 
 42 | Enabling Compression
 43 | ====================
 44 | 
 45 | Compression, which is enabled by default, is controlled by setting
 46 | :py:attr:`Options.compress` to ``True`` or passing ``compress=X`` to
 47 | :py:func:`write` and :py:func:`savemat` where ``X`` is ``True`` or
 48 | ``False``.
 49 | 
 50 | 
 51 | .. note::
 52 |    
 53 |    Not all python objects written to the HDF5 file will be compressed,
 54 |    or even support compression. For one, :py:mod:`numpy` scalars or any
 55 |    type that is stored as one do not support compression due to
 56 |    limitations of the HDF5 library, though compressing them would be a
 57 |    waste (hence the lack of support).
 58 | 
 59 | 
 60 | Setting The Minimum Data Size for Compression
 61 | =============================================
 62 | 
 63 | Compressing small pieces of data often wastes space (compressed size is
 64 | larger than uncompressed size) and CPU time. Due to this, python objects
 65 | have to be larger than a particular size before this package will
 66 | compress them. The threshold, in bytes, is controlled by setting
 67 | :py:attr:`Options.compress_size_threshold` or passing
 68 | ``compress_size_threshold=X`` to :py:func:`write` and
 69 | :py:func:`savemat` where ``X`` is a non-negative integer. The default
 70 | value is 16 KB.
 71 | 
 72 | 
 73 | Controlling The Compression Algorithm And Level
 74 | ===============================================
 75 | 
 76 | Many compression algorithms can be used with HDF5 files, though only
 77 | three are common. The Deflate algorithm (sometimes known as the GZIP
 78 | algorithm), LZF algorithm, and SZIP algorithms are the algorithms that
 79 | the HDF5 library is explicitly setup to support. The library has a
 80 | mechanism for adding additional algorithms. Popular ones include the
 81 | BZIP2 and BLOSC algorithms.
 82 | 
 83 | The compression algorithm used is controlled by setting
 84 | :py:attr:`Options.compression_algorithm` or passing
 85 | ``compression_algorithm=X`` to :py:func:`write` and :py:func:`savemat`.
 86 | ``X`` is the ``str`` name of the algorithm. The default is ``'gzip'``
 87 | corresponding to the Deflate/GZIP algorithm.
 88 | 
 89 | .. note::
 90 |    
 91 |    As of version ``0.2``, only the Deflate (``X = 'gzip'``), LZF
 92 |    (``X = 'lzf'``), and SZIP (``X = 'szip'``) algorithms are supported.
 93 | 
 94 | 
 95 | .. note::
 96 | 
 97 |    If doing MATLAB compatibility (:py:attr:`Options.matlab_compatible`
 98 |    is ``True``), only the Deflate algorithm is supported.
 99 | 
100 | 
101 | The algorithms, in more detail
102 | 
103 | GZIP / Deflate (``'gzip'``)
104 |    The common Deflate algorithm seen in the Unix and Linux ``gzip``
105 |    utility and the most common compression algorithm used in ZIP files.
106 |    It is the most compatible algorithm. It achieves good compression and
107 |    is reasonably fast. It has no patent or license restrictions.
108 | 
109 | LZF (``'lzf'``)
110 |    A very fast algorithm but with inferior compression to GZIP/Deflate.
111 |    It is less commonly used than GZIP/Deflate, but similarly has no
112 |    patent or license restrictions.
113 | 
114 | SZIP (``'szip'``)
115 |    This compression algorithm isn't always available and has patent
116 |    and license restrictions. See
117 |    `SZIP License <https://www.hdfgroup.org/doc_resource/SZIP/Commercial_szip.html>`_.
118 | 
119 | 
120 | If GZIP/Deflate compression is being used, the compression level can be
121 | adjusted by setting :py:attr:`Options.gzip_compression_level` or passing
122 | ``gzip_compression_level=X`` to :py:func:`write` and :py:func:`savemat`
123 | where ``X`` is an integer between ``0`` and ``9`` inclusive. ``0`` is
124 | the lowest compression, but is the fastest. ``9`` gives the best
125 | compression, but is the slowest. The default is ``7``.
126 | 
127 | For all compression algorithms, there is an additional filter which can
128 | help achieve better compression at relatively low cost in CPU time. It
129 | is the shuffle filter. It is controlled by setting
130 | :py:attr:`Options.shuffle_filter` or passing ``shuffle_filter=X`` to
131 | :py:func:`write` and :py:func:`savemat` where ``X`` is ``True`` or
132 | ``False``. The default is ``True``.
133 | 
134 | 
135 | Using Checksums
136 | ===============
137 | 
138 | Fletcher32 checksums can be calculated and stored for most types of
139 | stored data in an HDF5 file. These are then checked when the data is
140 | read to catch file corruption, which will cause an error when reading
141 | the data informing the user that there is data corruption. The filter
142 | can be enabled or disabled separately for data that is compressed and
143 | data that is not compressed (e.g. compression is disabled, the python
144 | object can't be compressed, or the python object's data size is smaller
145 | than the compression threshold).
146 | 
147 | For compressed data, it is controlled by setting
148 | :py:attr:`Options.compressed_fletcher32_filter` or passing
149 | ``compressed_fletcher32_filter=X`` to :py:func:`write` and
150 | :py:func:`savemat` where ``X`` is ``True`` or ``False``. The default is
151 | ``True``.
152 | 
153 | For uncompressed data, it is controlled by setting
154 | :py:attr:`Options.uncompressed_fletcher32_filter` or passing
155 | ``uncompressed_fletcher32_filter=X`` to :py:func:`write` and
156 | :py:func:`savemat` where ``X`` is ``True`` or ``False``. The default is
157 | ``False``.
158 | 
159 | 
160 | .. note::
161 |    
162 |    Fletcher32 checksums are not computed for anything that is stored
163 |    as a :py:mod:`numpy` scalar.
164 | 
165 | 
166 | Chunking
167 | ========
168 | 
169 | When no filters are used (compression and Fletcher32), this package
170 | stores data in HDF5 files in a contiguous manner. The use of any filter
171 | requires that the data use chunked storage. Chunk sizes are determined
172 | automatically using the autochunk feature of :py:mod:`h5py`. The HDF5
173 | libraries make reading contiguous and chunked data transparent, though
174 | access speeds can differ and the chunk size affects the compression
175 | ratio.
176 | 
177 | 
178 | Further Reading
179 | ===============
180 | 
181 | .. seealso::
182 | 
183 |    `HDF5 Datasets Filter pipeline <http://docs.h5py.org/en/latest/high/dataset.html#filter-pipeline>`_
184 |       Description of the Dataset filter pipeline in the :py:mod:`h5py`
185 |    
186 |    `Using Compression in HDF5 <http://www.hdfgroup.org/HDF5/faq/compression.html>`_
187 |       FAQ on compression from the HDF Group.
188 |    
189 |    `HDF5 Tutorial: Learning The Basics: Dataset Storage Layout <https://www.hdfgroup.org/HDF5/Tutor/layout.html>`_
190 |       Information on Dataset storage format from the HDF Group
191 |    
192 |    `SZIP License <https://www.hdfgroup.org/doc_resource/SZIP/Commercial_szip.html>`_
193 |       The license for using the SZIP compression algorithm.
194 | 
195 |    `SZIP COMPRESSION IN HDF PRODUCTS <https://www.hdfgroup.org/doc_resource/SZIP>`_
196 |       Information on using SZIP compression from the HDF Group.
197 | 
198 |    `3rd Party Compression Algorithms for HDF5 <https://www.hdfgroup.org/services/contributions.html>`_
199 |       List of common additional compression algorithms.
200 |    
201 | 


--------------------------------------------------------------------------------
/doc/source/conf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # hdf5storage documentation build configuration file, created by
  5 | # sphinx-quickstart on Sun Dec 22 00:05:54 2013.
  6 | #
  7 | # This file is execfile()d with the current directory set to its containing dir.
  8 | #
  9 | # Note that not all possible configuration values are present in this
 10 | # autogenerated file.
 11 | #
 12 | # All configuration values have a default; values that are commented out
 13 | # serve to show the default.
 14 | 
 15 | import sys, os
 16 | 
 17 | # If extensions (or modules to document with autodoc) are in another directory,
 18 | # add these directories to sys.path here. If the directory is relative to the
 19 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 20 | #sys.path.insert(0, os.path.abspath('.'))
 21 | 
 22 | # -- General configuration -----------------------------------------------------
 23 | 
 24 | # If your documentation needs a minimal Sphinx version, state it here.
 25 | needs_sphinx = '1.7'
 26 | 
 27 | # Add any Sphinx extension module names here, as strings. They can be extensions
 28 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 29 | extensions = ['sphinx.ext.autodoc',
 30 | 	      'sphinx.ext.intersphinx',
 31 | 	      'sphinx.ext.viewcode',
 32 | 	      'sphinx.ext.autosummary',
 33 | 	      'sphinx.ext.napoleon']
 34 | 
 35 | # Add any paths that contain templates here, relative to this directory.
 36 | templates_path = ['_templates']
 37 | 
 38 | # The suffix of source filenames.
 39 | source_suffix = '.rst'
 40 | 
 41 | # The encoding of source files.
 42 | #source_encoding = 'utf-8-sig'
 43 | 
 44 | # The master toctree document.
 45 | master_doc = 'index'
 46 | 
 47 | # General information about the project.
 48 | project = 'hdf5storage'
 49 | copyright = '2013-2020, Freja Nordsiek'
 50 | 
 51 | # The version info for the project you're documenting, acts as replacement for
 52 | # |version| and |release|, also used in various other places throughout the
 53 | # built documents.
 54 | #
 55 | # The short X.Y version.
 56 | version = '0.2'
 57 | # The full version, including alpha/beta/rc tags.
 58 | release = '0.2'
 59 | 
 60 | # The language for content autogenerated by Sphinx. Refer to documentation
 61 | # for a list of supported languages.
 62 | #language = None
 63 | 
 64 | # There are two options for replacing |today|: either, you set today to some
 65 | # non-false value, then it is used:
 66 | #today = ''
 67 | # Else, today_fmt is used as the format for a strftime call.
 68 | #today_fmt = '%B %d, %Y'
 69 | 
 70 | # List of patterns, relative to source directory, that match files and
 71 | # directories to ignore when looking for source files.
 72 | exclude_patterns = []
 73 | 
 74 | # The reST default role (used for this markup: `text`) to use for all documents.
 75 | #default_role = None
 76 | 
 77 | # If true, '()' will be appended to :func: etc. cross-reference text.
 78 | #add_function_parentheses = True
 79 | 
 80 | # If true, the current module name will be prepended to all description
 81 | # unit titles (such as .. function::).
 82 | #add_module_names = True
 83 | 
 84 | # If true, sectionauthor and moduleauthor directives will be shown in the
 85 | # output. They are ignored by default.
 86 | #show_authors = False
 87 | 
 88 | # The name of the Pygments (syntax highlighting) style to use.
 89 | pygments_style = 'sphinx'
 90 | 
 91 | # A list of ignored prefixes for module index sorting.
 92 | #modindex_common_prefix = []
 93 | 
 94 | 
 95 | # -- Options for HTML output ---------------------------------------------------
 96 | 
 97 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 98 | # a list of builtin themes.
 99 | html_theme = 'sphinx_rtd_theme'
100 | 
101 | # Theme options are theme-specific and customize the look and feel of a theme
102 | # further.  For a list of options available for each theme, see the
103 | # documentation.
104 | #html_theme_options = {}
105 | 
106 | # Add any paths that contain custom themes here, relative to this directory.
107 | #html_theme_path = []
108 | 
109 | # The name for this set of Sphinx documents.  If None, it defaults to
110 | # "<project> v<release> documentation".
111 | #html_title = None
112 | 
113 | # A shorter title for the navigation bar.  Default is the same as html_title.
114 | #html_short_title = None
115 | 
116 | # The name of an image file (relative to this directory) to place at the top
117 | # of the sidebar.
118 | #html_logo = None
119 | 
120 | # The name of an image file (within the static path) to use as favicon of the
121 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
122 | # pixels large.
123 | #html_favicon = None
124 | 
125 | # Add any paths that contain custom static files (such as style sheets) here,
126 | # relative to this directory. They are copied after the builtin static files,
127 | # so a file named "default.css" will overwrite the builtin "default.css".
128 | html_static_path = ['_static']
129 | 
130 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
131 | # using the given strftime format.
132 | #html_last_updated_fmt = '%b %d, %Y'
133 | 
134 | # If true, SmartyPants will be used to convert quotes and dashes to
135 | # typographically correct entities.
136 | #html_use_smartypants = True
137 | 
138 | # Custom sidebar templates, maps document names to template names.
139 | #html_sidebars = {}
140 | 
141 | # Additional templates that should be rendered to pages, maps page names to
142 | # template names.
143 | #html_additional_pages = {}
144 | 
145 | # If false, no module index is generated.
146 | #html_domain_indices = True
147 | 
148 | # If false, no index is generated.
149 | #html_use_index = True
150 | 
151 | # If true, the index is split into individual pages for each letter.
152 | #html_split_index = False
153 | 
154 | # If true, links to the reST sources are added to the pages.
155 | html_show_sourcelink = True
156 | 
157 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
158 | #html_show_sphinx = True
159 | 
160 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
161 | #html_show_copyright = True
162 | 
163 | # If true, an OpenSearch description file will be output, and all pages will
164 | # contain a <link> tag referring to it.  The value of this option must be the
165 | # base URL from which the finished HTML is served.
166 | #html_use_opensearch = ''
167 | 
168 | # This is the file name suffix for HTML files (e.g. ".xhtml").
169 | #html_file_suffix = None
170 | 
171 | # Output file base name for HTML help builder.
172 | htmlhelp_basename = 'hdf5storagedoc'
173 | 
174 | 
175 | # -- Options for LaTeX output --------------------------------------------------
176 | 
177 | latex_elements = {
178 | # The paper size ('letterpaper' or 'a4paper').
179 | #'papersize': 'letterpaper',
180 | 
181 | # The font size ('10pt', '11pt' or '12pt').
182 | #'pointsize': '10pt',
183 | 
184 | # Additional stuff for the LaTeX preamble.
185 | #'preamble': '',
186 | }
187 | 
188 | # Grouping the document tree into LaTeX files. List of tuples
189 | # (source start file, target name, title, author, documentclass [howto/manual]).
190 | latex_documents = [
191 |   ('index', 'hdf5storage.tex', 'hdf5storage Documentation',
192 |    'Freja Nordsiek', 'manual'),
193 | ]
194 | 
195 | # The name of an image file (relative to this directory) to place at the top of
196 | # the title page.
197 | #latex_logo = None
198 | 
199 | # For "manual" documents, if this is true, then toplevel headings are parts,
200 | # not chapters.
201 | #latex_use_parts = False
202 | 
203 | # If true, show page references after internal links.
204 | #latex_show_pagerefs = False
205 | 
206 | # If true, show URL addresses after external links.
207 | #latex_show_urls = False
208 | 
209 | # Documents to append as an appendix to all manuals.
210 | #latex_appendices = []
211 | 
212 | # If false, no module index is generated.
213 | #latex_domain_indices = True
214 | 
215 | 
216 | # -- Options for manual page output --------------------------------------------
217 | 
218 | # One entry per manual page. List of tuples
219 | # (source start file, name, description, authors, manual section).
220 | man_pages = [
221 |     ('index', 'hdf5storage', 'hdf5storage Documentation',
222 |      ['Freja Nordsiek'], 1)
223 | ]
224 | 
225 | # If true, show URL addresses after external links.
226 | #man_show_urls = False
227 | 
228 | 
229 | # -- Options for Texinfo output ------------------------------------------------
230 | 
231 | # Grouping the document tree into Texinfo files. List of tuples
232 | # (source start file, target name, title, author,
233 | #  dir menu entry, description, category)
234 | texinfo_documents = [
235 |   ('index', 'hdf5storage', 'hdf5storage Documentation',
236 |    'Freja Nordsiek', 'hdf5storage', 'One line description of project.',
237 |    'Miscellaneous'),
238 | ]
239 | 
240 | # Documents to append as an appendix to all manuals.
241 | #texinfo_appendices = []
242 | 
243 | # If false, no module index is generated.
244 | #texinfo_domain_indices = True
245 | 
246 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
247 | #texinfo_show_urls = 'footnote'
248 | 
249 | 
250 | # Example configuration for intersphinx: refer to the Python standard library.
251 | 
252 | intersphinx_mapping = {'python': ('http://docs.python.org/3', None),
253 | 		       'numpy': ('http://docs.scipy.org/doc/numpy', None),
254 | 		       'scipy': ('http://docs.scipy.org/doc/scipy/reference', None),
255 | 		       'h5py': ('http://docs.h5py.org/en/latest/', None)}
256 | 
257 | # -- Options for Autosummary ---------------------------------------------------
258 | 
259 | autosummary_generate = True
260 | 
261 | # -- Options for Napoleon ------------------------------------------------------
262 | 
263 | napoleon_use_ivar = True
264 | napoleon_use_param = True
265 | napoleon_use_rtype = True
266 | 


--------------------------------------------------------------------------------
/tests/test_marshallers_requiring_modules.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2016-2020, Freja Nordsiek
  2 | # All rights reserved.
  3 | #
  4 | # Redistribution and use in source and binary forms, with or without
  5 | # modification, are permitted provided that the following conditions are
  6 | # met:
  7 | #
  8 | # 1. Redistributions of source code must retain the above copyright
  9 | # notice, this list of conditions and the following disclaimer.
 10 | #
 11 | # 2. Redistributions in binary form must reproduce the above copyright
 12 | # notice, this list of conditions and the following disclaimer in the
 13 | # documentation and/or other materials provided with the distribution.
 14 | #
 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 16 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 17 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 18 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 19 | # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 20 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 21 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 22 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 23 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 26 | 
 27 | import os
 28 | import sys
 29 | import tempfile
 30 | 
 31 | import numpy as np
 32 | import h5py
 33 | 
 34 | import hdf5storage
 35 | import hdf5storage.utilities
 36 | import hdf5storage.Marshallers
 37 | 
 38 | from nose.tools import assert_is_not_none, assert_is_none, \
 39 |     assert_false, assert_equal, assert_not_in, assert_in
 40 | 
 41 | 
 42 | class Tmarshaller(hdf5storage.Marshallers.TypeMarshaller):
 43 |     def read(self, f, dsetgrp, attributes, options):
 44 |         return 'read'
 45 | 
 46 |     def read_approximate(self, f, dsetgrp, attributes, options):
 47 |         return 'read_approximate'
 48 | 
 49 | 
 50 | def test_missing_required_parent():
 51 |     m = hdf5storage.Marshallers.TypeMarshaller()
 52 |     m.required_parent_modules = ['ainivieanvueaq']
 53 |     m.python_type_strings = ['vi8vaeaniea']
 54 |     m.types = [s for s in m.python_type_strings]
 55 |     m.update_type_lookups()
 56 |     mc = hdf5storage.MarshallerCollection(marshallers=[m])
 57 |     assert_false(mc._has_required_modules[-1])
 58 |     assert_false(mc._imported_required_modules[-1])
 59 |     mback, has_modules = mc.get_marshaller_for_type_string( \
 60 |         m.python_type_strings[0])
 61 |     assert_is_not_none(mback)
 62 |     assert_false(has_modules)
 63 |     assert_false(mc._has_required_modules[-1])
 64 |     assert_false(mc._imported_required_modules[-1])
 65 |     for name in m.required_parent_modules:
 66 |         assert_not_in(name, sys.modules)
 67 | 
 68 | 
 69 | def test_missing_required_lazy():
 70 |     m = hdf5storage.Marshallers.TypeMarshaller()
 71 |     m.required_parent_modules = ['numpy']
 72 |     m.required_modules = ['ainivieanvueaq']
 73 |     m.python_type_strings = ['vi8vaeaniea']
 74 |     m.types = [s for s in m.python_type_strings]
 75 |     m.update_type_lookups()
 76 |     mc = hdf5storage.MarshallerCollection(lazy_loading=True,
 77 |                                           marshallers=[m])
 78 |     assert mc._has_required_modules[-1]
 79 |     assert_false(mc._imported_required_modules[-1])
 80 |     mback, has_modules = mc.get_marshaller_for_type_string( \
 81 |         m.python_type_strings[0])
 82 |     assert_is_not_none(mback)
 83 |     assert_false(has_modules)
 84 |     assert_false(mc._has_required_modules[-1])
 85 |     assert_false(mc._imported_required_modules[-1])
 86 |     for name in m.required_modules:
 87 |         assert_not_in(name, sys.modules)
 88 | 
 89 | 
 90 | def test_missing_required_non_lazy():
 91 |     m = hdf5storage.Marshallers.TypeMarshaller()
 92 |     m.required_parent_modules = ['numpy']
 93 |     m.required_modules = ['ainivieanvueaq']
 94 |     m.python_type_strings = ['vi8vaeaniea']
 95 |     m.types = [s for s in m.python_type_strings]
 96 |     m.update_type_lookups()
 97 |     mc = hdf5storage.MarshallerCollection(lazy_loading=False,
 98 |                                           marshallers=[m])
 99 |     assert_false(mc._has_required_modules[-1])
100 |     assert_false(mc._imported_required_modules[-1])
101 |     mback, has_modules = mc.get_marshaller_for_type_string( \
102 |         m.python_type_strings[0])
103 |     assert_is_not_none(mback)
104 |     assert_false(has_modules)
105 |     assert_false(mc._has_required_modules[-1])
106 |     assert_false(mc._imported_required_modules[-1])
107 |     for name in m.required_modules:
108 |         assert_not_in(name, sys.modules)
109 | 
110 | 
111 | def test_has_required_lazy():
112 |     m = hdf5storage.Marshallers.TypeMarshaller()
113 |     m.required_parent_modules = ['json']
114 |     m.required_modules = ['json']
115 |     m.python_type_strings = ['ellipsis']
116 |     m.types = ['builtins.ellipsis']
117 |     m.update_type_lookups()
118 |     for name in m.required_modules:
119 |         assert_not_in(name, sys.modules)
120 |     mc = hdf5storage.MarshallerCollection(lazy_loading=True,
121 |                                           marshallers=[m])
122 |     for name in m.required_modules:
123 |         assert_not_in(name, sys.modules)
124 |     assert mc._has_required_modules[-1]
125 |     assert_false(mc._imported_required_modules[-1])
126 |     mback, has_modules = mc.get_marshaller_for_type_string( \
127 |         m.python_type_strings[0])
128 |     assert_is_not_none(mback)
129 |     assert has_modules
130 |     assert mc._has_required_modules[-1]
131 |     assert mc._imported_required_modules[-1]
132 |     for name in m.required_modules:
133 |         assert_in(name, sys.modules)
134 | 
135 |     # Do it again, but this time the modules are already loaded so that
136 |     # flag should be set.
137 |     mc = hdf5storage.MarshallerCollection(lazy_loading=True,
138 |                                           marshallers=[m])
139 |     assert mc._has_required_modules[-1]
140 |     assert mc._imported_required_modules[-1]
141 |     mback, has_modules = mc.get_marshaller_for_type_string( \
142 |         m.python_type_strings[0])
143 |     assert_is_not_none(mback)
144 |     assert has_modules
145 |     assert mc._has_required_modules[-1]
146 |     assert mc._imported_required_modules[-1]
147 | 
148 | 
149 | def test_has_required_non_lazy():
150 |     m = hdf5storage.Marshallers.TypeMarshaller()
151 |     m.required_parent_modules = ['csv']
152 |     m.required_modules = ['csv']
153 |     m.python_type_strings = ['ellipsis']
154 |     m.types = ['builtins.ellipsis']
155 |     m.update_type_lookups()
156 |     for name in m.required_modules:
157 |         assert_not_in(name, sys.modules)
158 |     mc = hdf5storage.MarshallerCollection(lazy_loading=False,
159 |                                           marshallers=[m])
160 |     for name in m.required_modules:
161 |         assert_in(name, sys.modules)
162 |     assert mc._has_required_modules[-1]
163 |     assert mc._imported_required_modules[-1]
164 |     mback, has_modules = mc.get_marshaller_for_type_string( \
165 |         m.python_type_strings[0])
166 |     assert_is_not_none(mback)
167 |     assert has_modules
168 |     assert mc._has_required_modules[-1]
169 |     assert mc._imported_required_modules[-1]
170 | 
171 | 
172 | def test_marshaller_read():
173 |     m = Tmarshaller()
174 |     m.required_parent_modules = ['json']
175 |     m.required_modules = ['json']
176 |     m.python_type_strings = ['ellipsis']
177 |     m.types = ['builtins.ellipsis']
178 |     m.update_type_lookups()
179 |     mc = hdf5storage.MarshallerCollection(lazy_loading=True,
180 |                                           marshallers=[m])
181 |     options = hdf5storage.Options(marshaller_collection=mc)
182 | 
183 |     fld = None
184 |     name = '/the'
185 |     try:
186 |         fld = tempfile.mkstemp()
187 |         os.close(fld[0])
188 |         filename = fld[1]
189 |         with h5py.File(filename, mode='w') as f:
190 |             f.create_dataset(name, data=np.int64([1]))
191 |             f[name].attrs.create('Python.Type',
192 |                                  b'ellipsis')
193 |             out = hdf5storage.utilities.read_data(f, f, name, options)
194 |     except:
195 |         raise
196 |     finally:
197 |         if fld is not None:
198 |             os.remove(fld[1])
199 | 
200 |     assert_equal(out, 'read')
201 | 
202 | 
203 | def test_marshaller_read_approximate_missing_parent():
204 |     m = Tmarshaller()
205 |     m.required_parent_modules = ['aiveneiavie']
206 |     m.required_modules = ['json']
207 |     m.python_type_strings = ['ellipsis']
208 |     m.types = ['builtins.ellipsis']
209 |     m.update_type_lookups()
210 |     mc = hdf5storage.MarshallerCollection(lazy_loading=True,
211 |                                           marshallers=[m])
212 |     options = hdf5storage.Options(marshaller_collection=mc)
213 | 
214 |     fld = None
215 |     name = '/the'
216 |     try:
217 |         fld = tempfile.mkstemp()
218 |         os.close(fld[0])
219 |         filename = fld[1]
220 |         with h5py.File(filename, mode='w') as f:
221 |             f.create_dataset(name, data=np.int64([1]))
222 |             f[name].attrs.create('Python.Type',
223 |                                  b'ellipsis')
224 |             out = hdf5storage.utilities.read_data(f, f, name, options)
225 |     except:
226 |         raise
227 |     finally:
228 |         if fld is not None:
229 |             os.remove(fld[1])
230 | 
231 |     assert_equal(out, 'read_approximate')
232 | 
233 | 
234 | def test_marshaller_read_approximate_missing_import():
235 |     m = Tmarshaller()
236 |     m.required_parent_modules = ['json']
237 |     m.required_modules = ['aiveneiavie']
238 |     m.python_type_strings = ['ellipsis']
239 |     m.types = ['builtins.ellipsis']
240 |     m.update_type_lookups()
241 |     mc = hdf5storage.MarshallerCollection(lazy_loading=True,
242 |                                           marshallers=[m])
243 |     options = hdf5storage.Options(marshaller_collection=mc)
244 | 
245 |     fld = None
246 |     name = '/the'
247 |     try:
248 |         fld = tempfile.mkstemp()
249 |         os.close(fld[0])
250 |         filename = fld[1]
251 |         with h5py.File(filename, mode='w') as f:
252 |             f.create_dataset(name, data=np.int64([1]))
253 |             f[name].attrs.create('Python.Type',
254 |                                  b'ellipsis')
255 |             out = hdf5storage.utilities.read_data(f, f, name, options)
256 |     except:
257 |         raise
258 |     finally:
259 |         if fld is not None:
260 |             os.remove(fld[1])
261 | 
262 |     assert_equal(out, 'read_approximate')
263 | 


--------------------------------------------------------------------------------
/tests/test_path_escaping.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2016, Freja Nordsiek
  2 | # All rights reserved.
  3 | #
  4 | # Redistribution and use in source and binary forms, with or without
  5 | # modification, are permitted provided that the following conditions are
  6 | # met:
  7 | #
  8 | # 1. Redistributions of source code must retain the above copyright
  9 | # notice, this list of conditions and the following disclaimer.
 10 | #
 11 | # 2. Redistributions in binary form must reproduce the above copyright
 12 | # notice, this list of conditions and the following disclaimer in the
 13 | # documentation and/or other materials provided with the distribution.
 14 | #
 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 16 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 17 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 18 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 19 | # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 20 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 21 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 22 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 23 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 26 | 
 27 | import posixpath
 28 | import random
 29 | 
 30 | from hdf5storage.utilities import escape_path, unescape_path, process_path
 31 | 
 32 | from make_randoms import random_str_ascii, random_str_some_unicode
 33 | 
 34 | from nose.tools import assert_equal
 35 | 
 36 | random.seed()
 37 | 
 38 | 
 39 | # Get the characters that have to be escaped and make sure they are str
 40 | # instead of bytes.
 41 | chars_to_escape = ['\\', '/', '\x00']
 42 | substitutions = ['\\\\', '\\x2f', '\\x00']
 43 | period = '.'
 44 | period_substitute = '\\x2e'
 45 | if isinstance(chars_to_escape[0], bytes):
 46 |     chars_to_escape = [c.decode('utf-8') for c in chars_to_escape]
 47 |     substitutions = [c.decode('utf-8') for c in substitutions]
 48 |     period = period.decode('utf-8')
 49 |     period_substitute = period_substitute.decode('utf-8')
 50 | 
 51 | 
 52 | def make_str_for_esc(include_escapes=None,
 53 |                      include_leading_periods=False,
 54 |                      no_unicode=False,
 55 |                      pack_digits=True):
 56 |     sl = list(random_str_ascii(10))
 57 |     if not no_unicode:
 58 |         sl += list(random_str_some_unicode(10))
 59 |     if pack_digits:
 60 |         chars = b'0 1 2 3 4 5 6 7 8 9 a b c d e f A B C D E F'
 61 |         sl += chars.decode('ascii').split(b' '.decode('ascii')) * 10
 62 |     sl += [period] * 10
 63 |     if include_escapes is not None:
 64 |         for c in include_escapes:
 65 |             sl += [c] * 3
 66 |     random.shuffle(sl)
 67 |     s = b''.decode('ascii').join(sl).lstrip(period)
 68 |     if include_leading_periods:
 69 |         s = period * random.randint(1, 10) + s
 70 |     return s
 71 | 
 72 | 
 73 | def test_escaping():
 74 |     for i in range(20):
 75 |         s = make_str_for_esc(include_escapes=chars_to_escape,
 76 |                                  include_leading_periods=True)
 77 |         s_e = s
 78 |         for j, c in enumerate(chars_to_escape):
 79 |             s_e = s_e.replace(c, substitutions[j])
 80 |         length = len(s_e)
 81 |         s_e = s_e.lstrip(period)
 82 |         s_e = period_substitute * (length - len(s_e)) + s_e
 83 |         assert_equal(s_e, escape_path(s))
 84 | 
 85 | 
 86 | def test_unescaping_x():
 87 |     fmts = [b'{0:02x}'.decode('ascii'), b'{0:02X}'.decode('ascii')]
 88 |     prefix = b'\\x'.decode('ascii')
 89 |     for i in range(20):
 90 |         s = make_str_for_esc(no_unicode=True,
 91 |                              pack_digits=True)
 92 |         index = random.randrange(1, len(s) - 1)
 93 |         c = s[index]
 94 |         n = ord(c)
 95 |         c_e = prefix + random.choice(fmts).format(n)
 96 |         s_e = s[:index] + c_e + s[(index + 1):]
 97 |         assert_equal(s, unescape_path(s_e))
 98 | 
 99 | 
100 | def test_unescaping_u():
101 |     fmts = [b'{0:04x}'.decode('ascii'), b'{0:04X}'.decode('ascii')]
102 |     prefix = b'\\u'.decode('ascii')
103 |     for i in range(20):
104 |         s = make_str_for_esc(pack_digits=True)
105 |         index = random.randrange(1, len(s) - 1)
106 |         c = s[index]
107 |         n = ord(c)
108 |         c_e = prefix + random.choice(fmts).format(n)
109 |         s_e = s[:index] + c_e + s[(index + 1):]
110 |         assert_equal(s, unescape_path(s_e))
111 | 
112 | 
113 | def test_unescaping_U():
114 |     fmts = [b'{0:08x}'.decode('ascii'), b'{0:08X}'.decode('ascii')]
115 |     prefix = b'\\U'.decode('ascii')
116 |     for i in range(20):
117 |         s = make_str_for_esc(pack_digits=True)
118 |         index = random.randrange(1, len(s) - 1)
119 |         c = s[index]
120 |         n = ord(c)
121 |         c_e = prefix + random.choice(fmts).format(n)
122 |         s_e = s[:index] + c_e + s[(index + 1):]
123 |         assert_equal(s, unescape_path(s_e))
124 | 
125 | 
126 | def test_escape_reversibility_no_escapes():
127 |     for i in range(20):
128 |         s = make_str_for_esc()
129 |         s_e = escape_path(s)
130 |         s_e_u = unescape_path(s_e)
131 |         assert_equal(s, s_e)
132 |         assert_equal(s, s_e_u)
133 | 
134 | 
135 | def test_escape_reversibility_no_escapes_bytes():
136 |     for i in range(20):
137 |         s = make_str_for_esc()
138 |         s = s.encode('utf-8')
139 |         s_e = escape_path(s)
140 |         s_e_u = unescape_path(s_e)
141 |         assert_equal(s, s_e.encode('utf-8'))
142 |         assert_equal(s, s_e_u.encode('utf-8'))
143 | 
144 | 
145 | def test_escape_reversibility_escapes():
146 |     for i in range(20):
147 |         s = make_str_for_esc(include_escapes=chars_to_escape)
148 |         s_e = escape_path(s)
149 |         s_e_u = unescape_path(s_e)
150 |         assert_equal(s, s_e_u)
151 | 
152 | 
153 | def test_escape_reversibility_escapes_bytes():
154 |     for i in range(20):
155 |         s = make_str_for_esc(include_escapes=chars_to_escape)
156 |         s = s.encode('utf-8')
157 |         s_e = escape_path(s)
158 |         s_e_u = unescape_path(s_e)
159 |         assert_equal(s, s_e_u.encode('utf-8'))
160 | 
161 | 
162 | def test_escape_reversibility_leading_periods():
163 |     for i in range(20):
164 |         s = make_str_for_esc(include_leading_periods=True)
165 |         s_e = escape_path(s)
166 |         s_e_u = unescape_path(s_e)
167 |         assert_equal(s, s_e_u)
168 | 
169 | 
170 | def test_escape_reversibility_leading_periods_bytes():
171 |     for i in range(20):
172 |         s = make_str_for_esc(include_leading_periods=True)
173 |         s = s.encode('utf-8')
174 |         s_e = escape_path(s)
175 |         s_e_u = unescape_path(s_e)
176 |         assert_equal(s, s_e_u.encode('utf-8'))
177 | 
178 | 
179 | def test_escape_reversibility_escapes_leading_periods():
180 |     for i in range(20):
181 |         s = make_str_for_esc(include_escapes=chars_to_escape,
182 |                              include_leading_periods=True)
183 |         s_e = escape_path(s)
184 |         s_e_u = unescape_path(s_e)
185 |         assert_equal(s, s_e_u)
186 | 
187 | 
188 | def test_escape_reversibility_escapes_leading_periods_bytes():
189 |     for i in range(20):
190 |         s = make_str_for_esc(include_escapes=chars_to_escape,
191 |                              include_leading_periods=True)
192 |         s = s.encode('utf-8')
193 |         s_e = escape_path(s)
194 |         s_e_u = unescape_path(s_e)
195 |         assert_equal(s, s_e_u.encode('utf-8'))
196 | 
197 | 
198 | def test_process_path_no_escapes():
199 |     for i in range(10):
200 |         pth = [make_str_for_esc() for j in range(10)]
201 |         beginning = tuple(pth[:-1])
202 |         gs = posixpath.join(*beginning)
203 |         ts = pth[-1]
204 |         gname, tname = process_path(pth)
205 |         assert_equal(gs, gname)
206 |         assert_equal(ts, tname)
207 | 
208 | 
209 | def test_process_path_no_escapes_bytes():
210 |     for i in range(10):
211 |         pth = [make_str_for_esc().encode('utf-8') for j in range(10)]
212 |         beginning = tuple(pth[:-1])
213 |         gs = posixpath.join(*beginning).decode('utf-8')
214 |         ts = pth[-1].decode('utf-8')
215 |         gname, tname = process_path(pth)
216 |         assert_equal(gs, gname)
217 |         assert_equal(ts, tname)
218 | 
219 | 
220 | def test_process_path_escapes():
221 |     for i in range(10):
222 |         pth = [make_str_for_esc(include_escapes=chars_to_escape)
223 |                for j in range(10)]
224 |         beginning = tuple([escape_path(s) for s in pth[:-1]])
225 |         gs = posixpath.join(*beginning)
226 |         ts = escape_path(pth[-1])
227 |         gname, tname = process_path(pth)
228 |         assert_equal(gs, gname)
229 |         assert_equal(ts, tname)
230 | 
231 | 
232 | def test_process_path_escapes_bytes():
233 |     for i in range(10):
234 |         pth = [make_str_for_esc(
235 |                include_escapes=chars_to_escape).encode('utf-8')
236 |                for j in range(10)]
237 |         beginning = tuple([escape_path(s) for s in pth[:-1]])
238 |         gs = posixpath.join(*beginning)
239 |         ts = escape_path(pth[-1])
240 |         gname, tname = process_path(pth)
241 |         assert_equal(gs, gname)
242 |         assert_equal(ts, tname)
243 | 
244 | 
245 | def test_process_path_leading_periods():
246 |     for i in range(10):
247 |         pth = [make_str_for_esc(include_leading_periods=True)
248 |                for j in range(10)]
249 |         beginning = tuple([escape_path(s) for s in pth[:-1]])
250 |         gs = posixpath.join(*beginning)
251 |         ts = escape_path(pth[-1])
252 |         gname, tname = process_path(pth)
253 |         assert_equal(gs, gname)
254 |         assert_equal(ts, tname)
255 | 
256 | 
257 | def test_process_path_leading_periods_bytes():
258 |     for i in range(10):
259 |         pth = [make_str_for_esc(
260 |                include_leading_periods=True).encode('utf-8')
261 |                for j in range(10)]
262 |         beginning = tuple([escape_path(s) for s in pth[:-1]])
263 |         gs = posixpath.join(*beginning)
264 |         ts = escape_path(pth[-1])
265 |         gname, tname = process_path(pth)
266 |         assert_equal(gs, gname)
267 |         assert_equal(ts, tname)
268 | 
269 | 
270 | def test_process_path_escapes_leading_periods():
271 |     for i in range(10):
272 |         pth = [make_str_for_esc(include_escapes=chars_to_escape,
273 |                                 include_leading_periods=True)
274 |                for j in range(10)]
275 |         beginning = tuple([escape_path(s) for s in pth[:-1]])
276 |         gs = posixpath.join(*beginning)
277 |         ts = escape_path(pth[-1])
278 |         gname, tname = process_path(pth)
279 |         assert_equal(gs, gname)
280 |         assert_equal(ts, tname)
281 | 
282 | 
283 | def test_process_path_escapes_leading_periods_bytes():
284 |     for i in range(10):
285 |         pth = [make_str_for_esc(
286 |                include_escapes=chars_to_escape,
287 |                include_leading_periods=True).encode('utf-8')
288 |                for j in range(10)]
289 |         beginning = tuple([escape_path(s) for s in pth[:-1]])
290 |         gs = posixpath.join(*beginning)
291 |         ts = escape_path(pth[-1])
292 |         gname, tname = process_path(pth)
293 |         assert_equal(gs, gname)
294 |         assert_equal(ts, tname)
295 | 


--------------------------------------------------------------------------------
/tests/test_dict_like_storage_methods.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2013-2020, Freja Nordsiek
  2 | # All rights reserved.
  3 | #
  4 | # Redistribution and use in source and binary forms, with or without
  5 | # modification, are permitted provided that the following conditions are
  6 | # met:
  7 | #
  8 | # 1. Redistributions of source code must retain the above copyright
  9 | # notice, this list of conditions and the following disclaimer.
 10 | #
 11 | # 2. Redistributions in binary form must reproduce the above copyright
 12 | # notice, this list of conditions and the following disclaimer in the
 13 | # documentation and/or other materials provided with the distribution.
 14 | #
 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 16 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 17 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 18 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 19 | # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 20 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 21 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 22 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 23 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 26 | 
 27 | import os
 28 | import os.path
 29 | import random
 30 | import tempfile
 31 | 
 32 | import numpy as np
 33 | 
 34 | import h5py
 35 | 
 36 | import hdf5storage
 37 | from hdf5storage.utilities import escape_path
 38 | 
 39 | from nose.tools import assert_equal as assert_equal_nose
 40 | 
 41 | from make_randoms import random_name, random_dict, random_int, \
 42 |     random_str_ascii, random_str_some_unicode, max_dict_key_length
 43 | 
 44 | random.seed()
 45 | 
 46 | 
 47 | 
 48 | # Need a list of dict-like types, which will depend on Python
 49 | # version.
 50 | dict_like = ['dict', 'OrderedDict']
 51 | 
 52 | # Need a list of previously invalid characters.
 53 | invalid_characters = ('\x00', '/')
 54 | 
 55 | 
 56 | def check_all_valid_str_keys(tp, option_keywords):
 57 |     options = hdf5storage.Options(**option_keywords)
 58 |     key_value_names = (options.dict_like_keys_name,
 59 |                        options.dict_like_values_name)
 60 | 
 61 |     data = random_dict(tp)
 62 |     for k in key_value_names:
 63 |         if k in data:
 64 |             del data[k]
 65 | 
 66 |     # Make a random name.
 67 |     name = random_name()
 68 | 
 69 |     # Write the data to the proper file with the given name with the
 70 |     # provided options. The file needs to be deleted after to keep junk
 71 |     # from building up.
 72 |     fld = None
 73 |     try:
 74 |         fld = tempfile.mkstemp()
 75 |         os.close(fld[0])
 76 |         filename = fld[1]
 77 |         hdf5storage.write(data, path=name, filename=filename,
 78 |                           options=options)
 79 | 
 80 |         with h5py.File(filename, mode='r') as f:
 81 |             for k in key_value_names:
 82 |                 assert escape_path(k) not in f[name]
 83 |             for k in data:
 84 |                 assert escape_path(k) in f[name]
 85 |     except:
 86 |         raise
 87 |     finally:
 88 |         if fld is not None:
 89 |             os.remove(fld[1])
 90 | 
 91 | 
 92 | def check_str_key_previously_invalid_char(tp, ch, option_keywords):
 93 |     options = hdf5storage.Options(**option_keywords)
 94 |     key_value_names = (options.dict_like_keys_name,
 95 |                        options.dict_like_values_name)
 96 | 
 97 |     data = random_dict(tp)
 98 |     for k in key_value_names:
 99 |         if k in data:
100 |             del data[k]
101 | 
102 |     # Add a random invalid str key using the provided character
103 |     key = key_value_names[0]
104 |     while key in key_value_names:
105 |         key = ch.join([random_str_ascii(max_dict_key_length)
106 |                       for i in range(2)])
107 |     data[key] = random_int()
108 | 
109 |     # Make a random name.
110 |     name = random_name()
111 | 
112 |     # Write the data to the proper file with the given name with the
113 |     # provided options. The file needs to be deleted after to keep junk
114 |     # from building up.
115 |     fld = None
116 |     try:
117 |         fld = tempfile.mkstemp()
118 |         os.close(fld[0])
119 |         filename = fld[1]
120 |         hdf5storage.write(data, path=name, filename=filename,
121 |                           options=options)
122 | 
123 |         with h5py.File(filename, mode='r') as f:
124 |             for k in key_value_names:
125 |                 assert escape_path(k) not in f[name]
126 |             for k in data:
127 |                 assert escape_path(k) in f[name]
128 |     except:
129 |         raise
130 |     finally:
131 |         if fld is not None:
132 |             os.remove(fld[1])
133 | 
134 | 
135 | def check_string_type_non_str_key(tp, other_tp, option_keywords):
136 |     options = hdf5storage.Options(**option_keywords)
137 |     key_value_names = (options.dict_like_keys_name,
138 |                        options.dict_like_values_name)
139 | 
140 |     data = random_dict(tp)
141 |     for k in key_value_names:
142 |         if k in data:
143 |             del data[k]
144 |     keys = list(data.keys())
145 | 
146 |     key_gen = random_str_some_unicode(max_dict_key_length)
147 |     if other_tp == 'numpy.bytes_':
148 |         key = np.bytes_(key_gen.encode('UTF-8'))
149 |     elif other_tp == 'numpy.unicode_':
150 |         key = np.unicode_(key_gen)
151 |     elif other_tp == 'bytes':
152 |         key = key_gen.encode('UTF-8')
153 |     data[key] = random_int()
154 |     keys.append(key_gen)
155 | 
156 |     # Make a random name.
157 |     name = random_name()
158 | 
159 |     # Write the data to the proper file with the given name with the
160 |     # provided options. The file needs to be deleted after to keep junk
161 |     # from building up.
162 |     fld = None
163 |     try:
164 |         fld = tempfile.mkstemp()
165 |         os.close(fld[0])
166 |         filename = fld[1]
167 |         hdf5storage.write(data, path=name, filename=filename,
168 |                           options=options)
169 | 
170 |         with h5py.File(filename, mode='r') as f:
171 |             assert_equal_nose(set(keys), set(f[name].keys()))
172 | 
173 |     except:
174 |         raise
175 |     finally:
176 |         if fld is not None:
177 |             os.remove(fld[1])
178 | 
179 | 
180 | def check_int_key(tp, option_keywords):
181 |     options = hdf5storage.Options(**option_keywords)
182 |     key_value_names = (options.dict_like_keys_name,
183 |                        options.dict_like_values_name)
184 | 
185 |     data = random_dict(tp)
186 |     for k in key_value_names:
187 |         if k in data:
188 |             del data[k]
189 | 
190 |     key = random_int()
191 |     data[key] = random_int()
192 | 
193 |     # Make a random name.
194 |     name = random_name()
195 | 
196 |     # Write the data to the proper file with the given name with the
197 |     # provided options. The file needs to be deleted after to keep junk
198 |     # from building up.
199 |     fld = None
200 |     try:
201 |         fld = tempfile.mkstemp()
202 |         os.close(fld[0])
203 |         filename = fld[1]
204 |         hdf5storage.write(data, path=name, filename=filename,
205 |                           options=options)
206 | 
207 |         with h5py.File(filename, mode='r') as f:
208 |             assert_equal_nose(set(key_value_names), set(f[name].keys()))
209 |     except:
210 |         raise
211 |     finally:
212 |         if fld is not None:
213 |             os.remove(fld[1])
214 | 
215 | 
216 | def test_all_valid_str_keys():
217 |     # generate some random keys_values_names
218 |     keys_values_names = [('keys', 'values')]
219 |     for i in range(3):
220 |         names = ('a', 'a')
221 |         while names[0] == names[1]:
222 |             names = [random_str_ascii(8) for i in range(2)]
223 |         keys_values_names.append(names)
224 |     for pyth_meta in (True, False):
225 |         for mat_meta in (True, False):
226 |             for tp in dict_like:
227 |                 for names in keys_values_names:
228 |                     options_keywords = { \
229 |                         'store_python_metadata': pyth_meta, \
230 |                         'matlab_compatible': mat_meta, \
231 |                         'dict_like_keys_name': names[0], \
232 |                         'dict_like_values_name': names[1]}
233 |                     yield check_all_valid_str_keys, tp, options_keywords
234 | 
235 | 
236 | def test_str_key_previously_invalid_char():
237 |     # generate some random keys_values_names
238 |     keys_values_names = [('keys', 'values')]
239 |     for i in range(3):
240 |         names = ('a', 'a')
241 |         while names[0] == names[1]:
242 |             names = [random_str_ascii(8) for i in range(2)]
243 |         keys_values_names.append(names)
244 |     for pyth_meta in (True, False):
245 |         for mat_meta in (True, False):
246 |             for tp in dict_like:
247 |                 for c in invalid_characters:
248 |                     for names in keys_values_names:
249 |                         options_keywords = { \
250 |                             'store_python_metadata': pyth_meta, \
251 |                             'matlab_compatible': mat_meta, \
252 |                             'dict_like_keys_name': names[0], \
253 |                             'dict_like_values_name': names[1]}
254 |                         yield check_str_key_previously_invalid_char, tp, c, options_keywords
255 | 
256 | 
257 | def test_string_type_non_str_key():
258 |     # Set the other key types.
259 |     other_tps = ['bytes', 'numpy.bytes_', 'numpy.unicode_']
260 |     # generate some random keys_values_names
261 |     keys_values_names = [('keys', 'values')]
262 |     for i in range(1):
263 |         names = ('a', 'a')
264 |         while names[0] == names[1]:
265 |             names = [random_str_ascii(8) for i in range(2)]
266 |         keys_values_names.append(names)
267 |     for pyth_meta in (True, False):
268 |         for mat_meta in (True, False):
269 |             for tp in dict_like:
270 |                 for other_tp in other_tps:
271 |                     for names in keys_values_names:
272 |                         options_keywords = { \
273 |                             'store_python_metadata': pyth_meta, \
274 |                             'matlab_compatible': mat_meta, \
275 |                             'dict_like_keys_name': names[0], \
276 |                             'dict_like_values_name': names[1]}
277 |                     yield check_string_type_non_str_key, tp, other_tp, options_keywords
278 | 
279 | 
280 | def test_int_key():
281 |     # generate some random keys_values_names
282 |     keys_values_names = [('keys', 'values')]
283 |     for i in range(3):
284 |         names = ('a', 'a')
285 |         while names[0] == names[1]:
286 |             names = [random_str_ascii(8) for i in range(2)]
287 |         keys_values_names.append(names)
288 |     for pyth_meta in (True, False):
289 |         for mat_meta in (True, False):
290 |             for tp in dict_like:
291 |                 for names in keys_values_names:
292 |                     options_keywords = { \
293 |                         'store_python_metadata': pyth_meta, \
294 |                         'matlab_compatible': mat_meta, \
295 |                         'dict_like_keys_name': names[0], \
296 |                         'dict_like_values_name': names[1]}
297 |                     yield check_int_key, tp, options_keywords
298 | 


--------------------------------------------------------------------------------
/tests/test_hdf5_filters.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2013-2016, Freja Nordsiek
  2 | # All rights reserved.
  3 | #
  4 | # Redistribution and use in source and binary forms, with or without
  5 | # modification, are permitted provided that the following conditions are
  6 | # met:
  7 | #
  8 | # 1. Redistributions of source code must retain the above copyright
  9 | # notice, this list of conditions and the following disclaimer.
 10 | #
 11 | # 2. Redistributions in binary form must reproduce the above copyright
 12 | # notice, this list of conditions and the following disclaimer in the
 13 | # documentation and/or other materials provided with the distribution.
 14 | #
 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 16 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 17 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 18 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 19 | # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 20 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 21 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 22 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 23 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 26 | 
 27 | import os
 28 | import os.path
 29 | import random
 30 | import tempfile
 31 | 
 32 | import h5py
 33 | 
 34 | import hdf5storage
 35 | 
 36 | from nose.tools import assert_equal as assert_equal_nose
 37 | 
 38 | from asserts import assert_equal
 39 | from make_randoms import random_numpy, random_numpy_shape, \
 40 |     max_array_axis_length, dtypes, random_name
 41 | 
 42 | random.seed()
 43 | 
 44 | 
 45 | 
 46 | def check_read_filters(filters):
 47 |     # Read out the filter arguments.
 48 |     filts = {'compression': 'gzip',
 49 |              'shuffle': True,
 50 |              'fletcher32': True,
 51 |              'gzip_level': 7}
 52 |     for k, v in filters.items():
 53 |         filts[k] = v
 54 |     if filts['compression'] == 'gzip':
 55 |         filts['compression_opts'] = filts['gzip_level']
 56 |     del filts['gzip_level']
 57 |     
 58 |     # Make some random data.
 59 |     dims = random.randint(1, 4)
 60 |     data = random_numpy(shape=random_numpy_shape(dims,
 61 |                         max_array_axis_length),
 62 |                         dtype=random.choice(tuple(
 63 |                         set(dtypes) - set(['U']))))
 64 |     # Make a random name.
 65 |     name = random_name()
 66 | 
 67 |     # Write the data to the proper file with the given name with the
 68 |     # provided filters and read it backt. The file needs to be deleted
 69 |     # after to keep junk from building up.
 70 |     fld = None
 71 |     try:
 72 |         fld = tempfile.mkstemp()
 73 |         os.close(fld[0])
 74 |         filename = fld[1]
 75 |         with h5py.File(filename, mode='w') as f:
 76 |             f.create_dataset(name, data=data, chunks=True, **filts)
 77 |         out = hdf5storage.read(path=name, filename=filename,
 78 |                                matlab_compatible=False)
 79 |     except:
 80 |         raise
 81 |     finally:
 82 |         if fld is not None:
 83 |             os.remove(fld[1])
 84 | 
 85 |     # Compare
 86 |     assert_equal(out, data)
 87 | 
 88 | 
 89 | def check_write_filters(filters):
 90 |     # Read out the filter arguments.
 91 |     filts = {'compression': 'gzip',
 92 |              'shuffle': True,
 93 |              'fletcher32': True,
 94 |              'gzip_level': 7}
 95 |     for k, v in filters.items():
 96 |         filts[k] = v
 97 | 
 98 |     # Make some random data. The dtype must be restricted so that it can
 99 |     # be read back reliably.
100 |     dims = random.randint(1, 4)
101 |     dts = tuple(set(dtypes) - set(['U', 'S', 'bool', 'complex64', \
102 |         'complex128']))
103 | 
104 |     data = random_numpy(shape=random_numpy_shape(dims,
105 |                         max_array_axis_length),
106 |                         dtype=random.choice(dts))
107 |     # Make a random name.
108 |     name = random_name()
109 | 
110 |     # Write the data to the proper file with the given name with the
111 |     # provided filters and read it backt. The file needs to be deleted
112 |     # after to keep junk from building up.
113 |     fld = None
114 |     try:
115 |         fld = tempfile.mkstemp()
116 |         os.close(fld[0])
117 |         filename = fld[1]
118 |         hdf5storage.write(data, path=name, filename=filename, \
119 |             store_python_metadata=False, matlab_compatible=False, \
120 |             compress=True, compress_size_threshold=0, \
121 |             compression_algorithm=filts['compression'], \
122 |             gzip_compression_level=filts['gzip_level'], \
123 |             shuffle_filter=filts['shuffle'], \
124 |             compressed_fletcher32_filter=filts['fletcher32'])
125 | 
126 |         with h5py.File(filename, mode='r') as f:
127 |             d = f[name]
128 |             fletcher32 = d.fletcher32
129 |             shuffle = d.shuffle
130 |             compression = d.compression
131 |             gzip_level = d.compression_opts
132 |             out = d[...]
133 |     except:
134 |         raise
135 |     finally:
136 |         if fld is not None:
137 |             os.remove(fld[1])
138 | 
139 |     # Check the filters
140 |     assert_equal_nose(fletcher32, filts['fletcher32'])
141 |     assert_equal_nose(shuffle, filts['shuffle'])
142 |     assert_equal_nose(compression, filts['compression'])
143 |     if filts['compression'] == 'gzip':
144 |         assert_equal_nose(gzip_level, filts['gzip_level'])
145 | 
146 |     # Compare
147 |     assert_equal(out, data)
148 | 
149 | 
150 | def check_uncompressed_write_filters(method,
151 |                                      uncompressed_fletcher32_filter,
152 |                                      filters):
153 |     # Read out the filter arguments.
154 |     filts = {'compression': 'gzip',
155 |              'shuffle': True,
156 |              'fletcher32': True,
157 |              'gzip_level': 7}
158 |     for k, v in filters.items():
159 |         filts[k] = v
160 | 
161 |     # Make some random data. The dtype must be restricted so that it can
162 |     # be read back reliably.
163 |     dims = random.randint(1, 4)
164 |     dts = tuple(set(dtypes) - set(['U', 'S', 'bool', 'complex64', \
165 |         'complex128']))
166 | 
167 |     data = random_numpy(shape=random_numpy_shape(dims,
168 |                         max_array_axis_length),
169 |                         dtype=random.choice(dts))
170 |     # Make a random name.
171 |     name = random_name()
172 | 
173 |     # Make the options to disable compression by the method specified,
174 |     # which is either that it is outright disabled or that the data is
175 |     # smaller than the compression threshold.
176 |     if method == 'compression_disabled':
177 |         opts = {'compress': False, 'compress_size_threshold': 0}
178 |     else:
179 |         opts = {'compress': True,
180 |                 'compress_size_threshold': data.nbytes + 1}
181 | 
182 |     # Write the data to the proper file with the given name with the
183 |     # provided filters and read it backt. The file needs to be deleted
184 |     # after to keep junk from building up.
185 |     fld = None
186 |     try:
187 |         fld = tempfile.mkstemp()
188 |         os.close(fld[0])
189 |         filename = fld[1]
190 |         hdf5storage.write(data, path=name, filename=filename, \
191 |             store_python_metadata=False, matlab_compatible=False, \
192 |             compression_algorithm=filts['compression'], \
193 |             gzip_compression_level=filts['gzip_level'], \
194 |             shuffle_filter=filts['shuffle'], \
195 |             compressed_fletcher32_filter=filts['fletcher32'], \
196 |             uncompressed_fletcher32_filter= \
197 |             uncompressed_fletcher32_filter, \
198 |             **opts)
199 | 
200 |         with h5py.File(filename, mode='r') as f:
201 |             d = f[name]
202 |             fletcher32 = d.fletcher32
203 |             shuffle = d.shuffle
204 |             compression = d.compression
205 |             gzip_level = d.compression_opts
206 |             out = d[...]
207 |     except:
208 |         raise
209 |     finally:
210 |         if fld is not None:
211 |             os.remove(fld[1])
212 | 
213 |     # Check the filters
214 |     assert_equal_nose(compression, None)
215 |     assert_equal_nose(shuffle, False)
216 |     assert_equal_nose(fletcher32, uncompressed_fletcher32_filter)
217 | 
218 |     # Compare
219 |     assert_equal(out, data)
220 | 
221 | 
222 | def test_read_filtered_data():
223 |     for compression in ('gzip', 'lzf'):
224 |         for shuffle in (True, False):
225 |             for fletcher32 in (True, False):
226 |                 if compression != 'gzip':
227 |                     filters = {'compression': compression,
228 |                                'shuffle': shuffle,
229 |                                'fletcher32': fletcher32}
230 |                     yield check_read_filters, filters
231 |                 else:
232 |                     for level in range(10):
233 |                         filters = {'compression': compression,
234 |                                    'shuffle': shuffle,
235 |                                    'fletcher32': fletcher32,
236 |                                    'gzip_level': level}
237 |                         yield check_read_filters, filters
238 | 
239 | 
240 | def test_write_filtered_data():
241 |     for compression in ('gzip', 'lzf'):
242 |         for shuffle in (True, False):
243 |             for fletcher32 in (True, False):
244 |                 if compression != 'gzip':
245 |                     filters = {'compression': compression,
246 |                                'shuffle': shuffle,
247 |                                'fletcher32': fletcher32}
248 |                     yield check_read_filters, filters
249 |                 else:
250 |                     for level in range(10):
251 |                         filters = {'compression': compression,
252 |                                    'shuffle': shuffle,
253 |                                    'fletcher32': fletcher32,
254 |                                    'gzip_level': level}
255 |                         yield check_write_filters, filters
256 | 
257 | 
258 | def test_uncompressed_write_filtered_data():
259 |     for method in ('compression_disabled', 'data_too_small'):
260 |         for uncompressed_fletcher32_filter in (True, False):
261 |             for compression in ('gzip', 'lzf'):
262 |                 for shuffle in (True, False):
263 |                     for fletcher32 in (True, False):
264 |                         if compression != 'gzip':
265 |                             filters = {'compression': compression,
266 |                                        'shuffle': shuffle,
267 |                                        'fletcher32': fletcher32}
268 |                             yield check_read_filters, filters
269 |                         else:
270 |                             for level in range(10):
271 |                                 filters = {'compression': compression,
272 |                                            'shuffle': shuffle,
273 |                                            'fletcher32': fletcher32,
274 |                                            'gzip_level': level}
275 |                                 yield check_uncompressed_write_filters,\
276 |                                     method, uncompressed_fletcher32_filter,\
277 |                                     filters
278 | 


--------------------------------------------------------------------------------
/tests/make_randoms.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Copyright (c) 2013-2020, Freja Nordsiek
  4 | # All rights reserved.
  5 | #
  6 | # Redistribution and use in source and binary forms, with or without
  7 | # modification, are permitted provided that the following conditions are
  8 | # met:
  9 | #
 10 | # 1. Redistributions of source code must retain the above copyright
 11 | # notice, this list of conditions and the following disclaimer.
 12 | #
 13 | # 2. Redistributions in binary form must reproduce the above copyright
 14 | # notice, this list of conditions and the following disclaimer in the
 15 | # documentation and/or other materials provided with the distribution.
 16 | #
 17 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 18 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 19 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 20 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 21 | # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 22 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 23 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 24 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 25 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 26 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28 | 
 29 | import collections
 30 | import posixpath
 31 | import random
 32 | import string
 33 | import warnings
 34 | 
 35 | import numpy as np
 36 | import numpy.random
 37 | 
 38 | 
 39 | random.seed()
 40 | 
 41 | 
 42 | # The dtypes that can be made
 43 | dtypes = ['bool', 'uint8', 'uint16', 'uint32', 'uint64',
 44 |           'int8', 'int16', 'int32', 'int64',
 45 |           'float32', 'float64', 'complex64', 'complex128',
 46 |           'S', 'U']
 47 | 
 48 | # Define the sizes of random datasets to use.
 49 | max_string_length = 10
 50 | max_array_axis_length = 8
 51 | max_list_length = 6
 52 | max_posix_path_depth = 5
 53 | max_posix_path_lengths = 17
 54 | object_subarray_dimensions = 2
 55 | max_object_subarray_axis_length = 5
 56 | min_dict_keys = 4
 57 | max_dict_keys = 12
 58 | max_dict_key_length = 10
 59 | dict_value_subarray_dimensions = 2
 60 | max_dict_value_subarray_axis_length = 5
 61 | min_structured_ndarray_fields = 2
 62 | max_structured_ndarray_fields = 5
 63 | max_structured_ndarray_field_lengths = 10
 64 | max_structured_ndarray_axis_length = 2
 65 | structured_ndarray_subarray_dimensions = 2
 66 | max_structured_ndarray_subarray_axis_length = 4
 67 | 
 68 | 
 69 | def random_str_ascii_letters(length):
 70 |     # Makes a random ASCII str of the specified length.
 71 |     ltrs = string.ascii_letters
 72 |     return ''.join([random.choice(ltrs) for i in
 73 |                    range(0, length)])
 74 | 
 75 | 
 76 | def random_str_ascii(length):
 77 |     # Makes a random ASCII str of the specified length.
 78 |     ltrs = string.ascii_letters + string.digits
 79 |     return ''.join([random.choice(ltrs) for i in
 80 |                    range(0, length)])
 81 | 
 82 | 
 83 | def random_str_some_unicode(length):
 84 |     # Makes a random ASCII+limited unicode str of the specified
 85 |     # length.
 86 |     ltrs = random_str_ascii(10)
 87 |     ltrs += 'αβγδεζηθικλμνξοπρστυφχψωΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩς'
 88 |     c = ''
 89 |     return c.join([random.choice(ltrs) for i in range(0, length)])
 90 | 
 91 | 
 92 | def random_bytes(length):
 93 |     # Makes a random sequence of bytes of the specified length from
 94 |     # the ASCII set.
 95 |     ltrs = bytes(range(1, 127))
 96 |     return bytes([random.choice(ltrs) for i in range(0, length)])
 97 | 
 98 | 
 99 | def random_bytes_fullrange(length):
100 |     # Makes a random sequence of bytes of the specified length from
101 |     # the ASCII set.
102 |     ltrs = bytes(range(1, 255))
103 |     return bytes([random.choice(ltrs) for i in range(0, length)])
104 | 
105 | def random_int():
106 |     return random.randint(-(2**31 - 1), 2**31)
107 | 
108 | 
109 | def random_float():
110 |     return random.uniform(-1.0, 1.0) \
111 |         * 10.0**random.randint(-300, 300)
112 | 
113 | 
114 | def random_numpy(shape, dtype, allow_nan=True,
115 |                  allow_unicode=False,
116 |                  object_element_dtypes=None):
117 |     # Makes a random numpy array of the specified shape and dtype
118 |     # string. The method is slightly different depending on the
119 |     # type. For 'bytes', 'str', and 'object'; an array of the
120 |     # specified size is made and then each element is set to either
121 |     # a numpy.bytes_, numpy.str_, or some other object of any type
122 |     # (here, it is a randomly typed random numpy array). If it is
123 |     # any other type, then it is just a matter of constructing the
124 |     # right sized ndarray from a random sequence of bytes (all must
125 |     # be forced to 0 and 1 for bool). Optionally include unicode
126 |     # characters. Optionally, for object dtypes, the allowed dtypes for
127 |     # their elements can be given.
128 |     if dtype == 'S':
129 |         length = random.randint(1, max_string_length)
130 |         data = np.zeros(shape=shape, dtype='S' + str(length))
131 |         for index, x in np.ndenumerate(data):
132 |             if allow_unicode:
133 |                 chars = random_bytes_fullrange(length)
134 |             else:
135 |                 chars = random_bytes(length)
136 |             data[index] = np.bytes_(chars)
137 |         return data
138 |     elif dtype == 'U':
139 |         length = random.randint(1, max_string_length)
140 |         data = np.zeros(shape=shape, dtype='U' + str(length))
141 |         for index, x in np.ndenumerate(data):
142 |             if allow_unicode:
143 |                 chars = random_str_some_unicode(length)
144 |             else:
145 |                 chars = random_str_ascii(length)
146 |             data[index] = np.unicode_(chars)
147 |         return data
148 |     elif dtype == 'object':
149 |         if object_element_dtypes is None:
150 |             object_element_dtypes = dtypes
151 |         data = np.zeros(shape=shape, dtype='object')
152 |         for index, x in np.ndenumerate(data):
153 |             data[index] = random_numpy( \
154 |                 shape=random_numpy_shape( \
155 |                 object_subarray_dimensions, \
156 |                 max_object_subarray_axis_length), \
157 |                 dtype=random.choice(object_element_dtypes))
158 |         return data
159 |     else:
160 |         nbytes = np.ndarray(shape=(1,), dtype=dtype).nbytes
161 |         bts = np.random.bytes(nbytes * np.prod(shape))
162 |         if dtype == 'bool':
163 |             bts = b''.join([{True: b'\x01', False: b'\x00'}[ \
164 |                 ch > 127] for ch in bts])
165 |         data = np.ndarray(shape=shape, dtype=dtype, buffer=bts)
166 |         # If it is a floating point type and we are supposed to
167 |         # remove NaN's, then turn them to zeros. Numpy will throw
168 |         # RuntimeWarnings for some NaN values, so those warnings need to
169 |         # be caught and ignored.
170 |         if not allow_nan and data.dtype.kind in ('f', 'c'):
171 |             data = data.copy()
172 |             with warnings.catch_warnings():
173 |                 warnings.simplefilter('ignore', RuntimeWarning)
174 |                 if data.dtype.kind == 'f':
175 |                     data[np.isnan(data)] = 0.0
176 |                 else:
177 |                     data.real[np.isnan(data.real)] = 0.0
178 |                     data.imag[np.isnan(data.imag)] = 0.0
179 |         return data
180 | 
181 | 
182 | def random_numpy_scalar(dtype, object_element_dtypes=None):
183 |     # How a random scalar is made depends on th type. For must, it
184 |     # is just a single number. But for the string types, it is a
185 |     # string of any length.
186 |     if dtype == 'S':
187 |         return np.bytes_(random_bytes(random.randint(1,
188 |                          max_string_length)))
189 |     elif dtype == 'U':
190 |         return np.unicode_(random_str_ascii(
191 |                            random.randint(1,
192 |                            max_string_length)))
193 |     else:
194 |         return random_numpy(tuple(), dtype, \
195 |             object_element_dtypes=object_element_dtypes)[()]
196 | 
197 | 
198 | def random_numpy_shape(dimensions, max_length, min_length=1):
199 |     # Makes a random shape tuple having the specified number of
200 |     # dimensions. The maximum size along each axis is max_length.
201 |     return tuple([random.randint(min_length, max_length)
202 |                   for x in range(0, dimensions)])
203 | 
204 | 
205 | def random_list(N, python_or_numpy='numpy'):
206 |     # Makes a random list of the specified type. If instructed, it
207 |     # will be composed entirely from random numpy arrays (make a
208 |     # random object array and then convert that to a
209 |     # list). Otherwise, it will be a list of random bytes.
210 |     if python_or_numpy == 'numpy':
211 |         return random_numpy((N,), dtype='object').tolist()
212 |     else:
213 |         data = []
214 |         for i in range(0, N):
215 |             data.append(random_bytes(random.randint(1,
216 |                         max_string_length)))
217 |         return data
218 | 
219 | 
220 | def random_dict(tp='dict'):
221 |     # Makes a random dict or dict-like object tp (random number of
222 |     # randomized keys with random numpy arrays as values). The only
223 |     # supported values of tp are 'dict' and 'OrderedDict'.
224 |     data = dict()
225 |     for i in range(0, random.randint(min_dict_keys, \
226 |             max_dict_keys)):
227 |         name = random_str_ascii(max_dict_key_length)
228 |         data[name] = \
229 |             random_numpy(random_numpy_shape( \
230 |             dict_value_subarray_dimensions, \
231 |             max_dict_value_subarray_axis_length), \
232 |             dtype=random.choice(dtypes))
233 | 
234 |     # If tp is 'dict', return as is. Otherwise, randomize the order.
235 |     if tp == 'dict':
236 |         return data
237 |     elif tp == 'OrderedDict':
238 |         # An ordered dict is made by randomizing the field order.
239 |         itms = list(data.items())
240 |         random.shuffle(itms)
241 |         return collections.OrderedDict(itms)
242 |     else:
243 |         return data
244 | 
245 | 
246 | def random_structured_numpy_array(shape, field_shapes=None,
247 |                                   nonascii_fields=False,
248 |                                   nondigits_fields=False,
249 |                                   names=None):
250 |     # Make random field names (if not provided with field names),
251 |     # dtypes, and sizes. Though, if field_shapes is explicitly given,
252 |     # the sizes should be random. The field names must all be of type
253 |     # str, not unicode in Python 2. Optionally include non-ascii
254 |     # characters in the field names (will have to be encoded in Python
255 |     # 2.x). String types will not be used due to the difficulty in
256 |     # assigning the length.
257 |     if names is None:
258 |         if nonascii_fields:
259 |             name_func = random_str_some_unicode
260 |         elif nondigits_fields:
261 |             name_func = random_str_ascii_letters
262 |         else:
263 |             name_func = random_str_ascii
264 |         names = [name_func(
265 |                  max_structured_ndarray_field_lengths)
266 |                  for i in range(0, random.randint(
267 |                  min_structured_ndarray_fields,
268 |                  max_structured_ndarray_fields))]
269 |     dts = [random.choice(list(set(dtypes)
270 |            - set(('S', 'U'))))
271 |            for i in range(len(names))]
272 |     if field_shapes is None:
273 |         shapes = [random_numpy_shape(
274 |                   structured_ndarray_subarray_dimensions,
275 |                   max_structured_ndarray_subarray_axis_length)
276 |                   for i in range(len(names))]
277 |     else:
278 |         shapes = [field_shapes] * len(names)
279 |     # Construct the type of the whole thing.
280 |     dt = np.dtype([(names[i], dts[i], shapes[i])
281 |                   for i in range(len(names))])
282 |     # Make the array. If dt.itemsize is 0, then we need to make an
283 |     # array of int8's the size in shape and convert it to work
284 |     # around a numpy bug. Otherwise, we will just create an empty
285 |     # array and then proceed by assigning each field.
286 |     if dt.itemsize == 0:
287 |         return np.zeros(shape=shape, dtype='int8').astype(dt)
288 |     else:
289 |         data = np.empty(shape=shape, dtype=dt)
290 |         for index, x in np.ndenumerate(data):
291 |             for i, name in enumerate(names):
292 |                 data[name][index] = random_numpy(shapes[i], \
293 |                     dts[i], allow_nan=False)
294 |         return data
295 | 
296 | 
297 | def random_name():
298 |     # Makes a random POSIX path of a random depth.
299 |     depth = random.randint(1, max_posix_path_depth)
300 |     path = '/'
301 |     for i in range(0, depth):
302 |         path = posixpath.join(path, random_str_ascii(
303 |                               random.randint(1,
304 |                               max_posix_path_lengths)))
305 |     return path
306 | 


--------------------------------------------------------------------------------
/doc/source/introduction.rst:
--------------------------------------------------------------------------------
  1 | .. currentmodule:: hdf5storage
  2 | 
  3 | ============
  4 | Introduction
  5 | ============
  6 | 
  7 | Getting Started
  8 | ===============
  9 | 
 10 | Most of the functionality that one will use is contained in the main
 11 | module ::
 12 | 
 13 |     import hdf5storage
 14 | 
 15 | Lower level functionality needed mostly for extending this package to
 16 | work with more datatypes are in its submodules.
 17 | 
 18 | The main functions in this module are :py:func:`write` and
 19 | :py:func:`read` which write a single Python variable to an HDF5 file or
 20 | read the specified contents at one location in an HDF5 file and convert
 21 | to Python types.
 22 | 
 23 | HDF5 files are structured much like a Unix filesystem, so everything can
 24 | be referenced with a POSIX style path, which look like
 25 | ``'/pyth/hf'``. Unlike a Windows path, forward slashes (``'/'``) are
 26 | used as directory separators instead of backward slashes (``'\\'``) and
 27 | the base of the file system is just ``'/'`` instead of something like
 28 | ``'C:\\'``. In the language of HDF5, what we call directories and files
 29 | in filesystems are called groups and datasets.
 30 | 
 31 | More information about paths, the supported escapes, etc. can be found
 32 | at :ref:`Paths`.
 33 | 
 34 | .. versionadded:: 0.2
 35 |    
 36 |    Ability to escape characters not allowed in Group or Dataset names.
 37 | 
 38 | :py:func:`write` has many options for controlling how the data is
 39 | stored, and what metadata is stored, but we can ignore that for now. If
 40 | we have a variable named ``foo`` that we want to write to an HDF5 file
 41 | named ``data.h5``, we would write it by ::
 42 | 
 43 |     hdf5storage.write(foo, path='/foo', filename='data.h5')
 44 | 
 45 | And then we can read it back from the file with the :py:func:`read`
 46 | function, which returns the read data. Here, we will put the data we
 47 | read back into the variable ``bar`` ::
 48 | 
 49 |     bar = hdf5storage.read(path='/foo', filename='data.h5')
 50 | 
 51 | Writing And Reading Several Python Variables at Once
 52 | ====================================================
 53 | 
 54 | To write and read more than one Python variable, one could use
 55 | :py:func:`write` and :py:func:`read` for each variable individually.
 56 | This can incur a major performance penalty, especially for large HDF5
 57 | files, since each call opens and closes the HDF5 file (sometimes more
 58 | than once).
 59 | 
 60 | Version ``0.1.10`` added a way to do this without incuring this
 61 | performance penalty by adding two new functions: :py:func:`writes` and
 62 | :py:func:`reads`.
 63 | 
 64 | They can write and read more than one Python variable at once, though
 65 | they can still work with a single variable. In fact, :py:func:`write`
 66 | and :py:func:`read` are now wrappers around them. :py:func:`savemat`
 67 | and :py:func:`loadmat` currently use them for the improved performance.
 68 | 
 69 | .. versionadded:: 0.1.10
 70 |    
 71 |    Ability to write and read more than one Python variable at a time
 72 |    without opening and closing the HDF5 file each time.
 73 | 
 74 | Main Options Controlling Writing/Reading Data
 75 | =============================================
 76 | 
 77 | There are many individual options that control how data is written and
 78 | read to/from file. These can be set by passing an :py:class:`Options`
 79 | object to :py:func:`write` and :py:func:`read` by ::
 80 | 
 81 |     options = hdf5storage.Options(...)
 82 |     hdf5storage.write(... , options=options)
 83 |     hdf5storage.read(... , options=options)
 84 | 
 85 | or passing the individual keyword arguments used by the
 86 | :py:class:`Options` constructor to :py:func:`write` and
 87 | :py:func:`read`. The two methods cannot be mixed (the functions will
 88 | give precedence to the given :py:class:`Options` object).
 89 | 
 90 | .. note::
 91 | 
 92 |    Functions in the various submodules only support the
 93 |    :py:class:`Options` object method of passing options.
 94 | 
 95 | The two main options are :py:attr:`Options.store_python_metadata` and
 96 | :py:attr:`Options.matlab_compatible`. A more minor option is
 97 | :py:attr:`Options.oned_as`.
 98 | 
 99 | 
100 | .. versionadded:: 0.1.9
101 | 
102 |    Support for the transparent compression of data has been added. It
103 |    is enabled by default, compressing all python objects resulting in
104 |    HDF5 Datasets larger than 16 KB with the GZIP/Deflate algorithm.
105 | 
106 | 
107 | store_python_metadata
108 | ---------------------
109 | 
110 | ``bool``
111 | 
112 | Setting this options causes metadata to be written so that the written
113 | objects can be read back into Python accurately. As HDF5 does not
114 | natively support many Python data types (essentially only Numpy types),
115 | most Python data types have to be converted before being written. If
116 | metadata isn't also written, the data cannot be read back to its
117 | original form and will instead be read back as the Python type most
118 | closely resembling how it is stored, which will be a Numpy type of some
119 | sort.
120 | 
121 | .. note
122 | 
123 |    This option is especially important when we consider that when
124 |    ``matlab_compatible == True``, many additional conversions and
125 |    manipulations will be done to the data that cannot be reversed
126 |    without this metadata.
127 | 
128 | matlab_compatible
129 | -----------------
130 | 
131 | ``bool``
132 | 
133 | Setting this option causes the writing of HDF5 files be done in a way
134 | compatible with MATLAB v7.3 MAT files. This consists of writing some
135 | file metadata so that MATLAB recognizes the file, adding specific
136 | metadata to every stored object so that MATLAB recognizes them, and
137 | transforming the data to be in the form that MATLAB expects for certain
138 | types (for example, MATLAB expects everything to be at least a 2D array
139 | and strings to be stored in UTF-16 but with no doublets).
140 | 
141 | .. note::
142 | 
143 |    There are many individual small options in the :py:class:`Options`
144 |    class that this option sets to specific values. Setting
145 |    ``matlab_compatible`` automatically sets them, while changing their
146 |    values to something else automatically turns ``matlab_compatible``
147 |    off.
148 | 
149 | action_for_matlab_incompatible
150 | ------------------------------
151 | 
152 | {``'ignore'``, ``'discard'``, ``'error'``}
153 | 
154 | The action to perform when doing MATLAB compatibility
155 | (``matlab_compatible == True``) but a type
156 | being written is not MATLAB compatible. The actions are to write the
157 | data anyways ('ignore'), don't write the incompatible data ('discard'),
158 | or throw a :py:exc:`exceptions.TypeNotMatlabCompatibleError`
159 | exception. The default is 'error'.
160 | 
161 | oned_as
162 | -------
163 | 
164 | {'row', 'column'}
165 | 
166 | This option is only actually relevant when
167 | ``matlab_compatible == True``. MATLAB only supports 2D and higher
168 | dimensionality arrays, but Numpy supports 1D arrays. So, 1D arrays have
169 | to be made 2 dimensional making them either into row vectors or column
170 | vectors. This option sets which they become when imported into MATLAB.
171 | 
172 | 
173 | compress
174 | --------
175 | 
176 | .. versionadded:: 0.1.9
177 | 
178 | ``bool``
179 | 
180 | Whether to use compression when writing data. Enabled (``True``) by default. See :ref:`Compression` for more information.
181 | 
182 | 
183 | Convenience Functions for MATLAB MAT Files
184 | ==========================================
185 | 
186 | Two functions are provided for reading and writing to MATLAB MAT files
187 | in a convenient way. They are :py:func:`savemat` and :py:func:`loadmat`,
188 | which are modelled after the SciPy functions of the same name
189 | (:py:func:`scipy.io.savemat` and :py:func:`scipy.io.loadmat`), which
190 | work with non-HDF5 based MAT files. They take not only the same options,
191 | but dispatch calls automatically to the SciPy versions when instructed
192 | to write to a non-HDF5 based MAT file, or read a MAT file that is not
193 | HDF5 based. SciPy must be installed to take advantage of this
194 | functionality.
195 | 
196 | :py:func:`savemat` takes a ``dict`` having data (values) and the names
197 | to give each piece of data (keys), and writes them to a MATLAB
198 | compatible MAT file. The `format` keyword sets the MAT file format, with
199 | ``'7.3'`` being the HDF5 based format supported by this package and
200 | ``'5'`` and ``'4'`` being the non HDF5 based formats supported by
201 | SciPy. If you want the data to be able to be read accurately back into
202 | Python, you should set ``store_python_metadata=True``. Writing a couple
203 | variables to a file looks like ::
204 | 
205 |     hdf5storage.savemat('data.mat', {'foo': 2.3, 'bar': (1+2j)}, format='7.3', oned_as='column', store_python_metadata=True)
206 | 
207 | Then, to read variables back, we can either explicitly name the
208 | variables we want ::
209 | 
210 |     out = hdf5storage.loadmat('data.mat', variable_names=['foo', 'bar'])
211 | 
212 | or grab all variables by either not giving the `variable_names` option
213 | or setting it to ``None``. ::
214 | 
215 |     out = hdf5storage.loadmat('data.mat')
216 | 
217 | 
218 | Example: Write And Readback Including Different Metadata
219 | ========================================================
220 | 
221 | Making The Data
222 | ---------------
223 | 
224 | Make a ``dict`` containing many different types in it that we want to
225 | store to disk in an HDF5 file. The initialization method depends on
226 | the Python version.
227 | 
228 | .. versionchanged:: 0.2
229 |    The ``dict`` keys no longer have to all be ``str`` (the unicode
230 |    string type). However, if python metadata is not included, other
231 |    string type keys can get converted to ``str`` when read back or one
232 |    reads back a ``dict`` with two fields, ``keys`` and ``values``,
233 |    holding all the keys and values if at least one key is not a string
234 |    type.
235 | 
236 |     >>> import numpy as np
237 |     >>> import hdf5storage
238 |     >>> a = {'a': True,
239 |     ...      'b': None,
240 |     ...      'c': 2,
241 |     ...      'd': -3.2,
242 |     ...      'e': (1-2.3j),
243 |     ...      'f': 'hello',
244 |     ...      'g': b'goodbye',
245 |     ...      'h': ['list', 'of', 'stuff', [30, 2.3]],
246 |     ...      'i': np.zeros(shape=(2,), dtype=[('bi', 'uint8')]),
247 |     ...      'j':{'aa': np.bool_(False),
248 |     ...           'bb': np.uint8(4),
249 |     ...           'cc': np.uint32([70, 8]),
250 |     ...           'dd': np.int32([]),
251 |     ...           'ee': np.float32([[3.3], [5.3e3]]),
252 |     ...           'ff': np.complex128([[3.4, 3], [9+2j, 0]]),
253 |     ...           'gg': np.array(['one', 'two', 'three'], dtype='str'),
254 |     ...           'hh': np.bytes_(b'how many?'),
255 |     ...           'ii': np.object_(['text', np.int8([1, -3, 0])])}}
256 | 
257 | Using No Metadata
258 | -----------------
259 | 
260 | Write it to a file at the ``'/a'`` directory, but include no Python or
261 | MATLAB metadata. Then, read it back and notice that many objects come
262 | back quite different from what was written. Namely, everything was
263 | converted to Numpy types. This even included the dictionaries which were
264 | converted to structured ``np.ndarray``s. This happens because all
265 | other types (other than ``dict``) must be converted to these types
266 | before being written to the HDF5 file, and without metadata, the
267 | conversion cannot be reversed (while ``dict`` isn't converted, it has
268 | the same form and thus cannot be extracted reversibly).
269 | 
270 |     >>> hdf5storage.write(data=a, path='/a', filename='data.h5',
271 |     ...                   store_python_metadata=False,
272 |     ...                   matlab_compatible=False)
273 |     >>> hdf5storage.read(path='/a', filename='data.h5')
274 |     array([ (True,
275 |              [],
276 |              2,
277 |              -3.2,
278 |              (1-2.3j),
279 |              b'hello',
280 |              b'goodbye',
281 |              [array(b'list', dtype='|S4'),
282 |               array(b'of', dtype='|S2'),
283 |               array(b'stuff', dtype='|S5'),
284 |               array([array(30), array(2.3)], dtype=object)],
285 |              [(0,), (0,)],
286 |              [(False,
287 |                4,
288 |                array([70,  8], dtype=uint32),
289 |                array([], dtype=int32),
290 |                array([[  3.29999995e+00], [  5.30000000e+03]], dtype=float32),
291 |                array([[ 3.4+0.j,  3.0+0.j], [ 9.0+2.j,  0.0+0.j]]),
292 |                array([111, 110, 101,   0,   0, 116, 119, 111,   0,   0, 116, 104, 114,
293 |                       101, 101], dtype=uint32),
294 |                b'how many?',
295 |                array([array(b'text', dtype='|S4'),
296 |                       array([ 1, -3,  0], dtype=int8)],
297 |                      dtype=object))])], 
298 |           dtype=[('a', '?'),
299 |                  ('b', '<f8', (0,)),
300 |                  ('c', '<i8'),
301 |                  ('d', '<f8'),
302 |                  ('e', '<c16'),
303 |                  ('f', 'S5'),
304 |                  ('g', 'S7'), ('h', 'O', (4,)),
305 |                  ('i', [('bi', 'u1')], (2,)),
306 |                  ('j', [('aa', '?'),
307 |                         ('bb', 'u1'),
308 |                         ('cc', '<u4', (2,)),
309 |                         ('dd', '<i4', (0,)),
310 |                         ('ee', '<f4', (2, 1)),
311 |                         ('ff', '<c16', (2, 2)),
312 |                         ('gg', '<u4', (15,)),
313 |                         ('hh', 'S9'),
314 |                         ('ii', 'O', (2,))],
315 |                   (1,))])
316 | 
317 | 
318 | Including Python Metadata
319 | -------------------------
320 | 
321 | Do the same thing, but now include Python metadata
322 | (``store_python_metadata == True``). This time, everything is read back
323 | the same (or at least, it should) as it was written.
324 | 
325 |     >>> hdf5storage.write(data=a, path='/a', filename='data_typeinfo.h5',
326 |     ...                   store_python_metadata=True,
327 |     ...                   matlab_compatible=False)
328 |     >>> hdf5storage.read(path='/a', filename='data_typeinfo.h5')
329 |     {'a': True,
330 |      'b': None,
331 |      'c': 2,
332 |      'd': -3.2,
333 |      'e': (1-2.3j),
334 |      'f': 'hello',
335 |      'g': b'goodbye',
336 |      'h': ['list', 'of', 'stuff', [30, 2.3]],
337 |      'i': array([(0,), (0,)], 
338 |           dtype=[('bi', 'u1')]),
339 |      'j': {'aa': False,
340 |       'bb': 4,
341 |       'cc': array([70,  8], dtype=uint32),
342 |       'dd': array([], dtype=int32),
343 |       'ee': array([[  3.29999995e+00],
344 |            [  5.30000000e+03]], dtype=float32),
345 |       'ff': array([[ 3.4+0.j,  3.0+0.j],
346 |            [ 9.0+2.j,  0.0+0.j]]),
347 |       'gg': array(['one', 'two', 'three'], 
348 |           dtype='<U5'),
349 |       'hh': b'how many?',
350 |       'ii': array(['text', array([ 1, -3,  0], dtype=int8)], dtype=object)}}
351 | 
352 | Including MATLAB Metadata
353 | -------------------------
354 | 
355 | Do the same thing, but this time including only MATLAB metadata
356 | (``matlab_compatible == True``). This time, the data that is read back
357 | is different from what was written, but in a different way than when no
358 | metadata was used. The biggest differences are that everything was
359 | turned into an at least 2D array, all arrays are transposed, and all
360 | string types got converted to ``numpy.str_``. This happens because
361 | MATLAB can only work with 2D and higher arrays, uses Fortran array
362 | ordering instead of C ordering like Python does, and strings are stored
363 | in a subset of UTF-16 (no doublets) in the version 7.3 MAT files.
364 | 
365 |     >>> hdf5storage.write(data=a, path='/a', filename='data.mat',
366 |     ...                   store_python_metadata=False,
367 |     ...                   matlab_compatible=True)
368 |     >>> hdf5storage.read(path='/a', filename='data.mat')
369 |     array([ ([[True]],
370 |              [[]],
371 |              [[2]],
372 |              [[-3.2]],
373 |              [[(1-2.3j)]],
374 |              [['hello']],
375 |              [['goodbye']],
376 |              [[array([['list']], dtype='<U4'),
377 |                array([['of']], dtype='<U2'),
378 |                array([['stuff']], dtype='<U5'),
379 |                array([[array([[30]]), array([[ 2.3]])]], dtype=object)]],
380 |              [[(array([[0]], dtype=uint8),)],
381 |               [(array([[0]], dtype=uint8),)]],
382 |              [(array([[False]], dtype=bool),
383 |                array([[4]], dtype=uint8),
384 |                array([[70,  8]], dtype=uint32),
385 |                array([], shape=(1, 0), dtype=int32),
386 |                array([[  3.29999995e+00], [  5.30000000e+03]], dtype=float32),
387 |                array([[ 3.4+0.j,  3.0+0.j], [ 9.0+2.j,  0.0+0.j]]),
388 |                array([['one\x00\x00two\x00\x00three']], dtype='<U15'),
389 |                array([['how many?']], dtype='<U9'),
390 |                array([[array([['text']], dtype='<U4'),
391 |                        array([[ 1, -3,  0]], dtype=int8)]], dtype=object))])], 
392 |           dtype=[('a', '?', (1, 1)),
393 |                  ('b', '<f8', (1, 0)),
394 |                  ('c', '<i8', (1, 1)),
395 |                  ('d', '<f8', (1, 1)),
396 |                  ('e', '<c16', (1, 1)),
397 |                  ('f', '<U5', (1, 1)),
398 |                  ('g', '<U7', (1, 1)),
399 |                  ('h', 'O', (1, 4)),
400 |                  ('i', [('bi', 'u1', (1, 1))], (2, 1)),
401 |                  ('j', [('aa', '?', (1, 1)),
402 |                         ('bb', 'u1', (1, 1)),
403 |                         ('cc', '<u4', (1, 2)),
404 |                         ('dd', '<i4', (1, 0)),
405 |                         ('ee', '<f4', (2, 1)),
406 |                         ('ff', '<c16', (2, 2)),
407 |                         ('gg', '<U15', (1, 1)),
408 |                         ('hh', '<U9', (1, 1)),
409 |                         ('ii', 'O', (1, 2))],
410 |                   (1,))])
411 | 
412 | 
413 | Including both Python And MATLAB Metadata
414 | -----------------------------------------
415 | 
416 | Do the same thing, but now include both Python metadata
417 | (``store_python_metadata == True``) and MATLAB metadata
418 | (``matlab_compatible == True``). This time, everything is read back
419 | the same (or at least, it should) as it was written. The Python metadata
420 | allows the transformations done by making the stored data MATLAB
421 | compatible reversible.
422 | 
423 |     >>> hdf5storage.write(data=a, path='/a', filename='data_typeinfo.mat',
424 |     ...                   store_python_metadata=True,
425 |     ...                   matlab_compatible=True)
426 |     >>> hdf5storage.read(path='/a', filename='data_typeinfo.mat')
427 |     {'a': True,
428 |      'b': None,
429 |      'c': 2,
430 |      'd': -3.2,
431 |      'e': (1-2.3j),
432 |      'f': 'hello',
433 |      'g': b'goodbye',
434 |      'h': ['list', 'of', 'stuff', [30, 2.3]],
435 |      'i': array([(0,), (0,)], 
436 |           dtype=[('bi', 'u1')]),
437 |      'j': {'aa': False,
438 |       'bb': 4,
439 |       'cc': array([70,  8], dtype=uint32),
440 |       'dd': array([], dtype=int32),
441 |       'ee': array([[  3.29999995e+00],
442 |            [  5.30000000e+03]], dtype=float32),
443 |       'ff': array([[ 3.4+0.j,  3.0+0.j],
444 |            [ 9.0+2.j,  0.0+0.j]]),
445 |       'gg': array(['one', 'two', 'three'], 
446 |           dtype='<U5'),
447 |       'hh': b'how many?',
448 |       'ii': array(['text', array([ 1, -3,  0], dtype=int8)], dtype=object)}}
449 | 
450 | 


--------------------------------------------------------------------------------
/tests/asserts.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2013-2020, Freja Nordsiek
  2 | # All rights reserved.
  3 | #
  4 | # Redistribution and use in source and binary forms, with or without
  5 | # modification, are permitted provided that the following conditions are
  6 | # met:
  7 | #
  8 | # 1. Redistributions of source code must retain the above copyright
  9 | # notice, this list of conditions and the following disclaimer.
 10 | #
 11 | # 2. Redistributions in binary form must reproduce the above copyright
 12 | # notice, this list of conditions and the following disclaimer in the
 13 | # documentation and/or other materials provided with the distribution.
 14 | #
 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 16 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 17 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 18 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 19 | # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 20 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 21 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 22 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 23 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 25 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 26 | 
 27 | import collections
 28 | import warnings
 29 | 
 30 | import numpy as np
 31 | import numpy.testing as npt
 32 | 
 33 | from nose.tools import assert_equal as assert_equal_nose
 34 | 
 35 | 
 36 | def assert_dtypes_equal(a, b):
 37 |     # Check that two dtypes are equal, but ignorning itemsize for dtypes
 38 |     # whose shape is 0.
 39 |     assert isinstance(a, np.dtype)
 40 |     assert_equal_nose(a.shape, b.shape)
 41 |     if b.names is None:
 42 |         assert_equal_nose(a, b)
 43 |     else:
 44 |         assert_equal_nose(a.names, b.names)
 45 |         for n in b.names:
 46 |             assert_dtypes_equal(a[n], b[n])
 47 | 
 48 | 
 49 | def assert_equal(a, b, options=None):
 50 |     # Compares a and b for equality. If they are dictionaries, they must
 51 |     # have the same set of keys, after which they values must all be
 52 |     # compared. If they are a collection type (list, tuple, set,
 53 |     # frozenset, or deque), they must have the same length and their
 54 |     # elements must be compared. If they are not numpy types (aren't
 55 |     # or don't inherit from np.generic or np.ndarray), then it is a
 56 |     # matter of just comparing them. Otherwise, their dtypes and shapes
 57 |     # have to be compared. Then, if they are not an object array,
 58 |     # numpy.testing.assert_equal will compare them elementwise. For
 59 |     # object arrays, each element must be iterated over to be compared.
 60 |     assert_equal_nose(type(a), type(b))
 61 |     if type(b) == dict:
 62 |         assert_equal_nose(set(a.keys()), set(b.keys()))
 63 |         for k in b:
 64 |             assert_equal(a[k], b[k], options)
 65 |     elif type(b) == collections.OrderedDict:
 66 |         assert_equal_nose(list(a.keys()), list(b.keys()))
 67 |         for k in b:
 68 |             assert_equal(a[k], b[k], options)
 69 |     elif type(b) in (list, tuple, set, frozenset, collections.deque):
 70 |         assert_equal_nose(len(a), len(b))
 71 |         if type(b) in (set, frozenset):
 72 |             assert_equal_nose(a, b)
 73 |         else:
 74 |             for index in range(0, len(a)):
 75 |                 assert_equal(a[index], b[index], options)
 76 |     elif not isinstance(b, (np.generic, np.ndarray)):
 77 |         with warnings.catch_warnings():
 78 |             warnings.simplefilter('ignore', RuntimeWarning)
 79 |             if isinstance(b, complex):
 80 |                 assert a.real == b.real \
 81 |                     or np.all(np.isnan([a.real, b.real]))
 82 |                 assert a.imag == b.imag \
 83 |                     or np.all(np.isnan([a.imag, b.imag]))
 84 |             else:
 85 |                 assert a == b or np.all(np.isnan([a, b]))
 86 |     else:
 87 |         assert_dtypes_equal(a.dtype, b.dtype)
 88 |         assert_equal_nose(a.shape, b.shape)
 89 |         if b.dtype.name != 'object':
 90 |             with warnings.catch_warnings():
 91 |                 warnings.simplefilter('ignore', RuntimeWarning)
 92 |                 npt.assert_equal(a, b)
 93 |         else:
 94 |             for index, x in np.ndenumerate(a):
 95 |                 assert_equal(a[index], b[index], options)
 96 | 
 97 | 
 98 | def assert_equal_none_format(a, b, options=None):
 99 |     # Compares a and b for equality. b is always the original. If they
100 |     # are dictionaries, a must be a structured ndarray and they must
101 |     # have the same set of keys, after which they values must all be
102 |     # compared. If they are a collection type (list, tuple, set,
103 |     # frozenset, or deque), then the compairison must be made with b
104 |     # converted to an object array. If the original is not a numpy type
105 |     # (isn't or doesn't inherit from np.generic or np.ndarray), then it
106 |     # is a matter of converting it to the appropriate numpy
107 |     # type. Otherwise, both are supposed to be numpy types. For object
108 |     # arrays, each element must be iterated over to be compared. Then,
109 |     # if it isn't a string type, then they must have the same dtype,
110 |     # shape, and all elements. If it is an empty string, then it would
111 |     # have been stored as just a null byte (recurse to do that
112 |     # comparison). If it is a bytes_ type, the dtype, shape, and
113 |     # elements must all be the same. If it is string_ type, we must
114 |     # convert to uint32 and then everything can be compared. Big longs
115 |     # and ints get written as numpy.bytes_.
116 |     if type(b) == dict or type(b) == collections.OrderedDict:
117 |         assert_equal_nose(type(a), np.ndarray)
118 |         assert a.dtype.names is not None
119 | 
120 |         # Determine if any of the keys could not be stored as str. If
121 |         # they all can be, then the dtype field names should be the
122 |         # keys. Otherwise, they should be 'keys' and 'values'.
123 |         all_str_keys = True
124 |         tp_str = str
125 |         tp_bytes = bytes
126 |         converters = {tp_str: lambda x: x,
127 |                       tp_bytes: lambda x: x.decode('UTF-8'),
128 |                       np.bytes_:
129 |                       lambda x: bytes(x).decode('UTF-8'),
130 |                       np.unicode_: lambda x: str(x)}
131 |         tp_conv = lambda x: converters[type(x)](x)
132 |         tp_conv_str = lambda x: tp_conv(x)
133 |         tps = tuple(converters.keys())
134 |         for k in b.keys():
135 |             if type(k) not in tps:
136 |                 all_str_keys = False
137 |                 break
138 |             try:
139 |                 k_str = tp_conv(k)
140 |             except:
141 |                 all_str_keys = False
142 |                 break
143 |         if all_str_keys:
144 |             assert_equal_nose(set(a.dtype.names),
145 |                               set([tp_conv_str(k) for k in b.keys()]))
146 |             for k in b:
147 |                 assert_equal_none_format(a[tp_conv_str(k)][0],
148 |                                          b[k], options)
149 |         else:
150 |             names = (options.dict_like_keys_name,
151 |                      options.dict_like_values_name)
152 |             assert set(a.dtype.names) == set(names)
153 |             keys = a[names[0]]
154 |             values = a[names[1]]
155 |             assert_equal_none_format(keys, tuple(b.keys()), options)
156 |             assert_equal_none_format(values, tuple(b.values()), options)
157 |     elif type(b) in (list, tuple, set, frozenset, collections.deque):
158 |         b_conv = np.zeros(dtype='object', shape=(len(b), ))
159 |         for i, v in enumerate(b):
160 |             b_conv[i] = v
161 |         assert_equal_none_format(a, b_conv, options)
162 |     elif not isinstance(b, (np.generic, np.ndarray)):
163 |         if b is None:
164 |             # It should be np.float64([])
165 |             assert_equal_nose(type(a), np.ndarray)
166 |             assert_equal_nose(a.dtype, np.float64([]).dtype)
167 |             assert_equal_nose(a.shape, (0, ))
168 |         elif isinstance(b, (bytes, bytearray)):
169 |             assert_equal_nose(a, np.bytes_(b))
170 |         elif isinstance(b, str):
171 |             assert_equal_none_format(a, np.unicode_(b), options)
172 |         elif type(b) == int:
173 |             if b > 2**63 or b < -(2**63 - 1):
174 |                 assert_equal_none_format(a, np.bytes_(b), options)
175 |             else:
176 |                 assert_equal_none_format(a, np.int64(b), options)
177 |         else:
178 |             assert_equal_none_format(a, np.array(b)[()], options)
179 |     elif isinstance(b, np.recarray):
180 |         assert_equal_none_format(a, b.view(np.ndarray),
181 |                                  options)
182 |     else:
183 |         if b.dtype.name != 'object':
184 |             if b.dtype.char in ('U', 'S'):
185 |                 if b.dtype.char == 'S' and b.shape == tuple() \
186 |                         and len(b) == 0:
187 |                     assert_equal(a, \
188 |                         np.zeros(shape=tuple(), dtype=b.dtype.char), \
189 |                         options)
190 |                 elif b.dtype.char == 'U':
191 |                     if b.shape == tuple() and len(b) == 0:
192 |                         c = np.uint32(())
193 |                     else:
194 |                         c = np.atleast_1d(b).view(np.uint32)
195 |                     assert_equal_nose(a.dtype, c.dtype)
196 |                     assert_equal_nose(a.shape, c.shape)
197 |                     npt.assert_equal(a, c)
198 |                 else:
199 |                     assert_equal_nose(a.dtype, b.dtype)
200 |                     assert_equal_nose(a.shape, b.shape)
201 |                     npt.assert_equal(a, b)
202 |             else:
203 |                 # Check that the dtype's shape matches.
204 |                 assert_equal_nose(a.dtype.shape, b.dtype.shape)
205 | 
206 |                 # Now, if b.shape is just all ones, then a.shape will
207 |                 # just be (1,). Otherwise, we need to compare the shapes
208 |                 # directly. Also, dimensions need to be squeezed before
209 |                 # comparison in this case.
210 |                 assert_equal_nose(np.prod(a.shape), np.prod(b.shape))
211 |                 if a.shape != b.shape:
212 |                     assert_equal_nose(np.prod(b.shape), 1)
213 |                     assert_equal_nose(a.shape, (1, ))
214 |                 if np.prod(a.shape) == 1:
215 |                     a = np.squeeze(a)
216 |                     b = np.squeeze(b)
217 |                 # If there was a null in the dtype or the dtype of one
218 |                 # of its fields (or subfields) has a 0 in its shape,
219 |                 # then it was written as a Group so the field order
220 |                 # could have changed.
221 |                 has_zero_shape = False
222 |                 if b.dtype.names is not None:
223 |                     parts = [b.dtype]
224 |                     while 0 != len(parts):
225 |                         part = parts.pop()
226 |                         if 0 in part.shape:
227 |                             has_zero_shape = True
228 |                         if part.names is not None:
229 |                             parts.extend([v[0] for v
230 |                                           in part.fields.values()])
231 |                         if part.base != part:
232 |                             parts.append(part.base)
233 |                 if b.dtype.names is not None \
234 |                         and ('\\x00' in str(b.dtype) \
235 |                         or has_zero_shape):
236 |                     assert_equal_nose(a.shape, b.shape)
237 |                     assert_equal_nose(set(a.dtype.names),
238 |                                       set(b.dtype.names))
239 |                     for n in b.dtype.names:
240 |                         assert_equal_none_format(a[n], b[n], options)
241 |                 else:
242 |                     assert_equal_nose(a.dtype, b.dtype)
243 |                     with warnings.catch_warnings():
244 |                         warnings.simplefilter('ignore', RuntimeWarning)
245 |                         npt.assert_equal(a, b)
246 |         else:
247 |             # If the original is structued, it is possible that the
248 |             # fields got out of order, in which case the dtype won't
249 |             # quite match. It will need to be checked just to make sure
250 |             # all pieces are there. Otherwise, the dtypes can be
251 |             # directly compared.
252 |             if b.dtype.fields is None:
253 |                 assert_equal_nose(a.dtype, b.dtype)
254 |             else:
255 |                 assert_equal_nose(dict(a.dtype.fields),
256 |                                   dict(b.dtype.fields))
257 |             assert_equal_nose(a.shape, b.shape)
258 |             for index, x in np.ndenumerate(a):
259 |                 assert_equal_none_format(a[index], b[index], options)
260 | 
261 | 
262 | def assert_equal_matlab_format(a, b, options=None):
263 |     # Compares a and b for equality. b is always the original. If they
264 |     # are dictionaries, a must be a structured ndarray and they must
265 |     # have the same set of keys, after which they values must all be
266 |     # compared. If they are a collection type (list, tuple, set,
267 |     # frozenset, or deque), then the compairison must be made with b
268 |     # converted to an object array. If the original is not a numpy type
269 |     # (isn't or doesn't inherit from np.generic or np.ndarray), then it
270 |     # is a matter of converting it to the appropriate numpy
271 |     # type. Otherwise, both are supposed to be numpy types. For object
272 |     # arrays, each element must be iterated over to be compared. Then,
273 |     # if it isn't a string type, then they must have the same dtype,
274 |     # shape, and all elements. All strings are converted to numpy.str_
275 |     # on read unless they were stored as a numpy.bytes_ due to having
276 |     # non-ASCII characters. If it is empty, it has shape (1, 0). A
277 |     # numpy.str_ has all of its strings per row compacted together. A
278 |     # numpy.bytes_ string has to have the same thing done, but then it
279 |     # needs to be converted up to UTF-32 and to numpy.str_ through
280 |     # uint32. Big longs and ints end up getting converted to UTF-16
281 |     # uint16's when written and read back as UTF-32 numpy.unicode_.
282 |     #
283 |     # In all cases, we expect things to be at least two dimensional
284 |     # arrays.
285 |     if type(b) == dict or type(b) == collections.OrderedDict:
286 |         assert_equal_nose(type(a), np.ndarray)
287 |         assert a.dtype.names is not None
288 | 
289 |         # Determine if any of the keys could not be stored as str. If
290 |         # they all can be, then the dtype field names should be the
291 |         # keys. Otherwise, they should be 'keys' and 'values'.
292 |         all_str_keys = True
293 |         tp_str = str
294 |         tp_bytes = bytes
295 |         converters = {tp_str: lambda x: x,
296 |                       tp_bytes: lambda x: x.decode('UTF-8'),
297 |                       np.bytes_:
298 |                       lambda x: bytes(x).decode('UTF-8'),
299 |                       np.unicode_: lambda x: str(x)}
300 |         tp_conv = lambda x: converters[type(x)](x)
301 |         tp_conv_str = lambda x: tp_conv(x)
302 |         tps = tuple(converters.keys())
303 |         for k in b.keys():
304 |             if type(k) not in tps:
305 |                 all_str_keys = False
306 |                 break
307 |             try:
308 |                 k_str = tp_conv(k)
309 |             except:
310 |                 all_str_keys = False
311 |                 break
312 |         if all_str_keys:
313 |             assert_equal_nose(set(a.dtype.names),
314 |                               set([tp_conv_str(k)
315 |                                    for k in b.keys()]))
316 |             for k in b:
317 |                 assert_equal_matlab_format(a[tp_conv_str(k)][0],
318 |                                            b[k], options)
319 |         else:
320 |             names = (options.dict_like_keys_name,
321 |                      options.dict_like_values_name)
322 |             assert_equal_nose(set(a.dtype.names), set(names))
323 |             keys = a[names[0]][0]
324 |             values = a[names[1]][0]
325 |             assert_equal_matlab_format(keys, tuple(b.keys()), options)
326 |             assert_equal_matlab_format(values, tuple(b.values()),
327 |                                        options)
328 |     elif type(b) in (list, tuple, set, frozenset, collections.deque):
329 |         b_conv = np.zeros(dtype='object', shape=(len(b), ))
330 |         for i, v in enumerate(b):
331 |             b_conv[i] = v
332 |         assert_equal_matlab_format(a, b_conv, options)
333 |     elif not isinstance(b, (np.generic, np.ndarray)):
334 |         if b is None:
335 |             # It should be np.zeros(shape=(0, 1), dtype='float64'))
336 |             assert_equal_nose(type(a), np.ndarray)
337 |             assert_equal_nose(a.dtype, np.dtype('float64'))
338 |             assert_equal_nose(a.shape, (1, 0))
339 |         elif isinstance(b, (bytes, str, bytearray)):
340 |             if len(b) == 0:
341 |                 assert_equal(a, np.zeros(shape=(1, 0), dtype='U'),
342 |                              options)
343 |             elif isinstance(b, (bytes, bytearray)):
344 |                 try:
345 |                     c = np.unicode_(b.decode('ASCII'))
346 |                 except:
347 |                     c = np.bytes_(b)
348 |                 assert_equal(a, np.atleast_2d(c), options)
349 |             else:
350 |                 assert_equal(a, np.atleast_2d(np.unicode_(b)), options)
351 |         elif type(b) == int:
352 |             if b > 2**63 or b < -(2**63 - 1):
353 |                 assert_equal(a, np.atleast_2d(np.unicode_(b)), options)
354 |             else:
355 |                 assert_equal(a, np.atleast_2d(np.int64(b)), options)
356 |         else:
357 |             assert_equal(a, np.atleast_2d(np.array(b)), options)
358 |     else:
359 |         if b.dtype.name != 'object':
360 |             if b.dtype.char in ('U', 'S'):
361 |                 if len(b) == 0 and (b.shape == tuple() \
362 |                         or b.shape == (0, )):
363 |                     assert_equal(a, np.zeros(shape=(1, 0),
364 |                                              dtype='U'), options)
365 |                 elif b.dtype.char == 'U':
366 |                     c = np.atleast_1d(b)
367 |                     c = np.atleast_2d(c.view(np.dtype('U' \
368 |                         + str(c.shape[-1]*c.dtype.itemsize//4))))
369 |                     assert_equal_nose(a.dtype, c.dtype)
370 |                     assert_equal_nose(a.shape, c.shape)
371 |                     npt.assert_equal(a, c)
372 |                 elif b.dtype.char == 'S':
373 |                     c = np.atleast_1d(b).view(np.ndarray)
374 |                     if np.all(c.view(np.uint8) < 128):
375 |                         c = c.view(np.dtype('S' \
376 |                             + str(c.shape[-1]*c.dtype.itemsize)))
377 |                         c = c.view(np.dtype('uint8'))
378 |                         c = np.uint32(c.view(np.dtype('uint8')))
379 |                         c = c.view(np.dtype('U' + str(c.shape[-1])))
380 |                     c = np.atleast_2d(c)
381 |                     assert_equal_nose(a.dtype, c.dtype)
382 |                     assert_equal_nose(a.shape, c.shape)
383 |                     npt.assert_equal(a, c)
384 |                     pass
385 |                 else:
386 |                     c = np.atleast_2d(b)
387 |                     assert_equal_nose(a.dtype, c.dtype)
388 |                     assert_equal_nose(a.shape, c.shape)
389 |                     with warnings.catch_warnings():
390 |                         warnings.simplefilter('ignore', RuntimeWarning)
391 |                         npt.assert_equal(a, c)
392 |             else:
393 |                 c = np.atleast_2d(b)
394 |                 # An empty complex number gets turned into a real
395 |                 # number when it is stored.
396 |                 if np.prod(c.shape) == 0 \
397 |                         and b.dtype.name.startswith('complex'):
398 |                     c = np.real(c)
399 |                 # If it is structured, check that the field names are
400 |                 # the same, in the same order, and then go through them
401 |                 # one by one. Otherwise, make sure the dtypes and shapes
402 |                 # are the same before comparing all values.
403 |                 if b.dtype.names is None and a.dtype.names is None:
404 |                     assert_equal_nose(a.dtype, c.dtype)
405 |                     assert_equal_nose(a.shape, c.shape)
406 |                     with warnings.catch_warnings():
407 |                         warnings.simplefilter('ignore', RuntimeWarning)
408 |                         npt.assert_equal(a, c)
409 |                 else:
410 |                     assert a.dtype.names is not None
411 |                     assert b.dtype.names is not None
412 |                     assert_equal_nose(set(a.dtype.names),
413 |                                       set(b.dtype.names))
414 |                     # The ordering of fields must be preserved if the
415 |                     # MATLAB_fields attribute could be used, which can
416 |                     # only be done if there are no non-ascii characters
417 |                     # in any of the field names.
418 |                     allfields = ''.join(b.dtype.names)
419 |                     if np.all(np.array([ord(ch) < 128 \
420 |                             for ch in allfields])):
421 |                         assert_equal_nose(a.dtype.names, b.dtype.names)
422 |                     a = a.flatten()
423 |                     b = b.flatten()
424 |                     for k in b.dtype.names:
425 |                         for index, x in np.ndenumerate(a):
426 |                             assert_equal_from_matlab(a[k][index],
427 |                                                      b[k][index],
428 |                                                      options)
429 |         else:
430 |             c = np.atleast_2d(b)
431 |             assert_equal_nose(a.dtype, c.dtype)
432 |             assert_equal_nose(a.shape, c.shape)
433 |             for index, x in np.ndenumerate(a):
434 |                 assert_equal_matlab_format(a[index], c[index], options)
435 | 
436 | 
437 | def assert_equal_from_matlab(a, b, options=None):
438 |     # Compares a and b for equality. They are all going to be numpy
439 |     # types. hdf5storage and scipy behave differently when importing
440 |     # arrays as to whether they are 2D or not, so we will make them all
441 |     # at least 2D regardless. For strings, the two packages produce
442 |     # transposed results of each other, so one just needs to be
443 |     # transposed. For object arrays, each element must be iterated over
444 |     # to be compared. For structured ndarrays, their fields need to be
445 |     # compared and then they can be compared element and field
446 |     # wise. Otherwise, they can be directly compared. Note, the type is
447 |     # often converted by scipy (or on route to the file before scipy
448 |     # gets it), so comparisons are done by value, which is not perfect.
449 |     a = np.atleast_2d(a)
450 |     b = np.atleast_2d(b)
451 |     if a.dtype.char == 'U':
452 |         a = a.T
453 |     if b.dtype.name == 'object':
454 |         a = a.flatten()
455 |         b = b.flatten()
456 |         for index, x in np.ndenumerate(a):
457 |             assert_equal_from_matlab(a[index], b[index], options)
458 |     elif b.dtype.names is not None or a.dtype.names is not None:
459 |         assert a.dtype.names is not None
460 |         assert b.dtype.names is not None
461 |         assert set(a.dtype.names) == set(b.dtype.names)
462 |         a = a.flatten()
463 |         b = b.flatten()
464 |         for k in b.dtype.names:
465 |             for index, x in np.ndenumerate(a):
466 |                 assert_equal_from_matlab(a[k][index], b[k][index],
467 |                                          options)
468 |     else:
469 |         with warnings.catch_warnings():
470 |             warnings.simplefilter('ignore', RuntimeWarning)
471 |             npt.assert_equal(a, b)
472 | 


--------------------------------------------------------------------------------