├── .editorconfig
├── .github
    ├── ISSUE_TEMPLATE.md
    └── workflows
    │   └── main.yml
├── .gitignore
├── LICENSE
├── MANIFEST.in
├── README.md
├── pyproject.toml
├── requirements_dev.txt
├── setup.cfg
├── setup.py
├── split_dataset
    ├── __init__.py
    ├── blocks.py
    └── split_dataset.py
└── tests
    ├── __init__.py
    ├── test_blocks.py
    └── test_split_dataset.py


/.editorconfig:
--------------------------------------------------------------------------------
 1 | # http://editorconfig.org
 2 | 
 3 | root = true
 4 | 
 5 | [*]
 6 | indent_style = space
 7 | indent_size = 4
 8 | trim_trailing_whitespace = true
 9 | insert_final_newline = true
10 | charset = utf-8
11 | end_of_line = lf
12 | 
13 | [*.bat]
14 | indent_style = tab
15 | end_of_line = crlf
16 | 
17 | [LICENSE]
18 | insert_final_newline = false
19 | 
20 | [Makefile]
21 | indent_style = tab
22 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | * Split Dataset version:
 2 | * Python version:
 3 | * Operating System:
 4 | 
 5 | ### Description
 6 | 
 7 | Describe what you were trying to get done.
 8 | Tell us what happened, what went wrong, and what you expected to happen.
 9 | 
10 | ### What I Did
11 | 
12 | ```
13 | Paste the command(s) you ran and the output.
14 | If there was a crash, please include the traceback here.
15 | ```
16 | 


--------------------------------------------------------------------------------
/.github/workflows/main.yml:
--------------------------------------------------------------------------------
 1 | name: tests
 2 | 
 3 | on: push
 4 | 
 5 | jobs:
 6 |   lint:
 7 |     runs-on: ubuntu-latest
 8 |     steps:
 9 |       - uses: actions/checkout@v2
10 |       - name: Set up Python
11 |         uses: actions/setup-python@v2
12 |         with:
13 |           python-version: 3.8
14 |       - name: Install dependencies
15 |         run: |
16 |           python -m pip install --upgrade pip
17 |           pip install .[dev]
18 |       - name: Lint
19 |         run: |
20 |           black . --check
21 |           flake8 .
22 |           isort . --check
23 |   test:
24 |     needs: lint
25 |     runs-on: ubuntu-latest
26 |     steps:
27 |       - uses: actions/checkout@v2
28 |       - name: Set up Python
29 |         uses: actions/setup-python@v2
30 |         with:
31 |           python-version: 3.8
32 |       - name: Install dependencies
33 |         run: |
34 |           python -m pip install --upgrade pip
35 |           pip install .[dev]
36 |       - name: Test
37 |         run: pytest --cov
38 |       - name: Coveralls
39 |         env:
40 |             GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
41 |         run: |
42 |             pip install coveralls
43 |             coveralls --service=github
44 | 
45 |   deploy:
46 |     needs: test
47 |     runs-on: ubuntu-latest
48 |     if: startsWith(github.ref, 'refs/tags/v')
49 |     steps:
50 |       - uses: actions/checkout@v2
51 |       - name: Set up Python
52 |         uses: actions/setup-python@v1
53 |         with:
54 |           python-version: "3.x"
55 |       - name: Install dependencies
56 |         run: |
57 |           python -m pip install --upgrade pip
58 |           pip install -U setuptools setuptools_scm wheel twine
59 |       - name: Build and publish
60 |         env:
61 |           TWINE_USERNAME: __token__
62 |           TWINE_PASSWORD: ${{ secrets.TWINE_API_KEY }}
63 |         run: |
64 |           git tag
65 |           python setup.py sdist bdist_wheel
66 |           twine upload dist/*
67 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | 
 58 | # Flask stuff:
 59 | instance/
 60 | .webassets-cache
 61 | 
 62 | # Scrapy stuff:
 63 | .scrapy
 64 | 
 65 | # Sphinx documentation
 66 | docs/_build/
 67 | 
 68 | # PyBuilder
 69 | target/
 70 | 
 71 | # Jupyter Notebook
 72 | .ipynb_checkpoints
 73 | 
 74 | # pyenv
 75 | .python-version
 76 | 
 77 | # celery beat schedule file
 78 | celerybeat-schedule
 79 | 
 80 | # SageMath parsed files
 81 | *.sage.py
 82 | 
 83 | # dotenv
 84 | .env
 85 | 
 86 | # virtualenv
 87 | .venv
 88 | venv/
 89 | ENV/
 90 | 
 91 | # Spyder project settings
 92 | .spyderproject
 93 | .spyproject
 94 | 
 95 | # Rope project settings
 96 | .ropeproject
 97 | 
 98 | # mkdocs documentation
 99 | /site
100 | 
101 | # mypy
102 | .mypy_cache/
103 | 
104 | # IDE settings
105 | .vscode/
106 | 
107 | # Idea
108 | .idea/
109 | \.idea/
110 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | GNU GENERAL PUBLIC LICENSE
 2 |                       Version 3, 29 June 2007
 3 | 
 4 |     A package for HDF5-based chunked arrays
 5 |     Copyright (C) 2020  Vilim Stih & Luigi Petrucco @portugueslab
 6 | 
 7 |     This program is free software: you can redistribute it and/or modify
 8 |     it under the terms of the GNU General Public License as published by
 9 |     the Free Software Foundation, either version 3 of the License, or
10 |     (at your option) any later version.
11 | 
12 |     This program is distributed in the hope that it will be useful,
13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |     GNU General Public License for more details.
16 | 
17 |     You should have received a copy of the GNU General Public License
18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 | 
20 | Also add information on how to contact you by electronic and paper mail.
21 | 
22 |   You should also get your employer (if you work as a programmer) or school,
23 | if any, to sign a "copyright disclaimer" for the program, if necessary.
24 | For more information on this, and how to apply and follow the GNU GPL, see
25 | <http://www.gnu.org/licenses/>.
26 | 
27 |   The GNU General Public License does not permit incorporating your program
28 | into proprietary programs.  If your program is a subroutine library, you
29 | may consider it more useful to permit linking proprietary applications with
30 | the library.  If this is what you want to do, use the GNU Lesser General
31 | Public License instead of this License.  But first, please read
32 | <http://www.gnu.org/philosophy/why-not-lgpl.html>.
33 | 
34 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE
2 | include README.md
3 | 
4 | recursive-include tests *
5 | recursive-exclude * __pycache__
6 | recursive-exclude * *.py[co]
7 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | [![Python Version](https://img.shields.io/pypi/pyversions/split_dataset.svg)](https://pypi.org/project/split_dataset)
 3 | [![PyPI](https://img.shields.io/pypi/v/split_dataset.svg)](
 4 |     https://pypi.python.org/pypi/split_dataset)
 5 | [![Tests](https://img.shields.io/github/workflow/status/portugueslab/split_dataset/tests)](
 6 |     https://github.com/portugueslab/split_dataset/actions)
 7 | [![Coverage Status](https://coveralls.io/repos/github/portugueslab/split_dataset/badge.svg?branch=master)](https://coveralls.io/github/portugueslab/split_dataset?branch=master)
 8 | [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/python/black)
 9 | [![License: GPL v3](https://img.shields.io/badge/License-GPLv3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0)
10 | 
11 | 
12 | 
13 | A minimal package for saving and reading large HDF5-based chunked arrays.
14 | 
15 | This package has been developed in the [`Portugues lab`](http://www.portugueslab.com) for volumetric calcium imaging data. `split_dataset` is extensively used in the calcium imaging analysis package [`fimpy`](https://github.com/portugueslab/fimpy); The microscope control libraries [`sashimi`](https://github.com/portugueslab/sashimi) and [`brunoise`](https://github.com/portugueslab/brunoise) save files as split datasets.
16 | 
17 | [`napari-split-dataset`](https://github.com/portugueslab/napari-split-dataset) support the visualization of SplitDatasets in `napari`.
18 | 
19 | ## Why using Split dataset?
20 | Split datasets are numpy-like array saved over multiple h5 files. The concept of spli datasets is not different from e.g. [zarr arrays](https://zarr.readthedocs.io/en/stable/); however, relying on h5 files allow for partial reading even within the same file, which is crucial for visualizing volumetric time series, the main application `split_dataset` has been developed for (see [this discussion](https://github.com/zarr-developers/zarr-python/issues/521) on the limitation of zarr arrays).
21 | 
22 | # Structure of a split dataset
23 | A split dataset is contained in a folder containing multiple, numbered  h5 files (one file per chunk) and a metadata json file with information on the shape of the full dataset and of its chunks.
24 | The h5 files are saved using the [flammkuchen](https://github.com/portugueslab/flammkuchen) library (ex [deepdish](https://deepdish.readthedocs.io/en/latest/)). Each file contains a dictionary with the data under the `stack` keyword.
25 | 
26 | `SplitDataset` objects can than be instantiated from the dataset path, and numpy-style indexing can then be used to load data as numpy arrays. Any n of dimensions and block sizes are supported in principle; the package has been used mainly with 3D and 4D arrays.
27 | 
28 | 
29 | 
30 | ## Minimal example
31 | ```python
32 | # Load a  SplitDataset via a SplitDataset object:
33 | from split_dataset import SplitDataset
34 | ds = SplitDataset(path_to_dataset)
35 | 
36 | # Retrieve data in an interval:
37 | data_array = ds[n_start:n_end, :, :, :]
38 | ```
39 | 
40 | ## Creating split datasets
41 | New split datasets can be created with the `split_dataset.save_to_split_dataset` function, provided that the original data is fully loaded in memory. Alternatively, e.g. for time acquisitions, a split dataset can be saved one chunk at a time. It is enough to save with `flammkuchen` correctly formatted .h5 files and the correspondent json metadata file describing the full split dataset shape (this is [what happens in sashimi](https://github.com/portugueslab/sashimi/blob/01046f2f24483ab702be379843a1782ababa7d2d/sashimi/processes/streaming_save.py#L186))
42 | 
43 | 
44 | # TODO
45 | * provide utilities for partial saving of split datasets
46 | * support for more advanced indexing (support for step and vector indexing)
47 | * support for cropping a `SplitDataset`
48 | * support for resolution and frequency metadata
49 | 
50 | 
51 | # History
52 | 
53 | ### 0.4.0 (2021-03-23)
54 | * Added support to use a `SplitDataset` as data in a `napari` layer.
55 | 
56 | ...
57 | 
58 | ### 0.1.0 (2020-05-06)
59 | * First release on PyPI.
60 | 
61 | 
62 | Credits
63 | -------
64 | 
65 | Part of this package was inspired by  [Cookiecutter](https://github.com/audreyr/cookiecutter) and [this](https://github.com/audreyr/cookiecutter-pypackage) template.
66 | 
67 | .. _`Portugues lab`:
68 | .. _Cookiecutter:
69 | .. _this:
70 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.black]
 2 | target-version = ['py36', 'py37', 'py38']
 3 | skip-string-normalization = false
 4 | exclude = '''
 5 | (
 6 |   /(
 7 |       \.eggs
 8 |     | \.git
 9 |     | \.hg
10 |     | \.mypy_cache
11 |     | \.tox
12 |     | \.venv
13 |     | _build
14 |     | buck-out
15 |     | build
16 |     | dist
17 |     | examples
18 |   )/
19 | )
20 | '''
21 | 
22 | [tool.isort]
23 | multi_line_output = 3
24 | include_trailing_comma = true
25 | 


--------------------------------------------------------------------------------
/requirements_dev.txt:
--------------------------------------------------------------------------------
 1 | pip
 2 | bump2version
 3 | wheel
 4 | flake8
 5 | coverage
 6 | Sphinx
 7 | twine
 8 | black
 9 | isort
10 | 
11 | pytest
12 | pytest-runner
13 | pytest-cov
14 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [bumpversion]
 2 | current_version = 0.4.3
 3 | commit = True
 4 | tag = True
 5 | 
 6 | [bumpversion:file:setup.py]
 7 | search = version="{current_version}"
 8 | replace = version="{new_version}"
 9 | 
10 | [bumpversion:file:split_dataset/__init__.py]
11 | search = __version__ = "{current_version}"
12 | replace = __version__ = "{new_version}"
13 | 
14 | [bdist_wheel]
15 | universal = 1
16 | 
17 | [flake8]
18 | ignore = E203, W503
19 | max-line-length = 88
20 | exclude = __init__.py
21 | 
22 | [aliases]
23 | test = pytest
24 | 
25 | [tool:pytest]
26 | collect_ignore = ['setup.py']
27 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | """The setup script."""
 4 | 
 5 | from setuptools import find_packages, setup
 6 | 
 7 | with open("README.md") as readme_file:
 8 |     readme = readme_file.read()
 9 | 
10 | 
11 | requirements = ["flammkuchen", "numpy"]
12 | 
13 | with open("requirements_dev.txt") as f:
14 |     requirements_dev = f.read().splitlines()
15 | 
16 | setup(
17 |     author="Vilim Stih & Luigi Petrucco @portugueslab",
18 |     author_email="luigi.petrucco@gmail.com",
19 |     python_requires=">=3.5",
20 |     classifiers=[
21 |         "Development Status :: 2 - Pre-Alpha",
22 |         "Intended Audience :: Developers",
23 |         "License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
24 |         "Natural Language :: English",
25 |         "Programming Language :: Python :: 3",
26 |         "Programming Language :: Python :: 3.5",
27 |         "Programming Language :: Python :: 3.6",
28 |         "Programming Language :: Python :: 3.7",
29 |         "Programming Language :: Python :: 3.8",
30 |     ],
31 |     description="A package for HDF5-based chunked arrays",
32 |     install_requires=requirements,
33 |     extras_require=dict(dev=requirements_dev),
34 |     license="GNU General Public License v3",
35 |     long_description=readme,
36 |     long_description_content_type="text/markdown",
37 |     include_package_data=True,
38 |     keywords="split_dataset",
39 |     name="split_dataset",
40 |     packages=find_packages(include=["split_dataset", "split_dataset.*"]),
41 |     test_suite="tests",
42 |     url="https://github.com/portugueslab/split_dataset",
43 |     version="0.4.3",
44 |     zip_safe=False,
45 | )
46 | 


--------------------------------------------------------------------------------
/split_dataset/__init__.py:
--------------------------------------------------------------------------------
 1 | """Top-level package for Split Dataset."""
 2 | 
 3 | __author__ = """Vilim Stih & Luigi Petrucco @portugueslab"""
 4 | __version__ = "0.4.3"
 5 | 
 6 | from split_dataset.blocks import Blocks
 7 | from split_dataset.split_dataset import (
 8 |     EmptySplitDataset,
 9 |     SplitDataset,
10 |     save_to_split_dataset,
11 | )
12 | 


--------------------------------------------------------------------------------
/split_dataset/blocks.py:
--------------------------------------------------------------------------------
  1 | from itertools import product
  2 | from typing import Optional, Tuple, Union
  3 | 
  4 | import numpy as np
  5 | 
  6 | 
  7 | def _drop_ith(xs, dim_to_drop):
  8 |     return tuple(x for i, x in enumerate(xs) if i != dim_to_drop)
  9 | 
 10 | 
 11 | class BlockIterator:
 12 |     def __init__(self, blocks, slices=True):
 13 |         self.blocks = blocks
 14 |         self.current_block = 0
 15 |         self.slices = slices
 16 | 
 17 |     def __iter__(self):
 18 |         return self
 19 | 
 20 |     def __next__(self):
 21 |         if self.current_block == self.blocks.n_blocks:
 22 |             raise StopIteration
 23 |         else:
 24 |             idx = self.blocks.linear_to_cartesian(self.current_block)
 25 |             self.current_block += 1
 26 |             if self.slices:
 27 |                 return (
 28 |                     idx,
 29 |                     tuple(
 30 |                         slice(s, e)
 31 |                         for s, e in zip(
 32 |                             self.blocks.block_starts[idx], self.blocks.block_ends[idx]
 33 |                         )
 34 |                     ),
 35 |                 )
 36 |             else:
 37 |                 return (
 38 |                     idx,
 39 |                     tuple(
 40 |                         (s, e)
 41 |                         for s, e in zip(
 42 |                             self.blocks.block_starts[idx], self.blocks.block_ends[idx]
 43 |                         )
 44 |                     ),
 45 |                 )
 46 | 
 47 | 
 48 | def _make_iterable(input_var, n_rep=1):
 49 |     try:
 50 |         iter(input_var)
 51 |         return input_var
 52 |     except TypeError:
 53 |         return (input_var,) * n_rep
 54 | 
 55 | 
 56 | class Blocks:
 57 |     """
 58 |     Blocks have two indexing systems:
 59 |      - linear:
 60 |      - cartesian: gives the position of the block in the general block tiling.
 61 |     """
 62 | 
 63 |     def __init__(
 64 |         self,
 65 |         shape_full: Tuple,
 66 |         shape_block: Optional[Tuple] = None,
 67 |         dim_split: Optional[int] = None,
 68 |         blocks_number: Optional[int] = None,
 69 |         padding: Union[int, Tuple] = 0,
 70 |         crop: Optional[Tuple] = None,
 71 |     ):
 72 |         """Make a block structure. It can be defined using block size or number
 73 |         of blocks (number of blocks if specified will overwrite size).
 74 |         For example, one split over the 2nd and 3rd dimensions of a 100x20x40x10 block
 75 |         can equivalently defined as:
 76 |         BlockSplitter((100,20,40,10), block_size=(10,10,20,30))
 77 |         BlockSplitter((100,20,40,10), blocks_number=(1, 2, 2, 1))
 78 |         BlockSplitter((100,20,40,10), dim_split=(1,2), block_size=(10,20))
 79 |         BlockSplitter((100,20,40,10), dim_split=(1,2), blocks_number=(2,2))
 80 | 
 81 |         :param shape_full: dimensions of the whole stack
 82 |         :param dim_split: dimension along which to split (if undefined, start
 83 |            counting from the first dimension)
 84 |         :param shape_block: size of blocks along each dimension
 85 |         :param blocks_number: number of blocks along each dimension
 86 |         :param padding: amount of overlap between blocks
 87 |         :param crop: iterable of tuples giving the amount of cropping in
 88 |             each dimension
 89 |         """
 90 |         self._shape_full = shape_full
 91 | 
 92 |         if crop is None:
 93 |             crop = ((0, 0),) * len(shape_full) if shape_full is not None else None
 94 | 
 95 |         self._crop = crop
 96 |         self.shape_cropped = shape_full
 97 | 
 98 |         self.starts = None
 99 |         self.block_starts = None
100 |         self.block_ends = None
101 | 
102 |         self.update_stack_dims()
103 | 
104 |         # Define shape block and padding allowing multiple input types.
105 | 
106 |         # Initialize block size as full stack size and 0 padding:
107 |         self._shape_block = list(self.shape_cropped)
108 |         self._padding = [0 for _ in range(len(self.shape_cropped))]
109 | 
110 |         if not dim_split:
111 |             dim_split = [j for j, d in enumerate(shape_block) if d is not None]
112 | 
113 |         # Make tuple if single numbers
114 |         self.dim_split = _make_iterable(dim_split)
115 |         shape_block = _make_iterable(shape_block, max(self.dim_split) + 1)
116 |         pad_amount = _make_iterable(padding, max(self.dim_split) + 1)
117 | 
118 |         if blocks_number:  # define from required number of blocks
119 |             shape_block = []
120 |             blocks_number = _make_iterable(blocks_number, len(self.dim_split))
121 |             for dim, n in zip(self.dim_split, blocks_number):
122 |                 shape_block.append(int(np.ceil(self.shape_cropped[dim] / n)))
123 | 
124 |         for dim in self.dim_split:
125 |             self._shape_block[dim] = min(shape_block[dim], self.shape_cropped[dim])
126 |             self._padding[dim] = pad_amount[dim]
127 | 
128 |         # set property:
129 |         self.shape_block = tuple(self._shape_block)
130 | 
131 |     @property
132 |     def n_blocks(self):
133 |         return np.product(self.block_starts.shape[:-1])
134 | 
135 |     @property
136 |     def n_dims(self):
137 |         return len(self.shape_cropped)
138 | 
139 |     @property
140 |     def shape_full(self):
141 |         return self._shape_full
142 | 
143 |     @shape_full.setter
144 |     def shape_full(self, value):
145 |         self._shape_full = value
146 |         self.update_stack_dims()
147 |         self.update_block_structure()
148 | 
149 |     @property
150 |     def crop(self):
151 |         return self._crop
152 | 
153 |     @crop.setter
154 |     def crop(self, value):
155 |         if value is None:
156 |             value = ((0, 0),) * len(self.shape_full)
157 |         self._crop = value
158 |         self.update_stack_dims()
159 |         self.update_block_structure()
160 | 
161 |     @property
162 |     def shape_block(self):
163 |         return self._shape_block
164 | 
165 |     @shape_block.setter
166 |     def shape_block(self, value):
167 |         self._shape_block = value
168 |         self.update_block_structure()
169 | 
170 |     @property
171 |     def padding(self):
172 |         return self._padding
173 | 
174 |     @padding.setter
175 |     def padding(self, value):
176 |         self._padding = value
177 |         self.update_block_structure()
178 | 
179 |     def update_stack_dims(self):
180 |         """Update stack dimensions and cropping, if shape_full or cropping
181 |         is changed.
182 |         :return:
183 |         """
184 | 
185 |         if self.shape_full is not None:
186 |             self.shape_cropped = tuple(
187 |                 d - cl - ch for d, (cl, ch) in zip(self.shape_full, self.crop)
188 |             )
189 |             self.starts = tuple(cl for cl, ch in self.crop)
190 | 
191 |     def update_block_structure(self):
192 |         """
193 |         Update the Blocks structure, e.g. when block
194 |         shape or padding are changed.
195 |         """
196 |         # Cartesian product for generating a list of indexes on every split
197 |         # dimension (i.e., dimensions where int(np.ceil(stack_size / block_size)
198 |         #  is != 1).
199 |         # For example, splitting one time in 2nd and 3rd dims,
200 |         # idx_blocks = (0, 0, 0, 0), (0, 0, 1, 0), (0, 1, 0, 0), (0, 1, 1, 0).
201 | 
202 |         # block_starts and block_ends will be arrays of shape
203 |         # (n_blocks_dim0, n_blocks_dim1, n_blocks_dim2 ..., shape_full)
204 |         # by addressing the N-1 dimensions with the index of the block we
205 |         # will get a vector with the starting position of the block on all
206 |         # original dimensions of the full stack.
207 |         if self.shape_block is not None:
208 |             self.block_starts = np.empty(
209 |                 tuple(
210 |                     int(np.ceil((stack_size - pad_size) / block_size))
211 |                     for stack_size, block_size, pad_size in zip(
212 |                         self.shape_cropped, self.shape_block, self.padding
213 |                     )
214 |                 )
215 |                 + (len(self.shape_cropped),),
216 |                 dtype=np.int32,
217 |             )
218 |             self.block_ends = np.empty_like(self.block_starts)
219 |             for idx_blocks in product(
220 |                 *(range(s) for s in self.block_starts.shape[:-1])
221 |             ):
222 |                 self.block_starts[idx_blocks + (slice(None),)] = [
223 |                     st + i_bd * bs
224 |                     for i_bd, bs, st in zip(idx_blocks, self.shape_block, self.starts)
225 |                 ]
226 |                 self.block_ends[idx_blocks + (slice(None),)] = [
227 |                     min(maxdim + st, (i_bd + 1) * bs + pd + st)
228 |                     for i_bd, bs, pd, maxdim, st in zip(
229 |                         idx_blocks,
230 |                         self.shape_block,
231 |                         self.padding,
232 |                         self.shape_cropped,
233 |                         self.starts,
234 |                     )
235 |                 ]
236 | 
237 |     def slices(self, as_tuples=False):
238 |         return BlockIterator(self, slices=not as_tuples)
239 | 
240 |     def linear_to_cartesian(self, lin_idx):
241 |         """
242 |         Convert block linear index into cartesian index.
243 |         Example: in a 3D stack split in 2x2x3 blocks,
244 | 
245 |         self.linear_to_cartesian(0) = (0,0,0)  # first block
246 |         bs.linear_to_cartesian(11) = (1,1,2)  # last block
247 |         :param lin_idx: block linear index (int)
248 |         :return: block cartesian index (tuple of ints)
249 |         """
250 |         return np.unravel_index(lin_idx, self.block_starts.shape[:-1])
251 | 
252 |     def cartesian_to_linear(self, ca_idx):
253 |         """
254 |         Convert block cartesian index in linear index.
255 |         Example: in a 3D stack split in 2x2x3 blocks
256 | 
257 |         self.cartesian_to_linear0,0,0) = 0  # first block
258 |         bs.cartesian_to_linear(1,1,2) = 11  # last block
259 | 
260 |         :param ca_idx: block cartesian index (tuple of ints)
261 |         :return: block linear index (int)
262 |         """
263 |         return np.ravel_multi_index(ca_idx, self.block_starts.shape[:-1])
264 | 
265 |     def __getitem__(self, item):
266 |         """
267 |         :param item:
268 |         :return:
269 |         """
270 |         # TODO make less brittle, support also indexing by tuples
271 | 
272 |         # TODO decide what should be returned: slices are tricky
273 |         # with multiprocessing
274 |         if isinstance(item, int):
275 |             idx = self.linear_to_cartesian(item)
276 |             return tuple(
277 |                 slice(s, e)
278 |                 for s, e in zip(self.block_starts[idx], self.block_ends[idx])
279 |             )
280 | 
281 |     def neighbour_blocks(self, i_block, dims=None):
282 |         """
283 |         Return neighbouring blocks across given dimensions
284 |         :param i_block:
285 |         :param dims:
286 |         :return:
287 |         """
288 |         block_idx = self.linear_to_cartesian(i_block)
289 |         act_dims = np.ones(self.n_dims, dtype=bool)
290 |         if dims is not None:
291 |             act_dims[dims] = True
292 | 
293 |         neighbors = []
294 |         for idx_neighbour in product(
295 |             *[
296 |                 (
297 |                     range(
298 |                         max(block_idx[i_dim] - 1, 0),
299 |                         min(block_idx[i_dim] + 1, self.block_starts.shape[i_dim]),
300 |                     )
301 |                     if act_dims[i_dim]
302 |                     else [block_idx[i_dim]]
303 |                 )
304 |                 for i_dim in range(self.n_dims)
305 |             ]
306 |         ):
307 |             if idx_neighbour != block_idx:
308 |                 neighbors.append(idx_neighbour)
309 |         if neighbors:
310 |             return np.ravel_multi_index(
311 |                 np.stack(neighbors, 1), self.block_starts.shape[:-1]
312 |             )
313 |         else:
314 |             return np.array([])
315 | 
316 |     def blocks_to_take(self, start_take, end_take):
317 |         """
318 |         Find which blocks to take to cover the range:
319 |         :param start_take: starting points in the N dims (tuple)
320 |         :param end_take: ending points in the N dims (tuple)
321 |         :return: tuple of tuples with the extremes of blocks to take in N dims;
322 |                  starting index of data in the first block;
323 |                  ending index of data in the last block.
324 |         """
325 |         # n_dims = len(start_take)
326 |         block_slices = []
327 |         take_block_s_idx = []
328 |         take_block_e_idx = []
329 |         for i_dim, (start, end) in enumerate(zip(start_take, end_take)):
330 |             axis_index = tuple(
331 |                 0 if i != i_dim else slice(None) for i in range(self.n_dims)
332 |             ) + (i_dim,)
333 |             s = max(
334 |                 0,
335 |                 min(
336 |                     np.searchsorted(self.block_starts[axis_index], start) - 1,
337 |                     len(self.block_starts[axis_index]) - 1,
338 |                 ),
339 |             )
340 |             e = np.searchsorted(self.block_starts[axis_index], end)
341 |             block_start = start - self.block_starts[axis_index][s]
342 |             block_end = end - self.block_starts[axis_index][e - 1]
343 | 
344 |             block_slices.append((s, e))
345 |             take_block_s_idx.append(block_start)
346 |             take_block_e_idx.append(block_end)
347 |         return block_slices, take_block_s_idx, take_block_e_idx
348 | 
349 |     @staticmethod
350 |     def block_to_slices(block):
351 |         return tuple(slice(lb, rb) for lb, rb in block)
352 | 
353 |     def centres(self):
354 |         return (self.block_ends + self.block_starts) / 2
355 | 
356 |     def block_containing_coords(self, coords):
357 |         """
358 |         Find the linear index of a block containing the given coordinates
359 | 
360 |         :param coords: a tuple of the coordinates
361 |         :return:
362 |         """
363 |         dims = []
364 |         for ic, c in enumerate(coords):
365 |             # Create a tuple with the starting points on current dimension
366 |             # for all the blocks:
367 |             starts = self.block_starts[
368 |                 tuple(slice(None) if i == ic else 0 for i in range(self.n_dims)) + (ic,)
369 |             ]
370 | 
371 |             # find in which position our guy should be ordered, correcting
372 |             # for 0 value:
373 |             dims.append(max((np.searchsorted(starts, c)) - 1, 0))
374 |         return dims
375 | 
376 |     def drop_dim(self, dim_to_drop):
377 |         """
378 |         Return a new BlockSplitter object with a dimension dropped,
379 |         useful for getting spatial from spatio-temporal blocks.
380 | 
381 |         :param dim_to_drop: dimension to be dropped (int)
382 |         :return: new BlockSplitter object
383 |         """
384 |         return Blocks(
385 |             _drop_ith(self.shape_full, dim_to_drop),
386 |             shape_block=_drop_ith(self.shape_block, dim_to_drop),
387 |             padding=_drop_ith(self.padding, dim_to_drop),
388 |             crop=_drop_ith(self.crop, dim_to_drop),
389 |         )
390 | 
391 |     def serialize(self):
392 |         """
393 |         Returns a dictionary with a complete description of the
394 |         BlockSplitter, e.g. to save its structure as json file.
395 |         :return:
396 |         """
397 |         # TODO it should be possible to initialize the BlockSplitter from
398 |         # this dictionary!
399 |         return dict(
400 |             shape_full=self.shape_full,
401 |             shape_block=self.shape_block,
402 |             crop_start=tuple(c[0] for c in self.crop),
403 |             crop_end=tuple(c[1] for c in self.crop),
404 |             padding=self.padding,
405 |         )
406 | 


--------------------------------------------------------------------------------
/split_dataset/split_dataset.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import warnings
  3 | from itertools import product
  4 | from pathlib import Path
  5 | 
  6 | import flammkuchen as fl
  7 | import numpy as np
  8 | 
  9 | from split_dataset.blocks import Blocks
 10 | 
 11 | 
 12 | # TODO this should probably be done as a constructor of the SplitDataset
 13 | def save_to_split_dataset(
 14 |     data,
 15 |     root_name,
 16 |     block_size=None,
 17 |     crop=None,
 18 |     padding=0,
 19 |     prefix="",
 20 |     compression="blosc",
 21 | ):
 22 |     """Function to save block of data into a split_dataset."""
 23 | 
 24 |     new_name = prefix + ("_cropped" if crop is not None else "")
 25 |     padding = (
 26 |         data.padding if padding is not None and isinstance(data, Blocks) else padding
 27 |     )
 28 |     blocks = EmptySplitDataset(
 29 |         shape_full=data.shape,
 30 |         shape_block=data.shape_block if block_size is None else block_size,
 31 |         crop=crop,
 32 |         padding=padding,
 33 |         root=root_name,
 34 |         name=new_name,
 35 |     )
 36 |     for filename, (idxs, slices) in zip(blocks.files, blocks.slices()):
 37 |         fl.save(
 38 |             str(blocks.root / filename),
 39 |             {"stack_{}D".format(len(blocks.shape_cropped)): data[slices]},
 40 |             compression=compression,
 41 |         )
 42 | 
 43 |     return blocks.finalize()
 44 | 
 45 | 
 46 | class SplitDataset(Blocks):
 47 |     """
 48 |     Manages datasets split over multiple h5 file across arbitrary dimensions.
 49 |     To do so, uses the BlockSplitter class functions, and define blocks as
 50 |     files.
 51 | 
 52 |     """
 53 | 
 54 |     def __init__(self, root, prefix=None):
 55 |         """
 56 |         :param root: The directory containing the files
 57 |         :param prefix: The class assumes individual file names to be xxxx.h5.
 58 |         If there is a prefix to this, for example if the files are stack_xxxx.h5
 59 |         this has to be passed to the object as a string, in this
 60 |         particular case it would be prefix="stack_"
 61 |         """
 62 | 
 63 |         # Load information about stack and splitting. Use the json metadata
 64 |         # file if possible:
 65 |         self.root = Path(root)
 66 |         try:
 67 |             stack_meta_f = next(self.root.glob("*stack_metadata.json"))
 68 | 
 69 |             with open(str(stack_meta_f), "r") as f:
 70 |                 block_metadata = json.load(f)
 71 |         except StopIteration:
 72 |             last_data_f = sorted(list(self.root.glob("{}*.h5".format(prefix))))[-1]
 73 |             block_metadata = fl.load(str(last_data_f), "/stack_metadata")
 74 | 
 75 |             # Ugly keyword fix to handle transition to new json system:
 76 |             for new_k, old_k in zip(
 77 |                 ["shape_block", "shape_full"], ["block_size", "full_size"]
 78 |             ):
 79 |                 block_metadata[new_k] = block_metadata.pop(old_k)
 80 | 
 81 |             # By putting this here, we generate the proper stack_metadata
 82 |             # file when we open old version data (int conversion for some
 83 |             # weird format problem with flammkuchen dictionary):
 84 |             clean_metadata = dict()
 85 |             _save_metadata_json(block_metadata, self.root)
 86 |             for k in block_metadata.keys():
 87 |                 if isinstance(block_metadata[k], tuple):
 88 |                     clean_metadata[k] = tuple(
 89 |                         int(n) if n is not None else None for n in block_metadata[k]
 90 |                     )
 91 |                 else:
 92 |                     clean_metadata[k] = block_metadata[k]
 93 |             with open(str(), "w") as f:
 94 |                 json.dump(clean_metadata, f)
 95 | 
 96 |         # Start the parent BlockSplitter:
 97 |         super().__init__(
 98 |             shape_full=block_metadata["shape_full"],
 99 |             shape_block=block_metadata["shape_block"],
100 |         )
101 | 
102 |         if prefix is None:
103 |             files = sorted(self.root.glob("*[0-9]*.h5"))
104 |         else:
105 |             files = sorted(self.root.glob("*{}_[0-9]*.h5".format(prefix)))
106 | 
107 |         self.files = np.array(files).reshape(self.block_starts.shape[:-1])
108 | 
109 |         # If available, read resolution
110 |         try:
111 |             self.resolution = block_metadata["resolution"]
112 |         except KeyError:
113 |             self.resolution = (1, 1, 1)
114 |         # TODO check this
115 |         self.shape = self.shape_cropped
116 | 
117 |     @property
118 |     def ndim(self):
119 |         return len(self.shape)
120 | 
121 |     @property
122 |     def dtype(self):
123 |         px = fl.load(
124 |             str(self.files.flatten()[0]),
125 |             "/" + self.data_key,
126 |             sel=(0,) * len(self.shape),
127 |         )
128 |         return px.dtype
129 | 
130 |     @property
131 |     def data_key(self):
132 |         """To migrate smoothly to removal of stack_ND key in favour of only stack"""
133 |         return [k for k in fl.meta(self.files.flatten()[0]).keys() if "stack" in k][0]
134 | 
135 |     def __getitem__(self, item):
136 |         """
137 |         Implement usage of the H5SplitDataset as normal numpy array.
138 |         :param item:
139 |         :return:
140 |         """
141 |         # Lot of input munging to emulate indexing in numpy array
142 |         if np.any(self.padding) != 0:
143 |             raise ValueError(
144 |                 "Indexing in datasets with overlap (padding) is"
145 |                 " not supported, merge them first with an"
146 |                 " appropriate merging function"
147 |             )
148 | 
149 |         if isinstance(item, int):
150 |             item = (slice(item, item + 1),)
151 | 
152 |         if isinstance(item, slice):
153 |             item = (item,)
154 | 
155 |         if isinstance(item, tuple):
156 |             # Take care of the case when only the first few dimensions
157 |             # are specified:
158 |             if len(item) < len(self.shape):
159 |                 item = item + (None,) * (len(self.shape) - len(item))
160 | 
161 |             # Loop over dimensions creating a list of starting and ending
162 |             # points
163 | 
164 |             starts = []
165 |             ends = []
166 |             singletons = np.zeros(len(item), dtype=bool)
167 |             for i_dim, (dim_slc, dim_full) in enumerate(zip(item, self.shape)):
168 |                 # i_dim: index of current dimension
169 |                 # dim_slc: slice/index for current dimension
170 |                 # fd: length of dataset on current dimension
171 | 
172 |                 # If nothing specified, start from 0 and finish at end:
173 |                 if dim_slc is None:
174 |                     starts.append(0)
175 |                     ends.append(dim_full)
176 | 
177 |                 # If a slice is specified:
178 |                 elif isinstance(dim_slc, slice):
179 |                     if dim_slc.start is None:
180 |                         starts.append(0)
181 |                     else:
182 |                         if dim_slc.start >= 0:
183 |                             starts.append(dim_slc.start)
184 |                         else:
185 |                             starts.append(max(0, dim_full + dim_slc.start))
186 | 
187 |                     if dim_slc.stop is None:
188 |                         ends.append(dim_full)
189 |                     else:
190 |                         if dim_slc.stop >= 0:
191 |                             ends.append(min(dim_slc.stop, dim_full))
192 |                         else:
193 |                             ends.append(max(0, dim_full + dim_slc.stop))
194 |                 elif type(dim_slc) in [int, np.int32, np.int64]:
195 |                     singletons[i_dim] = True
196 |                     if dim_slc >= 0:
197 |                         if dim_slc > dim_full - 1:
198 |                             raise IndexError(
199 |                                 "Indexes {} out of dimensions {}!".format(
200 |                                     item, self.shape
201 |                                 )
202 |                             )
203 |                         starts.append(dim_slc)
204 |                         ends.append(dim_slc + 1)
205 |                     else:
206 |                         if -dim_slc > dim_full:
207 |                             raise IndexError(
208 |                                 "Indexes {} out of dimensions {}!".format(
209 |                                     item, self.shape
210 |                                 )
211 |                             )
212 |                         starts.append(dim_full + dim_slc)
213 |                         ends.append(dim_full + dim_slc + 1)
214 |                 else:
215 |                     raise IndexError("Unsupported indexing")
216 |         else:
217 |             raise IndexError("Unsupported indexing")
218 | 
219 |         file_slices, take_block_s_idx, take_block_e_idx = self.blocks_to_take(
220 |             starts, ends
221 |         )
222 |         output_size = tuple(e - s for s, e in zip(starts, ends))
223 | 
224 |         output = None
225 | 
226 |         # A lot of indexing tricks to achieve multidimensional generality
227 |         for f_idx in product(*(range(s, e) for s, e in file_slices)):
228 |             abs_idx = [ri - s for ri, (s, e) in zip(f_idx, file_slices)]
229 |             sel_slices = tuple(
230 |                 slice(0 if ci != s else si, None if ci < e - 1 else ei)
231 |                 for ci, (s, e), si, ei in zip(
232 |                     f_idx, file_slices, take_block_s_idx, take_block_e_idx
233 |                 )
234 |             )
235 |             arr = fl.load(
236 |                 str(self.files[f_idx]),
237 |                 "/" + self.data_key,
238 |                 sel=fl.aslice[sel_slices],
239 |             )
240 | 
241 |             if output is None:
242 |                 output = np.empty(output_size, arr.dtype)
243 | 
244 |             output_sel_tuple = tuple(
245 |                 slice(
246 |                     0 if st_idx == 0 else bs - first_idx + (st_idx - 1) * (bs),
247 |                     (0 if st_idx == 0 else bs - first_idx + (st_idx - 1) * (bs)) + sz,
248 |                 )
249 |                 for st_idx, bs, first_idx, sz in zip(
250 |                     abs_idx, self.shape_block, take_block_s_idx, arr.shape
251 |                 )
252 |             )
253 |             output[output_sel_tuple] = arr
254 | 
255 |         if output is None:
256 |             raise IndexError(
257 |                 "Trying to index the split dataset outside of bounds, between "
258 |                 + str(starts)
259 |                 + " and "
260 |                 + str(ends)
261 |             )
262 | 
263 |         output_sel = tuple(0 if singleton else slice(None) for singleton in singletons)
264 | 
265 |         return output[output_sel]
266 | 
267 |     def apply_crop(self, crop):
268 |         """Take out the data with a crop"""
269 |         # TODO there is the crop atrribute, which is a lazy crop, this should actually
270 |         #  return a non-cropped dataset
271 |         ds_cropped = EmptySplitDataset(
272 |             shape_full=self.shape,
273 |             shape_block=self.shape_block,
274 |             padding=self.padding,
275 |             crop=crop,
276 |             root=self.root.parent,
277 |             name=self.root.name + "_cropped",
278 |         )
279 |         # the slices iterator does not return just the slices, but also the indicesS
280 |         for (i_slice, block_slices), file_name in zip(
281 |             ds_cropped.slices(), ds_cropped.files
282 |         ):
283 |             fl.save(
284 |                 str(self.root / file_name),
285 |                 {"stack": self[block_slices]},
286 |             )
287 | 
288 |         ds_cropped.finalize()
289 | 
290 | 
291 | class EmptySplitDataset(Blocks):
292 |     """Class to initialize an empty dataset for which we have to save metadata
293 |     after filling its blocks.
294 |     """
295 | 
296 |     def __init__(self, root, name, *args, resolution=None, **kwargs):
297 |         """
298 |         :param root: folder where the stack will be saved;
299 |         :param name: name of the dataset, for the folder name;
300 |         :param resolution: resolution of the stack, in microns;
301 |         """
302 |         super().__init__(*args, **kwargs)
303 |         self.root = Path(root) / name
304 |         if not self.root.is_dir():
305 |             self.root.mkdir(parents=True)
306 |         else:
307 |             warnings.warn("Existing directory")
308 | 
309 |         self.files = ["{:04d}.h5".format(i) for i in range(self.n_blocks)]
310 |         self.resolution = resolution
311 | 
312 |     def save_block_data(self, n, data, verbose=False):
313 |         """Optional method to save data in a block. Often we don't use it,
314 |         as we directly save data in the parallelized function. Might be good to
315 |         find ways of centralizing saving here?
316 |         :param n: n of the block we are saving in;
317 |         :param data: data to be pured in the block;
318 |         :param verbose:
319 |         :return:
320 |         """
321 |         fname = "{:04d}.h5".format(n)
322 |         if verbose:
323 |             print("Saving ", str(self.root / fname))
324 | 
325 |             if data.shape != self.shape_block:
326 |                 print(" - data has different dimension from block!")
327 | 
328 |         to_save = {"stack": data}
329 | 
330 |         fl.save(str(self.root / fname), to_save, compression="blosc")
331 | 
332 |     def finalize(self):
333 |         n_dims = len(self.shape_block)
334 |         block_dict = self.serialize()
335 |         block_dict["shape_full"] = self.shape_cropped
336 |         block_dict["crop_start"] = (0,) * n_dims
337 |         block_dict["crop_end"] = (0,) * n_dims
338 |         block_dict["resolution"] = (
339 |             self.resolution if self.resolution is not None else (1,) * n_dims
340 |         )
341 | 
342 |         block_dict["axis_order"] = "tzyx" if n_dims == 4 else "zyx"
343 | 
344 |         _save_metadata_json(block_dict, self.root)
345 |         return SplitDataset(self.root)
346 | 
347 | 
348 | def _save_metadata_json(dictionary, root):
349 |     """Save json file preventing type failures for stack shapes
350 |     :param path: path for saving
351 |     :param dictionary: dictionary to be saved
352 |     :return:
353 |     """
354 |     METADATA_FILENAME = "stack_metadata.json"
355 |     for k in dictionary.keys():
356 |         if type(dictionary[k]) is tuple:
357 |             # funny fix for variable type mysterious error:
358 |             if type(dictionary[k][0]) == np.int64 or type(dictionary[k][0]) == int:
359 |                 dictionary[k] = tuple([int(i) for i in dictionary[k]])
360 | 
361 |     json.dump(dictionary, open(root / METADATA_FILENAME, "w"))
362 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | """Unit test package for split_dataset."""
2 | 


--------------------------------------------------------------------------------
/tests/test_blocks.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from split_dataset import Blocks
 4 | 
 5 | 
 6 | def test_cartesian_blocks():
 7 |     test_size = (20, 20)
 8 |     a = np.ones(test_size)
 9 |     blocks = Blocks(test_size, shape_block=(3, 7), padding=(1, 2))
10 |     for idx, block in blocks.slices():
11 |         a[block] = 0
12 |     np.testing.assert_array_equal(a, np.zeros(test_size))
13 | 
14 | 
15 | def test_dropped_dimension():
16 |     test_size = (5, 15, 20)
17 |     blocks = Blocks(
18 |         test_size, shape_block=(3, 7), padding=(1, 2), crop=((1, 1), (0, 0), (0, 0))
19 |     )
20 |     np.testing.assert_equal(blocks.drop_dim(1).shape_full, (5, 20))
21 | 


--------------------------------------------------------------------------------
/tests/test_split_dataset.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | """Tests for `split_dataset` package."""
 4 | 
 5 | import shutil
 6 | import tempfile
 7 | import unittest
 8 | 
 9 | import numpy as np
10 | 
11 | from split_dataset import save_to_split_dataset
12 | 
13 | 
14 | class TestSplitDataset(unittest.TestCase):
15 |     def setUp(self):
16 |         self.test_dir = tempfile.mkdtemp()
17 | 
18 |     def tearDown(self):
19 |         shutil.rmtree(self.test_dir)
20 | 
21 |     def test_SplitDataset(self):
22 |         dims = [(10, 3, 3, 3), (5, 5, 5), (5, 5), (1, 5, 5, 5)]
23 |         block_sizes = [(2, None, None, None), (1, None, 3), (2, None), (None, 2, 5, 5)]
24 |         all_slices = [
25 |             [(slice(3, 8), slice(None))],
26 |             [(slice(0, 1),), (slice(0, 2), slice(0, 1), slice(None))],
27 |             [slice(0, 2)],
28 |             [
29 |                 (slice(0, 1),),
30 |                 (slice(0, 2), slice(0, 1), slice(None)),
31 |                 (0, slice(0, 2), slice(0, 1)),
32 |             ],
33 |         ]
34 | 
35 |         for i, (di, bs, slices) in enumerate(zip(dims, block_sizes, all_slices)):
36 |             test_data = np.arange(np.product(di)).reshape(di)
37 | 
38 |             sd = save_to_split_dataset(
39 |                 test_data,
40 |                 block_size=bs,
41 |                 root_name=self.test_dir,
42 |                 prefix="te{:02d}".format(i),
43 |             )
44 |             for sl in slices:
45 |                 a = sd[sl]
46 |                 b = test_data[sl]
47 |                 np.testing.assert_equal(
48 |                     a,
49 |                     b,
50 |                     err_msg="Testing "
51 |                     + str(di)
52 |                     + " "
53 |                     + str(sl)
54 |                     + " of shape "
55 |                     + str(a.shape)
56 |                     + " and shape"
57 |                     + str(b.shape),
58 |                 )
59 | 


--------------------------------------------------------------------------------