├── .github └── workflows │ ├── cd.yml │ ├── ci.yml │ └── version-cmp.py ├── .gitignore ├── .pre-commit-config.yaml ├── .readthedocs.yml ├── Cargo.toml ├── LICENSE ├── README.md ├── docs ├── Makefile ├── conf.py ├── contributing.md ├── index.md └── make.bat ├── hatch.toml ├── pyproject.toml ├── python └── zarrs │ ├── __init__.py │ ├── _internal.pyi │ ├── pipeline.py │ ├── py.typed │ └── utils.py ├── src ├── bin │ └── stub_gen.rs ├── chunk_item.rs ├── concurrency.rs ├── lib.rs ├── metadata_v2.rs ├── runtime.rs ├── store.rs ├── store │ ├── filesystem.rs │ ├── http.rs │ └── manager.rs ├── tests.rs └── utils.rs └── tests ├── conftest.py ├── test_blosc.py ├── test_codecs.py ├── test_endian.py ├── test_gzip.py ├── test_pipeline.py ├── test_sharding.py ├── test_transpose.py ├── test_v2.py ├── test_version.py ├── test_vlen.py ├── test_zarrs_http.py └── test_zstd.py /.github/workflows/cd.yml: -------------------------------------------------------------------------------- 1 | name: cd 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | tags: 8 | - '**' 9 | pull_request: 10 | 11 | env: 12 | CARGO_TERM_COLOR: always 13 | 14 | jobs: 15 | build: 16 | name: build on ${{ matrix.os }} (${{ matrix.target }}${{ matrix.os == 'linux' && format(' - {0}', matrix.manylinux == 'auto' && 'manylinux' || matrix.manylinux) || '' }}) 17 | # only run on push to tags, main branch, or explicit full build 18 | # keep condition in sync with `build-sdist` job 19 | strategy: 20 | fail-fast: false 21 | matrix: 22 | os: [linux, macos, windows] 23 | target: [x86_64, aarch64] 24 | manylinux: ['2_28'] 25 | include: 26 | # manylinux for various platforms 27 | #- { os: linux, manylinux: '2_28', target: i686 } 28 | - { os: linux, manylinux: '2_28', target: armv7 } 29 | - { os: linux, manylinux: '2_28', target: ppc64le } 30 | #- { os: linux, manylinux: '2_28', target: s390x } 31 | # musl 32 | - { os: linux, manylinux: musllinux_1_2, target: x86_64 } 33 | - { os: linux, manylinux: musllinux_1_2, target: aarch64 } 34 | - { os: linux, manylinux: musllinux_1_2, target: armv7 } 35 | # windows 36 | - { os: windows, target: i686, python-architecture: x86 } 37 | exclude: 38 | # https://github.com/rust-cross/cargo-xwin/issues/76 39 | - os: windows 40 | target: aarch64 41 | runs-on: ${{ (matrix.os == 'linux' && 'ubuntu') || matrix.os }}-latest 42 | steps: 43 | - uses: actions/checkout@v4 44 | - uses: actions/setup-python@v5 45 | with: 46 | python-version: '3.13' 47 | architecture: ${{ matrix.python-architecture || 'x64' }} 48 | - run: pip install twine 49 | - uses: PyO3/maturin-action@v1 50 | with: 51 | target: ${{ matrix.target }} 52 | manylinux: ${{ matrix.manylinux }} 53 | args: --release --out dist --interpreter '3.11 3.12 3.13' 54 | rust-toolchain: stable 55 | docker-options: -e CI 56 | # https://github.com/PyO3/maturin/issues/2336#issuecomment-2507418846 57 | maturin-version: 1.7.4 58 | before-script-linux: | 59 | # If we're running on rhel centos, install needed packages. 60 | if command -v yum &> /dev/null; then 61 | yum update -y && yum install -y perl-core 62 | # https://github.com/PyO3/maturin-action/discussions/152 63 | if [[ "${{ matrix.os }}" == "linux" && "${{ matrix.target }}" == "x86_64" && "${{ matrix.manylinux }}" == "2_28" ]]; then 64 | yum update -y && yum install -y clang 65 | fi 66 | fi 67 | - run: ${{ (matrix.os == 'windows' && 'dir') || 'ls -lh' }} dist/ 68 | - run: twine check --strict dist/* 69 | - uses: actions/upload-artifact@v4 70 | with: 71 | name: pypi-files-${{ matrix.os }}-${{ matrix.target }}-${{ matrix.manylinux }} 72 | path: dist 73 | 74 | build-sdist: 75 | name: build sdist 76 | # keep condition in sync with `build` job 77 | runs-on: ubuntu-latest 78 | steps: 79 | - uses: actions/checkout@v4 80 | - uses: actions/setup-python@v5 81 | with: 82 | python-version: '3.13' 83 | - uses: PyO3/maturin-action@v1 84 | with: 85 | # https://github.com/PyO3/maturin/issues/2336#issuecomment-2507418846 86 | maturin-version: 1.7.4 87 | command: sdist 88 | args: --out dist 89 | rust-toolchain: stable 90 | - uses: actions/upload-artifact@v4 91 | with: 92 | name: pypi-files-sdist 93 | path: dist 94 | 95 | inspect: 96 | needs: [build, build-sdist] 97 | runs-on: ubuntu-latest 98 | steps: 99 | - uses: actions/download-artifact@v4 100 | with: 101 | pattern: pypi-files-* 102 | merge-multiple: true 103 | path: dist/ 104 | - run: ls -lh dist/ 105 | # TODO: some more checks? `twine` is already run above 106 | 107 | # If git tag is a version, verify that it matches the package metadata version (or fail job and skip `publish`) 108 | # If git tag is not a version, set output `version` to "" (also skipping `publish`) 109 | version: 110 | if: github.ref_type == 'tag' && startsWith(github.ref_name, 'v') 111 | needs: build 112 | outputs: 113 | version: ${{ steps.version.outputs.version }} 114 | is_prerelease: ${{ steps.version.outputs.is_prerelease }} 115 | runs-on: ubuntu-latest 116 | steps: 117 | - uses: actions/checkout@v4 118 | - uses: actions/setup-python@v5 119 | with: 120 | python-version: '3.13' 121 | - uses: actions/download-artifact@v4 122 | with: 123 | name: pypi-files-linux-x86_64-2_28 124 | path: dist/ 125 | - name: Install zarrs-python 126 | run: pip install packaging dist/*manylinux_2_28_x86_64.whl 127 | - name: Get zarrs-python version and tag 128 | id: version 129 | run: python .github/workflows/version-cmp.py 130 | 131 | publish: 132 | if: needs.version.outputs.version != '' 133 | runs-on: ubuntu-latest 134 | needs: [inspect, version] 135 | environment: pypi 136 | permissions: 137 | contents: write # to create a github release 138 | id-token: write # to authenticate as Trusted Publisher to pypi.org 139 | steps: 140 | - uses: actions/download-artifact@v4 141 | with: 142 | pattern: pypi-files-* 143 | merge-multiple: true 144 | path: dist/ 145 | - name: "Publishing version ${{ needs.version.outputs.version }}" 146 | uses: pypa/gh-action-pypi-publish@release/v1 147 | with: 148 | packages-dir: dist/ 149 | verbose: true 150 | - uses: ncipollo/release-action@v1 151 | with: 152 | name: ${{ needs.version.outputs.version }} 153 | prerelease: ${{ needs.version.outputs.is_prerelease }} 154 | generateReleaseNotes: true 155 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: ci 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | 9 | concurrency: 10 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} 11 | cancel-in-progress: true 12 | 13 | env: 14 | CARGO_TERM_COLOR: always 15 | PYTEST_ADDOPTS: '--color=yes' 16 | 17 | jobs: 18 | build_and_test: 19 | name: build and test 20 | strategy: 21 | fail-fast: false 22 | matrix: 23 | rust_toolchain: ["stable"] # "nightly" 24 | runs-on: ubuntu-latest 25 | steps: 26 | - uses: actions/checkout@v4 27 | 28 | # Rust in GH runner images can lag behind stable releases + act does not include Rust 29 | - name: Install Rust 30 | uses: dtolnay/rust-toolchain@master 31 | with: 32 | toolchain: ${{ matrix.rust_toolchain }} 33 | components: rustfmt 34 | 35 | - name: Install rust-cache 36 | uses: Swatinem/rust-cache@v2 37 | 38 | - name: Install Python 39 | uses: actions/setup-python@v5 40 | with: 41 | python-version: "3.x" 42 | 43 | - name: Install UV 44 | uses: astral-sh/setup-uv@v3 45 | with: 46 | version: "0.5.0" 47 | enable-cache: true 48 | cache-dependency-glob: | 49 | pyproject.toml 50 | Cargo.toml 51 | 52 | - name: Install python deps + Build 53 | run: | 54 | uv pip install --system -e ".[test,dev]" --verbose 55 | 56 | - name: Python Tests 57 | run: pytest -n auto 58 | 59 | - name: Rust Tests 60 | run: cargo test 61 | 62 | - name: Check formatting 63 | # see “Type hints” section in contributing.md 64 | run: | 65 | cargo run --bin stub_gen 66 | pre-commit run --all-files --show-diff-on-failure || true 67 | git diff --exit-code HEAD 68 | -------------------------------------------------------------------------------- /.github/workflows/version-cmp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Can’t be an isolated script since we want to access zarrs’ metadata 3 | 4 | import importlib.metadata as im 5 | import os 6 | import sys 7 | from pathlib import Path 8 | 9 | from packaging.version import InvalidVersion, Version 10 | 11 | 12 | def set_outputs(version: Version | str) -> None: 13 | is_prerelease = version.is_prerelease if isinstance(version, Version) else False 14 | is_prerelease_json = "true" if is_prerelease else "false" 15 | print(f"{version=!s} {is_prerelease=}") 16 | with Path(os.environ["GITHUB_OUTPUT"]).open("a") as f: 17 | print(f"version={version}", file=f) 18 | print(f"is_prerelease={is_prerelease_json}", file=f) 19 | 20 | 21 | version_tag_str = os.environ["GITHUB_REF_NAME"] 22 | assert version_tag_str.startswith("v"), "should be enforced in `if:` condition" 23 | try: 24 | version_tag = Version(version_tag_str[1:]) 25 | except InvalidVersion: 26 | set_outputs("") 27 | sys.exit(0) 28 | 29 | if version_tag_str[1:] != str(version_tag): 30 | sys.exit(f"Tag version not normalized: {version_tag_str} should be v{version_tag}") 31 | 32 | if version_tag != (version_meta := Version(im.version("zarrs"))): 33 | sys.exit(f"Version mismatch: {version_tag} (tag) != {version_meta} (metadata)") 34 | 35 | set_outputs(version_meta) 36 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # IDEs 2 | /.idea/ 3 | /.vscode/ 4 | 5 | # Caches 6 | .DS_Store 7 | __pycache__/ 8 | /.*cache/ 9 | /.hypothesis/ 10 | 11 | # Build 12 | *.so 13 | /target/ 14 | /dist/ 15 | /docs/_build/ 16 | 17 | # Coverage 18 | /.coverage 19 | /coverage.xml 20 | 21 | # Docs 22 | docs/generated/ 23 | 24 | # Lock files 25 | Cargo.lock 26 | uv.lock 27 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v5.0.0 4 | hooks: 5 | - id: trailing-whitespace 6 | - id: end-of-file-fixer 7 | - id: check-added-large-files 8 | - id: check-case-conflict 9 | - id: check-toml 10 | - id: check-yaml 11 | - id: check-merge-conflict 12 | - id: detect-private-key 13 | - id: no-commit-to-branch 14 | args: ["--branch=main"] 15 | - repo: local 16 | hooks: 17 | - id: rustfmt 18 | name: rustfmt 19 | description: Check if all files follow the rustfmt style 20 | entry: cargo fmt --all -- --color always 21 | language: system 22 | pass_filenames: false 23 | - repo: https://github.com/astral-sh/ruff-pre-commit 24 | rev: v0.11.9 25 | hooks: 26 | - id: ruff 27 | args: ["--fix"] 28 | - id: ruff-format 29 | # The following can be removed once PLR0917 is out of preview 30 | - name: ruff preview rules 31 | id: ruff 32 | args: ["--preview", "--select=PLR0917"] 33 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | build: 3 | os: ubuntu-24.04 4 | apt_packages: 5 | - clang 6 | tools: 7 | python: "3.12" 8 | rust: "latest" 9 | jobs: 10 | post_checkout: 11 | # unshallow so version can be derived from tag 12 | - git fetch --unshallow || true 13 | sphinx: 14 | configuration: docs/conf.py 15 | fail_on_warning: true 16 | python: 17 | install: 18 | - method: pip 19 | path: . 20 | extra_requirements: 21 | - doc 22 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "zarrs-python" 3 | version = "0.1.4" 4 | edition = "2021" 5 | publish = false 6 | 7 | [lib] 8 | name = "zarrs_python" 9 | crate-type = ["cdylib", "rlib"] 10 | 11 | [dependencies] 12 | pyo3 = { version = "0.23.2", features = ["abi3-py311"] } 13 | zarrs = { version = "0.20.0", features = ["async", "zlib", "pcodec", "bz2"] } 14 | rayon_iter_concurrent_limit = "0.2.0" 15 | rayon = "1.10.0" 16 | # fix for https://stackoverflow.com/questions/76593417/package-openssl-was-not-found-in-the-pkg-config-search-path 17 | openssl = { version = "0.10", features = ["vendored"] } 18 | numpy = "0.23.0" 19 | unsafe_cell_slice = "0.2.0" 20 | serde_json = "1.0.128" 21 | pyo3-stub-gen = "0.7.0" 22 | opendal = { version = "0.53.0", features = ["services-http"] } 23 | tokio = { version = "1.41.1", features = ["rt-multi-thread"] } 24 | zarrs_opendal = "0.7.2" 25 | itertools = "0.9.0" 26 | 27 | [profile.release] 28 | lto = true 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Ilan Gold, Lachlan Deakin, Philipp Angerer 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # zarrs-python 2 | 3 | [![PyPI](https://img.shields.io/pypi/v/zarrs.svg)](https://pypi.org/project/zarrs) 4 | [![Downloads](https://static.pepy.tech/badge/zarrs/month)](https://pepy.tech/project/zarrs) 5 | [![Downloads](https://static.pepy.tech/badge/zarrs)](https://pepy.tech/project/zarrs) 6 | [![Stars](https://img.shields.io/github/stars/zarrs/zarrs-python?style=flat&logo=github&color=yellow)](https://github.com/zarrs/zarrs-python/stargazers) 7 | ![CI](https://github.com/zarrs/zarrs-python/actions/workflows/ci.yml/badge.svg) 8 | ![CD](https://github.com/zarrs/zarrs-python/actions/workflows/cd.yml/badge.svg) 9 | 10 | This project serves as a bridge between [`zarrs`](https://docs.rs/zarrs/latest/zarrs/) (Rust) and [`zarr`](https://zarr.readthedocs.io/en/latest/index.html) (`zarr-python`) via [`PyO3`](https://pyo3.rs/v0.22.3/). The main goal of the project is to speed up i/o (see [`zarr_benchmarks`](https://github.com/LDeakin/zarr_benchmarks)). 11 | 12 | To use the project, simply install our package (which depends on `zarr-python>=3.0.0`), and run: 13 | 14 | ```python 15 | import zarr 16 | import zarrs 17 | zarr.config.set({"codec_pipeline.path": "zarrs.ZarrsCodecPipeline"}) 18 | ``` 19 | 20 | You can then use your `zarr` as normal (with some caveats)! 21 | 22 | ## API 23 | 24 | We export a `ZarrsCodecPipeline` class so that `zarr-python` can use the class but it is not meant to be instantiated and we do not guarantee the stability of its API beyond what is required so that `zarr-python` can use it. Therefore, it is not documented here. 25 | 26 | At the moment, we only support a subset of the `zarr-python` stores: 27 | 28 | - [x] [LocalStore](https://zarr.readthedocs.io/en/latest/_autoapi/zarr/storage/index.html#zarr.storage.LocalStore) (FileSystem) 29 | - [FsspecStore](https://zarr.readthedocs.io/en/latest/_autoapi/zarr/storage/index.html#zarr.storage.FsspecStore) 30 | - [x] [HTTPFileSystem](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.implementations.http.HTTPFileSystem) 31 | 32 | A `NotImplementedError` will be raised if a store is not supported. 33 | We intend to support more stores in the future: https://github.com/zarrs/zarrs-python/issues/44. 34 | 35 | ### Configuration 36 | 37 | `ZarrsCodecPipeline` options are exposed through `zarr.config`. 38 | 39 | Standard `zarr.config` options control some functionality (see the defaults in the [config.py](https://github.com/zarr-developers/zarr-python/blob/main/src/zarr/core/config.py) of `zarr-python`): 40 | - `threading.max_workers`: the maximum number of threads used internally by the `ZarrsCodecPipeline` on the Rust side. 41 | - Defaults to the number of threads in the global `rayon` thread pool if set to `None`, which is [typically the number of logical CPUs](https://docs.rs/rayon/latest/rayon/struct.ThreadPoolBuilder.html#method.num_threads). 42 | - `array.write_empty_chunks`: whether or not to store empty chunks. 43 | - Defaults to false if `None`. Note that checking for emptiness has some overhead, see [here](https://docs.rs/zarrs/latest/zarrs/config/struct.Config.html#store-empty-chunks) for more info. 44 | 45 | The `ZarrsCodecPipeline` specific options are: 46 | - `codec_pipeline.chunk_concurrent_maximum`: the maximum number of chunks stored/retrieved concurrently. 47 | - Defaults to the number of logical CPUs if `None`. It is constrained by `threading.max_workers` as well. 48 | - `codec_pipeline.chunk_concurrent_minimum`: the minimum number of chunks retrieved/stored concurrently when balancing chunk/codec concurrency. 49 | - Defaults to 4 if `None`. See [here](https://docs.rs/zarrs/latest/zarrs/config/struct.Config.html#chunk-concurrent-minimum) for more info. 50 | - `codec_pipeline.validate_checksums`: enable checksum validation (e.g. with the CRC32C codec). 51 | - Defaults to true if `None`. See [here](https://docs.rs/zarrs/latest/zarrs/config/struct.Config.html#validate-checksums) for more info. 52 | 53 | For example: 54 | ```python 55 | zarr.config.set({ 56 | "threading.max_workers": None, 57 | "array.write_empty_chunks": False, 58 | "codec_pipeline": { 59 | "path": "zarrs.ZarrsCodecPipeline", 60 | "validate_checksums": True, 61 | "store_empty_chunks": False, 62 | "chunk_concurrent_maximum": None, 63 | "chunk_concurrent_minimum": 4, 64 | } 65 | }) 66 | ``` 67 | 68 | If the `ZarrsCodecPipeline` is pickled, and then un-pickled, and during that time one of `store_empty_chunks`, `chunk_concurrent_minimum`, `chunk_concurrent_maximum`, or `num_threads` has changed, the newly un-pickled version will pick up the new value. However, once a `ZarrsCodecPipeline` object has been instantiated, these values are then fixed. This may change in the future as guidance from the `zarr` community becomes clear. 69 | 70 | ## Concurrency 71 | 72 | Concurrency can be classified into two types: 73 | - chunk (outer) concurrency: the number of chunks retrieved/stored concurrently. 74 | - This is chosen automatically based on various factors, such as the chunk size and codecs. 75 | - It is constrained between `codec_pipeline.chunk_concurrent_minimum` and `codec_pipeline.chunk_concurrent_maximum` for operations involving multiple chunks. 76 | - codec (inner) concurrency: the number of threads encoding/decoding a chunk. 77 | - This is chosen automatically in combination with the chunk concurrency. 78 | 79 | The product of the chunk and codec concurrency will approximately match `threading.max_workers`. 80 | 81 | Chunk concurrency is typically favored because: 82 | - parallel encoding/decoding can have a high overhead with some codecs, especially with small chunks, and 83 | - it is advantageous to retrieve/store multiple chunks concurrently, especially with high latency stores. 84 | 85 | `zarrs-python` will often favor codec concurrency with sharded arrays, as they are well suited to codec concurrency. 86 | 87 | ## Supported Indexing Methods 88 | 89 | The following methods will trigger use with the old zarr-python pipeline: 90 | 91 | 1. Any `oindex` or `vindex` integer `np.ndarray` indexing with dimensionality >=3 i.e., 92 | 93 | ```python 94 | arr[np.array([...]), :, np.array([...])] 95 | arr[np.array([...]), np.array([...]), np.array([...])] 96 | arr[np.array([...]), np.array([...]), np.array([...])] = ... 97 | arr.oindex[np.array([...]), np.array([...]), np.array([...])] = ... 98 | ``` 99 | 100 | 2. Any `vindex` or `oindex` discontinuous integer `np.ndarray` indexing for writes in 2D 101 | 102 | ```python 103 | arr[np.array([0, 5]), :] = ... 104 | arr.oindex[np.array([0, 5]), :] = ... 105 | ``` 106 | 107 | 3. `vindex` writes in 2D where both indexers are integer `np.ndarray` indices i.e., 108 | 109 | ```python 110 | arr[np.array([...]), np.array([...])] = ... 111 | ``` 112 | 113 | 4. Ellipsis indexing. We have tested some, but others fail even with `zarr-python`'s default codec pipeline. Thus for now we advise proceeding with caution here. 114 | 115 | ```python 116 | arr[0:10, ..., 0:5] 117 | ``` 118 | 119 | 120 | Furthermore, using anything except contiguous (i.e., slices or consecutive integer) `np.ndarray` for numeric data will fall back to the default `zarr-python` implementation. 121 | 122 | Please file an issue if you believe we have more holes in our coverage than we are aware of or you wish to contribute! For example, we have an [issue in zarrs for integer-array indexing](https://github.com/LDeakin/zarrs/issues/52) that would unblock a lot the use of the rust pipeline for that use-case (very useful for mini-batch training perhaps!). 123 | 124 | Further, any codecs not supported by `zarrs` will also automatically fall back to the python implementation. 125 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # For the full list of built-in configuration values, see the documentation: 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 5 | 6 | from importlib.metadata import distribution 7 | 8 | # -- Project information ----------------------------------------------------- 9 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information 10 | 11 | _dist = distribution("zarrs") 12 | project = _dist.name 13 | copyright = f"2024, {_dist.metadata['Author']}" 14 | author = _dist.metadata["Author"] 15 | release = _dist.version 16 | 17 | # -- General configuration --------------------------------------------------- 18 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration 19 | 20 | extensions = [ 21 | "myst_parser", 22 | "sphinx.ext.autosummary", 23 | ] 24 | source_suffix = [".rst", ".md"] 25 | 26 | templates_path = ["_templates"] 27 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] 28 | 29 | 30 | # -- Options for HTML output ------------------------------------------------- 31 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output 32 | 33 | html_theme = "alabaster" 34 | # html_static_path = ["_static"] 35 | -------------------------------------------------------------------------------- /docs/contributing.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | ## Rust 4 | 5 | You will need `rust` and `cargo` installed on your local system. For more info, see [the rust docs](https://doc.rust-lang.org/cargo/getting-started/installation.html). 6 | 7 | ## Environment management 8 | 9 | We encourage the use of [uv](https://docs.astral.sh/uv/) for environment management. To install the package for development, run 10 | 11 | ```shell 12 | uv pip install -e ".[test,dev,doc]" 13 | ``` 14 | 15 | However, take note that while this does build the rust package, the rust package will not be rebuilt upon edits despite the `-e` flag. You will need to manually rebuild it using either `uv pip install -e .` or `maturin develop`. Take note that for benchmarking/speed testing, it is advisable to build a release version of the rust package by passing the `-r` flag to `maturin`. For more information on the `rust`-`python` bridge, see the [`PyO3` docs](https://pyo3.rs/v0.22.6/). 16 | 17 | ## Testing 18 | 19 | To install test dependencies, simply run 20 | 21 | ```shell 22 | pytest 23 | ``` 24 | 25 | or 26 | 27 | ```shell 28 | pytest -n auto 29 | ``` 30 | 31 | for parallelized tests. Most tests have been copied from the `zarr-python` repository with the exception of `test_pipeline.py` which we have written. 32 | 33 | ## Type hints 34 | 35 | Thanks to [`pyo3-stub-gen`][], we can generate type stubs for the `zarrs._internal` module. 36 | If the “Check formatting” CI step fails, run `cargo run --bin stub_gen`, then `pre-commit run --all-files`, and commit the changes. 37 | 38 | Once `maturin` can be run as a `hatchling` plugin, this can be made automatic. 39 | 40 | [`pyo3-stub-gen`]: https://github.com/Jij-Inc/pyo3-stub-gen 41 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | ```{include} ../README.md 2 | ``` 3 | 4 | ```{toctree} 5 | :hidden: true 6 | :maxdepth: 1 7 | 8 | contributing 9 | ``` 10 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /hatch.toml: -------------------------------------------------------------------------------- 1 | [envs.default] 2 | installer = "uv" 3 | features = ["dev"] 4 | 5 | [envs.hatch-test] 6 | default-args = [] 7 | features = ["test"] 8 | 9 | [envs.docs] 10 | features = ["doc"] 11 | extra-dependencies = ["setuptools"] # https://bitbucket.org/pybtex-devs/pybtex/issues/169 12 | scripts.build = "sphinx-build -M html docs docs/_build -W --keep-going {args}" 13 | scripts.clean = "git clean -fdX -- {args:docs}" 14 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["maturin>=1.5,<2.0"] 3 | build-backend = "maturin" 4 | 5 | [project] 6 | name = "zarrs" 7 | requires-python = ">=3.11" 8 | authors = [ 9 | { name = "Ilan Gold" }, 10 | { name = "Lachlan Deakin" }, 11 | { name = "Philipp Angerer" }, 12 | ] 13 | license = "MIT" 14 | classifiers = [ 15 | "Programming Language :: Rust", 16 | "Programming Language :: Python :: Implementation :: CPython", 17 | "Programming Language :: Python :: Implementation :: PyPy", 18 | "Typing :: Typed", 19 | ] 20 | dynamic = ["version"] 21 | dependencies = [ 22 | 'asciitree', 23 | 'numpy>=1.24', 24 | 'fasteners', 25 | 'numcodecs[msgpack]>=0.10.0', 26 | 'fsspec>2024', 27 | 'crc32c', 28 | 'zstandard', 29 | 'typing_extensions', 30 | 'donfig', 31 | 'pytest', 32 | 'universal_pathlib>=0.2.0', 33 | "zarr>=3.0.3", 34 | ] 35 | 36 | [project.optional-dependencies] 37 | test = [ 38 | "coverage", 39 | "pytest", 40 | "pytest-cov", 41 | "msgpack", 42 | "lmdb", 43 | "s3fs", 44 | "pytest-asyncio", 45 | "moto[s3]", 46 | "flask-cors", 47 | "flask", 48 | "requests", 49 | "mypy", 50 | "hypothesis", 51 | "pytest-xdist", 52 | ] 53 | dev = ["maturin", "pip", "pre-commit"] 54 | doc = ["sphinx>=7.4.6", "myst-parser"] 55 | 56 | [tool.maturin] 57 | python-source = "python" 58 | module-name = "zarrs._internal" 59 | features = ["pyo3/extension-module"] 60 | 61 | [tool.pytest.ini_options] 62 | minversion = "7" 63 | testpaths = ["tests"] 64 | log_cli_level = "INFO" 65 | xfail_strict = true 66 | asyncio_mode = "auto" 67 | asyncio_default_fixture_loop_scope = "function" 68 | doctest_optionflags = [ 69 | "NORMALIZE_WHITESPACE", 70 | "ELLIPSIS", 71 | "IGNORE_EXCEPTION_DETAIL", 72 | ] 73 | addopts = [ 74 | "--durations=10", 75 | "-ra", 76 | "--strict-config", 77 | "--strict-markers", 78 | "--import-mode=importlib", 79 | ] 80 | filterwarnings = [ 81 | "error:::zarr.*", 82 | "ignore:PY_SSIZE_T_CLEAN will be required.*:DeprecationWarning", 83 | "ignore:The loop argument is deprecated since Python 3.8.*:DeprecationWarning", 84 | "ignore:Creating a zarr.buffer.gpu.*:UserWarning", 85 | "ignore:Duplicate name:UserWarning", # from ZipFile 86 | "ignore:.*not part in the Zarr format 3.*:UserWarning", 87 | ] 88 | markers = ["gpu: mark a test as requiring CuPy and GPU"] 89 | 90 | [tool.ruff] 91 | src = ["src", "tests"] 92 | 93 | [tool.ruff.format] 94 | docstring-code-format = true 95 | 96 | [tool.ruff.lint] 97 | select = [ 98 | "E", # Error detected by Pycodestyle 99 | "F", # Errors detected by Pyflakes 100 | "W", # Warning detected by Pycodestyle 101 | "UP", # pyupgrade 102 | "I", # isort 103 | "TC", # manage type checking blocks 104 | "TID251", # Banned imports 105 | "ICN", # Follow import conventions 106 | "PTH", # Pathlib instead of os.path 107 | "PLR0917", # Ban APIs with too many positional parameters 108 | "FBT", # No positional boolean parameters 109 | "PT", # Pytest style 110 | "SIM", # Simplify control flow 111 | ] 112 | ignore = [ 113 | # line too long -> we accept long comment lines; black gets rid of long code lines 114 | "E501", 115 | # module level import not at top of file -> required to circumvent circular imports for Scanpys API 116 | "E402", 117 | # E266 too many leading '#' for block comment -> Scanpy allows them for comments into sections 118 | "E262", 119 | # allow I, O, l as variable names -> I is the identity matrix, i, j, k, l is reasonable indexing notation 120 | "E741", 121 | ] 122 | [tool.ruff.lint.per-file-ignores] 123 | "**/*.pyi" = ["ICN001"] 124 | [tool.ruff.lint.isort] 125 | known-first-party = ["zarrs"] 126 | -------------------------------------------------------------------------------- /python/zarrs/__init__.py: -------------------------------------------------------------------------------- 1 | from zarr.registry import register_pipeline 2 | 3 | from ._internal import __version__ 4 | from .pipeline import ZarrsCodecPipeline as _ZarrsCodecPipeline 5 | from .utils import CollapsedDimensionError, DiscontiguousArrayError 6 | 7 | 8 | # Need to do this redirection so people can access the pipeline as `zarrs.ZarrsCodecPipeline` instead of `zarrs.pipeline.ZarrsCodecPipeline` 9 | class ZarrsCodecPipeline(_ZarrsCodecPipeline): 10 | pass 11 | 12 | 13 | register_pipeline(ZarrsCodecPipeline) 14 | 15 | __all__ = [ 16 | "ZarrsCodecPipeline", 17 | "DiscontiguousArrayError", 18 | "CollapsedDimensionError", 19 | "__version__", 20 | ] 21 | -------------------------------------------------------------------------------- /python/zarrs/_internal.pyi: -------------------------------------------------------------------------------- 1 | # This file is automatically generated by pyo3_stub_gen 2 | # ruff: noqa: E501, F401 3 | 4 | import builtins 5 | import typing 6 | from enum import Enum, auto 7 | 8 | import numpy.typing 9 | 10 | class Basic: 11 | def __new__(cls, byte_interface: typing.Any, chunk_spec: typing.Any): ... 12 | ... 13 | 14 | class CodecPipelineImpl: 15 | def __new__( 16 | cls, 17 | metadata: builtins.str, 18 | *, 19 | validate_checksums: builtins.bool | None = None, 20 | store_empty_chunks: builtins.bool | None = None, 21 | chunk_concurrent_minimum: builtins.int | None = None, 22 | chunk_concurrent_maximum: builtins.int | None = None, 23 | num_threads: builtins.int | None = None, 24 | ): ... 25 | def retrieve_chunks_and_apply_index( 26 | self, 27 | chunk_descriptions: typing.Sequence[WithSubset], 28 | value: numpy.typing.NDArray[typing.Any], 29 | ) -> None: ... 30 | def store_chunks_with_indices( 31 | self, 32 | chunk_descriptions: typing.Sequence[WithSubset], 33 | value: numpy.typing.NDArray[typing.Any], 34 | ) -> None: ... 35 | 36 | class FilesystemStoreConfig: 37 | root: builtins.str 38 | 39 | class HttpStoreConfig: 40 | endpoint: builtins.str 41 | 42 | class WithSubset: 43 | def __new__( 44 | cls, 45 | item: Basic, 46 | chunk_subset: typing.Sequence[slice], 47 | subset: typing.Sequence[slice], 48 | shape: typing.Sequence[builtins.int], 49 | ): ... 50 | ... 51 | 52 | class StoreConfig(Enum): 53 | Filesystem = auto() 54 | Http = auto() 55 | -------------------------------------------------------------------------------- /python/zarrs/pipeline.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import asyncio 4 | import json 5 | import re 6 | from dataclasses import dataclass 7 | from typing import TYPE_CHECKING, TypedDict 8 | 9 | import numpy as np 10 | from zarr.abc.codec import Codec, CodecPipeline 11 | from zarr.core import BatchedCodecPipeline 12 | from zarr.core.config import config 13 | 14 | if TYPE_CHECKING: 15 | from collections.abc import Generator, Iterable, Iterator 16 | from typing import Any, Self 17 | 18 | from zarr.abc.store import ByteGetter, ByteSetter 19 | from zarr.core.array_spec import ArraySpec 20 | from zarr.core.buffer import Buffer, NDArrayLike, NDBuffer 21 | from zarr.core.chunk_grids import ChunkGrid 22 | from zarr.core.common import ChunkCoords 23 | from zarr.core.indexing import SelectorTuple 24 | 25 | from ._internal import CodecPipelineImpl, codec_metadata_v2_to_v3 26 | from .utils import ( 27 | CollapsedDimensionError, 28 | DiscontiguousArrayError, 29 | FillValueNoneError, 30 | make_chunk_info_for_rust_with_indices, 31 | ) 32 | 33 | 34 | class UnsupportedDataTypeError(Exception): 35 | pass 36 | 37 | 38 | class UnsupportedMetadataError(Exception): 39 | pass 40 | 41 | 42 | def get_codec_pipeline_impl(codec_metadata_json: str) -> CodecPipelineImpl | None: 43 | try: 44 | return CodecPipelineImpl( 45 | codec_metadata_json, 46 | validate_checksums=config.get("codec_pipeline.validate_checksums", None), 47 | store_empty_chunks=config.get("array.write_empty_chunks", None), 48 | chunk_concurrent_minimum=config.get( 49 | "codec_pipeline.chunk_concurrent_minimum", None 50 | ), 51 | chunk_concurrent_maximum=config.get( 52 | "codec_pipeline.chunk_concurrent_maximum", None 53 | ), 54 | num_threads=config.get("threading.max_workers", None), 55 | ) 56 | except TypeError as e: 57 | if re.match(r"codec (delta|zlib) is not supported", str(e)): 58 | return None 59 | else: 60 | raise e 61 | 62 | 63 | def codecs_to_dict(codecs: Iterable[Codec]) -> Generator[dict[str, Any], None, None]: 64 | for codec in codecs: 65 | if codec.__class__.__name__ == "V2Codec": 66 | codec_dict = codec.to_dict() 67 | if codec_dict.get("filters", None) is not None: 68 | filters = [ 69 | json.dumps(filter.get_config()) 70 | for filter in codec_dict.get("filters") 71 | ] 72 | else: 73 | filters = None 74 | if codec_dict.get("compressor", None) is not None: 75 | compressor_json = codec_dict.get("compressor").get_config() 76 | compressor = json.dumps(compressor_json) 77 | else: 78 | compressor = None 79 | codecs_v3 = codec_metadata_v2_to_v3(filters, compressor) 80 | for codec in codecs_v3: 81 | yield json.loads(codec) 82 | else: 83 | yield codec.to_dict() 84 | 85 | 86 | class ZarrsCodecPipelineState(TypedDict): 87 | codec_metadata_json: str 88 | codecs: tuple[Codec, ...] 89 | 90 | 91 | @dataclass 92 | class ZarrsCodecPipeline(CodecPipeline): 93 | codecs: tuple[Codec, ...] 94 | impl: CodecPipelineImpl | None 95 | codec_metadata_json: str 96 | python_impl: BatchedCodecPipeline 97 | 98 | def __getstate__(self) -> ZarrsCodecPipelineState: 99 | return {"codec_metadata_json": self.codec_metadata_json, "codecs": self.codecs} 100 | 101 | def __setstate__(self, state: ZarrsCodecPipelineState): 102 | self.codecs = state["codecs"] 103 | self.codec_metadata_json = state["codec_metadata_json"] 104 | self.impl = get_codec_pipeline_impl(self.codec_metadata_json) 105 | self.python_impl = BatchedCodecPipeline.from_codecs(self.codecs) 106 | 107 | def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self: 108 | raise NotImplementedError("evolve_from_array_spec") 109 | 110 | @classmethod 111 | def from_codecs(cls, codecs: Iterable[Codec]) -> Self: 112 | codec_metadata = list(codecs_to_dict(codecs)) 113 | codec_metadata_json = json.dumps(codec_metadata) 114 | # TODO: upstream zarr-python has not settled on how to deal with configs yet 115 | # Should they be checked when an array is created, or when an operation is performed? 116 | # https://github.com/zarr-developers/zarr-python/issues/2409 117 | # https://github.com/zarr-developers/zarr-python/pull/2429#issuecomment-2566976567 118 | return cls( 119 | codec_metadata_json=codec_metadata_json, 120 | codecs=tuple(codecs), 121 | impl=get_codec_pipeline_impl(codec_metadata_json), 122 | python_impl=BatchedCodecPipeline.from_codecs(codecs), 123 | ) 124 | 125 | @property 126 | def supports_partial_decode(self) -> bool: 127 | return False 128 | 129 | @property 130 | def supports_partial_encode(self) -> bool: 131 | return False 132 | 133 | def __iter__(self) -> Iterator[Codec]: 134 | yield from self.codecs 135 | 136 | def validate( 137 | self, *, shape: ChunkCoords, dtype: np.dtype[Any], chunk_grid: ChunkGrid 138 | ) -> None: 139 | raise NotImplementedError("validate") 140 | 141 | def compute_encoded_size(self, byte_length: int, array_spec: ArraySpec) -> int: 142 | raise NotImplementedError("compute_encoded_size") 143 | 144 | async def decode( 145 | self, 146 | chunk_bytes_and_specs: Iterable[tuple[Buffer | None, ArraySpec]], 147 | ) -> Iterable[NDBuffer | None]: 148 | raise NotImplementedError("decode") 149 | 150 | async def encode( 151 | self, 152 | chunk_arrays_and_specs: Iterable[tuple[NDBuffer | None, ArraySpec]], 153 | ) -> Iterable[Buffer | None]: 154 | raise NotImplementedError("encode") 155 | 156 | async def read( 157 | self, 158 | batch_info: Iterable[ 159 | tuple[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple, bool] 160 | ], 161 | out: NDBuffer, # type: ignore 162 | drop_axes: tuple[int, ...] = (), # FIXME: unused 163 | ) -> None: 164 | # FIXME: Error if array is not in host memory 165 | if not out.dtype.isnative: 166 | raise RuntimeError("Non-native byte order not supported") 167 | try: 168 | if self.impl is None: 169 | raise UnsupportedMetadataError() 170 | self._raise_error_on_unsupported_batch_dtype(batch_info) 171 | chunks_desc = make_chunk_info_for_rust_with_indices( 172 | batch_info, drop_axes, out.shape 173 | ) 174 | except ( 175 | UnsupportedMetadataError, 176 | DiscontiguousArrayError, 177 | CollapsedDimensionError, 178 | UnsupportedDataTypeError, 179 | FillValueNoneError, 180 | ): 181 | await self.python_impl.read(batch_info, out, drop_axes) 182 | return None 183 | else: 184 | out: NDArrayLike = out.as_ndarray_like() 185 | await asyncio.to_thread( 186 | self.impl.retrieve_chunks_and_apply_index, 187 | chunks_desc, 188 | out, 189 | ) 190 | return None 191 | 192 | async def write( 193 | self, 194 | batch_info: Iterable[ 195 | tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple, bool] 196 | ], 197 | value: NDBuffer, # type: ignore 198 | drop_axes: tuple[int, ...] = (), 199 | ) -> None: 200 | try: 201 | if self.impl is None: 202 | raise UnsupportedMetadataError() 203 | self._raise_error_on_unsupported_batch_dtype(batch_info) 204 | chunks_desc = make_chunk_info_for_rust_with_indices( 205 | batch_info, drop_axes, value.shape 206 | ) 207 | except ( 208 | UnsupportedMetadataError, 209 | DiscontiguousArrayError, 210 | CollapsedDimensionError, 211 | UnsupportedDataTypeError, 212 | FillValueNoneError, 213 | ): 214 | await self.python_impl.write(batch_info, value, drop_axes) 215 | return None 216 | else: 217 | # FIXME: Error if array is not in host memory 218 | value_np: NDArrayLike | np.ndarray = value.as_ndarray_like() 219 | if not value_np.dtype.isnative: 220 | value_np = np.ascontiguousarray( 221 | value_np, dtype=value_np.dtype.newbyteorder("=") 222 | ) 223 | elif not value_np.flags.c_contiguous: 224 | value_np = np.ascontiguousarray(value_np) 225 | await asyncio.to_thread( 226 | self.impl.store_chunks_with_indices, chunks_desc, value_np 227 | ) 228 | return None 229 | 230 | def _raise_error_on_unsupported_batch_dtype( 231 | self, 232 | batch_info: Iterable[ 233 | tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple, bool] 234 | ], 235 | ): 236 | # https://github.com/LDeakin/zarrs/blob/0532fe983b7b42b59dbf84e50a2fe5e6f7bad4ce/zarrs_metadata/src/v2_to_v3.rs#L289-L293 for VSUMm 237 | # Further, our pipeline does not support variable-length objects due to limitations on decode_into, so object/np.dtypes.StringDType is also out 238 | if any( 239 | info.dtype.kind in {"V", "S", "U", "M", "m", "O", "T"} 240 | for (_, info, _, _, _) in batch_info 241 | ): 242 | raise UnsupportedDataTypeError() 243 | -------------------------------------------------------------------------------- /python/zarrs/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zarrs/zarrs-python/bbe36d68e28617088b8d831641432d39b69867a0/python/zarrs/py.typed -------------------------------------------------------------------------------- /python/zarrs/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import operator 4 | import os 5 | from functools import reduce 6 | from typing import TYPE_CHECKING, Any 7 | 8 | import numpy as np 9 | from zarr.core.array_spec import ArraySpec 10 | from zarr.core.indexing import SelectorTuple, is_integer 11 | from zarr.core.metadata.v2 import _default_fill_value 12 | 13 | from zarrs._internal import Basic, WithSubset 14 | 15 | if TYPE_CHECKING: 16 | from collections.abc import Iterable 17 | from types import EllipsisType 18 | 19 | from zarr.abc.store import ByteGetter, ByteSetter 20 | 21 | 22 | # adapted from https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ThreadPoolExecutor 23 | def get_max_threads() -> int: 24 | return (os.cpu_count() or 1) + 4 25 | 26 | 27 | class DiscontiguousArrayError(Exception): 28 | pass 29 | 30 | 31 | class CollapsedDimensionError(Exception): 32 | pass 33 | 34 | 35 | class FillValueNoneError(Exception): 36 | pass 37 | 38 | 39 | # This is a (mostly) copy of the function from zarr.core.indexing that fixes: 40 | # DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated 41 | # TODO: Upstream this fix 42 | def make_slice_selection(selection: tuple[np.ndarray | float]) -> list[slice]: 43 | ls: list[slice] = [] 44 | for dim_selection in selection: 45 | if is_integer(dim_selection): 46 | ls.append(slice(int(dim_selection), int(dim_selection) + 1, 1)) 47 | elif isinstance(dim_selection, np.ndarray): 48 | dim_selection = dim_selection.ravel() 49 | if len(dim_selection) == 1: 50 | ls.append( 51 | slice(int(dim_selection.item()), int(dim_selection.item()) + 1, 1) 52 | ) 53 | else: 54 | diff = np.diff(dim_selection) 55 | if (diff != 1).any() and (diff != 0).any(): 56 | raise DiscontiguousArrayError(diff) 57 | ls.append(slice(dim_selection[0], dim_selection[-1] + 1, 1)) 58 | else: 59 | ls.append(dim_selection) 60 | return ls 61 | 62 | 63 | def selector_tuple_to_slice_selection(selector_tuple: SelectorTuple) -> list[slice]: 64 | if isinstance(selector_tuple, slice): 65 | return [selector_tuple] 66 | if all(isinstance(s, slice) for s in selector_tuple): 67 | return list(selector_tuple) 68 | return make_slice_selection(selector_tuple) 69 | 70 | 71 | def resulting_shape_from_index( 72 | array_shape: tuple[int, ...], 73 | index_tuple: tuple[int | slice | EllipsisType | np.ndarray], 74 | drop_axes: tuple[int, ...], 75 | *, 76 | pad: bool, 77 | ) -> tuple[int, ...]: 78 | result_shape = [] 79 | advanced_index_shapes = [ 80 | idx.shape for idx in index_tuple if isinstance(idx, np.ndarray) 81 | ] 82 | basic_shape_index = 0 83 | 84 | # Broadcast all advanced indices, if any 85 | if advanced_index_shapes: 86 | result_shape += np.broadcast_shapes(*advanced_index_shapes) 87 | # Consume dimensions from array_shape 88 | basic_shape_index += len(advanced_index_shapes) 89 | 90 | # Process each remaining index in index_tuple 91 | for idx in index_tuple: 92 | if isinstance(idx, int): 93 | # Integer index reduces dimension, so skip this dimension in array_shape 94 | basic_shape_index += 1 95 | elif isinstance(idx, slice): 96 | if idx.step is not None and idx.step > 1: 97 | raise DiscontiguousArrayError( 98 | "Step size greater than 1 is not supported" 99 | ) 100 | # Slice keeps dimension, adjust size accordingly 101 | start, stop, _ = idx.indices(array_shape[basic_shape_index]) 102 | result_shape.append(stop - start) 103 | basic_shape_index += 1 104 | elif idx is Ellipsis: 105 | # Calculate number of dimensions that Ellipsis should fill 106 | num_to_fill = len(array_shape) - len(index_tuple) + 1 107 | result_shape += array_shape[ 108 | basic_shape_index : basic_shape_index + num_to_fill 109 | ] 110 | basic_shape_index += num_to_fill 111 | elif not isinstance(idx, np.ndarray): 112 | raise ValueError(f"Invalid index type: {type(idx)}") 113 | 114 | # Step 4: Append remaining dimensions from array_shape if fewer indices were used 115 | if basic_shape_index < len(array_shape) and pad: 116 | result_shape += array_shape[basic_shape_index:] 117 | 118 | return tuple(size for idx, size in enumerate(result_shape) if idx not in drop_axes) 119 | 120 | 121 | def prod_op(x: Iterable[int]) -> int: 122 | return reduce(operator.mul, x, 1) 123 | 124 | 125 | def get_shape_for_selector( 126 | selector_tuple: SelectorTuple, 127 | shape: tuple[int, ...], 128 | *, 129 | pad: bool, 130 | drop_axes: tuple[int, ...] = (), 131 | ) -> tuple[int, ...]: 132 | if isinstance(selector_tuple, slice | np.ndarray): 133 | return resulting_shape_from_index( 134 | shape, 135 | (selector_tuple,), 136 | drop_axes, 137 | pad=pad, 138 | ) 139 | return resulting_shape_from_index(shape, selector_tuple, drop_axes, pad=pad) 140 | 141 | 142 | def get_implicit_fill_value(dtype: np.dtype, fill_value: Any) -> Any: 143 | if fill_value is None: 144 | fill_value = _default_fill_value(dtype) 145 | return fill_value 146 | 147 | 148 | def make_chunk_info_for_rust_with_indices( 149 | batch_info: Iterable[ 150 | tuple[ByteGetter | ByteSetter, ArraySpec, SelectorTuple, SelectorTuple, bool] 151 | ], 152 | drop_axes: tuple[int, ...], 153 | shape: tuple[int, ...], 154 | ) -> list[WithSubset]: 155 | shape = shape if shape else (1,) # constant array 156 | chunk_info_with_indices: list[WithSubset] = [] 157 | for ( 158 | byte_getter, 159 | chunk_spec, 160 | chunk_selection, 161 | out_selection, 162 | _, 163 | ) in batch_info: 164 | if chunk_spec.fill_value is None: 165 | chunk_spec = ArraySpec( 166 | chunk_spec.shape, 167 | chunk_spec.dtype, 168 | get_implicit_fill_value(chunk_spec.dtype, chunk_spec.fill_value), 169 | chunk_spec.config, 170 | chunk_spec.prototype, 171 | ) 172 | chunk_info = Basic(byte_getter, chunk_spec) 173 | out_selection_as_slices = selector_tuple_to_slice_selection(out_selection) 174 | chunk_selection_as_slices = selector_tuple_to_slice_selection(chunk_selection) 175 | shape_chunk_selection_slices = get_shape_for_selector( 176 | tuple(chunk_selection_as_slices), 177 | chunk_spec.shape, 178 | pad=True, 179 | drop_axes=drop_axes, 180 | ) 181 | shape_chunk_selection = get_shape_for_selector( 182 | chunk_selection, chunk_spec.shape, pad=True, drop_axes=drop_axes 183 | ) 184 | if prod_op(shape_chunk_selection) != prod_op(shape_chunk_selection_slices): 185 | raise CollapsedDimensionError( 186 | f"{shape_chunk_selection} != {shape_chunk_selection_slices}" 187 | ) 188 | chunk_info_with_indices.append( 189 | WithSubset( 190 | chunk_info, 191 | chunk_subset=chunk_selection_as_slices, 192 | subset=out_selection_as_slices, 193 | shape=shape, 194 | ) 195 | ) 196 | return chunk_info_with_indices 197 | -------------------------------------------------------------------------------- /src/bin/stub_gen.rs: -------------------------------------------------------------------------------- 1 | use pyo3_stub_gen::Result; 2 | 3 | fn main() -> Result<()> { 4 | let stub = zarrs_python::stub_info()?; 5 | stub.generate()?; 6 | Ok(()) 7 | } 8 | -------------------------------------------------------------------------------- /src/chunk_item.rs: -------------------------------------------------------------------------------- 1 | use std::num::NonZeroU64; 2 | 3 | use pyo3::{ 4 | exceptions::{PyRuntimeError, PyValueError}, 5 | pyclass, pymethods, 6 | types::{PyAnyMethods, PyBytes, PyBytesMethods, PyInt, PySlice, PySliceMethods as _}, 7 | Bound, PyAny, PyErr, PyResult, 8 | }; 9 | use pyo3_stub_gen::derive::{gen_stub_pyclass, gen_stub_pymethods}; 10 | use zarrs::{ 11 | array::{ChunkRepresentation, DataType, FillValue}, 12 | array_subset::ArraySubset, 13 | metadata::v3::MetadataV3, 14 | storage::StoreKey, 15 | }; 16 | 17 | use crate::{store::StoreConfig, utils::PyErrExt}; 18 | 19 | pub(crate) trait ChunksItem { 20 | fn store_config(&self) -> StoreConfig; 21 | fn key(&self) -> &StoreKey; 22 | fn representation(&self) -> &ChunkRepresentation; 23 | } 24 | 25 | #[derive(Clone)] 26 | #[gen_stub_pyclass] 27 | #[pyclass] 28 | pub(crate) struct Basic { 29 | store: StoreConfig, 30 | key: StoreKey, 31 | representation: ChunkRepresentation, 32 | } 33 | 34 | fn fill_value_to_bytes(dtype: &str, fill_value: &Bound<'_, PyAny>) -> PyResult> { 35 | if dtype == "string" { 36 | // Match zarr-python 2.x.x string fill value behaviour with a 0 fill value 37 | // See https://github.com/zarr-developers/zarr-python/issues/2792#issuecomment-2644362122 38 | if let Ok(fill_value_downcast) = fill_value.downcast::() { 39 | let fill_value_usize: usize = fill_value_downcast.extract()?; 40 | if fill_value_usize == 0 { 41 | return Ok(vec![]); 42 | } 43 | Err(PyErr::new::(format!( 44 | "Cannot understand non-zero integer {fill_value_usize} fill value for dtype {dtype}" 45 | )))?; 46 | } 47 | } 48 | 49 | if let Ok(fill_value_downcast) = fill_value.downcast::() { 50 | Ok(fill_value_downcast.as_bytes().to_vec()) 51 | } else if fill_value.hasattr("tobytes")? { 52 | Ok(fill_value.call_method0("tobytes")?.extract()?) 53 | } else { 54 | Err(PyErr::new::(format!( 55 | "Unsupported fill value {fill_value:?}" 56 | ))) 57 | } 58 | } 59 | 60 | #[gen_stub_pymethods] 61 | #[pymethods] 62 | impl Basic { 63 | #[new] 64 | fn new(byte_interface: &Bound<'_, PyAny>, chunk_spec: &Bound<'_, PyAny>) -> PyResult { 65 | let store: StoreConfig = byte_interface.getattr("store")?.extract()?; 66 | let path: String = byte_interface.getattr("path")?.extract()?; 67 | 68 | let chunk_shape = chunk_spec.getattr("shape")?.extract()?; 69 | let mut dtype: String = chunk_spec 70 | .getattr("dtype")? 71 | .call_method0("__str__")? 72 | .extract()?; 73 | if dtype == "object" { 74 | // zarrs doesn't understand `object` which is the output of `np.dtype("|O").__str__()` 75 | // but maps it to "string" internally https://github.com/LDeakin/zarrs/blob/0532fe983b7b42b59dbf84e50a2fe5e6f7bad4ce/zarrs_metadata/src/v2_to_v3.rs#L288 76 | dtype = String::from("string"); 77 | } 78 | let fill_value: Bound<'_, PyAny> = chunk_spec.getattr("fill_value")?; 79 | let fill_value_bytes = fill_value_to_bytes(&dtype, &fill_value)?; 80 | Ok(Self { 81 | store, 82 | key: StoreKey::new(path).map_py_err::()?, 83 | representation: get_chunk_representation(chunk_shape, &dtype, fill_value_bytes)?, 84 | }) 85 | } 86 | } 87 | 88 | #[derive(Clone)] 89 | #[gen_stub_pyclass] 90 | #[pyclass] 91 | pub(crate) struct WithSubset { 92 | pub item: Basic, 93 | pub chunk_subset: ArraySubset, 94 | pub subset: ArraySubset, 95 | } 96 | 97 | #[gen_stub_pymethods] 98 | #[pymethods] 99 | impl WithSubset { 100 | #[new] 101 | #[allow(clippy::needless_pass_by_value)] 102 | fn new( 103 | item: Basic, 104 | chunk_subset: Vec>, 105 | subset: Vec>, 106 | shape: Vec, 107 | ) -> PyResult { 108 | let chunk_subset = 109 | selection_to_array_subset(&chunk_subset, &item.representation.shape_u64())?; 110 | let subset = selection_to_array_subset(&subset, &shape)?; 111 | Ok(Self { 112 | item, 113 | chunk_subset, 114 | subset, 115 | }) 116 | } 117 | } 118 | 119 | impl ChunksItem for Basic { 120 | fn store_config(&self) -> StoreConfig { 121 | self.store.clone() 122 | } 123 | fn key(&self) -> &StoreKey { 124 | &self.key 125 | } 126 | fn representation(&self) -> &ChunkRepresentation { 127 | &self.representation 128 | } 129 | } 130 | 131 | impl ChunksItem for WithSubset { 132 | fn store_config(&self) -> StoreConfig { 133 | self.item.store.clone() 134 | } 135 | fn key(&self) -> &StoreKey { 136 | &self.item.key 137 | } 138 | fn representation(&self) -> &ChunkRepresentation { 139 | &self.item.representation 140 | } 141 | } 142 | 143 | fn get_chunk_representation( 144 | chunk_shape: Vec, 145 | dtype: &str, 146 | fill_value: Vec, 147 | ) -> PyResult { 148 | // Get the chunk representation 149 | let data_type = DataType::from_metadata( 150 | &MetadataV3::new(dtype), 151 | zarrs::config::global_config().data_type_aliases_v3(), 152 | ) 153 | .map_py_err::()?; 154 | let chunk_shape = chunk_shape 155 | .into_iter() 156 | .map(|x| NonZeroU64::new(x).expect("chunk shapes should always be non-zero")) 157 | .collect(); 158 | let chunk_representation = 159 | ChunkRepresentation::new(chunk_shape, data_type, FillValue::new(fill_value)) 160 | .map_py_err::()?; 161 | Ok(chunk_representation) 162 | } 163 | 164 | fn slice_to_range(slice: &Bound<'_, PySlice>, length: isize) -> PyResult> { 165 | let indices = slice.indices(length)?; 166 | if indices.start < 0 { 167 | Err(PyErr::new::( 168 | "slice start must be greater than or equal to 0".to_string(), 169 | )) 170 | } else if indices.stop < 0 { 171 | Err(PyErr::new::( 172 | "slice stop must be greater than or equal to 0".to_string(), 173 | )) 174 | } else if indices.step != 1 { 175 | Err(PyErr::new::( 176 | "slice step must be equal to 1".to_string(), 177 | )) 178 | } else { 179 | Ok(u64::try_from(indices.start)?..u64::try_from(indices.stop)?) 180 | } 181 | } 182 | 183 | fn selection_to_array_subset( 184 | selection: &[Bound<'_, PySlice>], 185 | shape: &[u64], 186 | ) -> PyResult { 187 | if selection.is_empty() { 188 | Ok(ArraySubset::new_with_shape(vec![1; shape.len()])) 189 | } else { 190 | let chunk_ranges = selection 191 | .iter() 192 | .zip(shape) 193 | .map(|(selection, &shape)| slice_to_range(selection, isize::try_from(shape)?)) 194 | .collect::>>()?; 195 | Ok(ArraySubset::new_with_ranges(&chunk_ranges)) 196 | } 197 | } 198 | -------------------------------------------------------------------------------- /src/concurrency.rs: -------------------------------------------------------------------------------- 1 | use pyo3::{exceptions::PyRuntimeError, PyErr, PyResult}; 2 | use zarrs::array::{ 3 | codec::CodecOptions, concurrency::calc_concurrency_outer_inner, ArrayCodecTraits, 4 | RecommendedConcurrency, 5 | }; 6 | 7 | use crate::{chunk_item::ChunksItem, CodecPipelineImpl}; 8 | 9 | pub trait ChunkConcurrentLimitAndCodecOptions { 10 | fn get_chunk_concurrent_limit_and_codec_options( 11 | &self, 12 | codec_pipeline_impl: &CodecPipelineImpl, 13 | ) -> PyResult>; 14 | } 15 | 16 | impl ChunkConcurrentLimitAndCodecOptions for Vec 17 | where 18 | T: ChunksItem, 19 | { 20 | fn get_chunk_concurrent_limit_and_codec_options( 21 | &self, 22 | codec_pipeline_impl: &CodecPipelineImpl, 23 | ) -> PyResult> { 24 | let num_chunks = self.len(); 25 | let Some(chunk_descriptions0) = self.first() else { 26 | return Ok(None); 27 | }; 28 | let chunk_representation = chunk_descriptions0.representation(); 29 | 30 | let codec_concurrency = codec_pipeline_impl 31 | .codec_chain 32 | .recommended_concurrency(chunk_representation) 33 | .map_err(|err| PyErr::new::(err.to_string()))?; 34 | 35 | let min_concurrent_chunks = 36 | std::cmp::min(codec_pipeline_impl.chunk_concurrent_minimum, num_chunks); 37 | let max_concurrent_chunks = 38 | std::cmp::max(codec_pipeline_impl.chunk_concurrent_maximum, num_chunks); 39 | let (chunk_concurrent_limit, codec_concurrent_limit) = calc_concurrency_outer_inner( 40 | codec_pipeline_impl.num_threads, 41 | &RecommendedConcurrency::new(min_concurrent_chunks..max_concurrent_chunks), 42 | &codec_concurrency, 43 | ); 44 | let codec_options = codec_pipeline_impl 45 | .codec_options 46 | .into_builder() 47 | .concurrent_target(codec_concurrent_limit) 48 | .build(); 49 | Ok(Some((chunk_concurrent_limit, codec_options))) 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #![warn(clippy::pedantic)] 2 | #![allow(clippy::module_name_repetitions)] 3 | 4 | use std::borrow::Cow; 5 | use std::collections::HashMap; 6 | use std::ptr::NonNull; 7 | use std::sync::Arc; 8 | 9 | use chunk_item::WithSubset; 10 | use itertools::Itertools; 11 | use numpy::npyffi::PyArrayObject; 12 | use numpy::{PyArrayDescrMethods, PyUntypedArray, PyUntypedArrayMethods}; 13 | use pyo3::exceptions::{PyRuntimeError, PyTypeError, PyValueError}; 14 | use pyo3::prelude::*; 15 | use pyo3_stub_gen::define_stub_info_gatherer; 16 | use pyo3_stub_gen::derive::{gen_stub_pyclass, gen_stub_pymethods}; 17 | use rayon::iter::{IntoParallelIterator, ParallelIterator}; 18 | use rayon_iter_concurrent_limit::iter_concurrent_limit; 19 | use unsafe_cell_slice::UnsafeCellSlice; 20 | use utils::is_whole_chunk; 21 | use zarrs::array::codec::{ 22 | ArrayPartialDecoderTraits, ArrayToBytesCodecTraits, CodecOptions, CodecOptionsBuilder, 23 | }; 24 | use zarrs::array::{ 25 | copy_fill_value_into, update_array_bytes, ArrayBytes, ArrayBytesFixedDisjointView, ArraySize, 26 | CodecChain, FillValue, 27 | }; 28 | use zarrs::array_subset::ArraySubset; 29 | use zarrs::metadata::v3::MetadataV3; 30 | use zarrs::storage::StoreKey; 31 | 32 | mod chunk_item; 33 | mod concurrency; 34 | mod metadata_v2; 35 | mod runtime; 36 | mod store; 37 | #[cfg(test)] 38 | mod tests; 39 | mod utils; 40 | 41 | use crate::chunk_item::ChunksItem; 42 | use crate::concurrency::ChunkConcurrentLimitAndCodecOptions; 43 | use crate::metadata_v2::codec_metadata_v2_to_v3; 44 | use crate::store::StoreManager; 45 | use crate::utils::{PyErrExt as _, PyUntypedArrayExt as _}; 46 | 47 | // TODO: Use a OnceLock for store with get_or_try_init when stabilised? 48 | #[gen_stub_pyclass] 49 | #[pyclass] 50 | pub struct CodecPipelineImpl { 51 | pub(crate) stores: StoreManager, 52 | pub(crate) codec_chain: Arc, 53 | pub(crate) codec_options: CodecOptions, 54 | pub(crate) chunk_concurrent_minimum: usize, 55 | pub(crate) chunk_concurrent_maximum: usize, 56 | pub(crate) num_threads: usize, 57 | } 58 | 59 | impl CodecPipelineImpl { 60 | fn retrieve_chunk_bytes<'a, I: ChunksItem>( 61 | &self, 62 | item: &I, 63 | codec_chain: &CodecChain, 64 | codec_options: &CodecOptions, 65 | ) -> PyResult> { 66 | let value_encoded = self.stores.get(item)?; 67 | let value_decoded = if let Some(value_encoded) = value_encoded { 68 | let value_encoded: Vec = value_encoded.into(); // zero-copy in this case 69 | codec_chain 70 | .decode(value_encoded.into(), item.representation(), codec_options) 71 | .map_py_err::()? 72 | } else { 73 | let array_size = ArraySize::new( 74 | item.representation().data_type().size(), 75 | item.representation().num_elements(), 76 | ); 77 | ArrayBytes::new_fill_value(array_size, item.representation().fill_value()) 78 | }; 79 | Ok(value_decoded) 80 | } 81 | 82 | fn store_chunk_bytes( 83 | &self, 84 | item: &I, 85 | codec_chain: &CodecChain, 86 | value_decoded: ArrayBytes, 87 | codec_options: &CodecOptions, 88 | ) -> PyResult<()> { 89 | value_decoded 90 | .validate( 91 | item.representation().num_elements(), 92 | item.representation().data_type().size(), 93 | ) 94 | .map_py_err::()?; 95 | 96 | if value_decoded.is_fill_value(item.representation().fill_value()) { 97 | self.stores.erase(item) 98 | } else { 99 | let value_encoded = codec_chain 100 | .encode(value_decoded, item.representation(), codec_options) 101 | .map(Cow::into_owned) 102 | .map_py_err::()?; 103 | 104 | // Store the encoded chunk 105 | self.stores.set(item, value_encoded.into()) 106 | } 107 | } 108 | 109 | fn store_chunk_subset_bytes( 110 | &self, 111 | item: &I, 112 | codec_chain: &CodecChain, 113 | chunk_subset_bytes: ArrayBytes, 114 | chunk_subset: &ArraySubset, 115 | codec_options: &CodecOptions, 116 | ) -> PyResult<()> { 117 | let array_shape = item.representation().shape_u64(); 118 | if !chunk_subset.inbounds_shape(&array_shape) { 119 | return Err(PyErr::new::(format!( 120 | "chunk subset ({chunk_subset}) is out of bounds for array shape ({array_shape:?})" 121 | ))); 122 | } 123 | let data_type_size = item.representation().data_type().size(); 124 | 125 | if chunk_subset.start().iter().all(|&o| o == 0) && chunk_subset.shape() == array_shape { 126 | // Fast path if the chunk subset spans the entire chunk, no read required 127 | self.store_chunk_bytes(item, codec_chain, chunk_subset_bytes, codec_options) 128 | } else { 129 | // Validate the chunk subset bytes 130 | chunk_subset_bytes 131 | .validate(chunk_subset.num_elements(), data_type_size) 132 | .map_py_err::()?; 133 | 134 | // Retrieve the chunk 135 | let chunk_bytes_old = self.retrieve_chunk_bytes(item, codec_chain, codec_options)?; 136 | 137 | // Update the chunk 138 | let chunk_bytes_new = update_array_bytes( 139 | chunk_bytes_old, 140 | &array_shape, 141 | chunk_subset, 142 | &chunk_subset_bytes, 143 | data_type_size, 144 | ) 145 | .map_py_err::()?; 146 | 147 | // Store the updated chunk 148 | self.store_chunk_bytes(item, codec_chain, chunk_bytes_new, codec_options) 149 | } 150 | } 151 | 152 | fn py_untyped_array_to_array_object<'a>( 153 | value: &'a Bound<'_, PyUntypedArray>, 154 | ) -> &'a PyArrayObject { 155 | // TODO: Upstream a PyUntypedArray.as_array_ref()? 156 | // https://github.com/zarrs/zarrs-python/pull/80/files/75be39184905d688ac04a5f8bca08c5241c458cd#r1918365296 157 | let array_object_ptr: NonNull = NonNull::new(value.as_array_ptr()) 158 | .expect("bug in numpy crate: Bound<'_, PyUntypedArray>::as_array_ptr unexpectedly returned a null pointer"); 159 | let array_object: &'a PyArrayObject = unsafe { 160 | // SAFETY: the array object pointed to by array_object_ptr is valid for 'a 161 | array_object_ptr.as_ref() 162 | }; 163 | array_object 164 | } 165 | 166 | fn nparray_to_slice<'a>(value: &'a Bound<'_, PyUntypedArray>) -> Result<&'a [u8], PyErr> { 167 | if !value.is_c_contiguous() { 168 | return Err(PyErr::new::( 169 | "input array must be a C contiguous array".to_string(), 170 | )); 171 | } 172 | let array_object: &PyArrayObject = Self::py_untyped_array_to_array_object(value); 173 | let array_data = array_object.data.cast::(); 174 | let array_len = value.len() * value.dtype().itemsize(); 175 | let slice = unsafe { 176 | // SAFETY: array_data is a valid pointer to a u8 array of length array_len 177 | debug_assert!(!array_data.is_null()); 178 | std::slice::from_raw_parts(array_data, array_len) 179 | }; 180 | Ok(slice) 181 | } 182 | 183 | fn nparray_to_unsafe_cell_slice<'a>( 184 | value: &'a Bound<'_, PyUntypedArray>, 185 | ) -> Result, PyErr> { 186 | if !value.is_c_contiguous() { 187 | return Err(PyErr::new::( 188 | "input array must be a C contiguous array".to_string(), 189 | )); 190 | } 191 | let array_object: &PyArrayObject = Self::py_untyped_array_to_array_object(value); 192 | let array_data = array_object.data.cast::(); 193 | let array_len = value.len() * value.dtype().itemsize(); 194 | let output = unsafe { 195 | // SAFETY: array_data is a valid pointer to a u8 array of length array_len 196 | debug_assert!(!array_data.is_null()); 197 | std::slice::from_raw_parts_mut(array_data, array_len) 198 | }; 199 | Ok(UnsafeCellSlice::new(output)) 200 | } 201 | } 202 | 203 | #[gen_stub_pymethods] 204 | #[pymethods] 205 | impl CodecPipelineImpl { 206 | #[pyo3(signature = ( 207 | metadata, 208 | *, 209 | validate_checksums=None, 210 | store_empty_chunks=None, 211 | chunk_concurrent_minimum=None, 212 | chunk_concurrent_maximum=None, 213 | num_threads=None, 214 | ))] 215 | #[new] 216 | fn new( 217 | metadata: &str, 218 | validate_checksums: Option, 219 | store_empty_chunks: Option, 220 | chunk_concurrent_minimum: Option, 221 | chunk_concurrent_maximum: Option, 222 | num_threads: Option, 223 | ) -> PyResult { 224 | let metadata: Vec = 225 | serde_json::from_str(metadata).map_py_err::()?; 226 | let codec_chain = 227 | Arc::new(CodecChain::from_metadata(&metadata).map_py_err::()?); 228 | let mut codec_options = CodecOptionsBuilder::new(); 229 | if let Some(validate_checksums) = validate_checksums { 230 | codec_options = codec_options.validate_checksums(validate_checksums); 231 | } 232 | if let Some(store_empty_chunks) = store_empty_chunks { 233 | codec_options = codec_options.store_empty_chunks(store_empty_chunks); 234 | } 235 | let codec_options = codec_options.build(); 236 | 237 | let chunk_concurrent_minimum = chunk_concurrent_minimum 238 | .unwrap_or(zarrs::config::global_config().chunk_concurrent_minimum()); 239 | let chunk_concurrent_maximum = 240 | chunk_concurrent_maximum.unwrap_or(rayon::current_num_threads()); 241 | let num_threads = num_threads.unwrap_or(rayon::current_num_threads()); 242 | 243 | Ok(Self { 244 | stores: StoreManager::default(), 245 | codec_chain, 246 | codec_options, 247 | chunk_concurrent_minimum, 248 | chunk_concurrent_maximum, 249 | num_threads, 250 | }) 251 | } 252 | 253 | fn retrieve_chunks_and_apply_index( 254 | &self, 255 | py: Python, 256 | chunk_descriptions: Vec, // FIXME: Ref / iterable? 257 | value: &Bound<'_, PyUntypedArray>, 258 | ) -> PyResult<()> { 259 | // Get input array 260 | let output = Self::nparray_to_unsafe_cell_slice(value)?; 261 | let output_shape: Vec = value.shape_zarr()?; 262 | 263 | // Adjust the concurrency based on the codec chain and the first chunk description 264 | let Some((chunk_concurrent_limit, codec_options)) = 265 | chunk_descriptions.get_chunk_concurrent_limit_and_codec_options(self)? 266 | else { 267 | return Ok(()); 268 | }; 269 | 270 | // Assemble partial decoders ahead of time and in parallel 271 | let partial_chunk_descriptions = chunk_descriptions 272 | .iter() 273 | .filter(|item| !(is_whole_chunk(item))) 274 | .unique_by(|item| item.key()) 275 | .collect::>(); 276 | let mut partial_decoder_cache: HashMap> = 277 | HashMap::new(); 278 | if !partial_chunk_descriptions.is_empty() { 279 | let key_decoder_pairs = iter_concurrent_limit!( 280 | chunk_concurrent_limit, 281 | partial_chunk_descriptions, 282 | map, 283 | |item| { 284 | let input_handle = self.stores.decoder(item)?; 285 | let partial_decoder = self 286 | .codec_chain 287 | .clone() 288 | .partial_decoder( 289 | Arc::new(input_handle), 290 | item.representation(), 291 | &codec_options, 292 | ) 293 | .map_py_err::()?; 294 | Ok((item.key().clone(), partial_decoder)) 295 | } 296 | ) 297 | .collect::>>()?; 298 | partial_decoder_cache.extend(key_decoder_pairs); 299 | } 300 | 301 | py.allow_threads(move || { 302 | // FIXME: the `decode_into` methods only support fixed length data types. 303 | // For variable length data types, need a codepath with non `_into` methods. 304 | // Collect all the subsets and copy into value on the Python side? 305 | let update_chunk_subset = |item: chunk_item::WithSubset| { 306 | let chunk_item::WithSubset { 307 | item, 308 | subset, 309 | chunk_subset, 310 | } = item; 311 | let mut output_view = unsafe { 312 | // TODO: Is the following correct? 313 | // can we guarantee that when this function is called from Python with arbitrary arguments? 314 | // SAFETY: chunks represent disjoint array subsets 315 | ArrayBytesFixedDisjointView::new( 316 | output, 317 | // TODO: why is data_type in `item`, it should be derived from `output`, no? 318 | item.representation() 319 | .data_type() 320 | .fixed_size() 321 | .ok_or("variable length data type not supported") 322 | .map_py_err::()?, 323 | &output_shape, 324 | subset, 325 | ) 326 | .map_py_err::()? 327 | }; 328 | 329 | // See zarrs::array::Array::retrieve_chunk_subset_into 330 | if chunk_subset.start().iter().all(|&o| o == 0) 331 | && chunk_subset.shape() == item.representation().shape_u64() 332 | { 333 | // See zarrs::array::Array::retrieve_chunk_into 334 | if let Some(chunk_encoded) = self.stores.get(&item)? { 335 | // Decode the encoded data into the output buffer 336 | let chunk_encoded: Vec = chunk_encoded.into(); 337 | self.codec_chain.decode_into( 338 | Cow::Owned(chunk_encoded), 339 | item.representation(), 340 | &mut output_view, 341 | &codec_options, 342 | ) 343 | } else { 344 | // The chunk is missing, write the fill value 345 | copy_fill_value_into( 346 | item.representation().data_type(), 347 | item.representation().fill_value(), 348 | &mut output_view, 349 | ) 350 | } 351 | } else { 352 | let key = item.key(); 353 | let partial_decoder = partial_decoder_cache.get(key).ok_or_else(|| { 354 | PyRuntimeError::new_err(format!("Partial decoder not found for key: {key}")) 355 | })?; 356 | partial_decoder.partial_decode_into( 357 | &chunk_subset, 358 | &mut output_view, 359 | &codec_options, 360 | ) 361 | } 362 | .map_py_err::() 363 | }; 364 | 365 | iter_concurrent_limit!( 366 | chunk_concurrent_limit, 367 | chunk_descriptions, 368 | try_for_each, 369 | update_chunk_subset 370 | )?; 371 | 372 | Ok(()) 373 | }) 374 | } 375 | 376 | fn store_chunks_with_indices( 377 | &self, 378 | py: Python, 379 | chunk_descriptions: Vec, 380 | value: &Bound<'_, PyUntypedArray>, 381 | ) -> PyResult<()> { 382 | enum InputValue<'a> { 383 | Array(ArrayBytes<'a>), 384 | Constant(FillValue), 385 | } 386 | 387 | // Get input array 388 | let input_slice = Self::nparray_to_slice(value)?; 389 | let input = if value.ndim() > 0 { 390 | // FIXME: Handle variable length data types, convert value to bytes and offsets 391 | InputValue::Array(ArrayBytes::new_flen(Cow::Borrowed(input_slice))) 392 | } else { 393 | InputValue::Constant(FillValue::new(input_slice.to_vec())) 394 | }; 395 | let input_shape: Vec = value.shape_zarr()?; 396 | 397 | // Adjust the concurrency based on the codec chain and the first chunk description 398 | let Some((chunk_concurrent_limit, codec_options)) = 399 | chunk_descriptions.get_chunk_concurrent_limit_and_codec_options(self)? 400 | else { 401 | return Ok(()); 402 | }; 403 | 404 | py.allow_threads(move || { 405 | let store_chunk = |item: chunk_item::WithSubset| match &input { 406 | InputValue::Array(input) => { 407 | let chunk_subset_bytes = input 408 | .extract_array_subset( 409 | &item.subset, 410 | &input_shape, 411 | item.item.representation().data_type(), 412 | ) 413 | .map_py_err::()?; 414 | self.store_chunk_subset_bytes( 415 | &item, 416 | &self.codec_chain, 417 | chunk_subset_bytes, 418 | &item.chunk_subset, 419 | &codec_options, 420 | ) 421 | } 422 | InputValue::Constant(constant_value) => { 423 | let chunk_subset_bytes = ArrayBytes::new_fill_value( 424 | ArraySize::new( 425 | item.representation().data_type().size(), 426 | item.chunk_subset.num_elements(), 427 | ), 428 | constant_value, 429 | ); 430 | 431 | self.store_chunk_subset_bytes( 432 | &item, 433 | &self.codec_chain, 434 | chunk_subset_bytes, 435 | &item.chunk_subset, 436 | &codec_options, 437 | ) 438 | } 439 | }; 440 | 441 | iter_concurrent_limit!( 442 | chunk_concurrent_limit, 443 | chunk_descriptions, 444 | try_for_each, 445 | store_chunk 446 | )?; 447 | 448 | Ok(()) 449 | }) 450 | } 451 | } 452 | 453 | /// A Python module implemented in Rust. 454 | #[pymodule] 455 | fn _internal(m: &Bound<'_, PyModule>) -> PyResult<()> { 456 | m.add("__version__", env!("CARGO_PKG_VERSION"))?; 457 | m.add_class::()?; 458 | m.add_class::()?; 459 | m.add_class::()?; 460 | m.add_function(wrap_pyfunction!(codec_metadata_v2_to_v3, m)?)?; 461 | Ok(()) 462 | } 463 | 464 | define_stub_info_gatherer!(stub_info); 465 | -------------------------------------------------------------------------------- /src/metadata_v2.rs: -------------------------------------------------------------------------------- 1 | use pyo3::{exceptions::PyRuntimeError, pyfunction, PyErr, PyResult}; 2 | use zarrs::metadata::{ 3 | v2::{ArrayMetadataV2Order, MetadataV2}, 4 | v3::MetadataV3, 5 | }; 6 | 7 | #[pyfunction] 8 | #[pyo3(signature = (filters=None, compressor=None))] 9 | pub fn codec_metadata_v2_to_v3( 10 | filters: Option>, 11 | compressor: Option, 12 | ) -> PyResult> { 13 | // Try and convert filters/compressor to V2 metadata 14 | let filters = if let Some(filters) = filters { 15 | Some( 16 | filters 17 | .into_iter() 18 | .map(|filter| { 19 | serde_json::from_str::(&filter) 20 | .map_err(|err| PyErr::new::(err.to_string())) 21 | }) 22 | .collect::, _>>()?, 23 | ) 24 | } else { 25 | None 26 | }; 27 | let compressor = if let Some(compressor) = compressor { 28 | Some( 29 | serde_json::from_str::(&compressor) 30 | .map_err(|err| PyErr::new::(err.to_string()))?, 31 | ) 32 | } else { 33 | None 34 | }; 35 | 36 | // FIXME: The array order, dimensionality, data type, and endianness are needed to exhaustively support all Zarr V2 data that zarrs can handle. 37 | // However, CodecPipeline.from_codecs does not supply this information, and CodecPipeline.evolve_from_array_spec is seemingly never called. 38 | let metadata = zarrs::metadata_ext::v2_to_v3::codec_metadata_v2_to_v3( 39 | ArrayMetadataV2Order::C, 40 | 0, // unused with C order 41 | &MetadataV3::new("bool"), // FIXME 42 | None, 43 | &filters, 44 | &compressor, 45 | zarrs::config::global_config().codec_aliases_v2(), 46 | zarrs::config::global_config().codec_aliases_v3(), 47 | ) 48 | .map_err(|err| { 49 | // TODO: More informative error messages from zarrs for ArrayMetadataV2ToV3ConversionError 50 | PyErr::new::(err.to_string()) 51 | })?; 52 | Ok(metadata 53 | .into_iter() 54 | .map(|metadata| serde_json::to_string(&metadata).expect("infallible")) // TODO: Add method to zarrs 55 | .collect()) 56 | } 57 | -------------------------------------------------------------------------------- /src/runtime.rs: -------------------------------------------------------------------------------- 1 | use std::sync::OnceLock; 2 | use tokio::runtime::Runtime; 3 | use zarrs::storage::storage_adapter::async_to_sync::AsyncToSyncBlockOn; 4 | 5 | static RUNTIME: OnceLock = OnceLock::new(); 6 | 7 | pub struct TokioBlockOn(tokio::runtime::Handle); 8 | 9 | impl AsyncToSyncBlockOn for TokioBlockOn { 10 | fn block_on(&self, future: F) -> F::Output { 11 | self.0.block_on(future) 12 | } 13 | } 14 | 15 | pub fn tokio_block_on() -> TokioBlockOn { 16 | let runtime = RUNTIME.get_or_init(|| Runtime::new().expect("Failed to create Tokio runtime")); 17 | TokioBlockOn(runtime.handle().clone()) 18 | } 19 | -------------------------------------------------------------------------------- /src/store.rs: -------------------------------------------------------------------------------- 1 | use std::{collections::HashMap, sync::Arc}; 2 | 3 | use opendal::Builder; 4 | use pyo3::{ 5 | exceptions::{PyNotImplementedError, PyValueError}, 6 | types::{PyAnyMethods, PyStringMethods, PyTypeMethods}, 7 | Bound, FromPyObject, PyAny, PyErr, PyResult, 8 | }; 9 | use pyo3_stub_gen::derive::gen_stub_pyclass_enum; 10 | use zarrs::storage::{ 11 | storage_adapter::async_to_sync::AsyncToSyncStorageAdapter, ReadableWritableListableStorage, 12 | }; 13 | 14 | use crate::{runtime::tokio_block_on, utils::PyErrExt}; 15 | 16 | mod filesystem; 17 | mod http; 18 | mod manager; 19 | 20 | pub use self::filesystem::FilesystemStoreConfig; 21 | pub use self::http::HttpStoreConfig; 22 | pub(crate) use self::manager::StoreManager; 23 | 24 | #[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] 25 | #[gen_stub_pyclass_enum] 26 | pub enum StoreConfig { 27 | Filesystem(FilesystemStoreConfig), 28 | Http(HttpStoreConfig), 29 | // TODO: Add support for more stores 30 | } 31 | 32 | impl<'py> FromPyObject<'py> for StoreConfig { 33 | fn extract_bound(store: &Bound<'py, PyAny>) -> PyResult { 34 | let name = store.get_type().name()?; 35 | let name = name.to_str()?; 36 | match name { 37 | "LocalStore" => { 38 | let root: String = store.getattr("root")?.call_method0("__str__")?.extract()?; 39 | Ok(StoreConfig::Filesystem(FilesystemStoreConfig::new(root))) 40 | } 41 | "FsspecStore" => { 42 | let fs = store.getattr("fs")?; 43 | let fs_name = fs.get_type().name()?; 44 | let fs_name = fs_name.to_str()?; 45 | let path: String = store.getattr("path")?.extract()?; 46 | let storage_options: HashMap> = 47 | fs.getattr("storage_options")?.extract()?; 48 | match fs_name { 49 | "HTTPFileSystem" => Ok(StoreConfig::Http(HttpStoreConfig::new( 50 | &path, 51 | &storage_options, 52 | )?)), 53 | _ => Err(PyErr::new::(format!( 54 | "zarrs-python does not support {fs_name} (FsspecStore) stores" 55 | ))), 56 | } 57 | } 58 | _ => Err(PyErr::new::(format!( 59 | "zarrs-python does not support {name} stores" 60 | ))), 61 | } 62 | } 63 | } 64 | 65 | impl TryFrom<&StoreConfig> for ReadableWritableListableStorage { 66 | type Error = PyErr; 67 | 68 | fn try_from(value: &StoreConfig) -> Result { 69 | match value { 70 | StoreConfig::Filesystem(config) => config.try_into(), 71 | StoreConfig::Http(config) => config.try_into(), 72 | } 73 | } 74 | } 75 | 76 | fn opendal_builder_to_sync_store( 77 | builder: B, 78 | ) -> PyResult { 79 | let operator = opendal::Operator::new(builder) 80 | .map_py_err::()? 81 | .finish(); 82 | let store = Arc::new(zarrs_opendal::AsyncOpendalStore::new(operator)); 83 | let store = Arc::new(AsyncToSyncStorageAdapter::new(store, tokio_block_on())); 84 | Ok(store) 85 | } 86 | -------------------------------------------------------------------------------- /src/store/filesystem.rs: -------------------------------------------------------------------------------- 1 | use std::sync::Arc; 2 | 3 | use pyo3::{exceptions::PyRuntimeError, pyclass, PyErr}; 4 | use pyo3_stub_gen::derive::gen_stub_pyclass; 5 | use zarrs::{filesystem::FilesystemStore, storage::ReadableWritableListableStorage}; 6 | 7 | use crate::utils::PyErrExt; 8 | 9 | #[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] 10 | #[gen_stub_pyclass] 11 | #[pyclass] 12 | pub struct FilesystemStoreConfig { 13 | #[pyo3(get, set)] 14 | pub root: String, 15 | } 16 | 17 | impl FilesystemStoreConfig { 18 | pub fn new(root: String) -> Self { 19 | Self { root } 20 | } 21 | } 22 | 23 | impl TryInto for &FilesystemStoreConfig { 24 | type Error = PyErr; 25 | 26 | fn try_into(self) -> Result { 27 | let store = 28 | Arc::new(FilesystemStore::new(self.root.clone()).map_py_err::()?); 29 | Ok(store) 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/store/http.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | 3 | use pyo3::{exceptions::PyValueError, pyclass, Bound, PyAny, PyErr, PyResult}; 4 | use pyo3_stub_gen::derive::gen_stub_pyclass; 5 | use zarrs::storage::ReadableWritableListableStorage; 6 | 7 | use super::opendal_builder_to_sync_store; 8 | 9 | #[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] 10 | #[gen_stub_pyclass] 11 | #[pyclass] 12 | pub struct HttpStoreConfig { 13 | #[pyo3(get, set)] 14 | pub endpoint: String, 15 | } 16 | 17 | impl HttpStoreConfig { 18 | pub fn new(path: &str, storage_options: &HashMap>) -> PyResult { 19 | if !storage_options.is_empty() { 20 | for storage_option in storage_options.keys() { 21 | match storage_option.as_str() { 22 | // TODO: Add support for other storage options 23 | "asynchronous" => {} 24 | _ => { 25 | return Err(PyValueError::new_err(format!( 26 | "Unsupported storage option for HTTPFileSystem: {storage_option}" 27 | ))); 28 | } 29 | } 30 | } 31 | } 32 | 33 | Ok(Self { 34 | endpoint: path.to_string(), 35 | }) 36 | } 37 | } 38 | 39 | impl TryInto for &HttpStoreConfig { 40 | type Error = PyErr; 41 | 42 | fn try_into(self) -> Result { 43 | let builder = opendal::services::Http::default().endpoint(&self.endpoint); 44 | opendal_builder_to_sync_store(builder) 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/store/manager.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | collections::BTreeMap, 3 | sync::{Arc, Mutex}, 4 | }; 5 | 6 | use pyo3::{exceptions::PyRuntimeError, PyResult}; 7 | use zarrs::{ 8 | array::codec::StoragePartialDecoder, 9 | storage::{Bytes, MaybeBytes, ReadableWritableListableStorage, StorageHandle}, 10 | }; 11 | 12 | use crate::{chunk_item::ChunksItem, store::PyErrExt as _}; 13 | 14 | use super::StoreConfig; 15 | 16 | #[derive(Default)] 17 | pub(crate) struct StoreManager(Mutex>); 18 | 19 | impl StoreManager { 20 | fn store(&self, item: &I) -> PyResult { 21 | use std::collections::btree_map::Entry::{Occupied, Vacant}; 22 | match self 23 | .0 24 | .lock() 25 | .map_py_err::()? 26 | .entry(item.store_config()) 27 | { 28 | Occupied(e) => Ok(e.get().clone()), 29 | Vacant(e) => Ok(e.insert((&item.store_config()).try_into()?).clone()), 30 | } 31 | } 32 | 33 | pub(crate) fn get(&self, item: &I) -> PyResult { 34 | self.store(item)? 35 | .get(item.key()) 36 | .map_py_err::() 37 | } 38 | 39 | pub(crate) fn set(&self, item: &I, value: Bytes) -> PyResult<()> { 40 | self.store(item)? 41 | .set(item.key(), value) 42 | .map_py_err::() 43 | } 44 | 45 | pub(crate) fn erase(&self, item: &I) -> PyResult<()> { 46 | self.store(item)? 47 | .erase(item.key()) 48 | .map_py_err::() 49 | } 50 | 51 | pub(crate) fn decoder(&self, item: &I) -> PyResult { 52 | // Partially decode the chunk into the output buffer 53 | let storage_handle = Arc::new(StorageHandle::new(self.store(item)?)); 54 | // NOTE: Normally a storage transformer would exist between the storage handle and the input handle 55 | // but zarr-python does not support them nor forward them to the codec pipeline 56 | Ok(StoragePartialDecoder::new( 57 | storage_handle, 58 | item.key().clone(), 59 | )) 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/tests.rs: -------------------------------------------------------------------------------- 1 | use pyo3::ffi::c_str; 2 | 3 | use numpy::PyUntypedArray; 4 | use pyo3::{ 5 | types::{PyAnyMethods, PyModule}, 6 | Bound, PyResult, Python, 7 | }; 8 | 9 | use crate::CodecPipelineImpl; 10 | 11 | #[test] 12 | fn test_nparray_to_unsafe_cell_slice_empty() -> PyResult<()> { 13 | pyo3::prepare_freethreaded_python(); 14 | Python::with_gil(|py| { 15 | let arr: Bound<'_, PyUntypedArray> = PyModule::from_code( 16 | py, 17 | c_str!( 18 | "def empty_array(): 19 | import numpy as np 20 | return np.empty(0, dtype=np.uint8)" 21 | ), 22 | c_str!(""), 23 | c_str!(""), 24 | )? 25 | .getattr("empty_array")? 26 | .call0()? 27 | .extract()?; 28 | 29 | let slice = CodecPipelineImpl::nparray_to_unsafe_cell_slice(&arr)?; 30 | assert!(slice.is_empty()); 31 | Ok(()) 32 | }) 33 | } 34 | -------------------------------------------------------------------------------- /src/utils.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Display; 2 | 3 | use numpy::{PyUntypedArray, PyUntypedArrayMethods}; 4 | use pyo3::{Bound, PyErr, PyResult, PyTypeInfo}; 5 | 6 | use crate::{ChunksItem, WithSubset}; 7 | 8 | pub(crate) trait PyErrExt { 9 | fn map_py_err(self) -> PyResult; 10 | } 11 | 12 | impl PyErrExt for Result { 13 | fn map_py_err(self) -> PyResult { 14 | self.map_err(|e| PyErr::new::(format!("{e}"))) 15 | } 16 | } 17 | 18 | pub(crate) trait PyUntypedArrayExt { 19 | fn shape_zarr(&self) -> PyResult>; 20 | } 21 | 22 | impl PyUntypedArrayExt for Bound<'_, PyUntypedArray> { 23 | fn shape_zarr(&self) -> PyResult> { 24 | Ok(if self.shape().is_empty() { 25 | vec![1] // scalar value 26 | } else { 27 | self.shape() 28 | .iter() 29 | .map(|&i| u64::try_from(i)) 30 | .collect::>()? 31 | }) 32 | } 33 | } 34 | 35 | pub fn is_whole_chunk(item: &WithSubset) -> bool { 36 | item.chunk_subset.start().iter().all(|&o| o == 0) 37 | && item.chunk_subset.shape() == item.representation().shape_u64() 38 | } 39 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from dataclasses import dataclass 4 | from typing import TYPE_CHECKING, Literal 5 | 6 | import numpy as np 7 | import numpy.typing as npt 8 | import pytest 9 | from zarr import config 10 | from zarr.core.common import ChunkCoords 11 | from zarr.storage import FsspecStore, LocalStore, MemoryStore, ZipStore 12 | 13 | from zarrs.utils import ( # noqa: F401 14 | CollapsedDimensionError, 15 | DiscontiguousArrayError, 16 | ) 17 | 18 | if TYPE_CHECKING: 19 | from collections.abc import Iterable 20 | from typing import Any, Literal 21 | 22 | from zarr.abc.store import Store 23 | from zarr.core.common import ChunkCoords, MemoryOrder 24 | 25 | 26 | @dataclass 27 | class ArrayRequest: 28 | shape: ChunkCoords 29 | dtype: str 30 | order: MemoryOrder 31 | 32 | 33 | @pytest.fixture(autouse=True) 34 | def _setup_codec_pipeline(): 35 | config.set({"codec_pipeline.path": "zarrs.ZarrsCodecPipeline"}) 36 | pass 37 | 38 | 39 | async def parse_store( 40 | store: Literal["local", "memory", "remote", "zip"], path: str 41 | ) -> LocalStore | MemoryStore | FsspecStore | ZipStore: 42 | if store == "local": 43 | return await LocalStore.open(path) 44 | if store == "memory": 45 | return await MemoryStore.open() 46 | if store == "remote": 47 | return await FsspecStore.open(url=path) 48 | if store == "zip": 49 | return await ZipStore.open(path + "/zarr.zip") 50 | raise AssertionError 51 | 52 | 53 | @pytest.fixture(params=["local"]) 54 | async def store(request: pytest.FixtureRequest, tmpdir) -> Store: 55 | param = request.param 56 | return await parse_store(param, str(tmpdir)) 57 | 58 | 59 | @pytest.fixture 60 | def array_fixture(request: pytest.FixtureRequest) -> npt.NDArray[Any]: 61 | array_request: ArrayRequest = request.param 62 | return ( 63 | np.arange(np.prod(array_request.shape)) 64 | .reshape(array_request.shape, order=array_request.order) 65 | .astype(array_request.dtype) 66 | ) 67 | 68 | 69 | # tests that also fail with zarr-python's default codec pipeline 70 | zarr_python_default_codec_pipeline_failures = [ 71 | # ellipsis weirdness, need to report, v3 72 | "test_roundtrip[oindex-2d-contiguous_in_chunk_array-ellipsis-v3]", 73 | "test_roundtrip[oindex-2d-discontinuous_in_chunk_array-ellipsis-v3]", 74 | "test_roundtrip[vindex-2d-contiguous_in_chunk_array-ellipsis-v3]", 75 | "test_roundtrip[vindex-2d-discontinuous_in_chunk_array-ellipsis-v3]", 76 | "test_roundtrip[oindex-2d-across_chunks_indices_array-ellipsis-v3]", 77 | "test_roundtrip[vindex-2d-ellipsis-across_chunks_indices_array-v3]", 78 | "test_roundtrip[vindex-2d-across_chunks_indices_array-ellipsis-v3]", 79 | "test_roundtrip[vindex-2d-ellipsis-contiguous_in_chunk_array-v3]", 80 | "test_roundtrip[vindex-2d-ellipsis-discontinuous_in_chunk_array-v3]", 81 | "test_roundtrip_read_only_zarrs[oindex-2d-contiguous_in_chunk_array-ellipsis-v3]", 82 | "test_roundtrip_read_only_zarrs[oindex-2d-discontinuous_in_chunk_array-ellipsis-v3]", 83 | "test_roundtrip_read_only_zarrs[vindex-2d-contiguous_in_chunk_array-ellipsis-v3]", 84 | "test_roundtrip_read_only_zarrs[vindex-2d-discontinuous_in_chunk_array-ellipsis-v3]", 85 | "test_roundtrip_read_only_zarrs[oindex-2d-across_chunks_indices_array-ellipsis-v3]", 86 | "test_roundtrip_read_only_zarrs[vindex-2d-ellipsis-across_chunks_indices_array-v3]", 87 | "test_roundtrip_read_only_zarrs[vindex-2d-across_chunks_indices_array-ellipsis-v3]", 88 | "test_roundtrip_read_only_zarrs[vindex-2d-ellipsis-contiguous_in_chunk_array-v3]", 89 | "test_roundtrip_read_only_zarrs[vindex-2d-ellipsis-discontinuous_in_chunk_array-v3]", 90 | # v2 91 | "test_roundtrip[oindex-2d-contiguous_in_chunk_array-ellipsis-v2]", 92 | "test_roundtrip[oindex-2d-discontinuous_in_chunk_array-ellipsis-v2]", 93 | "test_roundtrip[vindex-2d-contiguous_in_chunk_array-ellipsis-v2]", 94 | "test_roundtrip[vindex-2d-discontinuous_in_chunk_array-ellipsis-v2]", 95 | "test_roundtrip[oindex-2d-across_chunks_indices_array-ellipsis-v2]", 96 | "test_roundtrip[vindex-2d-ellipsis-across_chunks_indices_array-v2]", 97 | "test_roundtrip[vindex-2d-across_chunks_indices_array-ellipsis-v2]", 98 | "test_roundtrip[vindex-2d-ellipsis-contiguous_in_chunk_array-v2]", 99 | "test_roundtrip[vindex-2d-ellipsis-discontinuous_in_chunk_array-v2]", 100 | "test_roundtrip_read_only_zarrs[oindex-2d-contiguous_in_chunk_array-ellipsis-v2]", 101 | "test_roundtrip_read_only_zarrs[oindex-2d-discontinuous_in_chunk_array-ellipsis-v2]", 102 | "test_roundtrip_read_only_zarrs[vindex-2d-contiguous_in_chunk_array-ellipsis-v2]", 103 | "test_roundtrip_read_only_zarrs[vindex-2d-discontinuous_in_chunk_array-ellipsis-v2]", 104 | "test_roundtrip_read_only_zarrs[oindex-2d-across_chunks_indices_array-ellipsis-v2]", 105 | "test_roundtrip_read_only_zarrs[vindex-2d-ellipsis-across_chunks_indices_array-v2]", 106 | "test_roundtrip_read_only_zarrs[vindex-2d-across_chunks_indices_array-ellipsis-v2]", 107 | "test_roundtrip_read_only_zarrs[vindex-2d-ellipsis-contiguous_in_chunk_array-v2]", 108 | "test_roundtrip_read_only_zarrs[vindex-2d-ellipsis-discontinuous_in_chunk_array-v2]", 109 | # need to investigate this one - it seems to fail with the default pipeline 110 | # but it makes some sense that it succeeds with ours since we fall-back to numpy indexing 111 | # in the case of a collapsed dimension 112 | # "test_roundtrip_read_only_zarrs[vindex-2d-contiguous_in_chunk_array-contiguous_in_chunk_array]", 113 | ] 114 | 115 | 116 | def pytest_collection_modifyitems( 117 | config: pytest.Config, items: Iterable[pytest.Item] 118 | ) -> None: 119 | for item in items: 120 | if item.name in zarr_python_default_codec_pipeline_failures: 121 | xfail_marker = pytest.mark.xfail( 122 | reason="This test fails with the zarr-python default codec pipeline." 123 | ) 124 | item.add_marker(xfail_marker) 125 | -------------------------------------------------------------------------------- /tests/test_blosc.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import numpy as np 4 | import pytest 5 | from zarr import AsyncArray 6 | from zarr.abc.store import Store 7 | from zarr.codecs import BloscCodec, BytesCodec, ShardingCodec 8 | from zarr.core.buffer import default_buffer_prototype 9 | from zarr.storage import StorePath 10 | 11 | 12 | @pytest.mark.parametrize("dtype", ["uint8", "uint16"]) 13 | async def test_blosc_evolve(store: Store, dtype: str) -> None: 14 | typesize = np.dtype(dtype).itemsize 15 | path = "blosc_evolve" 16 | spath = StorePath(store, path) 17 | await AsyncArray.create( 18 | spath, 19 | shape=(16, 16), 20 | chunk_shape=(16, 16), 21 | dtype=dtype, 22 | fill_value=0, 23 | codecs=[BytesCodec(), BloscCodec()], 24 | ) 25 | buf = await store.get(f"{path}/zarr.json", prototype=default_buffer_prototype()) 26 | assert buf is not None 27 | zarr_json = json.loads(buf.to_bytes()) 28 | blosc_configuration_json = zarr_json["codecs"][1]["configuration"] 29 | assert blosc_configuration_json["typesize"] == typesize 30 | if typesize == 1: 31 | assert blosc_configuration_json["shuffle"] == "bitshuffle" 32 | else: 33 | assert blosc_configuration_json["shuffle"] == "shuffle" 34 | 35 | path2 = "blosc_evolve_sharding" 36 | spath2 = StorePath(store, path2) 37 | await AsyncArray.create( 38 | spath2, 39 | shape=(16, 16), 40 | chunk_shape=(16, 16), 41 | dtype=dtype, 42 | fill_value=0, 43 | codecs=[ 44 | ShardingCodec(chunk_shape=(16, 16), codecs=[BytesCodec(), BloscCodec()]) 45 | ], 46 | ) 47 | buf = await store.get(f"{path2}/zarr.json", prototype=default_buffer_prototype()) 48 | assert buf is not None 49 | zarr_json = json.loads(buf.to_bytes()) 50 | blosc_configuration_json = zarr_json["codecs"][0]["configuration"]["codecs"][1][ 51 | "configuration" 52 | ] 53 | assert blosc_configuration_json["typesize"] == typesize 54 | if typesize == 1: 55 | assert blosc_configuration_json["shuffle"] == "bitshuffle" 56 | else: 57 | assert blosc_configuration_json["shuffle"] == "shuffle" 58 | -------------------------------------------------------------------------------- /tests/test_codecs.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import json 4 | from dataclasses import dataclass 5 | from typing import TYPE_CHECKING 6 | 7 | import numpy as np 8 | import pytest 9 | from zarr import Array, AsyncArray, config 10 | from zarr.codecs import ( 11 | BytesCodec, 12 | ShardingCodec, 13 | TransposeCodec, 14 | ) 15 | from zarr.core.buffer import default_buffer_prototype 16 | from zarr.core.indexing import Selection, morton_order_iter 17 | from zarr.storage import StorePath 18 | 19 | if TYPE_CHECKING: 20 | from zarr.abc.codec import Codec 21 | from zarr.abc.store import Store 22 | from zarr.core.buffer.core import NDArrayLike 23 | from zarr.core.common import MemoryOrder 24 | 25 | 26 | @dataclass(frozen=True) 27 | class _AsyncArrayProxy: 28 | array: AsyncArray 29 | 30 | def __getitem__(self, selection: Selection) -> _AsyncArraySelectionProxy: 31 | return _AsyncArraySelectionProxy(self.array, selection) 32 | 33 | 34 | @dataclass(frozen=True) 35 | class _AsyncArraySelectionProxy: 36 | array: AsyncArray 37 | selection: Selection 38 | 39 | async def get(self) -> NDArrayLike: 40 | return await self.array.getitem(self.selection) 41 | 42 | async def set(self, value: np.ndarray) -> None: 43 | return await self.array.setitem(self.selection, value) 44 | 45 | 46 | def order_from_dim(order: MemoryOrder, ndim: int) -> tuple[int, ...]: 47 | if order == "F": 48 | return tuple(ndim - x - 1 for x in range(ndim)) 49 | else: 50 | return tuple(range(ndim)) 51 | 52 | 53 | def test_sharding_pickle() -> None: 54 | """ 55 | Test that sharding codecs can be pickled 56 | """ 57 | pass 58 | 59 | 60 | @pytest.mark.parametrize("input_order", ["F", "C"]) 61 | @pytest.mark.parametrize("store_order", ["F", "C"]) 62 | @pytest.mark.parametrize("runtime_write_order", ["C"]) 63 | @pytest.mark.parametrize("runtime_read_order", ["C"]) 64 | @pytest.mark.parametrize("with_sharding", [True, False]) 65 | async def test_order( 66 | *, 67 | store: Store, 68 | input_order: MemoryOrder, 69 | store_order: MemoryOrder, 70 | runtime_write_order: MemoryOrder, 71 | runtime_read_order: MemoryOrder, 72 | with_sharding: bool, 73 | ) -> None: 74 | data = np.arange(0, 256, dtype="uint16").reshape((32, 8), order=input_order) 75 | path = "order" 76 | spath = StorePath(store, path=path) 77 | codecs_: list[Codec] = ( 78 | [ 79 | ShardingCodec( 80 | chunk_shape=(16, 8), 81 | codecs=[ 82 | TransposeCodec(order=order_from_dim(store_order, data.ndim)), 83 | BytesCodec(), 84 | ], 85 | ) 86 | ] 87 | if with_sharding 88 | else [ 89 | TransposeCodec(order=order_from_dim(store_order, data.ndim)), 90 | BytesCodec(), 91 | ] 92 | ) 93 | 94 | with config.set({"array.order": runtime_write_order}): 95 | a = await AsyncArray.create( 96 | spath, 97 | shape=data.shape, 98 | chunk_shape=(32, 8), 99 | dtype=data.dtype, 100 | fill_value=0, 101 | chunk_key_encoding=("v2", "."), 102 | codecs=codecs_, 103 | ) 104 | 105 | await _AsyncArrayProxy(a)[:, :].set(data) 106 | read_data = await _AsyncArrayProxy(a)[:, :].get() 107 | assert np.array_equal(data, read_data) 108 | 109 | with config.set({"array.order": runtime_read_order}): 110 | a = await AsyncArray.open( 111 | spath, 112 | ) 113 | read_data = await _AsyncArrayProxy(a)[:, :].get() 114 | assert np.array_equal(data, read_data) 115 | 116 | if runtime_read_order == "F": 117 | assert read_data.flags["F_CONTIGUOUS"] 118 | assert not read_data.flags["C_CONTIGUOUS"] 119 | else: 120 | assert not read_data.flags["F_CONTIGUOUS"] 121 | assert read_data.flags["C_CONTIGUOUS"] 122 | 123 | 124 | @pytest.mark.parametrize("input_order", ["F", "C"]) 125 | @pytest.mark.parametrize("runtime_write_order", ["C"]) 126 | @pytest.mark.parametrize("runtime_read_order", ["C"]) 127 | @pytest.mark.parametrize("with_sharding", [True, False]) 128 | def test_order_implicit( 129 | *, 130 | store: Store, 131 | input_order: MemoryOrder, 132 | runtime_write_order: MemoryOrder, 133 | runtime_read_order: MemoryOrder, 134 | with_sharding: bool, 135 | ) -> None: 136 | data = np.arange(0, 256, dtype="uint16").reshape((16, 16), order=input_order) 137 | path = "order_implicit" 138 | spath = StorePath(store, path) 139 | codecs_: list[Codec] | None = ( 140 | [ShardingCodec(chunk_shape=(8, 8))] if with_sharding else None 141 | ) 142 | 143 | with config.set({"array.order": runtime_write_order}): 144 | a = Array.create( 145 | spath, 146 | shape=data.shape, 147 | chunk_shape=(16, 16), 148 | dtype=data.dtype, 149 | fill_value=0, 150 | codecs=codecs_, 151 | ) 152 | 153 | a[:, :] = data 154 | 155 | with config.set({"array.order": runtime_read_order}): 156 | a = Array.open(spath) 157 | read_data = a[:, :] 158 | assert np.array_equal(data, read_data) 159 | 160 | if runtime_read_order == "F": 161 | assert read_data.flags["F_CONTIGUOUS"] 162 | assert not read_data.flags["C_CONTIGUOUS"] 163 | else: 164 | assert not read_data.flags["F_CONTIGUOUS"] 165 | assert read_data.flags["C_CONTIGUOUS"] 166 | 167 | 168 | def test_open(store: Store) -> None: 169 | spath = StorePath(store) 170 | a = Array.create( 171 | spath, 172 | shape=(16, 16), 173 | chunk_shape=(16, 16), 174 | dtype="int32", 175 | fill_value=0, 176 | ) 177 | b = Array.open(spath) 178 | assert a.metadata == b.metadata 179 | 180 | 181 | def test_morton() -> None: 182 | assert list(morton_order_iter((2, 2))) == [(0, 0), (1, 0), (0, 1), (1, 1)] 183 | assert list(morton_order_iter((2, 2, 2))) == [ 184 | (0, 0, 0), 185 | (1, 0, 0), 186 | (0, 1, 0), 187 | (1, 1, 0), 188 | (0, 0, 1), 189 | (1, 0, 1), 190 | (0, 1, 1), 191 | (1, 1, 1), 192 | ] 193 | assert list(morton_order_iter((2, 2, 2, 2))) == [ 194 | (0, 0, 0, 0), 195 | (1, 0, 0, 0), 196 | (0, 1, 0, 0), 197 | (1, 1, 0, 0), 198 | (0, 0, 1, 0), 199 | (1, 0, 1, 0), 200 | (0, 1, 1, 0), 201 | (1, 1, 1, 0), 202 | (0, 0, 0, 1), 203 | (1, 0, 0, 1), 204 | (0, 1, 0, 1), 205 | (1, 1, 0, 1), 206 | (0, 0, 1, 1), 207 | (1, 0, 1, 1), 208 | (0, 1, 1, 1), 209 | (1, 1, 1, 1), 210 | ] 211 | 212 | 213 | def test_write_partial_chunks(store: Store) -> None: 214 | data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) 215 | spath = StorePath(store) 216 | a = Array.create( 217 | spath, 218 | shape=data.shape, 219 | chunk_shape=(20, 20), 220 | dtype=data.dtype, 221 | fill_value=1, 222 | ) 223 | a[0:16, 0:16] = data 224 | assert np.array_equal(a[0:16, 0:16], data) 225 | 226 | 227 | async def test_delete_empty_chunks(store: Store) -> None: 228 | data = np.ones((16, 16)) 229 | path = "delete_empty_chunks" 230 | spath = StorePath(store, path) 231 | a = await AsyncArray.create( 232 | spath, 233 | shape=data.shape, 234 | chunk_shape=(32, 32), 235 | dtype=data.dtype, 236 | fill_value=1, 237 | ) 238 | await _AsyncArrayProxy(a)[:16, :16].set(np.zeros((16, 16))) 239 | await _AsyncArrayProxy(a)[:16, :16].set(data) 240 | assert np.array_equal(await _AsyncArrayProxy(a)[:16, :16].get(), data) 241 | assert await store.get(f"{path}/c0/0", prototype=default_buffer_prototype()) is None 242 | 243 | 244 | async def test_dimension_names(store: Store) -> None: 245 | data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) 246 | path = "dimension_names" 247 | spath = StorePath(store, path) 248 | await AsyncArray.create( 249 | spath, 250 | shape=data.shape, 251 | chunk_shape=(16, 16), 252 | dtype=data.dtype, 253 | fill_value=0, 254 | dimension_names=("x", "y"), 255 | ) 256 | 257 | assert (await AsyncArray.open(spath)).metadata.dimension_names == ( 258 | "x", 259 | "y", 260 | ) 261 | path2 = "dimension_names2" 262 | spath2 = StorePath(store, path2) 263 | await AsyncArray.create( 264 | spath2, 265 | shape=data.shape, 266 | chunk_shape=(16, 16), 267 | dtype=data.dtype, 268 | fill_value=0, 269 | ) 270 | 271 | assert (await AsyncArray.open(spath2)).metadata.dimension_names is None 272 | zarr_json_buffer = await store.get( 273 | f"{path2}/zarr.json", prototype=default_buffer_prototype() 274 | ) 275 | assert zarr_json_buffer is not None 276 | assert "dimension_names" not in json.loads(zarr_json_buffer.to_bytes()) 277 | 278 | 279 | def test_invalid_metadata(store: Store) -> None: 280 | # LD: Disabled for `zarrs`. Including endianness for a single-byte data type is not invalid. 281 | # spath2 = StorePath(store, "invalid_endian") 282 | # with pytest.raises(TypeError): 283 | # Array.create( 284 | # spath2, 285 | # shape=(16, 16), 286 | # chunk_shape=(16, 16), 287 | # dtype=np.dtype("uint8"), 288 | # fill_value=0, 289 | # codecs=[ 290 | # BytesCodec(endian="big"), 291 | # TransposeCodec(order=order_from_dim("F", 2)), 292 | # ], 293 | # ) 294 | spath3 = StorePath(store, "invalid_order") 295 | with pytest.raises(TypeError): 296 | Array.create( 297 | spath3, 298 | shape=(16, 16), 299 | chunk_shape=(16, 16), 300 | dtype=np.dtype("uint8"), 301 | fill_value=0, 302 | codecs=[ 303 | BytesCodec(), 304 | TransposeCodec(order="F"), # type: ignore[arg-type] 305 | ], 306 | ) 307 | spath4 = StorePath(store, "invalid_missing_bytes_codec") 308 | with pytest.raises(ValueError, match=r".*[Cc]odec.*required"): 309 | Array.create( 310 | spath4, 311 | shape=(16, 16), 312 | chunk_shape=(16, 16), 313 | dtype=np.dtype("uint8"), 314 | fill_value=0, 315 | codecs=[ 316 | TransposeCodec(order=order_from_dim("F", 2)), 317 | ], 318 | ) 319 | spath5 = StorePath(store, "invalid_inner_chunk_shape") 320 | with pytest.raises( 321 | ValueError, match=r".*shard.*chunk_shape.*array.*shape.*need.*same.*dimensions" 322 | ): 323 | Array.create( 324 | spath5, 325 | shape=(16, 16), 326 | chunk_shape=(16, 16), 327 | dtype=np.dtype("uint8"), 328 | fill_value=0, 329 | codecs=[ 330 | ShardingCodec(chunk_shape=(8,)), 331 | ], 332 | ) 333 | spath6 = StorePath(store, "invalid_inner_chunk_shape") 334 | with pytest.raises( 335 | ValueError, match=r".*array.*chunk_shape.*divisible.*shard.*chunk_shape" 336 | ): 337 | Array.create( 338 | spath6, 339 | shape=(16, 16), 340 | chunk_shape=(16, 16), 341 | dtype=np.dtype("uint8"), 342 | fill_value=0, 343 | codecs=[ 344 | ShardingCodec(chunk_shape=(8, 7)), 345 | ], 346 | ) 347 | # LD: Disabled for `zarrs`. Such checks do not exist. 348 | # Also this is not invalid metadata, should be a separate test. 349 | # spath7 = StorePath(store, "warning_inefficient_codecs") 350 | # with pytest.warns(UserWarning): 351 | # Array.create( 352 | # spath7, 353 | # shape=(16, 16), 354 | # chunk_shape=(16, 16), 355 | # dtype=np.dtype("uint8"), 356 | # fill_value=0, 357 | # codecs=[ 358 | # ShardingCodec(chunk_shape=(8, 8)), 359 | # GzipCodec(), 360 | # ], 361 | # ) 362 | 363 | 364 | async def test_resize(store: Store) -> None: 365 | data = np.zeros((16, 18), dtype="uint16") 366 | path = "resize" 367 | spath = StorePath(store, path) 368 | a = await AsyncArray.create( 369 | spath, 370 | shape=data.shape, 371 | chunk_shape=(10, 10), 372 | dtype=data.dtype, 373 | chunk_key_encoding=("v2", "."), 374 | fill_value=1, 375 | ) 376 | 377 | await _AsyncArrayProxy(a)[:16, :18].set(data) 378 | assert ( 379 | await store.get(f"{path}/1.1", prototype=default_buffer_prototype()) is not None 380 | ) 381 | assert ( 382 | await store.get(f"{path}/0.0", prototype=default_buffer_prototype()) is not None 383 | ) 384 | assert ( 385 | await store.get(f"{path}/0.1", prototype=default_buffer_prototype()) is not None 386 | ) 387 | assert ( 388 | await store.get(f"{path}/1.0", prototype=default_buffer_prototype()) is not None 389 | ) 390 | 391 | await a.resize((10, 12)) 392 | assert a.metadata.shape == (10, 12) 393 | assert ( 394 | await store.get(f"{path}/0.0", prototype=default_buffer_prototype()) is not None 395 | ) 396 | assert ( 397 | await store.get(f"{path}/0.1", prototype=default_buffer_prototype()) is not None 398 | ) 399 | assert await store.get(f"{path}/1.0", prototype=default_buffer_prototype()) is None 400 | assert await store.get(f"{path}/1.1", prototype=default_buffer_prototype()) is None 401 | -------------------------------------------------------------------------------- /tests/test_endian.py: -------------------------------------------------------------------------------- 1 | from typing import Literal 2 | 3 | import numpy as np 4 | import pytest 5 | from zarr import AsyncArray 6 | from zarr.abc.store import Store 7 | from zarr.codecs import BytesCodec 8 | from zarr.storage import StorePath 9 | 10 | from .test_codecs import _AsyncArrayProxy 11 | 12 | 13 | @pytest.mark.parametrize("endian", ["big", "little"]) 14 | async def test_endian(store: Store, endian: Literal["big", "little"]) -> None: 15 | data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) 16 | path = "endian" 17 | spath = StorePath(store, path) 18 | a = await AsyncArray.create( 19 | spath, 20 | shape=data.shape, 21 | chunk_shape=(16, 16), 22 | dtype=data.dtype, 23 | fill_value=0, 24 | chunk_key_encoding=("v2", "."), 25 | codecs=[BytesCodec(endian=endian)], 26 | ) 27 | 28 | await _AsyncArrayProxy(a)[:, :].set(data) 29 | readback_data = await _AsyncArrayProxy(a)[:, :].get() 30 | assert np.array_equal(data, readback_data) 31 | 32 | 33 | @pytest.mark.parametrize("dtype_input_endian", [">u2", "u2", " None: 40 | data = np.arange(0, 256, dtype=dtype_input_endian).reshape((16, 16)) 41 | path = "endian" 42 | spath = StorePath(store, path) 43 | a = await AsyncArray.create( 44 | spath, 45 | shape=data.shape, 46 | chunk_shape=(16, 16), 47 | dtype="uint16", 48 | fill_value=0, 49 | chunk_key_encoding=("v2", "."), 50 | codecs=[BytesCodec(endian=dtype_store_endian)], 51 | ) 52 | 53 | await _AsyncArrayProxy(a)[:, :].set(data) 54 | readback_data = await _AsyncArrayProxy(a)[:, :].get() 55 | assert np.array_equal(data, readback_data) 56 | -------------------------------------------------------------------------------- /tests/test_gzip.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from zarr import Array 3 | from zarr.abc.store import Store 4 | from zarr.codecs import BytesCodec, GzipCodec 5 | from zarr.storage import StorePath 6 | 7 | 8 | def test_gzip(store: Store) -> None: 9 | data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) 10 | 11 | a = Array.create( 12 | StorePath(store), 13 | shape=data.shape, 14 | chunk_shape=(16, 16), 15 | dtype=data.dtype, 16 | fill_value=0, 17 | codecs=[BytesCodec(), GzipCodec()], 18 | ) 19 | 20 | a[:, :] = data 21 | assert np.array_equal(data, a[:, :]) 22 | -------------------------------------------------------------------------------- /tests/test_pipeline.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import operator 4 | import pickle 5 | import tempfile 6 | from collections.abc import Callable 7 | from contextlib import contextmanager 8 | from functools import reduce 9 | from itertools import product 10 | from pathlib import Path 11 | from types import EllipsisType 12 | 13 | import numpy as np 14 | import pytest 15 | import zarr 16 | from zarr.storage import LocalStore 17 | 18 | import zarrs # noqa: F401 19 | 20 | axis_size_ = 10 21 | chunk_size_ = axis_size_ // 2 22 | fill_value_ = 32767 23 | dimensionalities_ = list(range(1, 5)) 24 | 25 | 26 | @pytest.fixture 27 | def fill_value() -> int: 28 | return fill_value_ 29 | 30 | 31 | non_numpy_indices = [ 32 | pytest.param(slice(1, 3), id="slice_in_chunk"), 33 | pytest.param(slice(1, 7), id="slice_across_chunks"), 34 | pytest.param(2, id="int"), 35 | pytest.param(slice(None), id="full_slice"), 36 | pytest.param(Ellipsis, id="ellipsis"), 37 | ] 38 | 39 | numpy_indices = [ 40 | pytest.param(np.array([1, 2]), id="contiguous_in_chunk_array"), 41 | pytest.param(np.array([0, 3]), id="discontinuous_in_chunk_array"), 42 | pytest.param(np.array([0, 6]), id="across_chunks_indices_array"), 43 | ] 44 | 45 | all_indices = numpy_indices + non_numpy_indices 46 | 47 | indexing_method_params = [ 48 | pytest.param(lambda x: getattr(x, "oindex"), id="oindex"), 49 | pytest.param(lambda x: x, id="vindex"), 50 | ] 51 | 52 | zarr_formats = [2, 3] 53 | 54 | 55 | def pytest_generate_tests(metafunc): 56 | old_pipeline_path = zarr.config.get("codec_pipeline.path") 57 | # need to set the codec pipeline to the zarrs pipeline because the autouse fixture doesn't apply here 58 | zarr.config.set({"codec_pipeline.path": "zarrs.ZarrsCodecPipeline"}) 59 | if "test_roundtrip" in metafunc.function.__name__: 60 | arrs = [] 61 | indices = [] 62 | store_values = [] 63 | indexing_methods = [] 64 | ids = [] 65 | for format in zarr_formats: 66 | for dimensionality in dimensionalities_: 67 | indexers = non_numpy_indices if dimensionality > 2 else all_indices 68 | for index_param_prod in product(indexers, repeat=dimensionality): 69 | index = tuple( 70 | index_param.values[0] for index_param in index_param_prod 71 | ) 72 | # multi-ellipsis indexing is not supported 73 | if sum(isinstance(i, EllipsisType) for i in index) > 1: 74 | continue 75 | for indexing_method_param in indexing_method_params: 76 | arr = gen_arr( 77 | fill_value_, Path(tempfile.mktemp()), dimensionality, format 78 | ) 79 | indexing_method = indexing_method_param.values[0] 80 | dimensionality_id = f"{dimensionality}d" 81 | id = "-".join( 82 | [indexing_method_param.id, dimensionality_id] 83 | + [index_param.id for index_param in index_param_prod] 84 | + [f"v{format}"] 85 | ) 86 | ids.append(id) 87 | store_values.append( 88 | gen_store_values( 89 | indexing_method, 90 | index, 91 | full_array((axis_size_,) * dimensionality), 92 | ) 93 | ) 94 | indexing_methods.append(indexing_method) 95 | indices.append(index) 96 | arrs.append(arr) 97 | # array is used as param name to prevent collision with arr fixture 98 | metafunc.parametrize( 99 | ["array", "index", "store_values", "indexing_method"], 100 | zip(arrs, indices, store_values, indexing_methods), 101 | ids=ids, 102 | ) 103 | zarr.config.set({"codec_pipeline.path": old_pipeline_path}) 104 | 105 | 106 | def full_array(shape) -> np.ndarray: 107 | return np.arange(reduce(operator.mul, shape, 1)).reshape(shape) 108 | 109 | 110 | def gen_store_values( 111 | indexing_method: Callable, 112 | index: tuple[int | slice | np.ndarray | EllipsisType, ...], 113 | full_array: np.ndarray, 114 | ) -> np.ndarray: 115 | class smoke: 116 | oindex = "oindex" 117 | 118 | def maybe_convert( 119 | i: int | np.ndarray | slice | EllipsisType, axis: int 120 | ) -> np.ndarray: 121 | if isinstance(i, np.ndarray): 122 | return i 123 | if isinstance(i, slice): 124 | return np.arange( 125 | i.start if i.start is not None else 0, 126 | i.stop if i.stop is not None else full_array.shape[axis], 127 | ) 128 | if isinstance(i, int): 129 | return np.array([i]) 130 | if isinstance(i, EllipsisType): 131 | return np.arange(full_array.shape[axis]) 132 | raise ValueError(f"Invalid index {i}") 133 | 134 | if not isinstance(index, EllipsisType) and indexing_method(smoke()) == "oindex": 135 | index: tuple[np.ndarray, ...] = tuple( 136 | maybe_convert(i, axis) for axis, i in enumerate(index) 137 | ) 138 | res = full_array[np.ix_(*index)] 139 | # squeeze out extra dims from integer indexers 140 | if all(i.shape == (1,) for i in index): 141 | res = res.squeeze() 142 | return res 143 | res = res.squeeze( 144 | axis=tuple(axis for axis, i in enumerate(index) if i.shape == (1,)) 145 | ) 146 | return res 147 | return full_array[index] 148 | 149 | 150 | def gen_arr(fill_value, tmp_path, dimensionality, format) -> zarr.Array: 151 | return zarr.create( 152 | (axis_size_,) * dimensionality, 153 | store=LocalStore(root=tmp_path / ".zarr"), 154 | chunks=(chunk_size_,) * dimensionality, 155 | dtype=np.int16, 156 | fill_value=fill_value, 157 | codecs=[zarr.codecs.BytesCodec(), zarr.codecs.BloscCodec()] 158 | if format == 3 159 | else None, 160 | zarr_format=format, 161 | ) 162 | 163 | 164 | @pytest.fixture(params=dimensionalities_) 165 | def dimensionality(request): 166 | return request.param 167 | 168 | 169 | @pytest.fixture(params=zarr_formats) 170 | def format(request): 171 | return request.param 172 | 173 | 174 | @pytest.fixture 175 | def arr(dimensionality, tmp_path, format) -> zarr.Array: 176 | return gen_arr(fill_value_, tmp_path, dimensionality, format) 177 | 178 | 179 | def test_fill_value(arr: zarr.Array): 180 | assert np.all(arr[:] == fill_value_) 181 | 182 | 183 | def test_constant(arr: zarr.Array): 184 | arr[:] = 42 185 | assert np.all(arr[:] == 42) 186 | 187 | 188 | def test_singleton(arr: zarr.Array): 189 | singleton_index = (1,) * len(arr.shape) 190 | non_singleton_index = (0,) * len(arr.shape) 191 | arr[singleton_index] = 42 192 | assert arr[singleton_index] == 42 193 | assert arr[non_singleton_index] != 42 194 | 195 | 196 | def test_full_array(arr: zarr.Array): 197 | stored_values = full_array(arr.shape) 198 | arr[:] = stored_values 199 | assert np.all(arr[:] == stored_values) 200 | 201 | 202 | def test_roundtrip( 203 | array: zarr.Array, 204 | store_values: np.ndarray, 205 | index: tuple[int | slice | np.ndarray | EllipsisType, ...], 206 | indexing_method: Callable, 207 | ): 208 | indexing_method(array)[index] = store_values 209 | res = indexing_method(array)[index] 210 | assert np.all( 211 | res == store_values, 212 | ), res 213 | 214 | 215 | def test_ellipsis_indexing_invalid(arr: zarr.Array): 216 | if len(arr.shape) <= 2: 217 | pytest.skip( 218 | "Ellipsis indexing works for 1D and 2D arrays in zarr-python despite a shape mismatch" 219 | ) 220 | stored_value = np.array([1, 2, 3]) 221 | with pytest.raises(ValueError): # noqa: PT011 222 | # zarrs-python error: ValueError: operands could not be broadcast together with shapes (4,) (3,) 223 | # numpy error: ValueError: could not broadcast input array from shape (3,) into shape (4,) 224 | arr[2, ...] = stored_value 225 | 226 | 227 | def test_pickle(arr: zarr.Array, tmp_path: Path): 228 | arr[:] = np.arange(reduce(operator.mul, arr.shape, 1)).reshape(arr.shape) 229 | expected = arr[:] 230 | with Path.open(tmp_path / "arr.pickle", "wb") as f: 231 | pickle.dump(arr._async_array.codec_pipeline, f) 232 | with Path.open(tmp_path / "arr.pickle", "rb") as f: 233 | object.__setattr__(arr._async_array, "codec_pipeline", pickle.load(f)) 234 | assert (arr[:] == expected).all() 235 | 236 | 237 | @contextmanager 238 | def use_zarr_default_codec_reader(): 239 | zarr.config.set( 240 | {"codec_pipeline.path": "zarr.core.codec_pipeline.BatchedCodecPipeline"} 241 | ) 242 | yield 243 | zarr.config.set({"codec_pipeline.path": "zarrs.ZarrsCodecPipeline"}) 244 | 245 | 246 | def test_roundtrip_read_only_zarrs( 247 | array: zarr.Array, 248 | store_values: np.ndarray, 249 | index: tuple[int | slice | np.ndarray | EllipsisType, ...], 250 | indexing_method: Callable, 251 | ): 252 | with use_zarr_default_codec_reader(): 253 | arr_default = zarr.open(array.store, read_only=True) 254 | indexing_method(arr_default)[index] = store_values 255 | res = indexing_method(zarr.open(array.store))[index] 256 | assert np.all( 257 | res == store_values, 258 | ), res 259 | -------------------------------------------------------------------------------- /tests/test_sharding.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | from typing import Any 3 | 4 | import numpy as np 5 | import numpy.typing as npt 6 | import pytest 7 | from zarr import Array, AsyncArray 8 | from zarr.abc.store import Store 9 | from zarr.codecs import ( 10 | BloscCodec, 11 | BytesCodec, 12 | ShardingCodec, 13 | ShardingCodecIndexLocation, 14 | TransposeCodec, 15 | ) 16 | from zarr.core.buffer import default_buffer_prototype 17 | from zarr.storage import StorePath 18 | 19 | from .conftest import ArrayRequest 20 | from .test_codecs import _AsyncArrayProxy, order_from_dim 21 | 22 | 23 | @pytest.mark.parametrize("index_location", ["start", "end"]) 24 | @pytest.mark.parametrize( 25 | "array_fixture", 26 | [ 27 | ArrayRequest(shape=(128,) * 1, dtype="uint8", order="C"), 28 | ArrayRequest(shape=(128,) * 2, dtype="uint8", order="C"), 29 | ArrayRequest(shape=(128,) * 3, dtype="uint16", order="F"), 30 | ], 31 | indirect=["array_fixture"], 32 | ) 33 | @pytest.mark.parametrize("offset", [0, 10]) 34 | def test_sharding( 35 | store: Store, 36 | array_fixture: npt.NDArray[Any], 37 | index_location: ShardingCodecIndexLocation, 38 | offset: int, 39 | ) -> None: 40 | """ 41 | Test that we can create an array with a sharding codec, write data to that array, and get 42 | the same data out via indexing. 43 | """ 44 | data = array_fixture 45 | spath = StorePath(store) 46 | arr = Array.create( 47 | spath, 48 | shape=tuple(s + offset for s in data.shape), 49 | chunk_shape=(64,) * data.ndim, 50 | dtype=data.dtype, 51 | fill_value=6, 52 | codecs=[ 53 | ShardingCodec( 54 | chunk_shape=(32,) * data.ndim, 55 | codecs=[ 56 | TransposeCodec(order=order_from_dim("F", data.ndim)), 57 | BytesCodec(), 58 | BloscCodec(cname="lz4"), 59 | ], 60 | index_location=index_location, 61 | ) 62 | ], 63 | ) 64 | write_region = tuple(slice(offset, None) for dim in range(data.ndim)) 65 | arr[write_region] = data 66 | 67 | if offset > 0: 68 | empty_region = tuple(slice(0, offset) for dim in range(data.ndim)) 69 | assert np.all(arr[empty_region] == arr.metadata.fill_value) 70 | 71 | read_data = arr[write_region] 72 | assert data.shape == read_data.shape 73 | assert np.array_equal(data, read_data) 74 | 75 | 76 | @pytest.mark.parametrize("index_location", ["start", "end"]) 77 | @pytest.mark.parametrize( 78 | "array_fixture", 79 | [ 80 | ArrayRequest(shape=(128,) * 3, dtype="uint16", order="F"), 81 | ], 82 | indirect=["array_fixture"], 83 | ) 84 | def test_sharding_partial( 85 | store: Store, 86 | array_fixture: npt.NDArray[Any], 87 | index_location: ShardingCodecIndexLocation, 88 | ) -> None: 89 | data = array_fixture 90 | spath = StorePath(store) 91 | a = Array.create( 92 | spath, 93 | shape=tuple(a + 10 for a in data.shape), 94 | chunk_shape=(64, 64, 64), 95 | dtype=data.dtype, 96 | fill_value=0, 97 | codecs=[ 98 | ShardingCodec( 99 | chunk_shape=(32, 32, 32), 100 | codecs=[ 101 | TransposeCodec(order=order_from_dim("F", data.ndim)), 102 | BytesCodec(), 103 | BloscCodec(cname="lz4"), 104 | ], 105 | index_location=index_location, 106 | ) 107 | ], 108 | ) 109 | 110 | a[10:, 10:, 10:] = data 111 | 112 | read_data = a[0:10, 0:10, 0:10] 113 | assert np.all(read_data == 0) 114 | 115 | read_data = a[10:, 10:, 10:] 116 | assert data.shape == read_data.shape 117 | assert np.array_equal(data, read_data) 118 | 119 | 120 | @pytest.mark.parametrize("index_location", ["start", "end"]) 121 | @pytest.mark.parametrize( 122 | "array_fixture", 123 | [ 124 | ArrayRequest(shape=(128,) * 3, dtype="uint16", order="F"), 125 | ], 126 | indirect=["array_fixture"], 127 | ) 128 | def test_sharding_partial_readwrite( 129 | store: Store, 130 | array_fixture: npt.NDArray[Any], 131 | index_location: ShardingCodecIndexLocation, 132 | ) -> None: 133 | data = array_fixture 134 | spath = StorePath(store) 135 | a = Array.create( 136 | spath, 137 | shape=data.shape, 138 | chunk_shape=data.shape, 139 | dtype=data.dtype, 140 | fill_value=0, 141 | codecs=[ 142 | ShardingCodec( 143 | chunk_shape=(1, data.shape[1], data.shape[2]), 144 | codecs=[BytesCodec()], 145 | index_location=index_location, 146 | ) 147 | ], 148 | ) 149 | 150 | a[:] = data 151 | 152 | for x in range(data.shape[0]): 153 | read_data = a[x, :, :] 154 | assert np.array_equal(data[x], read_data) 155 | 156 | 157 | @pytest.mark.parametrize( 158 | "array_fixture", 159 | [ 160 | ArrayRequest(shape=(128,) * 3, dtype="uint16", order="F"), 161 | ], 162 | indirect=["array_fixture"], 163 | ) 164 | @pytest.mark.parametrize("index_location", ["start", "end"]) 165 | def test_sharding_partial_read( 166 | store: Store, 167 | array_fixture: npt.NDArray[Any], 168 | index_location: ShardingCodecIndexLocation, 169 | ) -> None: 170 | data = array_fixture 171 | spath = StorePath(store) 172 | a = Array.create( 173 | spath, 174 | shape=tuple(a + 10 for a in data.shape), 175 | chunk_shape=(64, 64, 64), 176 | dtype=data.dtype, 177 | fill_value=1, 178 | codecs=[ 179 | ShardingCodec( 180 | chunk_shape=(32, 32, 32), 181 | codecs=[ 182 | TransposeCodec(order=order_from_dim("F", data.ndim)), 183 | BytesCodec(), 184 | BloscCodec(cname="lz4"), 185 | ], 186 | index_location=index_location, 187 | ) 188 | ], 189 | ) 190 | 191 | read_data = a[0:10, 0:10, 0:10] 192 | assert np.all(read_data == 1) 193 | 194 | 195 | @pytest.mark.parametrize( 196 | "array_fixture", 197 | [ 198 | ArrayRequest(shape=(128,) * 3, dtype="uint16", order="F"), 199 | ], 200 | indirect=["array_fixture"], 201 | ) 202 | @pytest.mark.parametrize("index_location", ["start", "end"]) 203 | def test_sharding_partial_overwrite( 204 | store: Store, 205 | array_fixture: npt.NDArray[Any], 206 | index_location: ShardingCodecIndexLocation, 207 | ) -> None: 208 | data = array_fixture[:10, :10, :10] 209 | spath = StorePath(store) 210 | a = Array.create( 211 | spath, 212 | shape=tuple(a + 10 for a in data.shape), 213 | chunk_shape=(64, 64, 64), 214 | dtype=data.dtype, 215 | fill_value=1, 216 | codecs=[ 217 | ShardingCodec( 218 | chunk_shape=(32, 32, 32), 219 | codecs=[ 220 | TransposeCodec(order=order_from_dim("F", data.ndim)), 221 | BytesCodec(), 222 | BloscCodec(cname="lz4"), 223 | ], 224 | index_location=index_location, 225 | ) 226 | ], 227 | ) 228 | 229 | a[:10, :10, :10] = data 230 | 231 | read_data = a[0:10, 0:10, 0:10] 232 | assert np.array_equal(data, read_data) 233 | 234 | data = data + 10 235 | a[:10, :10, :10] = data 236 | read_data = a[0:10, 0:10, 0:10] 237 | assert np.array_equal(data, read_data) 238 | 239 | 240 | @pytest.mark.parametrize( 241 | "array_fixture", 242 | [ 243 | ArrayRequest(shape=(128,) * 3, dtype="uint16", order="F"), 244 | ], 245 | indirect=["array_fixture"], 246 | ) 247 | @pytest.mark.parametrize( 248 | "outer_index_location", 249 | ["start", "end"], 250 | ) 251 | @pytest.mark.parametrize( 252 | "inner_index_location", 253 | ["start", "end"], 254 | ) 255 | def test_nested_sharding( 256 | store: Store, 257 | array_fixture: npt.NDArray[Any], 258 | outer_index_location: ShardingCodecIndexLocation, 259 | inner_index_location: ShardingCodecIndexLocation, 260 | ) -> None: 261 | data = array_fixture 262 | spath = StorePath(store) 263 | a = Array.create( 264 | spath, 265 | shape=data.shape, 266 | chunk_shape=(64, 64, 64), 267 | dtype=data.dtype, 268 | fill_value=0, 269 | codecs=[ 270 | ShardingCodec( 271 | chunk_shape=(32, 32, 32), 272 | codecs=[ 273 | ShardingCodec( 274 | chunk_shape=(16, 16, 16), index_location=inner_index_location 275 | ) 276 | ], 277 | index_location=outer_index_location, 278 | ) 279 | ], 280 | ) 281 | 282 | a[:, :, :] = data 283 | 284 | read_data = a[0 : data.shape[0], 0 : data.shape[1], 0 : data.shape[2]] 285 | assert data.shape == read_data.shape 286 | assert np.array_equal(data, read_data) 287 | 288 | 289 | def test_open_sharding(store: Store) -> None: 290 | path = "open_sharding" 291 | spath = StorePath(store, path) 292 | a = Array.create( 293 | spath, 294 | shape=(16, 16), 295 | chunk_shape=(16, 16), 296 | dtype="int32", 297 | fill_value=0, 298 | codecs=[ 299 | ShardingCodec( 300 | chunk_shape=(8, 8), 301 | codecs=[ 302 | TransposeCodec(order=order_from_dim("F", 2)), 303 | BytesCodec(), 304 | BloscCodec(), 305 | ], 306 | ) 307 | ], 308 | ) 309 | b = Array.open(spath) 310 | assert a.metadata == b.metadata 311 | 312 | 313 | def test_write_partial_sharded_chunks(store: Store) -> None: 314 | data = np.arange(0, 16 * 16, dtype="uint16").reshape((16, 16)) 315 | spath = StorePath(store) 316 | a = Array.create( 317 | spath, 318 | shape=(40, 40), 319 | chunk_shape=(20, 20), 320 | dtype=data.dtype, 321 | fill_value=1, 322 | codecs=[ 323 | ShardingCodec( 324 | chunk_shape=(10, 10), 325 | codecs=[ 326 | BytesCodec(), 327 | BloscCodec(), 328 | ], 329 | ) 330 | ], 331 | ) 332 | a[0:16, 0:16] = data 333 | assert np.array_equal(a[0:16, 0:16], data) 334 | 335 | 336 | async def test_delete_empty_shards(store: Store) -> None: 337 | if not store.supports_deletes: 338 | pytest.skip("store does not support deletes") 339 | path = "delete_empty_shards" 340 | spath = StorePath(store, path) 341 | a = await AsyncArray.create( 342 | spath, 343 | shape=(16, 16), 344 | chunk_shape=(8, 16), 345 | dtype="uint16", 346 | fill_value=1, 347 | codecs=[ShardingCodec(chunk_shape=(8, 8))], 348 | ) 349 | await _AsyncArrayProxy(a)[:, :].set(np.zeros((16, 16))) 350 | await _AsyncArrayProxy(a)[8:, :].set(np.ones((8, 16))) 351 | await _AsyncArrayProxy(a)[:, 8:].set(np.ones((16, 8))) 352 | # chunk (0, 0) is full 353 | # chunks (0, 1), (1, 0), (1, 1) are empty 354 | # shard (0, 0) is half-full 355 | # shard (1, 0) is empty 356 | 357 | data = np.ones((16, 16), dtype="uint16") 358 | data[:8, :8] = 0 359 | assert np.array_equal(data, await _AsyncArrayProxy(a)[:, :].get()) 360 | assert ( 361 | await store.get(f"{path}/c/1/0", prototype=default_buffer_prototype()) is None 362 | ) 363 | chunk_bytes = await store.get(f"{path}/c/0/0", prototype=default_buffer_prototype()) 364 | assert chunk_bytes is not None 365 | assert len(chunk_bytes) == 16 * 2 + 8 * 8 * 2 + 4 366 | 367 | 368 | def test_pickle() -> None: 369 | codec = ShardingCodec(chunk_shape=(8, 8)) 370 | assert pickle.loads(pickle.dumps(codec)) == codec 371 | 372 | 373 | @pytest.mark.parametrize( 374 | "index_location", [ShardingCodecIndexLocation.start, ShardingCodecIndexLocation.end] 375 | ) 376 | async def test_sharding_with_empty_inner_chunk( 377 | store: Store, index_location: ShardingCodecIndexLocation 378 | ) -> None: 379 | data = np.arange(0, 16 * 16, dtype="uint32").reshape((16, 16)) 380 | fill_value = 1 381 | 382 | path = f"sharding_with_empty_inner_chunk_{index_location}" 383 | spath = StorePath(store, path) 384 | a = await AsyncArray.create( 385 | spath, 386 | shape=(16, 16), 387 | chunk_shape=(8, 8), 388 | dtype="uint32", 389 | fill_value=fill_value, 390 | codecs=[ShardingCodec(chunk_shape=(4, 4), index_location=index_location)], 391 | ) 392 | data[:4, :4] = fill_value 393 | await a.setitem(..., data) 394 | print("read data") 395 | data_read = await a.getitem(...) 396 | assert np.array_equal(data_read, data) 397 | -------------------------------------------------------------------------------- /tests/test_transpose.py: -------------------------------------------------------------------------------- 1 | from typing import TYPE_CHECKING 2 | 3 | import numpy as np 4 | import pytest 5 | from zarr import Array, AsyncArray, config 6 | from zarr.abc.store import Store 7 | from zarr.codecs import BytesCodec, ShardingCodec, TransposeCodec 8 | from zarr.core.common import MemoryOrder 9 | from zarr.storage import StorePath 10 | 11 | from .test_codecs import _AsyncArrayProxy 12 | 13 | if TYPE_CHECKING: 14 | from zarr.abc.codec import Codec 15 | 16 | 17 | @pytest.mark.parametrize("input_order", ["F", "C"]) 18 | @pytest.mark.parametrize("runtime_write_order", ["C"]) 19 | @pytest.mark.parametrize("runtime_read_order", ["C"]) 20 | @pytest.mark.parametrize("with_sharding", [True, False]) 21 | async def test_transpose( 22 | *, 23 | store: Store, 24 | input_order: MemoryOrder, 25 | runtime_write_order: MemoryOrder, 26 | runtime_read_order: MemoryOrder, 27 | with_sharding: bool, 28 | ) -> None: 29 | data = np.arange(0, 256, dtype="uint16").reshape((1, 32, 8), order=input_order) 30 | spath = StorePath(store, path="transpose") 31 | codecs_: list[Codec] = ( 32 | [ 33 | ShardingCodec( 34 | chunk_shape=(1, 16, 8), 35 | codecs=[TransposeCodec(order=(2, 1, 0)), BytesCodec()], 36 | ) 37 | ] 38 | if with_sharding 39 | else [TransposeCodec(order=(2, 1, 0)), BytesCodec()] 40 | ) 41 | with config.set({"array.order": runtime_write_order}): 42 | a = await AsyncArray.create( 43 | spath, 44 | shape=data.shape, 45 | chunk_shape=(1, 32, 8), 46 | dtype=data.dtype, 47 | fill_value=0, 48 | chunk_key_encoding=("v2", "."), 49 | codecs=codecs_, 50 | ) 51 | 52 | await _AsyncArrayProxy(a)[:, :].set(data) 53 | read_data = await _AsyncArrayProxy(a)[:, :].get() 54 | assert np.array_equal(data, read_data) 55 | 56 | with config.set({"array.order": runtime_read_order}): 57 | a = await AsyncArray.open( 58 | spath, 59 | ) 60 | read_data = await _AsyncArrayProxy(a)[:, :].get() 61 | assert np.array_equal(data, read_data) 62 | 63 | if runtime_read_order == "F": 64 | assert read_data.flags["F_CONTIGUOUS"] 65 | assert not read_data.flags["C_CONTIGUOUS"] 66 | else: 67 | assert not read_data.flags["F_CONTIGUOUS"] 68 | assert read_data.flags["C_CONTIGUOUS"] 69 | 70 | 71 | @pytest.mark.parametrize("order", [[1, 2, 0], [1, 2, 3, 0], [3, 2, 4, 0, 1]]) 72 | def test_transpose_non_self_inverse(store: Store, order: list[int]) -> None: 73 | shape = [i + 3 for i in range(len(order))] 74 | data = np.arange(0, np.prod(shape), dtype="uint16").reshape(shape) 75 | spath = StorePath(store, "transpose_non_self_inverse") 76 | a = Array.create( 77 | spath, 78 | shape=data.shape, 79 | chunk_shape=data.shape, 80 | dtype=data.dtype, 81 | fill_value=0, 82 | codecs=[TransposeCodec(order=order), BytesCodec()], 83 | ) 84 | a[:, :] = data 85 | read_data = a[:, :] 86 | assert np.array_equal(data, read_data) 87 | 88 | 89 | def test_transpose_invalid( 90 | store: Store, 91 | ) -> None: 92 | data = np.arange(0, 256, dtype="uint16").reshape((1, 32, 8)) 93 | spath = StorePath(store, "transpose_invalid") 94 | for order in [(1, 0), (3, 2, 1), (3, 3, 1)]: 95 | with pytest.raises(ValueError, match=r".*order"): 96 | Array.create( 97 | spath, 98 | shape=data.shape, 99 | chunk_shape=(1, 32, 8), 100 | dtype=data.dtype, 101 | fill_value=0, 102 | chunk_key_encoding=("v2", "."), 103 | codecs=[TransposeCodec(order=order), BytesCodec()], 104 | ) 105 | -------------------------------------------------------------------------------- /tests/test_v2.py: -------------------------------------------------------------------------------- 1 | import json 2 | from collections.abc import Iterator 3 | from pathlib import Path 4 | from typing import Any, Literal 5 | 6 | import numcodecs.vlen 7 | import numpy as np 8 | import pytest 9 | import zarr 10 | import zarr.core.buffer 11 | import zarr.storage 12 | from numcodecs import Delta 13 | from numcodecs.blosc import Blosc 14 | from numcodecs.zstd import Zstd 15 | from zarr import config 16 | from zarr.abc.store import Store 17 | from zarr.core.buffer.core import default_buffer_prototype 18 | from zarr.core.sync import sync 19 | from zarr.storage import LocalStore, StorePath 20 | 21 | 22 | @pytest.fixture 23 | async def store(tmp_path) -> Iterator[StorePath]: 24 | return StorePath(await LocalStore.open(tmp_path)) 25 | 26 | 27 | def test_simple(store: StorePath) -> None: 28 | data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) 29 | 30 | a = zarr.create_array( 31 | store / "simple_v2", 32 | zarr_format=2, 33 | shape=data.shape, 34 | chunks=(16, 16), 35 | dtype=data.dtype, 36 | fill_value=0, 37 | ) 38 | 39 | a[:, :] = data 40 | assert np.array_equal(data, a[:, :]) 41 | 42 | 43 | @pytest.mark.parametrize( 44 | ("dtype", "fill_value"), 45 | [ 46 | ("bool", False), 47 | ("int64", 0), 48 | ("float64", 0.0), 49 | ("|S1", b""), 50 | ("|U1", ""), 51 | ("object", ""), 52 | (str, ""), 53 | ], 54 | ) 55 | def test_implicit_fill_value(store: LocalStore, dtype: str, fill_value: Any) -> None: 56 | arr = zarr.create( 57 | store=store, shape=(4,), fill_value=None, zarr_format=2, dtype=dtype 58 | ) 59 | assert arr.metadata.fill_value is None 60 | assert arr.metadata.to_dict()["fill_value"] is None 61 | result = arr[:] 62 | numpy_dtype = np.dtype(object) if dtype is str else np.dtype(dtype) 63 | expected = np.full(arr.shape, fill_value, dtype=numpy_dtype) 64 | np.testing.assert_array_equal(result, expected) 65 | 66 | 67 | def test_codec_pipeline(tmp_path) -> None: 68 | # https://github.com/zarr-developers/zarr-python/issues/2243 69 | store = LocalStore(tmp_path) 70 | array = zarr.create( 71 | store=store, 72 | shape=(1,), 73 | dtype="i4", 74 | zarr_format=2, 75 | filters=[Delta(dtype="i4").get_config()], 76 | compressor=Blosc().get_config(), 77 | ) 78 | array[:] = 1 79 | result = array[:] 80 | expected = np.ones(1) 81 | np.testing.assert_array_equal(result, expected) 82 | 83 | 84 | @pytest.mark.parametrize( 85 | ("dtype", "expected_dtype", "fill_value", "fill_value_encoding"), 86 | [ 87 | ("|S", "|S0", b"X", "WA=="), 88 | ("|V", "|V0", b"X", "WA=="), 89 | ("|V10", "|V10", b"X", "WAAAAAAAAAAAAA=="), 90 | ], 91 | ) 92 | async def test_v2_encode_decode( 93 | dtype, expected_dtype, fill_value, fill_value_encoding, tmp_path 94 | ) -> None: 95 | with config.set( 96 | { 97 | "array.v2_default_filters.bytes": [{"id": "vlen-bytes"}], 98 | "array.v2_default_compressor.bytes": None, 99 | } 100 | ): 101 | store = zarr.storage.LocalStore(tmp_path) 102 | g = zarr.group(store=store, zarr_format=2) 103 | g.create_array( 104 | name="foo", 105 | shape=(3,), 106 | chunks=(3,), 107 | dtype=dtype, 108 | fill_value=fill_value, 109 | compressor=None, 110 | ) 111 | 112 | result = await store.get( 113 | "foo/.zarray", zarr.core.buffer.default_buffer_prototype() 114 | ) 115 | assert result is not None 116 | 117 | serialized = json.loads(result.to_bytes()) 118 | expected = { 119 | "chunks": [3], 120 | "compressor": None, 121 | "dtype": expected_dtype, 122 | "fill_value": fill_value_encoding, 123 | "filters": [{"id": "vlen-bytes"}] if dtype == "|S" else None, 124 | "order": "C", 125 | "shape": [3], 126 | "zarr_format": 2, 127 | "dimension_separator": ".", 128 | } 129 | assert serialized == expected 130 | 131 | data = zarr.open_array(store=store, path="foo")[:] 132 | expected = np.full((3,), b"X", dtype=dtype) 133 | np.testing.assert_equal(data, expected) 134 | 135 | 136 | @pytest.mark.parametrize("dtype_value", [["|S", b"Y"], ["|U", "Y"], ["O", b"Y"]]) 137 | def test_v2_encode_decode_with_data(dtype_value, tmp_path): 138 | dtype, value = dtype_value 139 | with config.set( 140 | { 141 | "array.v2_default_filters": { 142 | "string": [{"id": "vlen-utf8"}], 143 | "bytes": [{"id": "vlen-bytes"}], 144 | }, 145 | } 146 | ): 147 | expected = np.full((3,), value, dtype=dtype) 148 | a = zarr.create( 149 | store=tmp_path, 150 | shape=(3,), 151 | zarr_format=2, 152 | dtype=dtype, 153 | ) 154 | a[:] = expected 155 | data = a[:] 156 | np.testing.assert_equal(data, expected) 157 | 158 | 159 | @pytest.mark.parametrize("dtype", [str, "str"]) 160 | async def test_create_dtype_str(dtype: Any, tmp_path) -> None: 161 | # see https://github.com/zarr-developers/zarr-python/issues/2627 for why this test 162 | # is probably wrong 163 | arr = zarr.create(store=tmp_path, shape=3, dtype=dtype, zarr_format=2) 164 | assert arr.dtype.kind == "O" 165 | assert arr.metadata.to_dict()["dtype"] == "|O" 166 | assert arr.metadata.filters == (numcodecs.vlen.VLenBytes(),) 167 | arr[:] = [b"a", b"bb", b"ccc"] 168 | result = arr[:] 169 | np.testing.assert_array_equal( 170 | result, np.array([b"a", b"bb", b"ccc"], dtype="object") 171 | ) 172 | 173 | 174 | @pytest.mark.parametrize( 175 | "filters", [[], [numcodecs.Delta(dtype=" None: 181 | array_fixture = [42] 182 | with config.set({"array.order": order}): 183 | arr = zarr.create( 184 | store=tmp_path, shape=1, dtype=" None: 233 | store = LocalStore(tmp_path / "a_store") 234 | arr = zarr.create_array( 235 | store, 236 | shape=(10, 8), 237 | chunks=(3, 3), 238 | fill_value=np.nan, 239 | dtype="float64", 240 | zarr_format=2, 241 | filters=None, 242 | compressors=None, 243 | overwrite=True, 244 | order=array_order, 245 | config={"order": memory_order}, 246 | ) 247 | 248 | # Non-contiguous write 249 | a = np.arange(arr.shape[0] * arr.shape[1]).reshape(arr.shape, order=data_order) 250 | arr[6:9, 3:6] = a[6:9, 3:6] # The slice on the RHS is important 251 | np.testing.assert_array_equal(arr[6:9, 3:6], a[6:9, 3:6]) 252 | 253 | np.testing.assert_array_equal( 254 | a[6:9, 3:6], 255 | np.frombuffer( 256 | sync(store.get("2.1", default_buffer_prototype())).to_bytes(), 257 | dtype="float64", 258 | ).reshape((3, 3), order=array_order), 259 | ) 260 | if memory_order == "F": 261 | assert (arr[6:9, 3:6]).flags.f_contiguous 262 | else: 263 | assert (arr[6:9, 3:6]).flags.c_contiguous 264 | 265 | store = LocalStore(tmp_path / "other_store") 266 | arr = zarr.create_array( 267 | store, 268 | shape=(10, 8), 269 | chunks=(3, 3), 270 | fill_value=np.nan, 271 | dtype="float64", 272 | zarr_format=2, 273 | compressors=None, 274 | filters=None, 275 | overwrite=True, 276 | order=array_order, 277 | config={"order": memory_order}, 278 | ) 279 | 280 | # Contiguous write 281 | a = np.arange(9).reshape((3, 3), order=data_order) 282 | if data_order == "F": 283 | assert a.flags.f_contiguous 284 | else: 285 | assert a.flags.c_contiguous 286 | arr[6:9, 3:6] = a 287 | np.testing.assert_array_equal(arr[6:9, 3:6], a) 288 | 289 | 290 | def test_default_compressor_deprecation_warning(): 291 | with pytest.warns(DeprecationWarning, match="default_compressor is deprecated"): 292 | zarr.storage.default_compressor = "zarr.codecs.zstd.ZstdCodec()" 293 | 294 | 295 | @pytest.mark.parametrize( 296 | "dtype_expected", 297 | [ 298 | ["b", "zstd", None], 299 | ["i", "zstd", None], 300 | ["f", "zstd", None], 301 | ["|S1", "zstd", "vlen-bytes"], 302 | ["|U1", "zstd", "vlen-utf8"], 303 | ], 304 | ) 305 | def test_default_filters_and_compressor(dtype_expected: Any) -> None: 306 | with config.set( 307 | { 308 | "array.v2_default_compressor": { 309 | "numeric": {"id": "zstd", "level": "0"}, 310 | "string": {"id": "zstd", "level": "0"}, 311 | "bytes": {"id": "zstd", "level": "0"}, 312 | }, 313 | "array.v2_default_filters": { 314 | "numeric": [], 315 | "string": [{"id": "vlen-utf8"}], 316 | "bytes": [{"id": "vlen-bytes"}], 317 | }, 318 | } 319 | ): 320 | dtype, expected_compressor, expected_filter = dtype_expected 321 | arr = zarr.create(shape=(3,), path="foo", store={}, zarr_format=2, dtype=dtype) 322 | assert arr.metadata.compressor.codec_id == expected_compressor 323 | if expected_filter is not None: 324 | assert arr.metadata.filters[0].codec_id == expected_filter 325 | 326 | 327 | @pytest.mark.parametrize("fill_value", [None, (b"", 0, 0.0)], ids=["no_fill", "fill"]) 328 | def test_structured_dtype_roundtrip(fill_value, tmp_path) -> None: 329 | a = np.array( 330 | [(b"aaa", 1, 4.2), (b"bbb", 2, 8.4), (b"ccc", 3, 12.6)], 331 | dtype=[("foo", "S3"), ("bar", "i4"), ("baz", "f8")], 332 | ) 333 | array_path = tmp_path / "data.zarr" 334 | za = zarr.create( 335 | shape=(3,), 336 | store=array_path, 337 | chunks=(2,), 338 | fill_value=fill_value, 339 | zarr_format=2, 340 | dtype=a.dtype, 341 | ) 342 | if fill_value is not None: 343 | assert (np.array([fill_value] * a.shape[0], dtype=a.dtype) == za[:]).all() 344 | za[...] = a 345 | za = zarr.open_array(store=array_path) 346 | assert (a == za[:]).all() 347 | -------------------------------------------------------------------------------- /tests/test_version.py: -------------------------------------------------------------------------------- 1 | from importlib.metadata import version 2 | 3 | import zarrs 4 | 5 | 6 | def test_version(): 7 | assert zarrs.__version__ == version("zarrs") 8 | -------------------------------------------------------------------------------- /tests/test_vlen.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | import numpy as np 4 | import pytest 5 | import zarr 6 | from zarr.abc.codec import Codec 7 | from zarr.abc.store import Store 8 | from zarr.codecs import ZstdCodec 9 | from zarr.core.metadata.v3 import ArrayV3Metadata, DataType 10 | from zarr.core.strings import _NUMPY_SUPPORTS_VLEN_STRING 11 | from zarr.storage import StorePath 12 | 13 | numpy_str_dtypes: list[type | str | None] = [None, str, "str", np.dtypes.StrDType] 14 | expected_zarr_string_dtype: np.dtype[Any] 15 | if _NUMPY_SUPPORTS_VLEN_STRING: 16 | numpy_str_dtypes.append(np.dtypes.StringDType) 17 | expected_zarr_string_dtype = np.dtypes.StringDType() 18 | else: 19 | expected_zarr_string_dtype = np.dtype("O") 20 | 21 | 22 | @pytest.mark.parametrize("store", ["local"], indirect=["store"]) 23 | @pytest.mark.parametrize("dtype", numpy_str_dtypes) 24 | @pytest.mark.parametrize("as_object_array", [False, True]) 25 | @pytest.mark.parametrize("compressor", [None, ZstdCodec()]) 26 | def test_vlen_string( 27 | store: Store, 28 | dtype: np.dtype[Any] | None, 29 | *, 30 | as_object_array: bool, 31 | compressor: Codec | None, 32 | ) -> None: 33 | strings = ["hello", "world", "this", "is", "a", "test"] 34 | data = np.array(strings, dtype=dtype).reshape((2, 3)) 35 | 36 | sp = StorePath(store, path="string") 37 | a = zarr.create_array( 38 | sp, 39 | shape=data.shape, 40 | chunks=data.shape, 41 | dtype=data.dtype, 42 | fill_value="", 43 | compressors=compressor, 44 | ) 45 | assert isinstance(a.metadata, ArrayV3Metadata) # needed for mypy 46 | 47 | # should also work if input array is an object array, provided we explicitly specified 48 | # a stringlike dtype when creating the Array 49 | if as_object_array: 50 | data = data.astype("O") 51 | 52 | a[:, :] = data 53 | assert np.array_equal(data, a[:, :]) 54 | assert a.metadata.data_type == DataType.string 55 | assert a.dtype == expected_zarr_string_dtype 56 | 57 | # test round trip 58 | b = zarr.open(sp) 59 | assert isinstance(b.metadata, ArrayV3Metadata) # needed for mypy 60 | assert np.array_equal(data, b[:, :]) 61 | assert b.metadata.data_type == DataType.string 62 | assert a.dtype == expected_zarr_string_dtype 63 | 64 | 65 | @pytest.mark.parametrize("store", ["local"], indirect=["store"]) 66 | @pytest.mark.parametrize("as_object_array", [False, True]) 67 | @pytest.mark.parametrize("compressor", [None, ZstdCodec()]) 68 | def test_vlen_bytes( 69 | store: Store, *, as_object_array: bool, compressor: Codec | None 70 | ) -> None: 71 | bstrings = [b"hello", b"world", b"this", b"is", b"a", b"test"] 72 | data = np.array(bstrings).reshape((2, 3)) 73 | assert data.dtype == "|S5" 74 | 75 | sp = StorePath(store, path="string") 76 | a = zarr.create_array( 77 | sp, 78 | shape=data.shape, 79 | chunks=data.shape, 80 | dtype=data.dtype, 81 | fill_value=b"", 82 | compressors=compressor, 83 | ) 84 | assert isinstance(a.metadata, ArrayV3Metadata) # needed for mypy 85 | 86 | # should also work if input array is an object array, provided we explicitly specified 87 | # a bytesting-like dtype when creating the Array 88 | if as_object_array: 89 | data = data.astype("O") 90 | a[:, :] = data 91 | assert np.array_equal(data, a[:, :]) 92 | assert a.metadata.data_type == DataType.bytes 93 | assert a.dtype == "O" 94 | 95 | # test round trip 96 | b = zarr.open(sp) 97 | assert isinstance(b.metadata, ArrayV3Metadata) # needed for mypy 98 | assert np.array_equal(data, b[:, :]) 99 | assert b.metadata.data_type == DataType.bytes 100 | assert a.dtype == "O" 101 | -------------------------------------------------------------------------------- /tests/test_zarrs_http.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import aiohttp 4 | import numpy as np 5 | import pytest 6 | import zarr 7 | from zarr.storage import FsspecStore 8 | 9 | ARR_REF = np.array( 10 | [ 11 | [np.nan, np.nan, np.nan, np.nan, 0.1, 0.1, -0.6, 0.1], 12 | [np.nan, np.nan, np.nan, np.nan, 0.1, 0.1, -1.6, 0.1], 13 | [np.nan, np.nan, np.nan, np.nan, 0.1, 0.1, -2.6, 0.1], 14 | [np.nan, np.nan, np.nan, np.nan, -3.4, -3.5, -3.6, 0.1], 15 | [1.0, 1.0, 1.0, -4.3, -4.4, -4.5, -4.6, 1.1], 16 | [1.0, 1.0, 1.0, -5.3, -5.4, -5.5, -5.6, 1.1], 17 | [1.0, 1.0, 1.0, 1.0, 1.1, 1.1, -6.6, 1.1], 18 | [1.0, 1.0, 1.0, 1.0, -7.4, -7.5, -7.6, -7.7], 19 | ] 20 | ) 21 | 22 | URL = "https://raw.githubusercontent.com/LDeakin/zarrs/main/zarrs/tests/data/array_write_read.zarr/group/array" 23 | 24 | 25 | def test_zarrs_http(): 26 | arr = zarr.open(URL) 27 | assert arr.shape == (8, 8) 28 | assert np.allclose(arr[:], ARR_REF, equal_nan=True) 29 | 30 | 31 | @pytest.mark.xfail(reason="Storage options are not supported for HTTP store") 32 | def test_zarrs_http_kwargs(): 33 | store = FsspecStore.from_url( 34 | URL, storage_options={"auth": aiohttp.BasicAuth("user", "pass")} 35 | ) 36 | arr = zarr.open(store) 37 | assert arr.shape == (8, 8) 38 | assert np.allclose(arr[:], ARR_REF, equal_nan=True) 39 | -------------------------------------------------------------------------------- /tests/test_zstd.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | from zarr import Array 4 | from zarr.abc.store import Store 5 | from zarr.codecs import BytesCodec, ZstdCodec 6 | from zarr.storage import StorePath 7 | 8 | 9 | @pytest.mark.parametrize("checksum", [True, False]) 10 | def test_zstd(*, store: Store, checksum: bool) -> None: 11 | data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) 12 | 13 | a = Array.create( 14 | StorePath(store, path="zstd"), 15 | shape=data.shape, 16 | chunk_shape=(16, 16), 17 | dtype=data.dtype, 18 | fill_value=0, 19 | codecs=[BytesCodec(), ZstdCodec(level=0, checksum=checksum)], 20 | ) 21 | 22 | a[:, :] = data 23 | assert np.array_equal(data, a[:, :]) 24 | --------------------------------------------------------------------------------