├── .github
    └── workflows
    │   ├── cd.yml
    │   ├── ci.yml
    │   └── version-cmp.py
├── .gitignore
├── .pre-commit-config.yaml
├── .readthedocs.yml
├── Cargo.toml
├── LICENSE
├── README.md
├── docs
    ├── Makefile
    ├── conf.py
    ├── contributing.md
    ├── index.md
    └── make.bat
├── hatch.toml
├── pyproject.toml
├── python
    └── zarrs
    │   ├── __init__.py
    │   ├── _internal.pyi
    │   ├── pipeline.py
    │   ├── py.typed
    │   └── utils.py
├── src
    ├── bin
    │   └── stub_gen.rs
    ├── chunk_item.rs
    ├── concurrency.rs
    ├── lib.rs
    ├── metadata_v2.rs
    ├── runtime.rs
    ├── store.rs
    ├── store
    │   ├── filesystem.rs
    │   ├── http.rs
    │   └── manager.rs
    ├── tests.rs
    └── utils.rs
└── tests
    ├── conftest.py
    ├── test_blosc.py
    ├── test_codecs.py
    ├── test_endian.py
    ├── test_gzip.py
    ├── test_pipeline.py
    ├── test_sharding.py
    ├── test_transpose.py
    ├── test_v2.py
    ├── test_version.py
    ├── test_vlen.py
    ├── test_zarrs_http.py
    └── test_zstd.py


/.github/workflows/cd.yml:
--------------------------------------------------------------------------------
  1 | name: cd
  2 | 
  3 | on:
  4 |   push:
  5 |     branches:
  6 |       - main
  7 |     tags:
  8 |       - '**'
  9 |   pull_request:
 10 | 
 11 | env:
 12 |   CARGO_TERM_COLOR: always
 13 | 
 14 | jobs:
 15 |   build:
 16 |     name: build on ${{ matrix.os }} (${{ matrix.target }}${{ matrix.os == 'linux' && format(' - {0}', matrix.manylinux == 'auto' && 'manylinux' || matrix.manylinux) || '' }})
 17 |     # only run on push to tags, main branch, or explicit full build
 18 |     # keep condition in sync with `build-sdist` job
 19 |     strategy:
 20 |       fail-fast: false
 21 |       matrix:
 22 |         os: [linux, macos, windows]
 23 |         target: [x86_64, aarch64]
 24 |         manylinux: ['2_28']
 25 |         include:
 26 |           # manylinux for various platforms
 27 |           #- { os: linux, manylinux: '2_28', target: i686 }
 28 |           - { os: linux, manylinux: '2_28', target: armv7 }
 29 |           - { os: linux, manylinux: '2_28', target: ppc64le }
 30 |           #- { os: linux, manylinux: '2_28', target: s390x }
 31 |           # musl
 32 |           - { os: linux, manylinux: musllinux_1_2, target: x86_64 }
 33 |           - { os: linux, manylinux: musllinux_1_2, target: aarch64 }
 34 |           - { os: linux, manylinux: musllinux_1_2, target: armv7 }
 35 |           # windows
 36 |           - { os: windows, target: i686, python-architecture: x86 }
 37 |         exclude:
 38 |           # https://github.com/rust-cross/cargo-xwin/issues/76
 39 |           - os: windows
 40 |             target: aarch64
 41 |     runs-on: ${{ (matrix.os == 'linux' && 'ubuntu') || matrix.os }}-latest
 42 |     steps:
 43 |       - uses: actions/checkout@v4
 44 |       - uses: actions/setup-python@v5
 45 |         with:
 46 |           python-version: '3.13'
 47 |           architecture: ${{ matrix.python-architecture || 'x64' }}
 48 |       - run: pip install twine
 49 |       - uses: PyO3/maturin-action@v1
 50 |         with:
 51 |           target: ${{ matrix.target }}
 52 |           manylinux: ${{ matrix.manylinux }}
 53 |           args: --release --out dist --interpreter '3.11 3.12 3.13'
 54 |           rust-toolchain: stable
 55 |           docker-options: -e CI
 56 |           # https://github.com/PyO3/maturin/issues/2336#issuecomment-2507418846
 57 |           maturin-version: 1.7.4
 58 |           before-script-linux: |
 59 |             # If we're running on rhel centos, install needed packages.
 60 |             if command -v yum &> /dev/null; then
 61 |                 yum update -y && yum install -y perl-core
 62 |                 # https://github.com/PyO3/maturin-action/discussions/152
 63 |                 if [[ "${{ matrix.os }}" == "linux" && "${{ matrix.target }}" == "x86_64" && "${{ matrix.manylinux }}" == "2_28" ]]; then
 64 |                     yum update -y && yum install -y clang
 65 |                 fi
 66 |             fi
 67 |       - run: ${{ (matrix.os == 'windows' && 'dir') || 'ls -lh' }} dist/
 68 |       - run: twine check --strict dist/*
 69 |       - uses: actions/upload-artifact@v4
 70 |         with:
 71 |           name: pypi-files-${{ matrix.os }}-${{ matrix.target }}-${{ matrix.manylinux }}
 72 |           path: dist
 73 | 
 74 |   build-sdist:
 75 |     name: build sdist
 76 |     # keep condition in sync with `build` job
 77 |     runs-on: ubuntu-latest
 78 |     steps:
 79 |       - uses: actions/checkout@v4
 80 |       - uses: actions/setup-python@v5
 81 |         with:
 82 |           python-version: '3.13'
 83 |       - uses: PyO3/maturin-action@v1
 84 |         with:
 85 |           # https://github.com/PyO3/maturin/issues/2336#issuecomment-2507418846
 86 |           maturin-version: 1.7.4
 87 |           command: sdist
 88 |           args: --out dist
 89 |           rust-toolchain: stable
 90 |       - uses: actions/upload-artifact@v4
 91 |         with:
 92 |           name: pypi-files-sdist
 93 |           path: dist
 94 | 
 95 |   inspect:
 96 |     needs: [build, build-sdist]
 97 |     runs-on: ubuntu-latest
 98 |     steps:
 99 |       - uses: actions/download-artifact@v4
100 |         with:
101 |           pattern: pypi-files-*
102 |           merge-multiple: true
103 |           path: dist/
104 |       - run: ls -lh dist/
105 |       # TODO: some more checks? `twine` is already run above
106 | 
107 |   # If git tag is a version, verify that it matches the package metadata version (or fail job and skip `publish`)
108 |   # If git tag is not a version, set output `version` to "" (also skipping `publish`)
109 |   version:
110 |     if: github.ref_type == 'tag' && startsWith(github.ref_name, 'v')
111 |     needs: build
112 |     outputs:
113 |       version: ${{ steps.version.outputs.version }}
114 |       is_prerelease: ${{ steps.version.outputs.is_prerelease }}
115 |     runs-on: ubuntu-latest
116 |     steps:
117 |       - uses: actions/checkout@v4
118 |       - uses: actions/setup-python@v5
119 |         with:
120 |           python-version: '3.13'
121 |       - uses: actions/download-artifact@v4
122 |         with:
123 |           name: pypi-files-linux-x86_64-2_28
124 |           path: dist/
125 |       - name: Install zarrs-python
126 |         run: pip install packaging dist/*manylinux_2_28_x86_64.whl
127 |       - name: Get zarrs-python version and tag
128 |         id: version
129 |         run: python .github/workflows/version-cmp.py
130 | 
131 |   publish:
132 |     if: needs.version.outputs.version != ''
133 |     runs-on: ubuntu-latest
134 |     needs: [inspect, version]
135 |     environment: pypi
136 |     permissions:
137 |       contents: write # to create a github release
138 |       id-token: write # to authenticate as Trusted Publisher to pypi.org
139 |     steps:
140 |       - uses: actions/download-artifact@v4
141 |         with:
142 |           pattern: pypi-files-*
143 |           merge-multiple: true
144 |           path: dist/
145 |       - name: "Publishing version ${{ needs.version.outputs.version }}"
146 |         uses: pypa/gh-action-pypi-publish@release/v1
147 |         with:
148 |           packages-dir: dist/
149 |           verbose: true
150 |       - uses: ncipollo/release-action@v1
151 |         with:
152 |           name: ${{ needs.version.outputs.version }}
153 |           prerelease: ${{ needs.version.outputs.is_prerelease }}
154 |           generateReleaseNotes: true
155 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: ci
 2 | 
 3 | on:
 4 |   push:
 5 |       branches:
 6 |         - main
 7 |   pull_request:
 8 | 
 9 | concurrency:
10 |   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
11 |   cancel-in-progress: true
12 | 
13 | env:
14 |   CARGO_TERM_COLOR: always
15 |   PYTEST_ADDOPTS: '--color=yes'
16 | 
17 | jobs:
18 |   build_and_test:
19 |     name: build and test
20 |     strategy:
21 |       fail-fast: false
22 |       matrix:
23 |         rust_toolchain: ["stable"]  # "nightly"
24 |     runs-on: ubuntu-latest
25 |     steps:
26 |       - uses: actions/checkout@v4
27 | 
28 |       # Rust in GH runner images can lag behind stable releases + act does not include Rust
29 |       - name: Install Rust
30 |         uses: dtolnay/rust-toolchain@master
31 |         with:
32 |           toolchain: ${{ matrix.rust_toolchain }}
33 |           components: rustfmt
34 | 
35 |       - name: Install rust-cache
36 |         uses: Swatinem/rust-cache@v2
37 | 
38 |       - name: Install Python
39 |         uses: actions/setup-python@v5
40 |         with:
41 |           python-version: "3.x"
42 | 
43 |       - name: Install UV
44 |         uses: astral-sh/setup-uv@v3
45 |         with:
46 |           version: "0.5.0"
47 |           enable-cache: true
48 |           cache-dependency-glob:  |
49 |             pyproject.toml
50 |             Cargo.toml
51 | 
52 |       - name: Install python deps + Build
53 |         run: |
54 |           uv pip install --system -e ".[test,dev]" --verbose
55 | 
56 |       - name: Python Tests
57 |         run: pytest -n auto
58 | 
59 |       - name: Rust Tests
60 |         run: cargo test
61 | 
62 |       - name: Check formatting
63 |         # see “Type hints” section in contributing.md
64 |         run: |
65 |           cargo run --bin stub_gen
66 |           pre-commit run --all-files --show-diff-on-failure || true
67 |           git diff --exit-code HEAD
68 | 


--------------------------------------------------------------------------------
/.github/workflows/version-cmp.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # Can’t be an isolated script since we want to access zarrs’ metadata
 3 | 
 4 | import importlib.metadata as im
 5 | import os
 6 | import sys
 7 | from pathlib import Path
 8 | 
 9 | from packaging.version import InvalidVersion, Version
10 | 
11 | 
12 | def set_outputs(version: Version | str) -> None:
13 |     is_prerelease = version.is_prerelease if isinstance(version, Version) else False
14 |     is_prerelease_json = "true" if is_prerelease else "false"
15 |     print(f"{version=!s} {is_prerelease=}")
16 |     with Path(os.environ["GITHUB_OUTPUT"]).open("a") as f:
17 |         print(f"version={version}", file=f)
18 |         print(f"is_prerelease={is_prerelease_json}", file=f)
19 | 
20 | 
21 | version_tag_str = os.environ["GITHUB_REF_NAME"]
22 | assert version_tag_str.startswith("v"), "should be enforced in `if:` condition"
23 | try:
24 |     version_tag = Version(version_tag_str[1:])
25 | except InvalidVersion:
26 |     set_outputs("")
27 |     sys.exit(0)
28 | 
29 | if version_tag_str[1:] != str(version_tag):
30 |     sys.exit(f"Tag version not normalized: {version_tag_str} should be v{version_tag}")
31 | 
32 | if version_tag != (version_meta := Version(im.version("zarrs"))):
33 |     sys.exit(f"Version mismatch: {version_tag} (tag) != {version_meta} (metadata)")
34 | 
35 | set_outputs(version_meta)
36 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # IDEs
 2 | /.idea/
 3 | /.vscode/
 4 | 
 5 | # Caches
 6 | .DS_Store
 7 | __pycache__/
 8 | /.*cache/
 9 | /.hypothesis/
10 | 
11 | # Build
12 | *.so
13 | /target/
14 | /dist/
15 | /docs/_build/
16 | 
17 | # Coverage
18 | /.coverage
19 | /coverage.xml
20 | 
21 | # Docs
22 | docs/generated/
23 | 
24 | # Lock files
25 | Cargo.lock
26 | uv.lock
27 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 3 |     rev: v5.0.0
 4 |     hooks:
 5 |       - id: trailing-whitespace
 6 |       - id: end-of-file-fixer
 7 |       - id: check-added-large-files
 8 |       - id: check-case-conflict
 9 |       - id: check-toml
10 |       - id: check-yaml
11 |       - id: check-merge-conflict
12 |       - id: detect-private-key
13 |       - id: no-commit-to-branch
14 |         args: ["--branch=main"]
15 |   - repo: local
16 |     hooks:
17 |       - id: rustfmt
18 |         name: rustfmt
19 |         description: Check if all files follow the rustfmt style
20 |         entry: cargo fmt --all -- --color always
21 |         language: system
22 |         pass_filenames: false
23 |   - repo: https://github.com/astral-sh/ruff-pre-commit
24 |     rev: v0.11.9
25 |     hooks:
26 |     -   id: ruff
27 |         args: ["--fix"]
28 |     -   id: ruff-format
29 |     # The following can be removed once PLR0917 is out of preview
30 |     -   name: ruff preview rules
31 |         id: ruff
32 |         args: ["--preview", "--select=PLR0917"]
33 | 


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | build:
 3 |   os: ubuntu-24.04
 4 |   apt_packages:
 5 |     - clang
 6 |   tools:
 7 |     python: "3.12"
 8 |     rust: "latest"
 9 |   jobs:
10 |     post_checkout:
11 |       # unshallow so version can be derived from tag
12 |       - git fetch --unshallow || true
13 | sphinx:
14 |   configuration: docs/conf.py
15 |   fail_on_warning: true
16 | python:
17 |   install:
18 |     - method: pip
19 |       path: .
20 |       extra_requirements:
21 |         - doc
22 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "zarrs-python"
 3 | version = "0.1.4"
 4 | edition = "2021"
 5 | publish = false
 6 | 
 7 | [lib]
 8 | name = "zarrs_python"
 9 | crate-type = ["cdylib", "rlib"]
10 | 
11 | [dependencies]
12 | pyo3 = { version = "0.23.2", features = ["abi3-py311"] }
13 | zarrs = { version = "0.20.0", features = ["async", "zlib", "pcodec", "bz2"] }
14 | rayon_iter_concurrent_limit = "0.2.0"
15 | rayon = "1.10.0"
16 | # fix for https://stackoverflow.com/questions/76593417/package-openssl-was-not-found-in-the-pkg-config-search-path
17 | openssl = { version = "0.10", features = ["vendored"] }
18 | numpy = "0.23.0"
19 | unsafe_cell_slice = "0.2.0"
20 | serde_json = "1.0.128"
21 | pyo3-stub-gen = "0.7.0"
22 | opendal = { version = "0.53.0", features = ["services-http"] }
23 | tokio = { version = "1.41.1", features = ["rt-multi-thread"] }
24 | zarrs_opendal = "0.7.2"
25 | itertools = "0.9.0"
26 | 
27 | [profile.release]
28 | lto = true
29 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Ilan Gold, Lachlan Deakin, Philipp Angerer
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # zarrs-python
  2 | 
  3 | [![PyPI](https://img.shields.io/pypi/v/zarrs.svg)](https://pypi.org/project/zarrs)
  4 | [![Downloads](https://static.pepy.tech/badge/zarrs/month)](https://pepy.tech/project/zarrs)
  5 | [![Downloads](https://static.pepy.tech/badge/zarrs)](https://pepy.tech/project/zarrs)
  6 | [![Stars](https://img.shields.io/github/stars/zarrs/zarrs-python?style=flat&logo=github&color=yellow)](https://github.com/zarrs/zarrs-python/stargazers)
  7 | ![CI](https://github.com/zarrs/zarrs-python/actions/workflows/ci.yml/badge.svg)
  8 | ![CD](https://github.com/zarrs/zarrs-python/actions/workflows/cd.yml/badge.svg)
  9 | 
 10 | This project serves as a bridge between [`zarrs`](https://docs.rs/zarrs/latest/zarrs/) (Rust) and [`zarr`](https://zarr.readthedocs.io/en/latest/index.html) (`zarr-python`) via [`PyO3`](https://pyo3.rs/v0.22.3/).  The main goal of the project is to speed up i/o (see [`zarr_benchmarks`](https://github.com/LDeakin/zarr_benchmarks)).
 11 | 
 12 | To use the project, simply install our package (which depends on `zarr-python>=3.0.0`), and run:
 13 | 
 14 | ```python
 15 | import zarr
 16 | import zarrs
 17 | zarr.config.set({"codec_pipeline.path": "zarrs.ZarrsCodecPipeline"})
 18 | ```
 19 | 
 20 | You can then use your `zarr` as normal (with some caveats)!
 21 | 
 22 | ## API
 23 | 
 24 | We export a `ZarrsCodecPipeline` class so that `zarr-python` can use the class but it is not meant to be instantiated and we do not guarantee the stability of its API beyond what is required so that `zarr-python` can use it.  Therefore, it is not documented here.
 25 | 
 26 | At the moment, we only support a subset of the `zarr-python` stores:
 27 | 
 28 | - [x] [LocalStore](https://zarr.readthedocs.io/en/latest/_autoapi/zarr/storage/index.html#zarr.storage.LocalStore) (FileSystem)
 29 | - [FsspecStore](https://zarr.readthedocs.io/en/latest/_autoapi/zarr/storage/index.html#zarr.storage.FsspecStore)
 30 |   - [x] [HTTPFileSystem](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.implementations.http.HTTPFileSystem)
 31 | 
 32 | A `NotImplementedError` will be raised if a store is not supported.
 33 | We intend to support more stores in the future: https://github.com/zarrs/zarrs-python/issues/44.
 34 | 
 35 | ### Configuration
 36 | 
 37 | `ZarrsCodecPipeline` options are exposed through `zarr.config`.
 38 | 
 39 | Standard `zarr.config` options control some functionality (see the defaults in the [config.py](https://github.com/zarr-developers/zarr-python/blob/main/src/zarr/core/config.py) of `zarr-python`):
 40 | - `threading.max_workers`: the maximum number of threads used internally by the `ZarrsCodecPipeline` on the Rust side.
 41 |   - Defaults to the number of threads in the global `rayon` thread pool if set to `None`, which is [typically the number of logical CPUs](https://docs.rs/rayon/latest/rayon/struct.ThreadPoolBuilder.html#method.num_threads).
 42 | - `array.write_empty_chunks`: whether or not to store empty chunks.
 43 |   - Defaults to false if `None`. Note that checking for emptiness has some overhead, see [here](https://docs.rs/zarrs/latest/zarrs/config/struct.Config.html#store-empty-chunks) for more info.
 44 | 
 45 | The `ZarrsCodecPipeline` specific options are:
 46 | - `codec_pipeline.chunk_concurrent_maximum`: the maximum number of chunks stored/retrieved concurrently.
 47 |   - Defaults to the number of logical CPUs if `None`. It is constrained by `threading.max_workers` as well.
 48 | - `codec_pipeline.chunk_concurrent_minimum`: the minimum number of chunks retrieved/stored concurrently when balancing chunk/codec concurrency.
 49 |   - Defaults to 4 if `None`. See [here](https://docs.rs/zarrs/latest/zarrs/config/struct.Config.html#chunk-concurrent-minimum) for more info.
 50 | - `codec_pipeline.validate_checksums`: enable checksum validation (e.g. with the CRC32C codec).
 51 |   - Defaults to true if `None`. See [here](https://docs.rs/zarrs/latest/zarrs/config/struct.Config.html#validate-checksums) for more info.
 52 | 
 53 | For example:
 54 | ```python
 55 | zarr.config.set({
 56 |     "threading.max_workers": None,
 57 |     "array.write_empty_chunks": False,
 58 |     "codec_pipeline": {
 59 |         "path": "zarrs.ZarrsCodecPipeline",
 60 |         "validate_checksums": True,
 61 |         "store_empty_chunks": False,
 62 |         "chunk_concurrent_maximum": None,
 63 |         "chunk_concurrent_minimum": 4,
 64 |     }
 65 | })
 66 | ```
 67 | 
 68 | If the `ZarrsCodecPipeline` is pickled, and then un-pickled, and during that time one of `store_empty_chunks`, `chunk_concurrent_minimum`, `chunk_concurrent_maximum`, or `num_threads` has changed, the newly un-pickled version will pick up the new value.  However, once a `ZarrsCodecPipeline` object has been instantiated, these values are then fixed.  This may change in the future as guidance from the `zarr` community becomes clear.
 69 | 
 70 | ## Concurrency
 71 | 
 72 | Concurrency can be classified into two types:
 73 | - chunk (outer) concurrency: the number of chunks retrieved/stored concurrently.
 74 |   - This is chosen automatically based on various factors, such as the chunk size and codecs.
 75 |   - It is constrained between `codec_pipeline.chunk_concurrent_minimum` and `codec_pipeline.chunk_concurrent_maximum` for operations involving multiple chunks.
 76 | - codec (inner) concurrency: the number of threads encoding/decoding a chunk.
 77 |   - This is chosen automatically in combination with the chunk concurrency.
 78 | 
 79 | The product of the chunk and codec concurrency will approximately match `threading.max_workers`.
 80 | 
 81 | Chunk concurrency is typically favored because:
 82 | - parallel encoding/decoding can have a high overhead with some codecs, especially with small chunks, and
 83 | - it is advantageous to retrieve/store multiple chunks concurrently, especially with high latency stores.
 84 | 
 85 | `zarrs-python` will often favor codec concurrency with sharded arrays, as they are well suited to codec concurrency.
 86 | 
 87 | ## Supported Indexing Methods
 88 | 
 89 | The following methods will trigger use with the old zarr-python pipeline:
 90 | 
 91 | 1. Any `oindex` or `vindex` integer `np.ndarray` indexing with dimensionality >=3 i.e.,
 92 | 
 93 |    ```python
 94 |    arr[np.array([...]), :, np.array([...])]
 95 |    arr[np.array([...]), np.array([...]), np.array([...])]
 96 |    arr[np.array([...]), np.array([...]), np.array([...])] = ...
 97 |    arr.oindex[np.array([...]), np.array([...]), np.array([...])] = ...
 98 |    ```
 99 | 
100 | 2. Any `vindex` or `oindex` discontinuous integer `np.ndarray` indexing for writes in 2D
101 | 
102 |    ```python
103 |    arr[np.array([0, 5]), :] = ...
104 |    arr.oindex[np.array([0, 5]), :] = ...
105 |    ```
106 | 
107 | 3. `vindex` writes in 2D where both indexers are integer `np.ndarray` indices i.e.,
108 | 
109 |    ```python
110 |    arr[np.array([...]), np.array([...])] = ...
111 |    ```
112 | 
113 | 4. Ellipsis indexing.  We have tested some, but others fail even with `zarr-python`'s default codec pipeline.  Thus for now we advise proceeding with caution here.
114 | 
115 |    ```python
116 |    arr[0:10, ..., 0:5]
117 |    ```
118 | 
119 | 
120 | Furthermore, using anything except contiguous (i.e., slices or consecutive integer) `np.ndarray` for numeric data will fall back to the default `zarr-python` implementation.
121 | 
122 | Please file an issue if you believe we have more holes in our coverage than we are aware of or you wish to contribute!  For example, we have an [issue in zarrs for integer-array indexing](https://github.com/LDeakin/zarrs/issues/52) that would unblock a lot the use of the rust pipeline for that use-case (very useful for mini-batch training perhaps!).
123 | 
124 | Further, any codecs not supported by `zarrs` will also automatically fall back to the python implementation.
125 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # For the full list of built-in configuration values, see the documentation:
 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 5 | 
 6 | from importlib.metadata import distribution
 7 | 
 8 | # -- Project information -----------------------------------------------------
 9 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
10 | 
11 | _dist = distribution("zarrs")
12 | project = _dist.name
13 | copyright = f"2024, {_dist.metadata['Author']}"
14 | author = _dist.metadata["Author"]
15 | release = _dist.version
16 | 
17 | # -- General configuration ---------------------------------------------------
18 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
19 | 
20 | extensions = [
21 |     "myst_parser",
22 |     "sphinx.ext.autosummary",
23 | ]
24 | source_suffix = [".rst", ".md"]
25 | 
26 | templates_path = ["_templates"]
27 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
28 | 
29 | 
30 | # -- Options for HTML output -------------------------------------------------
31 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
32 | 
33 | html_theme = "alabaster"
34 | # html_static_path = ["_static"]
35 | 


--------------------------------------------------------------------------------
/docs/contributing.md:
--------------------------------------------------------------------------------
 1 | # Contributing
 2 | 
 3 | ## Rust
 4 | 
 5 | You will need `rust` and `cargo` installed on your local system.  For more info, see [the rust docs](https://doc.rust-lang.org/cargo/getting-started/installation.html).
 6 | 
 7 | ## Environment management
 8 | 
 9 | We encourage the use of [uv](https://docs.astral.sh/uv/) for environment management.  To install the package for development, run
10 | 
11 | ```shell
12 | uv pip install -e ".[test,dev,doc]"
13 | ```
14 | 
15 | However, take note that while this does build the rust package, the rust package will not be rebuilt upon edits despite the `-e` flag.  You will need to manually rebuild it using either `uv pip install -e .` or `maturin develop`.  Take note that for benchmarking/speed testing, it is advisable to build a release version of the rust package by passing the `-r` flag to `maturin`.  For more information on the `rust`-`python` bridge, see the [`PyO3` docs](https://pyo3.rs/v0.22.6/).
16 | 
17 | ## Testing
18 | 
19 | To install test dependencies, simply run
20 | 
21 | ```shell
22 | pytest
23 | ```
24 | 
25 | or
26 | 
27 | ```shell
28 | pytest -n auto
29 | ```
30 | 
31 | for parallelized tests.  Most tests have been copied from the `zarr-python` repository with the exception of `test_pipeline.py` which we have written.
32 | 
33 | ## Type hints
34 | 
35 | Thanks to [`pyo3-stub-gen`][], we can generate type stubs for the `zarrs._internal` module.
36 | If the “Check formatting” CI step fails, run `cargo run --bin stub_gen`, then `pre-commit run --all-files`, and commit the changes.
37 | 
38 | Once `maturin` can be run as a `hatchling` plugin, this can be made automatic.
39 | 
40 | [`pyo3-stub-gen`]: https://github.com/Jij-Inc/pyo3-stub-gen
41 | 


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
 1 | ```{include} ../README.md
 2 | ```
 3 | 
 4 | ```{toctree}
 5 | :hidden: true
 6 | :maxdepth: 1
 7 | 
 8 | contributing
 9 | ```
10 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | %SPHINXBUILD% >NUL 2>NUL
14 | if errorlevel 9009 (
15 | 	echo.
16 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17 | 	echo.installed, then set the SPHINXBUILD environment variable to point
18 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
19 | 	echo.may add the Sphinx directory to PATH.
20 | 	echo.
21 | 	echo.If you don't have Sphinx installed, grab it from
22 | 	echo.https://www.sphinx-doc.org/
23 | 	exit /b 1
24 | )
25 | 
26 | if "%1" == "" goto help
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/hatch.toml:
--------------------------------------------------------------------------------
 1 | [envs.default]
 2 | installer = "uv"
 3 | features = ["dev"]
 4 | 
 5 | [envs.hatch-test]
 6 | default-args = []
 7 | features = ["test"]
 8 | 
 9 | [envs.docs]
10 | features = ["doc"]
11 | extra-dependencies = ["setuptools"]  # https://bitbucket.org/pybtex-devs/pybtex/issues/169
12 | scripts.build = "sphinx-build -M html docs docs/_build -W --keep-going {args}"
13 | scripts.clean = "git clean -fdX -- {args:docs}"
14 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [build-system]
  2 | requires = ["maturin>=1.5,<2.0"]
  3 | build-backend = "maturin"
  4 | 
  5 | [project]
  6 | name = "zarrs"
  7 | requires-python = ">=3.11"
  8 | authors = [
  9 |     { name = "Ilan Gold" },
 10 |     { name = "Lachlan Deakin" },
 11 |     { name = "Philipp Angerer" },
 12 | ]
 13 | license = "MIT"
 14 | classifiers = [
 15 |     "Programming Language :: Rust",
 16 |     "Programming Language :: Python :: Implementation :: CPython",
 17 |     "Programming Language :: Python :: Implementation :: PyPy",
 18 |     "Typing :: Typed",
 19 | ]
 20 | dynamic = ["version"]
 21 | dependencies = [
 22 |     'asciitree',
 23 |     'numpy>=1.24',
 24 |     'fasteners',
 25 |     'numcodecs[msgpack]>=0.10.0',
 26 |     'fsspec>2024',
 27 |     'crc32c',
 28 |     'zstandard',
 29 |     'typing_extensions',
 30 |     'donfig',
 31 |     'pytest',
 32 |     'universal_pathlib>=0.2.0',
 33 |     "zarr>=3.0.3",
 34 | ]
 35 | 
 36 | [project.optional-dependencies]
 37 | test = [
 38 |     "coverage",
 39 |     "pytest",
 40 |     "pytest-cov",
 41 |     "msgpack",
 42 |     "lmdb",
 43 |     "s3fs",
 44 |     "pytest-asyncio",
 45 |     "moto[s3]",
 46 |     "flask-cors",
 47 |     "flask",
 48 |     "requests",
 49 |     "mypy",
 50 |     "hypothesis",
 51 |     "pytest-xdist",
 52 | ]
 53 | dev = ["maturin", "pip", "pre-commit"]
 54 | doc = ["sphinx>=7.4.6", "myst-parser"]
 55 | 
 56 | [tool.maturin]
 57 | python-source = "python"
 58 | module-name = "zarrs._internal"
 59 | features = ["pyo3/extension-module"]
 60 | 
 61 | [tool.pytest.ini_options]
 62 | minversion = "7"
 63 | testpaths = ["tests"]
 64 | log_cli_level = "INFO"
 65 | xfail_strict = true
 66 | asyncio_mode = "auto"
 67 | asyncio_default_fixture_loop_scope = "function"
 68 | doctest_optionflags = [
 69 |     "NORMALIZE_WHITESPACE",
 70 |     "ELLIPSIS",
 71 |     "IGNORE_EXCEPTION_DETAIL",
 72 | ]
 73 | addopts = [
 74 |     "--durations=10",
 75 |     "-ra",
 76 |     "--strict-config",
 77 |     "--strict-markers",
 78 |     "--import-mode=importlib",
 79 | ]
 80 | filterwarnings = [
 81 |     "error:::zarr.*",
 82 |     "ignore:PY_SSIZE_T_CLEAN will be required.*:DeprecationWarning",
 83 |     "ignore:The loop argument is deprecated since Python 3.8.*:DeprecationWarning",
 84 |     "ignore:Creating a zarr.buffer.gpu.*:UserWarning",
 85 |     "ignore:Duplicate name:UserWarning",                                            # from ZipFile
 86 |     "ignore:.*not part in the Zarr format 3.*:UserWarning",
 87 | ]
 88 | markers = ["gpu: mark a test as requiring CuPy and GPU"]
 89 | 
 90 | [tool.ruff]
 91 | src = ["src", "tests"]
 92 | 
 93 | [tool.ruff.format]
 94 | docstring-code-format = true
 95 | 
 96 | [tool.ruff.lint]
 97 | select = [
 98 |     "E",       # Error detected by Pycodestyle
 99 |     "F",       # Errors detected by Pyflakes
100 |     "W",       # Warning detected by Pycodestyle
101 |     "UP",      # pyupgrade
102 |     "I",       # isort
103 |     "TC",      # manage type checking blocks
104 |     "TID251",  # Banned imports
105 |     "ICN",     # Follow import conventions
106 |     "PTH",     # Pathlib instead of os.path
107 |     "PLR0917", # Ban APIs with too many positional parameters
108 |     "FBT",     # No positional boolean parameters
109 |     "PT",      # Pytest style
110 |     "SIM",     # Simplify control flow
111 | ]
112 | ignore = [
113 |     # line too long -> we accept long comment lines; black gets rid of long code lines
114 |     "E501",
115 |     # module level import not at top of file -> required to circumvent circular imports for Scanpys API
116 |     "E402",
117 |     # E266 too many leading '#' for block comment -> Scanpy allows them for comments into sections
118 |     "E262",
119 |     # allow I, O, l as variable names -> I is the identity matrix, i, j, k, l is reasonable indexing notation
120 |     "E741",
121 | ]
122 | [tool.ruff.lint.per-file-ignores]
123 | "**/*.pyi" = ["ICN001"]
124 | [tool.ruff.lint.isort]
125 | known-first-party = ["zarrs"]
126 | 


--------------------------------------------------------------------------------
/python/zarrs/__init__.py:
--------------------------------------------------------------------------------
 1 | from zarr.registry import register_pipeline
 2 | 
 3 | from ._internal import __version__
 4 | from .pipeline import ZarrsCodecPipeline as _ZarrsCodecPipeline
 5 | from .utils import CollapsedDimensionError, DiscontiguousArrayError
 6 | 
 7 | 
 8 | # Need to do this redirection so people can access the pipeline as `zarrs.ZarrsCodecPipeline` instead of `zarrs.pipeline.ZarrsCodecPipeline`
 9 | class ZarrsCodecPipeline(_ZarrsCodecPipeline):
10 |     pass
11 | 
12 | 
13 | register_pipeline(ZarrsCodecPipeline)
14 | 
15 | __all__ = [
16 |     "ZarrsCodecPipeline",
17 |     "DiscontiguousArrayError",
18 |     "CollapsedDimensionError",
19 |     "__version__",
20 | ]
21 | 


--------------------------------------------------------------------------------
/python/zarrs/_internal.pyi:
--------------------------------------------------------------------------------
 1 | # This file is automatically generated by pyo3_stub_gen
 2 | # ruff: noqa: E501, F401
 3 | 
 4 | import builtins
 5 | import typing
 6 | from enum import Enum, auto
 7 | 
 8 | import numpy.typing
 9 | 
10 | class Basic:
11 |     def __new__(cls, byte_interface: typing.Any, chunk_spec: typing.Any): ...
12 |     ...
13 | 
14 | class CodecPipelineImpl:
15 |     def __new__(
16 |         cls,
17 |         metadata: builtins.str,
18 |         *,
19 |         validate_checksums: builtins.bool | None = None,
20 |         store_empty_chunks: builtins.bool | None = None,
21 |         chunk_concurrent_minimum: builtins.int | None = None,
22 |         chunk_concurrent_maximum: builtins.int | None = None,
23 |         num_threads: builtins.int | None = None,
24 |     ): ...
25 |     def retrieve_chunks_and_apply_index(
26 |         self,
27 |         chunk_descriptions: typing.Sequence[WithSubset],
28 |         value: numpy.typing.NDArray[typing.Any],
29 |     ) -> None: ...
30 |     def store_chunks_with_indices(
31 |         self,
32 |         chunk_descriptions: typing.Sequence[WithSubset],
33 |         value: numpy.typing.NDArray[typing.Any],
34 |     ) -> None: ...
35 | 
36 | class FilesystemStoreConfig:
37 |     root: builtins.str
38 | 
39 | class HttpStoreConfig:
40 |     endpoint: builtins.str
41 | 
42 | class WithSubset:
43 |     def __new__(
44 |         cls,
45 |         item: Basic,
46 |         chunk_subset: typing.Sequence[slice],
47 |         subset: typing.Sequence[slice],
48 |         shape: typing.Sequence[builtins.int],
49 |     ): ...
50 |     ...
51 | 
52 | class StoreConfig(Enum):
53 |     Filesystem = auto()
54 |     Http = auto()
55 | 


--------------------------------------------------------------------------------
/python/zarrs/pipeline.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import asyncio
  4 | import json
  5 | import re
  6 | from dataclasses import dataclass
  7 | from typing import TYPE_CHECKING, TypedDict
  8 | 
  9 | import numpy as np
 10 | from zarr.abc.codec import Codec, CodecPipeline
 11 | from zarr.core import BatchedCodecPipeline
 12 | from zarr.core.config import config
 13 | 
 14 | if TYPE_CHECKING:
 15 |     from collections.abc import Generator, Iterable, Iterator
 16 |     from typing import Any, Self
 17 | 
 18 |     from zarr.abc.store import ByteGetter, ByteSetter
 19 |     from zarr.core.array_spec import ArraySpec
 20 |     from zarr.core.buffer import Buffer, NDArrayLike, NDBuffer
 21 |     from zarr.core.chunk_grids import ChunkGrid
 22 |     from zarr.core.common import ChunkCoords
 23 |     from zarr.core.indexing import SelectorTuple
 24 | 
 25 | from ._internal import CodecPipelineImpl, codec_metadata_v2_to_v3
 26 | from .utils import (
 27 |     CollapsedDimensionError,
 28 |     DiscontiguousArrayError,
 29 |     FillValueNoneError,
 30 |     make_chunk_info_for_rust_with_indices,
 31 | )
 32 | 
 33 | 
 34 | class UnsupportedDataTypeError(Exception):
 35 |     pass
 36 | 
 37 | 
 38 | class UnsupportedMetadataError(Exception):
 39 |     pass
 40 | 
 41 | 
 42 | def get_codec_pipeline_impl(codec_metadata_json: str) -> CodecPipelineImpl | None:
 43 |     try:
 44 |         return CodecPipelineImpl(
 45 |             codec_metadata_json,
 46 |             validate_checksums=config.get("codec_pipeline.validate_checksums", None),
 47 |             store_empty_chunks=config.get("array.write_empty_chunks", None),
 48 |             chunk_concurrent_minimum=config.get(
 49 |                 "codec_pipeline.chunk_concurrent_minimum", None
 50 |             ),
 51 |             chunk_concurrent_maximum=config.get(
 52 |                 "codec_pipeline.chunk_concurrent_maximum", None
 53 |             ),
 54 |             num_threads=config.get("threading.max_workers", None),
 55 |         )
 56 |     except TypeError as e:
 57 |         if re.match(r"codec (delta|zlib) is not supported", str(e)):
 58 |             return None
 59 |         else:
 60 |             raise e
 61 | 
 62 | 
 63 | def codecs_to_dict(codecs: Iterable[Codec]) -> Generator[dict[str, Any], None, None]:
 64 |     for codec in codecs:
 65 |         if codec.__class__.__name__ == "V2Codec":
 66 |             codec_dict = codec.to_dict()
 67 |             if codec_dict.get("filters", None) is not None:
 68 |                 filters = [
 69 |                     json.dumps(filter.get_config())
 70 |                     for filter in codec_dict.get("filters")
 71 |                 ]
 72 |             else:
 73 |                 filters = None
 74 |             if codec_dict.get("compressor", None) is not None:
 75 |                 compressor_json = codec_dict.get("compressor").get_config()
 76 |                 compressor = json.dumps(compressor_json)
 77 |             else:
 78 |                 compressor = None
 79 |             codecs_v3 = codec_metadata_v2_to_v3(filters, compressor)
 80 |             for codec in codecs_v3:
 81 |                 yield json.loads(codec)
 82 |         else:
 83 |             yield codec.to_dict()
 84 | 
 85 | 
 86 | class ZarrsCodecPipelineState(TypedDict):
 87 |     codec_metadata_json: str
 88 |     codecs: tuple[Codec, ...]
 89 | 
 90 | 
 91 | @dataclass
 92 | class ZarrsCodecPipeline(CodecPipeline):
 93 |     codecs: tuple[Codec, ...]
 94 |     impl: CodecPipelineImpl | None
 95 |     codec_metadata_json: str
 96 |     python_impl: BatchedCodecPipeline
 97 | 
 98 |     def __getstate__(self) -> ZarrsCodecPipelineState:
 99 |         return {"codec_metadata_json": self.codec_metadata_json, "codecs": self.codecs}
100 | 
101 |     def __setstate__(self, state: ZarrsCodecPipelineState):
102 |         self.codecs = state["codecs"]
103 |         self.codec_metadata_json = state["codec_metadata_json"]
104 |         self.impl = get_codec_pipeline_impl(self.codec_metadata_json)
105 |         self.python_impl = BatchedCodecPipeline.from_codecs(self.codecs)
106 | 
107 |     def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
108 |         raise NotImplementedError("evolve_from_array_spec")
109 | 
110 |     @classmethod
111 |     def from_codecs(cls, codecs: Iterable[Codec]) -> Self:
112 |         codec_metadata = list(codecs_to_dict(codecs))
113 |         codec_metadata_json = json.dumps(codec_metadata)
114 |         # TODO: upstream zarr-python has not settled on how to deal with configs yet
115 |         # Should they be checked when an array is created, or when an operation is performed?
116 |         # https://github.com/zarr-developers/zarr-python/issues/2409
117 |         # https://github.com/zarr-developers/zarr-python/pull/2429#issuecomment-2566976567
118 |         return cls(
119 |             codec_metadata_json=codec_metadata_json,
120 |             codecs=tuple(codecs),
121 |             impl=get_codec_pipeline_impl(codec_metadata_json),
122 |             python_impl=BatchedCodecPipeline.from_codecs(codecs),
123 |         )
124 | 
125 |     @property
126 |     def supports_partial_decode(self) -> bool:
127 |         return False
128 | 
129 |     @property
130 |     def supports_partial_encode(self) -> bool:
131 |         return False
132 | 
133 |     def __iter__(self) -> Iterator[Codec]:
134 |         yield from self.codecs
135 | 
136 |     def validate(
137 |         self, *, shape: ChunkCoords, dtype: np.dtype[Any], chunk_grid: ChunkGrid
138 |     ) -> None:
139 |         raise NotImplementedError("validate")
140 | 
141 |     def compute_encoded_size(self, byte_length: int, array_spec: ArraySpec) -> int:
142 |         raise NotImplementedError("compute_encoded_size")
143 | 
144 |     async def decode(
145 |         self,
146 |         chunk_bytes_and_specs: Iterable[tuple[Buffer | None, ArraySpec]],
147 |     ) -> Iterable[NDBuffer | None]:
148 |         raise NotImplementedError("decode")
149 | 
150 |     async def encode(
151 |         self,
152 |         chunk_arrays_and_specs: Iterable[tuple[NDBuffer | None, ArraySpec]],
153 |     ) -> Iterable[Buffer | None]:
154 |         raise NotImplementedError("encode")
155 | 
156 |     async def read(
157 |         self,
158 |         batch_info: Iterable[
159 |             tuple[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple, bool]
160 |         ],
161 |         out: NDBuffer,  # type: ignore
162 |         drop_axes: tuple[int, ...] = (),  # FIXME: unused
163 |     ) -> None:
164 |         # FIXME: Error if array is not in host memory
165 |         if not out.dtype.isnative:
166 |             raise RuntimeError("Non-native byte order not supported")
167 |         try:
168 |             if self.impl is None:
169 |                 raise UnsupportedMetadataError()
170 |             self._raise_error_on_unsupported_batch_dtype(batch_info)
171 |             chunks_desc = make_chunk_info_for_rust_with_indices(
172 |                 batch_info, drop_axes, out.shape
173 |             )
174 |         except (
175 |             UnsupportedMetadataError,
176 |             DiscontiguousArrayError,
177 |             CollapsedDimensionError,
178 |             UnsupportedDataTypeError,
179 |             FillValueNoneError,
180 |         ):
181 |             await self.python_impl.read(batch_info, out, drop_axes)
182 |             return None
183 |         else:
184 |             out: NDArrayLike = out.as_ndarray_like()
185 |             await asyncio.to_thread(
186 |                 self.impl.retrieve_chunks_and_apply_index,
187 |                 chunks_desc,
188 |                 out,
189 |             )
190 |             return None
191 | 
192 |     async def write(
193 |         self,
194 |         batch_info: Iterable[
195 |             tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple, bool]
196 |         ],
197 |         value: NDBuffer,  # type: ignore
198 |         drop_axes: tuple[int, ...] = (),
199 |     ) -> None:
200 |         try:
201 |             if self.impl is None:
202 |                 raise UnsupportedMetadataError()
203 |             self._raise_error_on_unsupported_batch_dtype(batch_info)
204 |             chunks_desc = make_chunk_info_for_rust_with_indices(
205 |                 batch_info, drop_axes, value.shape
206 |             )
207 |         except (
208 |             UnsupportedMetadataError,
209 |             DiscontiguousArrayError,
210 |             CollapsedDimensionError,
211 |             UnsupportedDataTypeError,
212 |             FillValueNoneError,
213 |         ):
214 |             await self.python_impl.write(batch_info, value, drop_axes)
215 |             return None
216 |         else:
217 |             # FIXME: Error if array is not in host memory
218 |             value_np: NDArrayLike | np.ndarray = value.as_ndarray_like()
219 |             if not value_np.dtype.isnative:
220 |                 value_np = np.ascontiguousarray(
221 |                     value_np, dtype=value_np.dtype.newbyteorder("=")
222 |                 )
223 |             elif not value_np.flags.c_contiguous:
224 |                 value_np = np.ascontiguousarray(value_np)
225 |             await asyncio.to_thread(
226 |                 self.impl.store_chunks_with_indices, chunks_desc, value_np
227 |             )
228 |             return None
229 | 
230 |     def _raise_error_on_unsupported_batch_dtype(
231 |         self,
232 |         batch_info: Iterable[
233 |             tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple, bool]
234 |         ],
235 |     ):
236 |         # https://github.com/LDeakin/zarrs/blob/0532fe983b7b42b59dbf84e50a2fe5e6f7bad4ce/zarrs_metadata/src/v2_to_v3.rs#L289-L293 for VSUMm
237 |         # Further, our pipeline does not support variable-length objects due to limitations on decode_into, so object/np.dtypes.StringDType is also out
238 |         if any(
239 |             info.dtype.kind in {"V", "S", "U", "M", "m", "O", "T"}
240 |             for (_, info, _, _, _) in batch_info
241 |         ):
242 |             raise UnsupportedDataTypeError()
243 | 


--------------------------------------------------------------------------------
/python/zarrs/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zarrs/zarrs-python/bbe36d68e28617088b8d831641432d39b69867a0/python/zarrs/py.typed


--------------------------------------------------------------------------------
/python/zarrs/utils.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import operator
  4 | import os
  5 | from functools import reduce
  6 | from typing import TYPE_CHECKING, Any
  7 | 
  8 | import numpy as np
  9 | from zarr.core.array_spec import ArraySpec
 10 | from zarr.core.indexing import SelectorTuple, is_integer
 11 | from zarr.core.metadata.v2 import _default_fill_value
 12 | 
 13 | from zarrs._internal import Basic, WithSubset
 14 | 
 15 | if TYPE_CHECKING:
 16 |     from collections.abc import Iterable
 17 |     from types import EllipsisType
 18 | 
 19 |     from zarr.abc.store import ByteGetter, ByteSetter
 20 | 
 21 | 
 22 | # adapted from https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ThreadPoolExecutor
 23 | def get_max_threads() -> int:
 24 |     return (os.cpu_count() or 1) + 4
 25 | 
 26 | 
 27 | class DiscontiguousArrayError(Exception):
 28 |     pass
 29 | 
 30 | 
 31 | class CollapsedDimensionError(Exception):
 32 |     pass
 33 | 
 34 | 
 35 | class FillValueNoneError(Exception):
 36 |     pass
 37 | 
 38 | 
 39 | # This is a (mostly) copy of the function from zarr.core.indexing that fixes:
 40 | #   DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated
 41 | # TODO: Upstream this fix
 42 | def make_slice_selection(selection: tuple[np.ndarray | float]) -> list[slice]:
 43 |     ls: list[slice] = []
 44 |     for dim_selection in selection:
 45 |         if is_integer(dim_selection):
 46 |             ls.append(slice(int(dim_selection), int(dim_selection) + 1, 1))
 47 |         elif isinstance(dim_selection, np.ndarray):
 48 |             dim_selection = dim_selection.ravel()
 49 |             if len(dim_selection) == 1:
 50 |                 ls.append(
 51 |                     slice(int(dim_selection.item()), int(dim_selection.item()) + 1, 1)
 52 |                 )
 53 |             else:
 54 |                 diff = np.diff(dim_selection)
 55 |                 if (diff != 1).any() and (diff != 0).any():
 56 |                     raise DiscontiguousArrayError(diff)
 57 |                 ls.append(slice(dim_selection[0], dim_selection[-1] + 1, 1))
 58 |         else:
 59 |             ls.append(dim_selection)
 60 |     return ls
 61 | 
 62 | 
 63 | def selector_tuple_to_slice_selection(selector_tuple: SelectorTuple) -> list[slice]:
 64 |     if isinstance(selector_tuple, slice):
 65 |         return [selector_tuple]
 66 |     if all(isinstance(s, slice) for s in selector_tuple):
 67 |         return list(selector_tuple)
 68 |     return make_slice_selection(selector_tuple)
 69 | 
 70 | 
 71 | def resulting_shape_from_index(
 72 |     array_shape: tuple[int, ...],
 73 |     index_tuple: tuple[int | slice | EllipsisType | np.ndarray],
 74 |     drop_axes: tuple[int, ...],
 75 |     *,
 76 |     pad: bool,
 77 | ) -> tuple[int, ...]:
 78 |     result_shape = []
 79 |     advanced_index_shapes = [
 80 |         idx.shape for idx in index_tuple if isinstance(idx, np.ndarray)
 81 |     ]
 82 |     basic_shape_index = 0
 83 | 
 84 |     # Broadcast all advanced indices, if any
 85 |     if advanced_index_shapes:
 86 |         result_shape += np.broadcast_shapes(*advanced_index_shapes)
 87 |         # Consume dimensions from array_shape
 88 |         basic_shape_index += len(advanced_index_shapes)
 89 | 
 90 |     # Process each remaining index in index_tuple
 91 |     for idx in index_tuple:
 92 |         if isinstance(idx, int):
 93 |             # Integer index reduces dimension, so skip this dimension in array_shape
 94 |             basic_shape_index += 1
 95 |         elif isinstance(idx, slice):
 96 |             if idx.step is not None and idx.step > 1:
 97 |                 raise DiscontiguousArrayError(
 98 |                     "Step size greater than 1 is not supported"
 99 |                 )
100 |             # Slice keeps dimension, adjust size accordingly
101 |             start, stop, _ = idx.indices(array_shape[basic_shape_index])
102 |             result_shape.append(stop - start)
103 |             basic_shape_index += 1
104 |         elif idx is Ellipsis:
105 |             # Calculate number of dimensions that Ellipsis should fill
106 |             num_to_fill = len(array_shape) - len(index_tuple) + 1
107 |             result_shape += array_shape[
108 |                 basic_shape_index : basic_shape_index + num_to_fill
109 |             ]
110 |             basic_shape_index += num_to_fill
111 |         elif not isinstance(idx, np.ndarray):
112 |             raise ValueError(f"Invalid index type: {type(idx)}")
113 | 
114 |     # Step 4: Append remaining dimensions from array_shape if fewer indices were used
115 |     if basic_shape_index < len(array_shape) and pad:
116 |         result_shape += array_shape[basic_shape_index:]
117 | 
118 |     return tuple(size for idx, size in enumerate(result_shape) if idx not in drop_axes)
119 | 
120 | 
121 | def prod_op(x: Iterable[int]) -> int:
122 |     return reduce(operator.mul, x, 1)
123 | 
124 | 
125 | def get_shape_for_selector(
126 |     selector_tuple: SelectorTuple,
127 |     shape: tuple[int, ...],
128 |     *,
129 |     pad: bool,
130 |     drop_axes: tuple[int, ...] = (),
131 | ) -> tuple[int, ...]:
132 |     if isinstance(selector_tuple, slice | np.ndarray):
133 |         return resulting_shape_from_index(
134 |             shape,
135 |             (selector_tuple,),
136 |             drop_axes,
137 |             pad=pad,
138 |         )
139 |     return resulting_shape_from_index(shape, selector_tuple, drop_axes, pad=pad)
140 | 
141 | 
142 | def get_implicit_fill_value(dtype: np.dtype, fill_value: Any) -> Any:
143 |     if fill_value is None:
144 |         fill_value = _default_fill_value(dtype)
145 |     return fill_value
146 | 
147 | 
148 | def make_chunk_info_for_rust_with_indices(
149 |     batch_info: Iterable[
150 |         tuple[ByteGetter | ByteSetter, ArraySpec, SelectorTuple, SelectorTuple, bool]
151 |     ],
152 |     drop_axes: tuple[int, ...],
153 |     shape: tuple[int, ...],
154 | ) -> list[WithSubset]:
155 |     shape = shape if shape else (1,)  # constant array
156 |     chunk_info_with_indices: list[WithSubset] = []
157 |     for (
158 |         byte_getter,
159 |         chunk_spec,
160 |         chunk_selection,
161 |         out_selection,
162 |         _,
163 |     ) in batch_info:
164 |         if chunk_spec.fill_value is None:
165 |             chunk_spec = ArraySpec(
166 |                 chunk_spec.shape,
167 |                 chunk_spec.dtype,
168 |                 get_implicit_fill_value(chunk_spec.dtype, chunk_spec.fill_value),
169 |                 chunk_spec.config,
170 |                 chunk_spec.prototype,
171 |             )
172 |         chunk_info = Basic(byte_getter, chunk_spec)
173 |         out_selection_as_slices = selector_tuple_to_slice_selection(out_selection)
174 |         chunk_selection_as_slices = selector_tuple_to_slice_selection(chunk_selection)
175 |         shape_chunk_selection_slices = get_shape_for_selector(
176 |             tuple(chunk_selection_as_slices),
177 |             chunk_spec.shape,
178 |             pad=True,
179 |             drop_axes=drop_axes,
180 |         )
181 |         shape_chunk_selection = get_shape_for_selector(
182 |             chunk_selection, chunk_spec.shape, pad=True, drop_axes=drop_axes
183 |         )
184 |         if prod_op(shape_chunk_selection) != prod_op(shape_chunk_selection_slices):
185 |             raise CollapsedDimensionError(
186 |                 f"{shape_chunk_selection} != {shape_chunk_selection_slices}"
187 |             )
188 |         chunk_info_with_indices.append(
189 |             WithSubset(
190 |                 chunk_info,
191 |                 chunk_subset=chunk_selection_as_slices,
192 |                 subset=out_selection_as_slices,
193 |                 shape=shape,
194 |             )
195 |         )
196 |     return chunk_info_with_indices
197 | 


--------------------------------------------------------------------------------
/src/bin/stub_gen.rs:
--------------------------------------------------------------------------------
1 | use pyo3_stub_gen::Result;
2 | 
3 | fn main() -> Result<()> {
4 |     let stub = zarrs_python::stub_info()?;
5 |     stub.generate()?;
6 |     Ok(())
7 | }
8 | 


--------------------------------------------------------------------------------
/src/chunk_item.rs:
--------------------------------------------------------------------------------
  1 | use std::num::NonZeroU64;
  2 | 
  3 | use pyo3::{
  4 |     exceptions::{PyRuntimeError, PyValueError},
  5 |     pyclass, pymethods,
  6 |     types::{PyAnyMethods, PyBytes, PyBytesMethods, PyInt, PySlice, PySliceMethods as _},
  7 |     Bound, PyAny, PyErr, PyResult,
  8 | };
  9 | use pyo3_stub_gen::derive::{gen_stub_pyclass, gen_stub_pymethods};
 10 | use zarrs::{
 11 |     array::{ChunkRepresentation, DataType, FillValue},
 12 |     array_subset::ArraySubset,
 13 |     metadata::v3::MetadataV3,
 14 |     storage::StoreKey,
 15 | };
 16 | 
 17 | use crate::{store::StoreConfig, utils::PyErrExt};
 18 | 
 19 | pub(crate) trait ChunksItem {
 20 |     fn store_config(&self) -> StoreConfig;
 21 |     fn key(&self) -> &StoreKey;
 22 |     fn representation(&self) -> &ChunkRepresentation;
 23 | }
 24 | 
 25 | #[derive(Clone)]
 26 | #[gen_stub_pyclass]
 27 | #[pyclass]
 28 | pub(crate) struct Basic {
 29 |     store: StoreConfig,
 30 |     key: StoreKey,
 31 |     representation: ChunkRepresentation,
 32 | }
 33 | 
 34 | fn fill_value_to_bytes(dtype: &str, fill_value: &Bound<'_, PyAny>) -> PyResult<Vec<u8>> {
 35 |     if dtype == "string" {
 36 |         // Match zarr-python 2.x.x string fill value behaviour with a 0 fill value
 37 |         // See https://github.com/zarr-developers/zarr-python/issues/2792#issuecomment-2644362122
 38 |         if let Ok(fill_value_downcast) = fill_value.downcast::<PyInt>() {
 39 |             let fill_value_usize: usize = fill_value_downcast.extract()?;
 40 |             if fill_value_usize == 0 {
 41 |                 return Ok(vec![]);
 42 |             }
 43 |             Err(PyErr::new::<PyValueError, _>(format!(
 44 |                     "Cannot understand non-zero integer {fill_value_usize} fill value for dtype {dtype}"
 45 |                 )))?;
 46 |         }
 47 |     }
 48 | 
 49 |     if let Ok(fill_value_downcast) = fill_value.downcast::<PyBytes>() {
 50 |         Ok(fill_value_downcast.as_bytes().to_vec())
 51 |     } else if fill_value.hasattr("tobytes")? {
 52 |         Ok(fill_value.call_method0("tobytes")?.extract()?)
 53 |     } else {
 54 |         Err(PyErr::new::<PyValueError, _>(format!(
 55 |             "Unsupported fill value {fill_value:?}"
 56 |         )))
 57 |     }
 58 | }
 59 | 
 60 | #[gen_stub_pymethods]
 61 | #[pymethods]
 62 | impl Basic {
 63 |     #[new]
 64 |     fn new(byte_interface: &Bound<'_, PyAny>, chunk_spec: &Bound<'_, PyAny>) -> PyResult<Self> {
 65 |         let store: StoreConfig = byte_interface.getattr("store")?.extract()?;
 66 |         let path: String = byte_interface.getattr("path")?.extract()?;
 67 | 
 68 |         let chunk_shape = chunk_spec.getattr("shape")?.extract()?;
 69 |         let mut dtype: String = chunk_spec
 70 |             .getattr("dtype")?
 71 |             .call_method0("__str__")?
 72 |             .extract()?;
 73 |         if dtype == "object" {
 74 |             // zarrs doesn't understand `object` which is the output of `np.dtype("|O").__str__()`
 75 |             // but maps it to "string" internally https://github.com/LDeakin/zarrs/blob/0532fe983b7b42b59dbf84e50a2fe5e6f7bad4ce/zarrs_metadata/src/v2_to_v3.rs#L288
 76 |             dtype = String::from("string");
 77 |         }
 78 |         let fill_value: Bound<'_, PyAny> = chunk_spec.getattr("fill_value")?;
 79 |         let fill_value_bytes = fill_value_to_bytes(&dtype, &fill_value)?;
 80 |         Ok(Self {
 81 |             store,
 82 |             key: StoreKey::new(path).map_py_err::<PyValueError>()?,
 83 |             representation: get_chunk_representation(chunk_shape, &dtype, fill_value_bytes)?,
 84 |         })
 85 |     }
 86 | }
 87 | 
 88 | #[derive(Clone)]
 89 | #[gen_stub_pyclass]
 90 | #[pyclass]
 91 | pub(crate) struct WithSubset {
 92 |     pub item: Basic,
 93 |     pub chunk_subset: ArraySubset,
 94 |     pub subset: ArraySubset,
 95 | }
 96 | 
 97 | #[gen_stub_pymethods]
 98 | #[pymethods]
 99 | impl WithSubset {
100 |     #[new]
101 |     #[allow(clippy::needless_pass_by_value)]
102 |     fn new(
103 |         item: Basic,
104 |         chunk_subset: Vec<Bound<'_, PySlice>>,
105 |         subset: Vec<Bound<'_, PySlice>>,
106 |         shape: Vec<u64>,
107 |     ) -> PyResult<Self> {
108 |         let chunk_subset =
109 |             selection_to_array_subset(&chunk_subset, &item.representation.shape_u64())?;
110 |         let subset = selection_to_array_subset(&subset, &shape)?;
111 |         Ok(Self {
112 |             item,
113 |             chunk_subset,
114 |             subset,
115 |         })
116 |     }
117 | }
118 | 
119 | impl ChunksItem for Basic {
120 |     fn store_config(&self) -> StoreConfig {
121 |         self.store.clone()
122 |     }
123 |     fn key(&self) -> &StoreKey {
124 |         &self.key
125 |     }
126 |     fn representation(&self) -> &ChunkRepresentation {
127 |         &self.representation
128 |     }
129 | }
130 | 
131 | impl ChunksItem for WithSubset {
132 |     fn store_config(&self) -> StoreConfig {
133 |         self.item.store.clone()
134 |     }
135 |     fn key(&self) -> &StoreKey {
136 |         &self.item.key
137 |     }
138 |     fn representation(&self) -> &ChunkRepresentation {
139 |         &self.item.representation
140 |     }
141 | }
142 | 
143 | fn get_chunk_representation(
144 |     chunk_shape: Vec<u64>,
145 |     dtype: &str,
146 |     fill_value: Vec<u8>,
147 | ) -> PyResult<ChunkRepresentation> {
148 |     // Get the chunk representation
149 |     let data_type = DataType::from_metadata(
150 |         &MetadataV3::new(dtype),
151 |         zarrs::config::global_config().data_type_aliases_v3(),
152 |     )
153 |     .map_py_err::<PyRuntimeError>()?;
154 |     let chunk_shape = chunk_shape
155 |         .into_iter()
156 |         .map(|x| NonZeroU64::new(x).expect("chunk shapes should always be non-zero"))
157 |         .collect();
158 |     let chunk_representation =
159 |         ChunkRepresentation::new(chunk_shape, data_type, FillValue::new(fill_value))
160 |             .map_py_err::<PyValueError>()?;
161 |     Ok(chunk_representation)
162 | }
163 | 
164 | fn slice_to_range(slice: &Bound<'_, PySlice>, length: isize) -> PyResult<std::ops::Range<u64>> {
165 |     let indices = slice.indices(length)?;
166 |     if indices.start < 0 {
167 |         Err(PyErr::new::<PyValueError, _>(
168 |             "slice start must be greater than or equal to 0".to_string(),
169 |         ))
170 |     } else if indices.stop < 0 {
171 |         Err(PyErr::new::<PyValueError, _>(
172 |             "slice stop must be greater than or equal to 0".to_string(),
173 |         ))
174 |     } else if indices.step != 1 {
175 |         Err(PyErr::new::<PyValueError, _>(
176 |             "slice step must be equal to 1".to_string(),
177 |         ))
178 |     } else {
179 |         Ok(u64::try_from(indices.start)?..u64::try_from(indices.stop)?)
180 |     }
181 | }
182 | 
183 | fn selection_to_array_subset(
184 |     selection: &[Bound<'_, PySlice>],
185 |     shape: &[u64],
186 | ) -> PyResult<ArraySubset> {
187 |     if selection.is_empty() {
188 |         Ok(ArraySubset::new_with_shape(vec![1; shape.len()]))
189 |     } else {
190 |         let chunk_ranges = selection
191 |             .iter()
192 |             .zip(shape)
193 |             .map(|(selection, &shape)| slice_to_range(selection, isize::try_from(shape)?))
194 |             .collect::<PyResult<Vec<_>>>()?;
195 |         Ok(ArraySubset::new_with_ranges(&chunk_ranges))
196 |     }
197 | }
198 | 


--------------------------------------------------------------------------------
/src/concurrency.rs:
--------------------------------------------------------------------------------
 1 | use pyo3::{exceptions::PyRuntimeError, PyErr, PyResult};
 2 | use zarrs::array::{
 3 |     codec::CodecOptions, concurrency::calc_concurrency_outer_inner, ArrayCodecTraits,
 4 |     RecommendedConcurrency,
 5 | };
 6 | 
 7 | use crate::{chunk_item::ChunksItem, CodecPipelineImpl};
 8 | 
 9 | pub trait ChunkConcurrentLimitAndCodecOptions {
10 |     fn get_chunk_concurrent_limit_and_codec_options(
11 |         &self,
12 |         codec_pipeline_impl: &CodecPipelineImpl,
13 |     ) -> PyResult<Option<(usize, CodecOptions)>>;
14 | }
15 | 
16 | impl<T> ChunkConcurrentLimitAndCodecOptions for Vec<T>
17 | where
18 |     T: ChunksItem,
19 | {
20 |     fn get_chunk_concurrent_limit_and_codec_options(
21 |         &self,
22 |         codec_pipeline_impl: &CodecPipelineImpl,
23 |     ) -> PyResult<Option<(usize, CodecOptions)>> {
24 |         let num_chunks = self.len();
25 |         let Some(chunk_descriptions0) = self.first() else {
26 |             return Ok(None);
27 |         };
28 |         let chunk_representation = chunk_descriptions0.representation();
29 | 
30 |         let codec_concurrency = codec_pipeline_impl
31 |             .codec_chain
32 |             .recommended_concurrency(chunk_representation)
33 |             .map_err(|err| PyErr::new::<PyRuntimeError, _>(err.to_string()))?;
34 | 
35 |         let min_concurrent_chunks =
36 |             std::cmp::min(codec_pipeline_impl.chunk_concurrent_minimum, num_chunks);
37 |         let max_concurrent_chunks =
38 |             std::cmp::max(codec_pipeline_impl.chunk_concurrent_maximum, num_chunks);
39 |         let (chunk_concurrent_limit, codec_concurrent_limit) = calc_concurrency_outer_inner(
40 |             codec_pipeline_impl.num_threads,
41 |             &RecommendedConcurrency::new(min_concurrent_chunks..max_concurrent_chunks),
42 |             &codec_concurrency,
43 |         );
44 |         let codec_options = codec_pipeline_impl
45 |             .codec_options
46 |             .into_builder()
47 |             .concurrent_target(codec_concurrent_limit)
48 |             .build();
49 |         Ok(Some((chunk_concurrent_limit, codec_options)))
50 |     }
51 | }
52 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
  1 | #![warn(clippy::pedantic)]
  2 | #![allow(clippy::module_name_repetitions)]
  3 | 
  4 | use std::borrow::Cow;
  5 | use std::collections::HashMap;
  6 | use std::ptr::NonNull;
  7 | use std::sync::Arc;
  8 | 
  9 | use chunk_item::WithSubset;
 10 | use itertools::Itertools;
 11 | use numpy::npyffi::PyArrayObject;
 12 | use numpy::{PyArrayDescrMethods, PyUntypedArray, PyUntypedArrayMethods};
 13 | use pyo3::exceptions::{PyRuntimeError, PyTypeError, PyValueError};
 14 | use pyo3::prelude::*;
 15 | use pyo3_stub_gen::define_stub_info_gatherer;
 16 | use pyo3_stub_gen::derive::{gen_stub_pyclass, gen_stub_pymethods};
 17 | use rayon::iter::{IntoParallelIterator, ParallelIterator};
 18 | use rayon_iter_concurrent_limit::iter_concurrent_limit;
 19 | use unsafe_cell_slice::UnsafeCellSlice;
 20 | use utils::is_whole_chunk;
 21 | use zarrs::array::codec::{
 22 |     ArrayPartialDecoderTraits, ArrayToBytesCodecTraits, CodecOptions, CodecOptionsBuilder,
 23 | };
 24 | use zarrs::array::{
 25 |     copy_fill_value_into, update_array_bytes, ArrayBytes, ArrayBytesFixedDisjointView, ArraySize,
 26 |     CodecChain, FillValue,
 27 | };
 28 | use zarrs::array_subset::ArraySubset;
 29 | use zarrs::metadata::v3::MetadataV3;
 30 | use zarrs::storage::StoreKey;
 31 | 
 32 | mod chunk_item;
 33 | mod concurrency;
 34 | mod metadata_v2;
 35 | mod runtime;
 36 | mod store;
 37 | #[cfg(test)]
 38 | mod tests;
 39 | mod utils;
 40 | 
 41 | use crate::chunk_item::ChunksItem;
 42 | use crate::concurrency::ChunkConcurrentLimitAndCodecOptions;
 43 | use crate::metadata_v2::codec_metadata_v2_to_v3;
 44 | use crate::store::StoreManager;
 45 | use crate::utils::{PyErrExt as _, PyUntypedArrayExt as _};
 46 | 
 47 | // TODO: Use a OnceLock for store with get_or_try_init when stabilised?
 48 | #[gen_stub_pyclass]
 49 | #[pyclass]
 50 | pub struct CodecPipelineImpl {
 51 |     pub(crate) stores: StoreManager,
 52 |     pub(crate) codec_chain: Arc<CodecChain>,
 53 |     pub(crate) codec_options: CodecOptions,
 54 |     pub(crate) chunk_concurrent_minimum: usize,
 55 |     pub(crate) chunk_concurrent_maximum: usize,
 56 |     pub(crate) num_threads: usize,
 57 | }
 58 | 
 59 | impl CodecPipelineImpl {
 60 |     fn retrieve_chunk_bytes<'a, I: ChunksItem>(
 61 |         &self,
 62 |         item: &I,
 63 |         codec_chain: &CodecChain,
 64 |         codec_options: &CodecOptions,
 65 |     ) -> PyResult<ArrayBytes<'a>> {
 66 |         let value_encoded = self.stores.get(item)?;
 67 |         let value_decoded = if let Some(value_encoded) = value_encoded {
 68 |             let value_encoded: Vec<u8> = value_encoded.into(); // zero-copy in this case
 69 |             codec_chain
 70 |                 .decode(value_encoded.into(), item.representation(), codec_options)
 71 |                 .map_py_err::<PyRuntimeError>()?
 72 |         } else {
 73 |             let array_size = ArraySize::new(
 74 |                 item.representation().data_type().size(),
 75 |                 item.representation().num_elements(),
 76 |             );
 77 |             ArrayBytes::new_fill_value(array_size, item.representation().fill_value())
 78 |         };
 79 |         Ok(value_decoded)
 80 |     }
 81 | 
 82 |     fn store_chunk_bytes<I: ChunksItem>(
 83 |         &self,
 84 |         item: &I,
 85 |         codec_chain: &CodecChain,
 86 |         value_decoded: ArrayBytes,
 87 |         codec_options: &CodecOptions,
 88 |     ) -> PyResult<()> {
 89 |         value_decoded
 90 |             .validate(
 91 |                 item.representation().num_elements(),
 92 |                 item.representation().data_type().size(),
 93 |             )
 94 |             .map_py_err::<PyValueError>()?;
 95 | 
 96 |         if value_decoded.is_fill_value(item.representation().fill_value()) {
 97 |             self.stores.erase(item)
 98 |         } else {
 99 |             let value_encoded = codec_chain
100 |                 .encode(value_decoded, item.representation(), codec_options)
101 |                 .map(Cow::into_owned)
102 |                 .map_py_err::<PyRuntimeError>()?;
103 | 
104 |             // Store the encoded chunk
105 |             self.stores.set(item, value_encoded.into())
106 |         }
107 |     }
108 | 
109 |     fn store_chunk_subset_bytes<I: ChunksItem>(
110 |         &self,
111 |         item: &I,
112 |         codec_chain: &CodecChain,
113 |         chunk_subset_bytes: ArrayBytes,
114 |         chunk_subset: &ArraySubset,
115 |         codec_options: &CodecOptions,
116 |     ) -> PyResult<()> {
117 |         let array_shape = item.representation().shape_u64();
118 |         if !chunk_subset.inbounds_shape(&array_shape) {
119 |             return Err(PyErr::new::<PyValueError, _>(format!(
120 |                 "chunk subset ({chunk_subset}) is out of bounds for array shape ({array_shape:?})"
121 |             )));
122 |         }
123 |         let data_type_size = item.representation().data_type().size();
124 | 
125 |         if chunk_subset.start().iter().all(|&o| o == 0) && chunk_subset.shape() == array_shape {
126 |             // Fast path if the chunk subset spans the entire chunk, no read required
127 |             self.store_chunk_bytes(item, codec_chain, chunk_subset_bytes, codec_options)
128 |         } else {
129 |             // Validate the chunk subset bytes
130 |             chunk_subset_bytes
131 |                 .validate(chunk_subset.num_elements(), data_type_size)
132 |                 .map_py_err::<PyValueError>()?;
133 | 
134 |             // Retrieve the chunk
135 |             let chunk_bytes_old = self.retrieve_chunk_bytes(item, codec_chain, codec_options)?;
136 | 
137 |             // Update the chunk
138 |             let chunk_bytes_new = update_array_bytes(
139 |                 chunk_bytes_old,
140 |                 &array_shape,
141 |                 chunk_subset,
142 |                 &chunk_subset_bytes,
143 |                 data_type_size,
144 |             )
145 |             .map_py_err::<PyRuntimeError>()?;
146 | 
147 |             // Store the updated chunk
148 |             self.store_chunk_bytes(item, codec_chain, chunk_bytes_new, codec_options)
149 |         }
150 |     }
151 | 
152 |     fn py_untyped_array_to_array_object<'a>(
153 |         value: &'a Bound<'_, PyUntypedArray>,
154 |     ) -> &'a PyArrayObject {
155 |         // TODO: Upstream a PyUntypedArray.as_array_ref()?
156 |         //       https://github.com/zarrs/zarrs-python/pull/80/files/75be39184905d688ac04a5f8bca08c5241c458cd#r1918365296
157 |         let array_object_ptr: NonNull<PyArrayObject> = NonNull::new(value.as_array_ptr())
158 |             .expect("bug in numpy crate: Bound<'_, PyUntypedArray>::as_array_ptr unexpectedly returned a null pointer");
159 |         let array_object: &'a PyArrayObject = unsafe {
160 |             // SAFETY: the array object pointed to by array_object_ptr is valid for 'a
161 |             array_object_ptr.as_ref()
162 |         };
163 |         array_object
164 |     }
165 | 
166 |     fn nparray_to_slice<'a>(value: &'a Bound<'_, PyUntypedArray>) -> Result<&'a [u8], PyErr> {
167 |         if !value.is_c_contiguous() {
168 |             return Err(PyErr::new::<PyValueError, _>(
169 |                 "input array must be a C contiguous array".to_string(),
170 |             ));
171 |         }
172 |         let array_object: &PyArrayObject = Self::py_untyped_array_to_array_object(value);
173 |         let array_data = array_object.data.cast::<u8>();
174 |         let array_len = value.len() * value.dtype().itemsize();
175 |         let slice = unsafe {
176 |             // SAFETY: array_data is a valid pointer to a u8 array of length array_len
177 |             debug_assert!(!array_data.is_null());
178 |             std::slice::from_raw_parts(array_data, array_len)
179 |         };
180 |         Ok(slice)
181 |     }
182 | 
183 |     fn nparray_to_unsafe_cell_slice<'a>(
184 |         value: &'a Bound<'_, PyUntypedArray>,
185 |     ) -> Result<UnsafeCellSlice<'a, u8>, PyErr> {
186 |         if !value.is_c_contiguous() {
187 |             return Err(PyErr::new::<PyValueError, _>(
188 |                 "input array must be a C contiguous array".to_string(),
189 |             ));
190 |         }
191 |         let array_object: &PyArrayObject = Self::py_untyped_array_to_array_object(value);
192 |         let array_data = array_object.data.cast::<u8>();
193 |         let array_len = value.len() * value.dtype().itemsize();
194 |         let output = unsafe {
195 |             // SAFETY: array_data is a valid pointer to a u8 array of length array_len
196 |             debug_assert!(!array_data.is_null());
197 |             std::slice::from_raw_parts_mut(array_data, array_len)
198 |         };
199 |         Ok(UnsafeCellSlice::new(output))
200 |     }
201 | }
202 | 
203 | #[gen_stub_pymethods]
204 | #[pymethods]
205 | impl CodecPipelineImpl {
206 |     #[pyo3(signature = (
207 |         metadata,
208 |         *,
209 |         validate_checksums=None,
210 |         store_empty_chunks=None,
211 |         chunk_concurrent_minimum=None,
212 |         chunk_concurrent_maximum=None,
213 |         num_threads=None,
214 |     ))]
215 |     #[new]
216 |     fn new(
217 |         metadata: &str,
218 |         validate_checksums: Option<bool>,
219 |         store_empty_chunks: Option<bool>,
220 |         chunk_concurrent_minimum: Option<usize>,
221 |         chunk_concurrent_maximum: Option<usize>,
222 |         num_threads: Option<usize>,
223 |     ) -> PyResult<Self> {
224 |         let metadata: Vec<MetadataV3> =
225 |             serde_json::from_str(metadata).map_py_err::<PyTypeError>()?;
226 |         let codec_chain =
227 |             Arc::new(CodecChain::from_metadata(&metadata).map_py_err::<PyTypeError>()?);
228 |         let mut codec_options = CodecOptionsBuilder::new();
229 |         if let Some(validate_checksums) = validate_checksums {
230 |             codec_options = codec_options.validate_checksums(validate_checksums);
231 |         }
232 |         if let Some(store_empty_chunks) = store_empty_chunks {
233 |             codec_options = codec_options.store_empty_chunks(store_empty_chunks);
234 |         }
235 |         let codec_options = codec_options.build();
236 | 
237 |         let chunk_concurrent_minimum = chunk_concurrent_minimum
238 |             .unwrap_or(zarrs::config::global_config().chunk_concurrent_minimum());
239 |         let chunk_concurrent_maximum =
240 |             chunk_concurrent_maximum.unwrap_or(rayon::current_num_threads());
241 |         let num_threads = num_threads.unwrap_or(rayon::current_num_threads());
242 | 
243 |         Ok(Self {
244 |             stores: StoreManager::default(),
245 |             codec_chain,
246 |             codec_options,
247 |             chunk_concurrent_minimum,
248 |             chunk_concurrent_maximum,
249 |             num_threads,
250 |         })
251 |     }
252 | 
253 |     fn retrieve_chunks_and_apply_index(
254 |         &self,
255 |         py: Python,
256 |         chunk_descriptions: Vec<chunk_item::WithSubset>, // FIXME: Ref / iterable?
257 |         value: &Bound<'_, PyUntypedArray>,
258 |     ) -> PyResult<()> {
259 |         // Get input array
260 |         let output = Self::nparray_to_unsafe_cell_slice(value)?;
261 |         let output_shape: Vec<u64> = value.shape_zarr()?;
262 | 
263 |         // Adjust the concurrency based on the codec chain and the first chunk description
264 |         let Some((chunk_concurrent_limit, codec_options)) =
265 |             chunk_descriptions.get_chunk_concurrent_limit_and_codec_options(self)?
266 |         else {
267 |             return Ok(());
268 |         };
269 | 
270 |         // Assemble partial decoders ahead of time and in parallel
271 |         let partial_chunk_descriptions = chunk_descriptions
272 |             .iter()
273 |             .filter(|item| !(is_whole_chunk(item)))
274 |             .unique_by(|item| item.key())
275 |             .collect::<Vec<_>>();
276 |         let mut partial_decoder_cache: HashMap<StoreKey, Arc<dyn ArrayPartialDecoderTraits>> =
277 |             HashMap::new();
278 |         if !partial_chunk_descriptions.is_empty() {
279 |             let key_decoder_pairs = iter_concurrent_limit!(
280 |                 chunk_concurrent_limit,
281 |                 partial_chunk_descriptions,
282 |                 map,
283 |                 |item| {
284 |                     let input_handle = self.stores.decoder(item)?;
285 |                     let partial_decoder = self
286 |                         .codec_chain
287 |                         .clone()
288 |                         .partial_decoder(
289 |                             Arc::new(input_handle),
290 |                             item.representation(),
291 |                             &codec_options,
292 |                         )
293 |                         .map_py_err::<PyValueError>()?;
294 |                     Ok((item.key().clone(), partial_decoder))
295 |                 }
296 |             )
297 |             .collect::<PyResult<Vec<_>>>()?;
298 |             partial_decoder_cache.extend(key_decoder_pairs);
299 |         }
300 | 
301 |         py.allow_threads(move || {
302 |             // FIXME: the `decode_into` methods only support fixed length data types.
303 |             // For variable length data types, need a codepath with non `_into` methods.
304 |             // Collect all the subsets and copy into value on the Python side?
305 |             let update_chunk_subset = |item: chunk_item::WithSubset| {
306 |                 let chunk_item::WithSubset {
307 |                     item,
308 |                     subset,
309 |                     chunk_subset,
310 |                 } = item;
311 |                 let mut output_view = unsafe {
312 |                     // TODO: Is the following correct?
313 |                     //       can we guarantee that when this function is called from Python with arbitrary arguments?
314 |                     // SAFETY: chunks represent disjoint array subsets
315 |                     ArrayBytesFixedDisjointView::new(
316 |                         output,
317 |                         // TODO: why is data_type in `item`, it should be derived from `output`, no?
318 |                         item.representation()
319 |                             .data_type()
320 |                             .fixed_size()
321 |                             .ok_or("variable length data type not supported")
322 |                             .map_py_err::<PyTypeError>()?,
323 |                         &output_shape,
324 |                         subset,
325 |                     )
326 |                     .map_py_err::<PyRuntimeError>()?
327 |                 };
328 | 
329 |                 // See zarrs::array::Array::retrieve_chunk_subset_into
330 |                 if chunk_subset.start().iter().all(|&o| o == 0)
331 |                     && chunk_subset.shape() == item.representation().shape_u64()
332 |                 {
333 |                     // See zarrs::array::Array::retrieve_chunk_into
334 |                     if let Some(chunk_encoded) = self.stores.get(&item)? {
335 |                         // Decode the encoded data into the output buffer
336 |                         let chunk_encoded: Vec<u8> = chunk_encoded.into();
337 |                         self.codec_chain.decode_into(
338 |                             Cow::Owned(chunk_encoded),
339 |                             item.representation(),
340 |                             &mut output_view,
341 |                             &codec_options,
342 |                         )
343 |                     } else {
344 |                         // The chunk is missing, write the fill value
345 |                         copy_fill_value_into(
346 |                             item.representation().data_type(),
347 |                             item.representation().fill_value(),
348 |                             &mut output_view,
349 |                         )
350 |                     }
351 |                 } else {
352 |                     let key = item.key();
353 |                     let partial_decoder = partial_decoder_cache.get(key).ok_or_else(|| {
354 |                         PyRuntimeError::new_err(format!("Partial decoder not found for key: {key}"))
355 |                     })?;
356 |                     partial_decoder.partial_decode_into(
357 |                         &chunk_subset,
358 |                         &mut output_view,
359 |                         &codec_options,
360 |                     )
361 |                 }
362 |                 .map_py_err::<PyValueError>()
363 |             };
364 | 
365 |             iter_concurrent_limit!(
366 |                 chunk_concurrent_limit,
367 |                 chunk_descriptions,
368 |                 try_for_each,
369 |                 update_chunk_subset
370 |             )?;
371 | 
372 |             Ok(())
373 |         })
374 |     }
375 | 
376 |     fn store_chunks_with_indices(
377 |         &self,
378 |         py: Python,
379 |         chunk_descriptions: Vec<chunk_item::WithSubset>,
380 |         value: &Bound<'_, PyUntypedArray>,
381 |     ) -> PyResult<()> {
382 |         enum InputValue<'a> {
383 |             Array(ArrayBytes<'a>),
384 |             Constant(FillValue),
385 |         }
386 | 
387 |         // Get input array
388 |         let input_slice = Self::nparray_to_slice(value)?;
389 |         let input = if value.ndim() > 0 {
390 |             // FIXME: Handle variable length data types, convert value to bytes and offsets
391 |             InputValue::Array(ArrayBytes::new_flen(Cow::Borrowed(input_slice)))
392 |         } else {
393 |             InputValue::Constant(FillValue::new(input_slice.to_vec()))
394 |         };
395 |         let input_shape: Vec<u64> = value.shape_zarr()?;
396 | 
397 |         // Adjust the concurrency based on the codec chain and the first chunk description
398 |         let Some((chunk_concurrent_limit, codec_options)) =
399 |             chunk_descriptions.get_chunk_concurrent_limit_and_codec_options(self)?
400 |         else {
401 |             return Ok(());
402 |         };
403 | 
404 |         py.allow_threads(move || {
405 |             let store_chunk = |item: chunk_item::WithSubset| match &input {
406 |                 InputValue::Array(input) => {
407 |                     let chunk_subset_bytes = input
408 |                         .extract_array_subset(
409 |                             &item.subset,
410 |                             &input_shape,
411 |                             item.item.representation().data_type(),
412 |                         )
413 |                         .map_py_err::<PyRuntimeError>()?;
414 |                     self.store_chunk_subset_bytes(
415 |                         &item,
416 |                         &self.codec_chain,
417 |                         chunk_subset_bytes,
418 |                         &item.chunk_subset,
419 |                         &codec_options,
420 |                     )
421 |                 }
422 |                 InputValue::Constant(constant_value) => {
423 |                     let chunk_subset_bytes = ArrayBytes::new_fill_value(
424 |                         ArraySize::new(
425 |                             item.representation().data_type().size(),
426 |                             item.chunk_subset.num_elements(),
427 |                         ),
428 |                         constant_value,
429 |                     );
430 | 
431 |                     self.store_chunk_subset_bytes(
432 |                         &item,
433 |                         &self.codec_chain,
434 |                         chunk_subset_bytes,
435 |                         &item.chunk_subset,
436 |                         &codec_options,
437 |                     )
438 |                 }
439 |             };
440 | 
441 |             iter_concurrent_limit!(
442 |                 chunk_concurrent_limit,
443 |                 chunk_descriptions,
444 |                 try_for_each,
445 |                 store_chunk
446 |             )?;
447 | 
448 |             Ok(())
449 |         })
450 |     }
451 | }
452 | 
453 | /// A Python module implemented in Rust.
454 | #[pymodule]
455 | fn _internal(m: &Bound<'_, PyModule>) -> PyResult<()> {
456 |     m.add("__version__", env!("CARGO_PKG_VERSION"))?;
457 |     m.add_class::<CodecPipelineImpl>()?;
458 |     m.add_class::<chunk_item::Basic>()?;
459 |     m.add_class::<chunk_item::WithSubset>()?;
460 |     m.add_function(wrap_pyfunction!(codec_metadata_v2_to_v3, m)?)?;
461 |     Ok(())
462 | }
463 | 
464 | define_stub_info_gatherer!(stub_info);
465 | 


--------------------------------------------------------------------------------
/src/metadata_v2.rs:
--------------------------------------------------------------------------------
 1 | use pyo3::{exceptions::PyRuntimeError, pyfunction, PyErr, PyResult};
 2 | use zarrs::metadata::{
 3 |     v2::{ArrayMetadataV2Order, MetadataV2},
 4 |     v3::MetadataV3,
 5 | };
 6 | 
 7 | #[pyfunction]
 8 | #[pyo3(signature = (filters=None, compressor=None))]
 9 | pub fn codec_metadata_v2_to_v3(
10 |     filters: Option<Vec<String>>,
11 |     compressor: Option<String>,
12 | ) -> PyResult<Vec<String>> {
13 |     // Try and convert filters/compressor to V2 metadata
14 |     let filters = if let Some(filters) = filters {
15 |         Some(
16 |             filters
17 |                 .into_iter()
18 |                 .map(|filter| {
19 |                     serde_json::from_str::<MetadataV2>(&filter)
20 |                         .map_err(|err| PyErr::new::<PyRuntimeError, _>(err.to_string()))
21 |                 })
22 |                 .collect::<Result<Vec<_>, _>>()?,
23 |         )
24 |     } else {
25 |         None
26 |     };
27 |     let compressor = if let Some(compressor) = compressor {
28 |         Some(
29 |             serde_json::from_str::<MetadataV2>(&compressor)
30 |                 .map_err(|err| PyErr::new::<PyRuntimeError, _>(err.to_string()))?,
31 |         )
32 |     } else {
33 |         None
34 |     };
35 | 
36 |     // FIXME: The array order, dimensionality, data type, and endianness are needed to exhaustively support all Zarr V2 data that zarrs can handle.
37 |     // However, CodecPipeline.from_codecs does not supply this information, and CodecPipeline.evolve_from_array_spec is seemingly never called.
38 |     let metadata = zarrs::metadata_ext::v2_to_v3::codec_metadata_v2_to_v3(
39 |         ArrayMetadataV2Order::C,
40 |         0,                        // unused with C order
41 |         &MetadataV3::new("bool"), // FIXME
42 |         None,
43 |         &filters,
44 |         &compressor,
45 |         zarrs::config::global_config().codec_aliases_v2(),
46 |         zarrs::config::global_config().codec_aliases_v3(),
47 |     )
48 |     .map_err(|err| {
49 |         // TODO: More informative error messages from zarrs for ArrayMetadataV2ToV3ConversionError
50 |         PyErr::new::<PyRuntimeError, _>(err.to_string())
51 |     })?;
52 |     Ok(metadata
53 |         .into_iter()
54 |         .map(|metadata| serde_json::to_string(&metadata).expect("infallible")) // TODO: Add method to zarrs
55 |         .collect())
56 | }
57 | 


--------------------------------------------------------------------------------
/src/runtime.rs:
--------------------------------------------------------------------------------
 1 | use std::sync::OnceLock;
 2 | use tokio::runtime::Runtime;
 3 | use zarrs::storage::storage_adapter::async_to_sync::AsyncToSyncBlockOn;
 4 | 
 5 | static RUNTIME: OnceLock<Runtime> = OnceLock::new();
 6 | 
 7 | pub struct TokioBlockOn(tokio::runtime::Handle);
 8 | 
 9 | impl AsyncToSyncBlockOn for TokioBlockOn {
10 |     fn block_on<F: core::future::Future>(&self, future: F) -> F::Output {
11 |         self.0.block_on(future)
12 |     }
13 | }
14 | 
15 | pub fn tokio_block_on() -> TokioBlockOn {
16 |     let runtime = RUNTIME.get_or_init(|| Runtime::new().expect("Failed to create Tokio runtime"));
17 |     TokioBlockOn(runtime.handle().clone())
18 | }
19 | 


--------------------------------------------------------------------------------
/src/store.rs:
--------------------------------------------------------------------------------
 1 | use std::{collections::HashMap, sync::Arc};
 2 | 
 3 | use opendal::Builder;
 4 | use pyo3::{
 5 |     exceptions::{PyNotImplementedError, PyValueError},
 6 |     types::{PyAnyMethods, PyStringMethods, PyTypeMethods},
 7 |     Bound, FromPyObject, PyAny, PyErr, PyResult,
 8 | };
 9 | use pyo3_stub_gen::derive::gen_stub_pyclass_enum;
10 | use zarrs::storage::{
11 |     storage_adapter::async_to_sync::AsyncToSyncStorageAdapter, ReadableWritableListableStorage,
12 | };
13 | 
14 | use crate::{runtime::tokio_block_on, utils::PyErrExt};
15 | 
16 | mod filesystem;
17 | mod http;
18 | mod manager;
19 | 
20 | pub use self::filesystem::FilesystemStoreConfig;
21 | pub use self::http::HttpStoreConfig;
22 | pub(crate) use self::manager::StoreManager;
23 | 
24 | #[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
25 | #[gen_stub_pyclass_enum]
26 | pub enum StoreConfig {
27 |     Filesystem(FilesystemStoreConfig),
28 |     Http(HttpStoreConfig),
29 |     // TODO: Add support for more stores
30 | }
31 | 
32 | impl<'py> FromPyObject<'py> for StoreConfig {
33 |     fn extract_bound(store: &Bound<'py, PyAny>) -> PyResult<Self> {
34 |         let name = store.get_type().name()?;
35 |         let name = name.to_str()?;
36 |         match name {
37 |             "LocalStore" => {
38 |                 let root: String = store.getattr("root")?.call_method0("__str__")?.extract()?;
39 |                 Ok(StoreConfig::Filesystem(FilesystemStoreConfig::new(root)))
40 |             }
41 |             "FsspecStore" => {
42 |                 let fs = store.getattr("fs")?;
43 |                 let fs_name = fs.get_type().name()?;
44 |                 let fs_name = fs_name.to_str()?;
45 |                 let path: String = store.getattr("path")?.extract()?;
46 |                 let storage_options: HashMap<String, Bound<'py, PyAny>> =
47 |                     fs.getattr("storage_options")?.extract()?;
48 |                 match fs_name {
49 |                     "HTTPFileSystem" => Ok(StoreConfig::Http(HttpStoreConfig::new(
50 |                         &path,
51 |                         &storage_options,
52 |                     )?)),
53 |                     _ => Err(PyErr::new::<PyNotImplementedError, _>(format!(
54 |                         "zarrs-python does not support {fs_name} (FsspecStore) stores"
55 |                     ))),
56 |                 }
57 |             }
58 |             _ => Err(PyErr::new::<PyNotImplementedError, _>(format!(
59 |                 "zarrs-python does not support {name} stores"
60 |             ))),
61 |         }
62 |     }
63 | }
64 | 
65 | impl TryFrom<&StoreConfig> for ReadableWritableListableStorage {
66 |     type Error = PyErr;
67 | 
68 |     fn try_from(value: &StoreConfig) -> Result<Self, Self::Error> {
69 |         match value {
70 |             StoreConfig::Filesystem(config) => config.try_into(),
71 |             StoreConfig::Http(config) => config.try_into(),
72 |         }
73 |     }
74 | }
75 | 
76 | fn opendal_builder_to_sync_store<B: Builder>(
77 |     builder: B,
78 | ) -> PyResult<ReadableWritableListableStorage> {
79 |     let operator = opendal::Operator::new(builder)
80 |         .map_py_err::<PyValueError>()?
81 |         .finish();
82 |     let store = Arc::new(zarrs_opendal::AsyncOpendalStore::new(operator));
83 |     let store = Arc::new(AsyncToSyncStorageAdapter::new(store, tokio_block_on()));
84 |     Ok(store)
85 | }
86 | 


--------------------------------------------------------------------------------
/src/store/filesystem.rs:
--------------------------------------------------------------------------------
 1 | use std::sync::Arc;
 2 | 
 3 | use pyo3::{exceptions::PyRuntimeError, pyclass, PyErr};
 4 | use pyo3_stub_gen::derive::gen_stub_pyclass;
 5 | use zarrs::{filesystem::FilesystemStore, storage::ReadableWritableListableStorage};
 6 | 
 7 | use crate::utils::PyErrExt;
 8 | 
 9 | #[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
10 | #[gen_stub_pyclass]
11 | #[pyclass]
12 | pub struct FilesystemStoreConfig {
13 |     #[pyo3(get, set)]
14 |     pub root: String,
15 | }
16 | 
17 | impl FilesystemStoreConfig {
18 |     pub fn new(root: String) -> Self {
19 |         Self { root }
20 |     }
21 | }
22 | 
23 | impl TryInto<ReadableWritableListableStorage> for &FilesystemStoreConfig {
24 |     type Error = PyErr;
25 | 
26 |     fn try_into(self) -> Result<ReadableWritableListableStorage, Self::Error> {
27 |         let store =
28 |             Arc::new(FilesystemStore::new(self.root.clone()).map_py_err::<PyRuntimeError>()?);
29 |         Ok(store)
30 |     }
31 | }
32 | 


--------------------------------------------------------------------------------
/src/store/http.rs:
--------------------------------------------------------------------------------
 1 | use std::collections::HashMap;
 2 | 
 3 | use pyo3::{exceptions::PyValueError, pyclass, Bound, PyAny, PyErr, PyResult};
 4 | use pyo3_stub_gen::derive::gen_stub_pyclass;
 5 | use zarrs::storage::ReadableWritableListableStorage;
 6 | 
 7 | use super::opendal_builder_to_sync_store;
 8 | 
 9 | #[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
10 | #[gen_stub_pyclass]
11 | #[pyclass]
12 | pub struct HttpStoreConfig {
13 |     #[pyo3(get, set)]
14 |     pub endpoint: String,
15 | }
16 | 
17 | impl HttpStoreConfig {
18 |     pub fn new(path: &str, storage_options: &HashMap<String, Bound<'_, PyAny>>) -> PyResult<Self> {
19 |         if !storage_options.is_empty() {
20 |             for storage_option in storage_options.keys() {
21 |                 match storage_option.as_str() {
22 |                     // TODO: Add support for other storage options
23 |                     "asynchronous" => {}
24 |                     _ => {
25 |                         return Err(PyValueError::new_err(format!(
26 |                             "Unsupported storage option for HTTPFileSystem: {storage_option}"
27 |                         )));
28 |                     }
29 |                 }
30 |             }
31 |         }
32 | 
33 |         Ok(Self {
34 |             endpoint: path.to_string(),
35 |         })
36 |     }
37 | }
38 | 
39 | impl TryInto<ReadableWritableListableStorage> for &HttpStoreConfig {
40 |     type Error = PyErr;
41 | 
42 |     fn try_into(self) -> Result<ReadableWritableListableStorage, Self::Error> {
43 |         let builder = opendal::services::Http::default().endpoint(&self.endpoint);
44 |         opendal_builder_to_sync_store(builder)
45 |     }
46 | }
47 | 


--------------------------------------------------------------------------------
/src/store/manager.rs:
--------------------------------------------------------------------------------
 1 | use std::{
 2 |     collections::BTreeMap,
 3 |     sync::{Arc, Mutex},
 4 | };
 5 | 
 6 | use pyo3::{exceptions::PyRuntimeError, PyResult};
 7 | use zarrs::{
 8 |     array::codec::StoragePartialDecoder,
 9 |     storage::{Bytes, MaybeBytes, ReadableWritableListableStorage, StorageHandle},
10 | };
11 | 
12 | use crate::{chunk_item::ChunksItem, store::PyErrExt as _};
13 | 
14 | use super::StoreConfig;
15 | 
16 | #[derive(Default)]
17 | pub(crate) struct StoreManager(Mutex<BTreeMap<StoreConfig, ReadableWritableListableStorage>>);
18 | 
19 | impl StoreManager {
20 |     fn store<I: ChunksItem>(&self, item: &I) -> PyResult<ReadableWritableListableStorage> {
21 |         use std::collections::btree_map::Entry::{Occupied, Vacant};
22 |         match self
23 |             .0
24 |             .lock()
25 |             .map_py_err::<PyRuntimeError>()?
26 |             .entry(item.store_config())
27 |         {
28 |             Occupied(e) => Ok(e.get().clone()),
29 |             Vacant(e) => Ok(e.insert((&item.store_config()).try_into()?).clone()),
30 |         }
31 |     }
32 | 
33 |     pub(crate) fn get<I: ChunksItem>(&self, item: &I) -> PyResult<MaybeBytes> {
34 |         self.store(item)?
35 |             .get(item.key())
36 |             .map_py_err::<PyRuntimeError>()
37 |     }
38 | 
39 |     pub(crate) fn set<I: ChunksItem>(&self, item: &I, value: Bytes) -> PyResult<()> {
40 |         self.store(item)?
41 |             .set(item.key(), value)
42 |             .map_py_err::<PyRuntimeError>()
43 |     }
44 | 
45 |     pub(crate) fn erase<I: ChunksItem>(&self, item: &I) -> PyResult<()> {
46 |         self.store(item)?
47 |             .erase(item.key())
48 |             .map_py_err::<PyRuntimeError>()
49 |     }
50 | 
51 |     pub(crate) fn decoder<I: ChunksItem>(&self, item: &I) -> PyResult<StoragePartialDecoder> {
52 |         // Partially decode the chunk into the output buffer
53 |         let storage_handle = Arc::new(StorageHandle::new(self.store(item)?));
54 |         // NOTE: Normally a storage transformer would exist between the storage handle and the input handle
55 |         // but zarr-python does not support them nor forward them to the codec pipeline
56 |         Ok(StoragePartialDecoder::new(
57 |             storage_handle,
58 |             item.key().clone(),
59 |         ))
60 |     }
61 | }
62 | 


--------------------------------------------------------------------------------
/src/tests.rs:
--------------------------------------------------------------------------------
 1 | use pyo3::ffi::c_str;
 2 | 
 3 | use numpy::PyUntypedArray;
 4 | use pyo3::{
 5 |     types::{PyAnyMethods, PyModule},
 6 |     Bound, PyResult, Python,
 7 | };
 8 | 
 9 | use crate::CodecPipelineImpl;
10 | 
11 | #[test]
12 | fn test_nparray_to_unsafe_cell_slice_empty() -> PyResult<()> {
13 |     pyo3::prepare_freethreaded_python();
14 |     Python::with_gil(|py| {
15 |         let arr: Bound<'_, PyUntypedArray> = PyModule::from_code(
16 |             py,
17 |             c_str!(
18 |                 "def empty_array():
19 |                 import numpy as np
20 |                 return np.empty(0, dtype=np.uint8)"
21 |             ),
22 |             c_str!(""),
23 |             c_str!(""),
24 |         )?
25 |         .getattr("empty_array")?
26 |         .call0()?
27 |         .extract()?;
28 | 
29 |         let slice = CodecPipelineImpl::nparray_to_unsafe_cell_slice(&arr)?;
30 |         assert!(slice.is_empty());
31 |         Ok(())
32 |     })
33 | }
34 | 


--------------------------------------------------------------------------------
/src/utils.rs:
--------------------------------------------------------------------------------
 1 | use std::fmt::Display;
 2 | 
 3 | use numpy::{PyUntypedArray, PyUntypedArrayMethods};
 4 | use pyo3::{Bound, PyErr, PyResult, PyTypeInfo};
 5 | 
 6 | use crate::{ChunksItem, WithSubset};
 7 | 
 8 | pub(crate) trait PyErrExt<T> {
 9 |     fn map_py_err<PE: PyTypeInfo>(self) -> PyResult<T>;
10 | }
11 | 
12 | impl<T, E: Display> PyErrExt<T> for Result<T, E> {
13 |     fn map_py_err<PE: PyTypeInfo>(self) -> PyResult<T> {
14 |         self.map_err(|e| PyErr::new::<PE, _>(format!("{e}")))
15 |     }
16 | }
17 | 
18 | pub(crate) trait PyUntypedArrayExt {
19 |     fn shape_zarr(&self) -> PyResult<Vec<u64>>;
20 | }
21 | 
22 | impl PyUntypedArrayExt for Bound<'_, PyUntypedArray> {
23 |     fn shape_zarr(&self) -> PyResult<Vec<u64>> {
24 |         Ok(if self.shape().is_empty() {
25 |             vec![1] // scalar value
26 |         } else {
27 |             self.shape()
28 |                 .iter()
29 |                 .map(|&i| u64::try_from(i))
30 |                 .collect::<Result<_, _>>()?
31 |         })
32 |     }
33 | }
34 | 
35 | pub fn is_whole_chunk(item: &WithSubset) -> bool {
36 |     item.chunk_subset.start().iter().all(|&o| o == 0)
37 |         && item.chunk_subset.shape() == item.representation().shape_u64()
38 | }
39 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from dataclasses import dataclass
  4 | from typing import TYPE_CHECKING, Literal
  5 | 
  6 | import numpy as np
  7 | import numpy.typing as npt
  8 | import pytest
  9 | from zarr import config
 10 | from zarr.core.common import ChunkCoords
 11 | from zarr.storage import FsspecStore, LocalStore, MemoryStore, ZipStore
 12 | 
 13 | from zarrs.utils import (  # noqa: F401
 14 |     CollapsedDimensionError,
 15 |     DiscontiguousArrayError,
 16 | )
 17 | 
 18 | if TYPE_CHECKING:
 19 |     from collections.abc import Iterable
 20 |     from typing import Any, Literal
 21 | 
 22 |     from zarr.abc.store import Store
 23 |     from zarr.core.common import ChunkCoords, MemoryOrder
 24 | 
 25 | 
 26 | @dataclass
 27 | class ArrayRequest:
 28 |     shape: ChunkCoords
 29 |     dtype: str
 30 |     order: MemoryOrder
 31 | 
 32 | 
 33 | @pytest.fixture(autouse=True)
 34 | def _setup_codec_pipeline():
 35 |     config.set({"codec_pipeline.path": "zarrs.ZarrsCodecPipeline"})
 36 |     pass
 37 | 
 38 | 
 39 | async def parse_store(
 40 |     store: Literal["local", "memory", "remote", "zip"], path: str
 41 | ) -> LocalStore | MemoryStore | FsspecStore | ZipStore:
 42 |     if store == "local":
 43 |         return await LocalStore.open(path)
 44 |     if store == "memory":
 45 |         return await MemoryStore.open()
 46 |     if store == "remote":
 47 |         return await FsspecStore.open(url=path)
 48 |     if store == "zip":
 49 |         return await ZipStore.open(path + "/zarr.zip")
 50 |     raise AssertionError
 51 | 
 52 | 
 53 | @pytest.fixture(params=["local"])
 54 | async def store(request: pytest.FixtureRequest, tmpdir) -> Store:
 55 |     param = request.param
 56 |     return await parse_store(param, str(tmpdir))
 57 | 
 58 | 
 59 | @pytest.fixture
 60 | def array_fixture(request: pytest.FixtureRequest) -> npt.NDArray[Any]:
 61 |     array_request: ArrayRequest = request.param
 62 |     return (
 63 |         np.arange(np.prod(array_request.shape))
 64 |         .reshape(array_request.shape, order=array_request.order)
 65 |         .astype(array_request.dtype)
 66 |     )
 67 | 
 68 | 
 69 | # tests that also fail with zarr-python's default codec pipeline
 70 | zarr_python_default_codec_pipeline_failures = [
 71 |     # ellipsis weirdness, need to report, v3
 72 |     "test_roundtrip[oindex-2d-contiguous_in_chunk_array-ellipsis-v3]",
 73 |     "test_roundtrip[oindex-2d-discontinuous_in_chunk_array-ellipsis-v3]",
 74 |     "test_roundtrip[vindex-2d-contiguous_in_chunk_array-ellipsis-v3]",
 75 |     "test_roundtrip[vindex-2d-discontinuous_in_chunk_array-ellipsis-v3]",
 76 |     "test_roundtrip[oindex-2d-across_chunks_indices_array-ellipsis-v3]",
 77 |     "test_roundtrip[vindex-2d-ellipsis-across_chunks_indices_array-v3]",
 78 |     "test_roundtrip[vindex-2d-across_chunks_indices_array-ellipsis-v3]",
 79 |     "test_roundtrip[vindex-2d-ellipsis-contiguous_in_chunk_array-v3]",
 80 |     "test_roundtrip[vindex-2d-ellipsis-discontinuous_in_chunk_array-v3]",
 81 |     "test_roundtrip_read_only_zarrs[oindex-2d-contiguous_in_chunk_array-ellipsis-v3]",
 82 |     "test_roundtrip_read_only_zarrs[oindex-2d-discontinuous_in_chunk_array-ellipsis-v3]",
 83 |     "test_roundtrip_read_only_zarrs[vindex-2d-contiguous_in_chunk_array-ellipsis-v3]",
 84 |     "test_roundtrip_read_only_zarrs[vindex-2d-discontinuous_in_chunk_array-ellipsis-v3]",
 85 |     "test_roundtrip_read_only_zarrs[oindex-2d-across_chunks_indices_array-ellipsis-v3]",
 86 |     "test_roundtrip_read_only_zarrs[vindex-2d-ellipsis-across_chunks_indices_array-v3]",
 87 |     "test_roundtrip_read_only_zarrs[vindex-2d-across_chunks_indices_array-ellipsis-v3]",
 88 |     "test_roundtrip_read_only_zarrs[vindex-2d-ellipsis-contiguous_in_chunk_array-v3]",
 89 |     "test_roundtrip_read_only_zarrs[vindex-2d-ellipsis-discontinuous_in_chunk_array-v3]",
 90 |     # v2
 91 |     "test_roundtrip[oindex-2d-contiguous_in_chunk_array-ellipsis-v2]",
 92 |     "test_roundtrip[oindex-2d-discontinuous_in_chunk_array-ellipsis-v2]",
 93 |     "test_roundtrip[vindex-2d-contiguous_in_chunk_array-ellipsis-v2]",
 94 |     "test_roundtrip[vindex-2d-discontinuous_in_chunk_array-ellipsis-v2]",
 95 |     "test_roundtrip[oindex-2d-across_chunks_indices_array-ellipsis-v2]",
 96 |     "test_roundtrip[vindex-2d-ellipsis-across_chunks_indices_array-v2]",
 97 |     "test_roundtrip[vindex-2d-across_chunks_indices_array-ellipsis-v2]",
 98 |     "test_roundtrip[vindex-2d-ellipsis-contiguous_in_chunk_array-v2]",
 99 |     "test_roundtrip[vindex-2d-ellipsis-discontinuous_in_chunk_array-v2]",
100 |     "test_roundtrip_read_only_zarrs[oindex-2d-contiguous_in_chunk_array-ellipsis-v2]",
101 |     "test_roundtrip_read_only_zarrs[oindex-2d-discontinuous_in_chunk_array-ellipsis-v2]",
102 |     "test_roundtrip_read_only_zarrs[vindex-2d-contiguous_in_chunk_array-ellipsis-v2]",
103 |     "test_roundtrip_read_only_zarrs[vindex-2d-discontinuous_in_chunk_array-ellipsis-v2]",
104 |     "test_roundtrip_read_only_zarrs[oindex-2d-across_chunks_indices_array-ellipsis-v2]",
105 |     "test_roundtrip_read_only_zarrs[vindex-2d-ellipsis-across_chunks_indices_array-v2]",
106 |     "test_roundtrip_read_only_zarrs[vindex-2d-across_chunks_indices_array-ellipsis-v2]",
107 |     "test_roundtrip_read_only_zarrs[vindex-2d-ellipsis-contiguous_in_chunk_array-v2]",
108 |     "test_roundtrip_read_only_zarrs[vindex-2d-ellipsis-discontinuous_in_chunk_array-v2]",
109 |     # need to investigate this one - it seems to fail with the default pipeline
110 |     # but it makes some sense that it succeeds with ours since we fall-back to numpy indexing
111 |     # in the case of a collapsed dimension
112 |     # "test_roundtrip_read_only_zarrs[vindex-2d-contiguous_in_chunk_array-contiguous_in_chunk_array]",
113 | ]
114 | 
115 | 
116 | def pytest_collection_modifyitems(
117 |     config: pytest.Config, items: Iterable[pytest.Item]
118 | ) -> None:
119 |     for item in items:
120 |         if item.name in zarr_python_default_codec_pipeline_failures:
121 |             xfail_marker = pytest.mark.xfail(
122 |                 reason="This test fails with the zarr-python default codec pipeline."
123 |             )
124 |             item.add_marker(xfail_marker)
125 | 


--------------------------------------------------------------------------------
/tests/test_blosc.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | import numpy as np
 4 | import pytest
 5 | from zarr import AsyncArray
 6 | from zarr.abc.store import Store
 7 | from zarr.codecs import BloscCodec, BytesCodec, ShardingCodec
 8 | from zarr.core.buffer import default_buffer_prototype
 9 | from zarr.storage import StorePath
10 | 
11 | 
12 | @pytest.mark.parametrize("dtype", ["uint8", "uint16"])
13 | async def test_blosc_evolve(store: Store, dtype: str) -> None:
14 |     typesize = np.dtype(dtype).itemsize
15 |     path = "blosc_evolve"
16 |     spath = StorePath(store, path)
17 |     await AsyncArray.create(
18 |         spath,
19 |         shape=(16, 16),
20 |         chunk_shape=(16, 16),
21 |         dtype=dtype,
22 |         fill_value=0,
23 |         codecs=[BytesCodec(), BloscCodec()],
24 |     )
25 |     buf = await store.get(f"{path}/zarr.json", prototype=default_buffer_prototype())
26 |     assert buf is not None
27 |     zarr_json = json.loads(buf.to_bytes())
28 |     blosc_configuration_json = zarr_json["codecs"][1]["configuration"]
29 |     assert blosc_configuration_json["typesize"] == typesize
30 |     if typesize == 1:
31 |         assert blosc_configuration_json["shuffle"] == "bitshuffle"
32 |     else:
33 |         assert blosc_configuration_json["shuffle"] == "shuffle"
34 | 
35 |     path2 = "blosc_evolve_sharding"
36 |     spath2 = StorePath(store, path2)
37 |     await AsyncArray.create(
38 |         spath2,
39 |         shape=(16, 16),
40 |         chunk_shape=(16, 16),
41 |         dtype=dtype,
42 |         fill_value=0,
43 |         codecs=[
44 |             ShardingCodec(chunk_shape=(16, 16), codecs=[BytesCodec(), BloscCodec()])
45 |         ],
46 |     )
47 |     buf = await store.get(f"{path2}/zarr.json", prototype=default_buffer_prototype())
48 |     assert buf is not None
49 |     zarr_json = json.loads(buf.to_bytes())
50 |     blosc_configuration_json = zarr_json["codecs"][0]["configuration"]["codecs"][1][
51 |         "configuration"
52 |     ]
53 |     assert blosc_configuration_json["typesize"] == typesize
54 |     if typesize == 1:
55 |         assert blosc_configuration_json["shuffle"] == "bitshuffle"
56 |     else:
57 |         assert blosc_configuration_json["shuffle"] == "shuffle"
58 | 


--------------------------------------------------------------------------------
/tests/test_codecs.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import json
  4 | from dataclasses import dataclass
  5 | from typing import TYPE_CHECKING
  6 | 
  7 | import numpy as np
  8 | import pytest
  9 | from zarr import Array, AsyncArray, config
 10 | from zarr.codecs import (
 11 |     BytesCodec,
 12 |     ShardingCodec,
 13 |     TransposeCodec,
 14 | )
 15 | from zarr.core.buffer import default_buffer_prototype
 16 | from zarr.core.indexing import Selection, morton_order_iter
 17 | from zarr.storage import StorePath
 18 | 
 19 | if TYPE_CHECKING:
 20 |     from zarr.abc.codec import Codec
 21 |     from zarr.abc.store import Store
 22 |     from zarr.core.buffer.core import NDArrayLike
 23 |     from zarr.core.common import MemoryOrder
 24 | 
 25 | 
 26 | @dataclass(frozen=True)
 27 | class _AsyncArrayProxy:
 28 |     array: AsyncArray
 29 | 
 30 |     def __getitem__(self, selection: Selection) -> _AsyncArraySelectionProxy:
 31 |         return _AsyncArraySelectionProxy(self.array, selection)
 32 | 
 33 | 
 34 | @dataclass(frozen=True)
 35 | class _AsyncArraySelectionProxy:
 36 |     array: AsyncArray
 37 |     selection: Selection
 38 | 
 39 |     async def get(self) -> NDArrayLike:
 40 |         return await self.array.getitem(self.selection)
 41 | 
 42 |     async def set(self, value: np.ndarray) -> None:
 43 |         return await self.array.setitem(self.selection, value)
 44 | 
 45 | 
 46 | def order_from_dim(order: MemoryOrder, ndim: int) -> tuple[int, ...]:
 47 |     if order == "F":
 48 |         return tuple(ndim - x - 1 for x in range(ndim))
 49 |     else:
 50 |         return tuple(range(ndim))
 51 | 
 52 | 
 53 | def test_sharding_pickle() -> None:
 54 |     """
 55 |     Test that sharding codecs can be pickled
 56 |     """
 57 |     pass
 58 | 
 59 | 
 60 | @pytest.mark.parametrize("input_order", ["F", "C"])
 61 | @pytest.mark.parametrize("store_order", ["F", "C"])
 62 | @pytest.mark.parametrize("runtime_write_order", ["C"])
 63 | @pytest.mark.parametrize("runtime_read_order", ["C"])
 64 | @pytest.mark.parametrize("with_sharding", [True, False])
 65 | async def test_order(
 66 |     *,
 67 |     store: Store,
 68 |     input_order: MemoryOrder,
 69 |     store_order: MemoryOrder,
 70 |     runtime_write_order: MemoryOrder,
 71 |     runtime_read_order: MemoryOrder,
 72 |     with_sharding: bool,
 73 | ) -> None:
 74 |     data = np.arange(0, 256, dtype="uint16").reshape((32, 8), order=input_order)
 75 |     path = "order"
 76 |     spath = StorePath(store, path=path)
 77 |     codecs_: list[Codec] = (
 78 |         [
 79 |             ShardingCodec(
 80 |                 chunk_shape=(16, 8),
 81 |                 codecs=[
 82 |                     TransposeCodec(order=order_from_dim(store_order, data.ndim)),
 83 |                     BytesCodec(),
 84 |                 ],
 85 |             )
 86 |         ]
 87 |         if with_sharding
 88 |         else [
 89 |             TransposeCodec(order=order_from_dim(store_order, data.ndim)),
 90 |             BytesCodec(),
 91 |         ]
 92 |     )
 93 | 
 94 |     with config.set({"array.order": runtime_write_order}):
 95 |         a = await AsyncArray.create(
 96 |             spath,
 97 |             shape=data.shape,
 98 |             chunk_shape=(32, 8),
 99 |             dtype=data.dtype,
100 |             fill_value=0,
101 |             chunk_key_encoding=("v2", "."),
102 |             codecs=codecs_,
103 |         )
104 | 
105 |     await _AsyncArrayProxy(a)[:, :].set(data)
106 |     read_data = await _AsyncArrayProxy(a)[:, :].get()
107 |     assert np.array_equal(data, read_data)
108 | 
109 |     with config.set({"array.order": runtime_read_order}):
110 |         a = await AsyncArray.open(
111 |             spath,
112 |         )
113 |     read_data = await _AsyncArrayProxy(a)[:, :].get()
114 |     assert np.array_equal(data, read_data)
115 | 
116 |     if runtime_read_order == "F":
117 |         assert read_data.flags["F_CONTIGUOUS"]
118 |         assert not read_data.flags["C_CONTIGUOUS"]
119 |     else:
120 |         assert not read_data.flags["F_CONTIGUOUS"]
121 |         assert read_data.flags["C_CONTIGUOUS"]
122 | 
123 | 
124 | @pytest.mark.parametrize("input_order", ["F", "C"])
125 | @pytest.mark.parametrize("runtime_write_order", ["C"])
126 | @pytest.mark.parametrize("runtime_read_order", ["C"])
127 | @pytest.mark.parametrize("with_sharding", [True, False])
128 | def test_order_implicit(
129 |     *,
130 |     store: Store,
131 |     input_order: MemoryOrder,
132 |     runtime_write_order: MemoryOrder,
133 |     runtime_read_order: MemoryOrder,
134 |     with_sharding: bool,
135 | ) -> None:
136 |     data = np.arange(0, 256, dtype="uint16").reshape((16, 16), order=input_order)
137 |     path = "order_implicit"
138 |     spath = StorePath(store, path)
139 |     codecs_: list[Codec] | None = (
140 |         [ShardingCodec(chunk_shape=(8, 8))] if with_sharding else None
141 |     )
142 | 
143 |     with config.set({"array.order": runtime_write_order}):
144 |         a = Array.create(
145 |             spath,
146 |             shape=data.shape,
147 |             chunk_shape=(16, 16),
148 |             dtype=data.dtype,
149 |             fill_value=0,
150 |             codecs=codecs_,
151 |         )
152 | 
153 |     a[:, :] = data
154 | 
155 |     with config.set({"array.order": runtime_read_order}):
156 |         a = Array.open(spath)
157 |     read_data = a[:, :]
158 |     assert np.array_equal(data, read_data)
159 | 
160 |     if runtime_read_order == "F":
161 |         assert read_data.flags["F_CONTIGUOUS"]
162 |         assert not read_data.flags["C_CONTIGUOUS"]
163 |     else:
164 |         assert not read_data.flags["F_CONTIGUOUS"]
165 |         assert read_data.flags["C_CONTIGUOUS"]
166 | 
167 | 
168 | def test_open(store: Store) -> None:
169 |     spath = StorePath(store)
170 |     a = Array.create(
171 |         spath,
172 |         shape=(16, 16),
173 |         chunk_shape=(16, 16),
174 |         dtype="int32",
175 |         fill_value=0,
176 |     )
177 |     b = Array.open(spath)
178 |     assert a.metadata == b.metadata
179 | 
180 | 
181 | def test_morton() -> None:
182 |     assert list(morton_order_iter((2, 2))) == [(0, 0), (1, 0), (0, 1), (1, 1)]
183 |     assert list(morton_order_iter((2, 2, 2))) == [
184 |         (0, 0, 0),
185 |         (1, 0, 0),
186 |         (0, 1, 0),
187 |         (1, 1, 0),
188 |         (0, 0, 1),
189 |         (1, 0, 1),
190 |         (0, 1, 1),
191 |         (1, 1, 1),
192 |     ]
193 |     assert list(morton_order_iter((2, 2, 2, 2))) == [
194 |         (0, 0, 0, 0),
195 |         (1, 0, 0, 0),
196 |         (0, 1, 0, 0),
197 |         (1, 1, 0, 0),
198 |         (0, 0, 1, 0),
199 |         (1, 0, 1, 0),
200 |         (0, 1, 1, 0),
201 |         (1, 1, 1, 0),
202 |         (0, 0, 0, 1),
203 |         (1, 0, 0, 1),
204 |         (0, 1, 0, 1),
205 |         (1, 1, 0, 1),
206 |         (0, 0, 1, 1),
207 |         (1, 0, 1, 1),
208 |         (0, 1, 1, 1),
209 |         (1, 1, 1, 1),
210 |     ]
211 | 
212 | 
213 | def test_write_partial_chunks(store: Store) -> None:
214 |     data = np.arange(0, 256, dtype="uint16").reshape((16, 16))
215 |     spath = StorePath(store)
216 |     a = Array.create(
217 |         spath,
218 |         shape=data.shape,
219 |         chunk_shape=(20, 20),
220 |         dtype=data.dtype,
221 |         fill_value=1,
222 |     )
223 |     a[0:16, 0:16] = data
224 |     assert np.array_equal(a[0:16, 0:16], data)
225 | 
226 | 
227 | async def test_delete_empty_chunks(store: Store) -> None:
228 |     data = np.ones((16, 16))
229 |     path = "delete_empty_chunks"
230 |     spath = StorePath(store, path)
231 |     a = await AsyncArray.create(
232 |         spath,
233 |         shape=data.shape,
234 |         chunk_shape=(32, 32),
235 |         dtype=data.dtype,
236 |         fill_value=1,
237 |     )
238 |     await _AsyncArrayProxy(a)[:16, :16].set(np.zeros((16, 16)))
239 |     await _AsyncArrayProxy(a)[:16, :16].set(data)
240 |     assert np.array_equal(await _AsyncArrayProxy(a)[:16, :16].get(), data)
241 |     assert await store.get(f"{path}/c0/0", prototype=default_buffer_prototype()) is None
242 | 
243 | 
244 | async def test_dimension_names(store: Store) -> None:
245 |     data = np.arange(0, 256, dtype="uint16").reshape((16, 16))
246 |     path = "dimension_names"
247 |     spath = StorePath(store, path)
248 |     await AsyncArray.create(
249 |         spath,
250 |         shape=data.shape,
251 |         chunk_shape=(16, 16),
252 |         dtype=data.dtype,
253 |         fill_value=0,
254 |         dimension_names=("x", "y"),
255 |     )
256 | 
257 |     assert (await AsyncArray.open(spath)).metadata.dimension_names == (
258 |         "x",
259 |         "y",
260 |     )
261 |     path2 = "dimension_names2"
262 |     spath2 = StorePath(store, path2)
263 |     await AsyncArray.create(
264 |         spath2,
265 |         shape=data.shape,
266 |         chunk_shape=(16, 16),
267 |         dtype=data.dtype,
268 |         fill_value=0,
269 |     )
270 | 
271 |     assert (await AsyncArray.open(spath2)).metadata.dimension_names is None
272 |     zarr_json_buffer = await store.get(
273 |         f"{path2}/zarr.json", prototype=default_buffer_prototype()
274 |     )
275 |     assert zarr_json_buffer is not None
276 |     assert "dimension_names" not in json.loads(zarr_json_buffer.to_bytes())
277 | 
278 | 
279 | def test_invalid_metadata(store: Store) -> None:
280 |     # LD: Disabled for `zarrs`. Including endianness for a single-byte data type is not invalid.
281 |     # spath2 = StorePath(store, "invalid_endian")
282 |     # with pytest.raises(TypeError):
283 |     #     Array.create(
284 |     #         spath2,
285 |     #         shape=(16, 16),
286 |     #         chunk_shape=(16, 16),
287 |     #         dtype=np.dtype("uint8"),
288 |     #         fill_value=0,
289 |     #         codecs=[
290 |     #             BytesCodec(endian="big"),
291 |     #             TransposeCodec(order=order_from_dim("F", 2)),
292 |     #         ],
293 |     #     )
294 |     spath3 = StorePath(store, "invalid_order")
295 |     with pytest.raises(TypeError):
296 |         Array.create(
297 |             spath3,
298 |             shape=(16, 16),
299 |             chunk_shape=(16, 16),
300 |             dtype=np.dtype("uint8"),
301 |             fill_value=0,
302 |             codecs=[
303 |                 BytesCodec(),
304 |                 TransposeCodec(order="F"),  # type: ignore[arg-type]
305 |             ],
306 |         )
307 |     spath4 = StorePath(store, "invalid_missing_bytes_codec")
308 |     with pytest.raises(ValueError, match=r".*[Cc]odec.*required"):
309 |         Array.create(
310 |             spath4,
311 |             shape=(16, 16),
312 |             chunk_shape=(16, 16),
313 |             dtype=np.dtype("uint8"),
314 |             fill_value=0,
315 |             codecs=[
316 |                 TransposeCodec(order=order_from_dim("F", 2)),
317 |             ],
318 |         )
319 |     spath5 = StorePath(store, "invalid_inner_chunk_shape")
320 |     with pytest.raises(
321 |         ValueError, match=r".*shard.*chunk_shape.*array.*shape.*need.*same.*dimensions"
322 |     ):
323 |         Array.create(
324 |             spath5,
325 |             shape=(16, 16),
326 |             chunk_shape=(16, 16),
327 |             dtype=np.dtype("uint8"),
328 |             fill_value=0,
329 |             codecs=[
330 |                 ShardingCodec(chunk_shape=(8,)),
331 |             ],
332 |         )
333 |     spath6 = StorePath(store, "invalid_inner_chunk_shape")
334 |     with pytest.raises(
335 |         ValueError, match=r".*array.*chunk_shape.*divisible.*shard.*chunk_shape"
336 |     ):
337 |         Array.create(
338 |             spath6,
339 |             shape=(16, 16),
340 |             chunk_shape=(16, 16),
341 |             dtype=np.dtype("uint8"),
342 |             fill_value=0,
343 |             codecs=[
344 |                 ShardingCodec(chunk_shape=(8, 7)),
345 |             ],
346 |         )
347 |     # LD: Disabled for `zarrs`. Such checks do not exist.
348 |     #     Also this is not invalid metadata, should be a separate test.
349 |     # spath7 = StorePath(store, "warning_inefficient_codecs")
350 |     # with pytest.warns(UserWarning):
351 |     #     Array.create(
352 |     #         spath7,
353 |     #         shape=(16, 16),
354 |     #         chunk_shape=(16, 16),
355 |     #         dtype=np.dtype("uint8"),
356 |     #         fill_value=0,
357 |     #         codecs=[
358 |     #             ShardingCodec(chunk_shape=(8, 8)),
359 |     #             GzipCodec(),
360 |     #         ],
361 |     #     )
362 | 
363 | 
364 | async def test_resize(store: Store) -> None:
365 |     data = np.zeros((16, 18), dtype="uint16")
366 |     path = "resize"
367 |     spath = StorePath(store, path)
368 |     a = await AsyncArray.create(
369 |         spath,
370 |         shape=data.shape,
371 |         chunk_shape=(10, 10),
372 |         dtype=data.dtype,
373 |         chunk_key_encoding=("v2", "."),
374 |         fill_value=1,
375 |     )
376 | 
377 |     await _AsyncArrayProxy(a)[:16, :18].set(data)
378 |     assert (
379 |         await store.get(f"{path}/1.1", prototype=default_buffer_prototype()) is not None
380 |     )
381 |     assert (
382 |         await store.get(f"{path}/0.0", prototype=default_buffer_prototype()) is not None
383 |     )
384 |     assert (
385 |         await store.get(f"{path}/0.1", prototype=default_buffer_prototype()) is not None
386 |     )
387 |     assert (
388 |         await store.get(f"{path}/1.0", prototype=default_buffer_prototype()) is not None
389 |     )
390 | 
391 |     await a.resize((10, 12))
392 |     assert a.metadata.shape == (10, 12)
393 |     assert (
394 |         await store.get(f"{path}/0.0", prototype=default_buffer_prototype()) is not None
395 |     )
396 |     assert (
397 |         await store.get(f"{path}/0.1", prototype=default_buffer_prototype()) is not None
398 |     )
399 |     assert await store.get(f"{path}/1.0", prototype=default_buffer_prototype()) is None
400 |     assert await store.get(f"{path}/1.1", prototype=default_buffer_prototype()) is None
401 | 


--------------------------------------------------------------------------------
/tests/test_endian.py:
--------------------------------------------------------------------------------
 1 | from typing import Literal
 2 | 
 3 | import numpy as np
 4 | import pytest
 5 | from zarr import AsyncArray
 6 | from zarr.abc.store import Store
 7 | from zarr.codecs import BytesCodec
 8 | from zarr.storage import StorePath
 9 | 
10 | from .test_codecs import _AsyncArrayProxy
11 | 
12 | 
13 | @pytest.mark.parametrize("endian", ["big", "little"])
14 | async def test_endian(store: Store, endian: Literal["big", "little"]) -> None:
15 |     data = np.arange(0, 256, dtype="uint16").reshape((16, 16))
16 |     path = "endian"
17 |     spath = StorePath(store, path)
18 |     a = await AsyncArray.create(
19 |         spath,
20 |         shape=data.shape,
21 |         chunk_shape=(16, 16),
22 |         dtype=data.dtype,
23 |         fill_value=0,
24 |         chunk_key_encoding=("v2", "."),
25 |         codecs=[BytesCodec(endian=endian)],
26 |     )
27 | 
28 |     await _AsyncArrayProxy(a)[:, :].set(data)
29 |     readback_data = await _AsyncArrayProxy(a)[:, :].get()
30 |     assert np.array_equal(data, readback_data)
31 | 
32 | 
33 | @pytest.mark.parametrize("dtype_input_endian", [">u2", "<u2"])
34 | @pytest.mark.parametrize("dtype_store_endian", ["big", "little"])
35 | async def test_endian_write(
36 |     store: Store,
37 |     dtype_input_endian: Literal[">u2", "<u2"],
38 |     dtype_store_endian: Literal["big", "little"],
39 | ) -> None:
40 |     data = np.arange(0, 256, dtype=dtype_input_endian).reshape((16, 16))
41 |     path = "endian"
42 |     spath = StorePath(store, path)
43 |     a = await AsyncArray.create(
44 |         spath,
45 |         shape=data.shape,
46 |         chunk_shape=(16, 16),
47 |         dtype="uint16",
48 |         fill_value=0,
49 |         chunk_key_encoding=("v2", "."),
50 |         codecs=[BytesCodec(endian=dtype_store_endian)],
51 |     )
52 | 
53 |     await _AsyncArrayProxy(a)[:, :].set(data)
54 |     readback_data = await _AsyncArrayProxy(a)[:, :].get()
55 |     assert np.array_equal(data, readback_data)
56 | 


--------------------------------------------------------------------------------
/tests/test_gzip.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from zarr import Array
 3 | from zarr.abc.store import Store
 4 | from zarr.codecs import BytesCodec, GzipCodec
 5 | from zarr.storage import StorePath
 6 | 
 7 | 
 8 | def test_gzip(store: Store) -> None:
 9 |     data = np.arange(0, 256, dtype="uint16").reshape((16, 16))
10 | 
11 |     a = Array.create(
12 |         StorePath(store),
13 |         shape=data.shape,
14 |         chunk_shape=(16, 16),
15 |         dtype=data.dtype,
16 |         fill_value=0,
17 |         codecs=[BytesCodec(), GzipCodec()],
18 |     )
19 | 
20 |     a[:, :] = data
21 |     assert np.array_equal(data, a[:, :])
22 | 


--------------------------------------------------------------------------------
/tests/test_pipeline.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import operator
  4 | import pickle
  5 | import tempfile
  6 | from collections.abc import Callable
  7 | from contextlib import contextmanager
  8 | from functools import reduce
  9 | from itertools import product
 10 | from pathlib import Path
 11 | from types import EllipsisType
 12 | 
 13 | import numpy as np
 14 | import pytest
 15 | import zarr
 16 | from zarr.storage import LocalStore
 17 | 
 18 | import zarrs  # noqa: F401
 19 | 
 20 | axis_size_ = 10
 21 | chunk_size_ = axis_size_ // 2
 22 | fill_value_ = 32767
 23 | dimensionalities_ = list(range(1, 5))
 24 | 
 25 | 
 26 | @pytest.fixture
 27 | def fill_value() -> int:
 28 |     return fill_value_
 29 | 
 30 | 
 31 | non_numpy_indices = [
 32 |     pytest.param(slice(1, 3), id="slice_in_chunk"),
 33 |     pytest.param(slice(1, 7), id="slice_across_chunks"),
 34 |     pytest.param(2, id="int"),
 35 |     pytest.param(slice(None), id="full_slice"),
 36 |     pytest.param(Ellipsis, id="ellipsis"),
 37 | ]
 38 | 
 39 | numpy_indices = [
 40 |     pytest.param(np.array([1, 2]), id="contiguous_in_chunk_array"),
 41 |     pytest.param(np.array([0, 3]), id="discontinuous_in_chunk_array"),
 42 |     pytest.param(np.array([0, 6]), id="across_chunks_indices_array"),
 43 | ]
 44 | 
 45 | all_indices = numpy_indices + non_numpy_indices
 46 | 
 47 | indexing_method_params = [
 48 |     pytest.param(lambda x: getattr(x, "oindex"), id="oindex"),
 49 |     pytest.param(lambda x: x, id="vindex"),
 50 | ]
 51 | 
 52 | zarr_formats = [2, 3]
 53 | 
 54 | 
 55 | def pytest_generate_tests(metafunc):
 56 |     old_pipeline_path = zarr.config.get("codec_pipeline.path")
 57 |     # need to set the codec pipeline to the zarrs pipeline because the autouse fixture doesn't apply here
 58 |     zarr.config.set({"codec_pipeline.path": "zarrs.ZarrsCodecPipeline"})
 59 |     if "test_roundtrip" in metafunc.function.__name__:
 60 |         arrs = []
 61 |         indices = []
 62 |         store_values = []
 63 |         indexing_methods = []
 64 |         ids = []
 65 |         for format in zarr_formats:
 66 |             for dimensionality in dimensionalities_:
 67 |                 indexers = non_numpy_indices if dimensionality > 2 else all_indices
 68 |                 for index_param_prod in product(indexers, repeat=dimensionality):
 69 |                     index = tuple(
 70 |                         index_param.values[0] for index_param in index_param_prod
 71 |                     )
 72 |                     # multi-ellipsis indexing is not supported
 73 |                     if sum(isinstance(i, EllipsisType) for i in index) > 1:
 74 |                         continue
 75 |                     for indexing_method_param in indexing_method_params:
 76 |                         arr = gen_arr(
 77 |                             fill_value_, Path(tempfile.mktemp()), dimensionality, format
 78 |                         )
 79 |                         indexing_method = indexing_method_param.values[0]
 80 |                         dimensionality_id = f"{dimensionality}d"
 81 |                         id = "-".join(
 82 |                             [indexing_method_param.id, dimensionality_id]
 83 |                             + [index_param.id for index_param in index_param_prod]
 84 |                             + [f"v{format}"]
 85 |                         )
 86 |                         ids.append(id)
 87 |                         store_values.append(
 88 |                             gen_store_values(
 89 |                                 indexing_method,
 90 |                                 index,
 91 |                                 full_array((axis_size_,) * dimensionality),
 92 |                             )
 93 |                         )
 94 |                         indexing_methods.append(indexing_method)
 95 |                         indices.append(index)
 96 |                         arrs.append(arr)
 97 |         # array is used as param name to prevent collision with arr fixture
 98 |         metafunc.parametrize(
 99 |             ["array", "index", "store_values", "indexing_method"],
100 |             zip(arrs, indices, store_values, indexing_methods),
101 |             ids=ids,
102 |         )
103 |         zarr.config.set({"codec_pipeline.path": old_pipeline_path})
104 | 
105 | 
106 | def full_array(shape) -> np.ndarray:
107 |     return np.arange(reduce(operator.mul, shape, 1)).reshape(shape)
108 | 
109 | 
110 | def gen_store_values(
111 |     indexing_method: Callable,
112 |     index: tuple[int | slice | np.ndarray | EllipsisType, ...],
113 |     full_array: np.ndarray,
114 | ) -> np.ndarray:
115 |     class smoke:
116 |         oindex = "oindex"
117 | 
118 |     def maybe_convert(
119 |         i: int | np.ndarray | slice | EllipsisType, axis: int
120 |     ) -> np.ndarray:
121 |         if isinstance(i, np.ndarray):
122 |             return i
123 |         if isinstance(i, slice):
124 |             return np.arange(
125 |                 i.start if i.start is not None else 0,
126 |                 i.stop if i.stop is not None else full_array.shape[axis],
127 |             )
128 |         if isinstance(i, int):
129 |             return np.array([i])
130 |         if isinstance(i, EllipsisType):
131 |             return np.arange(full_array.shape[axis])
132 |         raise ValueError(f"Invalid index {i}")
133 | 
134 |     if not isinstance(index, EllipsisType) and indexing_method(smoke()) == "oindex":
135 |         index: tuple[np.ndarray, ...] = tuple(
136 |             maybe_convert(i, axis) for axis, i in enumerate(index)
137 |         )
138 |         res = full_array[np.ix_(*index)]
139 |         # squeeze out extra dims from integer indexers
140 |         if all(i.shape == (1,) for i in index):
141 |             res = res.squeeze()
142 |             return res
143 |         res = res.squeeze(
144 |             axis=tuple(axis for axis, i in enumerate(index) if i.shape == (1,))
145 |         )
146 |         return res
147 |     return full_array[index]
148 | 
149 | 
150 | def gen_arr(fill_value, tmp_path, dimensionality, format) -> zarr.Array:
151 |     return zarr.create(
152 |         (axis_size_,) * dimensionality,
153 |         store=LocalStore(root=tmp_path / ".zarr"),
154 |         chunks=(chunk_size_,) * dimensionality,
155 |         dtype=np.int16,
156 |         fill_value=fill_value,
157 |         codecs=[zarr.codecs.BytesCodec(), zarr.codecs.BloscCodec()]
158 |         if format == 3
159 |         else None,
160 |         zarr_format=format,
161 |     )
162 | 
163 | 
164 | @pytest.fixture(params=dimensionalities_)
165 | def dimensionality(request):
166 |     return request.param
167 | 
168 | 
169 | @pytest.fixture(params=zarr_formats)
170 | def format(request):
171 |     return request.param
172 | 
173 | 
174 | @pytest.fixture
175 | def arr(dimensionality, tmp_path, format) -> zarr.Array:
176 |     return gen_arr(fill_value_, tmp_path, dimensionality, format)
177 | 
178 | 
179 | def test_fill_value(arr: zarr.Array):
180 |     assert np.all(arr[:] == fill_value_)
181 | 
182 | 
183 | def test_constant(arr: zarr.Array):
184 |     arr[:] = 42
185 |     assert np.all(arr[:] == 42)
186 | 
187 | 
188 | def test_singleton(arr: zarr.Array):
189 |     singleton_index = (1,) * len(arr.shape)
190 |     non_singleton_index = (0,) * len(arr.shape)
191 |     arr[singleton_index] = 42
192 |     assert arr[singleton_index] == 42
193 |     assert arr[non_singleton_index] != 42
194 | 
195 | 
196 | def test_full_array(arr: zarr.Array):
197 |     stored_values = full_array(arr.shape)
198 |     arr[:] = stored_values
199 |     assert np.all(arr[:] == stored_values)
200 | 
201 | 
202 | def test_roundtrip(
203 |     array: zarr.Array,
204 |     store_values: np.ndarray,
205 |     index: tuple[int | slice | np.ndarray | EllipsisType, ...],
206 |     indexing_method: Callable,
207 | ):
208 |     indexing_method(array)[index] = store_values
209 |     res = indexing_method(array)[index]
210 |     assert np.all(
211 |         res == store_values,
212 |     ), res
213 | 
214 | 
215 | def test_ellipsis_indexing_invalid(arr: zarr.Array):
216 |     if len(arr.shape) <= 2:
217 |         pytest.skip(
218 |             "Ellipsis indexing works for 1D and 2D arrays in zarr-python despite a shape mismatch"
219 |         )
220 |     stored_value = np.array([1, 2, 3])
221 |     with pytest.raises(ValueError):  # noqa: PT011
222 |         # zarrs-python error: ValueError: operands could not be broadcast together with shapes (4,) (3,)
223 |         # numpy error: ValueError: could not broadcast input array from shape (3,) into shape (4,)
224 |         arr[2, ...] = stored_value
225 | 
226 | 
227 | def test_pickle(arr: zarr.Array, tmp_path: Path):
228 |     arr[:] = np.arange(reduce(operator.mul, arr.shape, 1)).reshape(arr.shape)
229 |     expected = arr[:]
230 |     with Path.open(tmp_path / "arr.pickle", "wb") as f:
231 |         pickle.dump(arr._async_array.codec_pipeline, f)
232 |     with Path.open(tmp_path / "arr.pickle", "rb") as f:
233 |         object.__setattr__(arr._async_array, "codec_pipeline", pickle.load(f))
234 |     assert (arr[:] == expected).all()
235 | 
236 | 
237 | @contextmanager
238 | def use_zarr_default_codec_reader():
239 |     zarr.config.set(
240 |         {"codec_pipeline.path": "zarr.core.codec_pipeline.BatchedCodecPipeline"}
241 |     )
242 |     yield
243 |     zarr.config.set({"codec_pipeline.path": "zarrs.ZarrsCodecPipeline"})
244 | 
245 | 
246 | def test_roundtrip_read_only_zarrs(
247 |     array: zarr.Array,
248 |     store_values: np.ndarray,
249 |     index: tuple[int | slice | np.ndarray | EllipsisType, ...],
250 |     indexing_method: Callable,
251 | ):
252 |     with use_zarr_default_codec_reader():
253 |         arr_default = zarr.open(array.store, read_only=True)
254 |         indexing_method(arr_default)[index] = store_values
255 |     res = indexing_method(zarr.open(array.store))[index]
256 |     assert np.all(
257 |         res == store_values,
258 |     ), res
259 | 


--------------------------------------------------------------------------------
/tests/test_sharding.py:
--------------------------------------------------------------------------------
  1 | import pickle
  2 | from typing import Any
  3 | 
  4 | import numpy as np
  5 | import numpy.typing as npt
  6 | import pytest
  7 | from zarr import Array, AsyncArray
  8 | from zarr.abc.store import Store
  9 | from zarr.codecs import (
 10 |     BloscCodec,
 11 |     BytesCodec,
 12 |     ShardingCodec,
 13 |     ShardingCodecIndexLocation,
 14 |     TransposeCodec,
 15 | )
 16 | from zarr.core.buffer import default_buffer_prototype
 17 | from zarr.storage import StorePath
 18 | 
 19 | from .conftest import ArrayRequest
 20 | from .test_codecs import _AsyncArrayProxy, order_from_dim
 21 | 
 22 | 
 23 | @pytest.mark.parametrize("index_location", ["start", "end"])
 24 | @pytest.mark.parametrize(
 25 |     "array_fixture",
 26 |     [
 27 |         ArrayRequest(shape=(128,) * 1, dtype="uint8", order="C"),
 28 |         ArrayRequest(shape=(128,) * 2, dtype="uint8", order="C"),
 29 |         ArrayRequest(shape=(128,) * 3, dtype="uint16", order="F"),
 30 |     ],
 31 |     indirect=["array_fixture"],
 32 | )
 33 | @pytest.mark.parametrize("offset", [0, 10])
 34 | def test_sharding(
 35 |     store: Store,
 36 |     array_fixture: npt.NDArray[Any],
 37 |     index_location: ShardingCodecIndexLocation,
 38 |     offset: int,
 39 | ) -> None:
 40 |     """
 41 |     Test that we can create an array with a sharding codec, write data to that array, and get
 42 |     the same data out via indexing.
 43 |     """
 44 |     data = array_fixture
 45 |     spath = StorePath(store)
 46 |     arr = Array.create(
 47 |         spath,
 48 |         shape=tuple(s + offset for s in data.shape),
 49 |         chunk_shape=(64,) * data.ndim,
 50 |         dtype=data.dtype,
 51 |         fill_value=6,
 52 |         codecs=[
 53 |             ShardingCodec(
 54 |                 chunk_shape=(32,) * data.ndim,
 55 |                 codecs=[
 56 |                     TransposeCodec(order=order_from_dim("F", data.ndim)),
 57 |                     BytesCodec(),
 58 |                     BloscCodec(cname="lz4"),
 59 |                 ],
 60 |                 index_location=index_location,
 61 |             )
 62 |         ],
 63 |     )
 64 |     write_region = tuple(slice(offset, None) for dim in range(data.ndim))
 65 |     arr[write_region] = data
 66 | 
 67 |     if offset > 0:
 68 |         empty_region = tuple(slice(0, offset) for dim in range(data.ndim))
 69 |         assert np.all(arr[empty_region] == arr.metadata.fill_value)
 70 | 
 71 |     read_data = arr[write_region]
 72 |     assert data.shape == read_data.shape
 73 |     assert np.array_equal(data, read_data)
 74 | 
 75 | 
 76 | @pytest.mark.parametrize("index_location", ["start", "end"])
 77 | @pytest.mark.parametrize(
 78 |     "array_fixture",
 79 |     [
 80 |         ArrayRequest(shape=(128,) * 3, dtype="uint16", order="F"),
 81 |     ],
 82 |     indirect=["array_fixture"],
 83 | )
 84 | def test_sharding_partial(
 85 |     store: Store,
 86 |     array_fixture: npt.NDArray[Any],
 87 |     index_location: ShardingCodecIndexLocation,
 88 | ) -> None:
 89 |     data = array_fixture
 90 |     spath = StorePath(store)
 91 |     a = Array.create(
 92 |         spath,
 93 |         shape=tuple(a + 10 for a in data.shape),
 94 |         chunk_shape=(64, 64, 64),
 95 |         dtype=data.dtype,
 96 |         fill_value=0,
 97 |         codecs=[
 98 |             ShardingCodec(
 99 |                 chunk_shape=(32, 32, 32),
100 |                 codecs=[
101 |                     TransposeCodec(order=order_from_dim("F", data.ndim)),
102 |                     BytesCodec(),
103 |                     BloscCodec(cname="lz4"),
104 |                 ],
105 |                 index_location=index_location,
106 |             )
107 |         ],
108 |     )
109 | 
110 |     a[10:, 10:, 10:] = data
111 | 
112 |     read_data = a[0:10, 0:10, 0:10]
113 |     assert np.all(read_data == 0)
114 | 
115 |     read_data = a[10:, 10:, 10:]
116 |     assert data.shape == read_data.shape
117 |     assert np.array_equal(data, read_data)
118 | 
119 | 
120 | @pytest.mark.parametrize("index_location", ["start", "end"])
121 | @pytest.mark.parametrize(
122 |     "array_fixture",
123 |     [
124 |         ArrayRequest(shape=(128,) * 3, dtype="uint16", order="F"),
125 |     ],
126 |     indirect=["array_fixture"],
127 | )
128 | def test_sharding_partial_readwrite(
129 |     store: Store,
130 |     array_fixture: npt.NDArray[Any],
131 |     index_location: ShardingCodecIndexLocation,
132 | ) -> None:
133 |     data = array_fixture
134 |     spath = StorePath(store)
135 |     a = Array.create(
136 |         spath,
137 |         shape=data.shape,
138 |         chunk_shape=data.shape,
139 |         dtype=data.dtype,
140 |         fill_value=0,
141 |         codecs=[
142 |             ShardingCodec(
143 |                 chunk_shape=(1, data.shape[1], data.shape[2]),
144 |                 codecs=[BytesCodec()],
145 |                 index_location=index_location,
146 |             )
147 |         ],
148 |     )
149 | 
150 |     a[:] = data
151 | 
152 |     for x in range(data.shape[0]):
153 |         read_data = a[x, :, :]
154 |         assert np.array_equal(data[x], read_data)
155 | 
156 | 
157 | @pytest.mark.parametrize(
158 |     "array_fixture",
159 |     [
160 |         ArrayRequest(shape=(128,) * 3, dtype="uint16", order="F"),
161 |     ],
162 |     indirect=["array_fixture"],
163 | )
164 | @pytest.mark.parametrize("index_location", ["start", "end"])
165 | def test_sharding_partial_read(
166 |     store: Store,
167 |     array_fixture: npt.NDArray[Any],
168 |     index_location: ShardingCodecIndexLocation,
169 | ) -> None:
170 |     data = array_fixture
171 |     spath = StorePath(store)
172 |     a = Array.create(
173 |         spath,
174 |         shape=tuple(a + 10 for a in data.shape),
175 |         chunk_shape=(64, 64, 64),
176 |         dtype=data.dtype,
177 |         fill_value=1,
178 |         codecs=[
179 |             ShardingCodec(
180 |                 chunk_shape=(32, 32, 32),
181 |                 codecs=[
182 |                     TransposeCodec(order=order_from_dim("F", data.ndim)),
183 |                     BytesCodec(),
184 |                     BloscCodec(cname="lz4"),
185 |                 ],
186 |                 index_location=index_location,
187 |             )
188 |         ],
189 |     )
190 | 
191 |     read_data = a[0:10, 0:10, 0:10]
192 |     assert np.all(read_data == 1)
193 | 
194 | 
195 | @pytest.mark.parametrize(
196 |     "array_fixture",
197 |     [
198 |         ArrayRequest(shape=(128,) * 3, dtype="uint16", order="F"),
199 |     ],
200 |     indirect=["array_fixture"],
201 | )
202 | @pytest.mark.parametrize("index_location", ["start", "end"])
203 | def test_sharding_partial_overwrite(
204 |     store: Store,
205 |     array_fixture: npt.NDArray[Any],
206 |     index_location: ShardingCodecIndexLocation,
207 | ) -> None:
208 |     data = array_fixture[:10, :10, :10]
209 |     spath = StorePath(store)
210 |     a = Array.create(
211 |         spath,
212 |         shape=tuple(a + 10 for a in data.shape),
213 |         chunk_shape=(64, 64, 64),
214 |         dtype=data.dtype,
215 |         fill_value=1,
216 |         codecs=[
217 |             ShardingCodec(
218 |                 chunk_shape=(32, 32, 32),
219 |                 codecs=[
220 |                     TransposeCodec(order=order_from_dim("F", data.ndim)),
221 |                     BytesCodec(),
222 |                     BloscCodec(cname="lz4"),
223 |                 ],
224 |                 index_location=index_location,
225 |             )
226 |         ],
227 |     )
228 | 
229 |     a[:10, :10, :10] = data
230 | 
231 |     read_data = a[0:10, 0:10, 0:10]
232 |     assert np.array_equal(data, read_data)
233 | 
234 |     data = data + 10
235 |     a[:10, :10, :10] = data
236 |     read_data = a[0:10, 0:10, 0:10]
237 |     assert np.array_equal(data, read_data)
238 | 
239 | 
240 | @pytest.mark.parametrize(
241 |     "array_fixture",
242 |     [
243 |         ArrayRequest(shape=(128,) * 3, dtype="uint16", order="F"),
244 |     ],
245 |     indirect=["array_fixture"],
246 | )
247 | @pytest.mark.parametrize(
248 |     "outer_index_location",
249 |     ["start", "end"],
250 | )
251 | @pytest.mark.parametrize(
252 |     "inner_index_location",
253 |     ["start", "end"],
254 | )
255 | def test_nested_sharding(
256 |     store: Store,
257 |     array_fixture: npt.NDArray[Any],
258 |     outer_index_location: ShardingCodecIndexLocation,
259 |     inner_index_location: ShardingCodecIndexLocation,
260 | ) -> None:
261 |     data = array_fixture
262 |     spath = StorePath(store)
263 |     a = Array.create(
264 |         spath,
265 |         shape=data.shape,
266 |         chunk_shape=(64, 64, 64),
267 |         dtype=data.dtype,
268 |         fill_value=0,
269 |         codecs=[
270 |             ShardingCodec(
271 |                 chunk_shape=(32, 32, 32),
272 |                 codecs=[
273 |                     ShardingCodec(
274 |                         chunk_shape=(16, 16, 16), index_location=inner_index_location
275 |                     )
276 |                 ],
277 |                 index_location=outer_index_location,
278 |             )
279 |         ],
280 |     )
281 | 
282 |     a[:, :, :] = data
283 | 
284 |     read_data = a[0 : data.shape[0], 0 : data.shape[1], 0 : data.shape[2]]
285 |     assert data.shape == read_data.shape
286 |     assert np.array_equal(data, read_data)
287 | 
288 | 
289 | def test_open_sharding(store: Store) -> None:
290 |     path = "open_sharding"
291 |     spath = StorePath(store, path)
292 |     a = Array.create(
293 |         spath,
294 |         shape=(16, 16),
295 |         chunk_shape=(16, 16),
296 |         dtype="int32",
297 |         fill_value=0,
298 |         codecs=[
299 |             ShardingCodec(
300 |                 chunk_shape=(8, 8),
301 |                 codecs=[
302 |                     TransposeCodec(order=order_from_dim("F", 2)),
303 |                     BytesCodec(),
304 |                     BloscCodec(),
305 |                 ],
306 |             )
307 |         ],
308 |     )
309 |     b = Array.open(spath)
310 |     assert a.metadata == b.metadata
311 | 
312 | 
313 | def test_write_partial_sharded_chunks(store: Store) -> None:
314 |     data = np.arange(0, 16 * 16, dtype="uint16").reshape((16, 16))
315 |     spath = StorePath(store)
316 |     a = Array.create(
317 |         spath,
318 |         shape=(40, 40),
319 |         chunk_shape=(20, 20),
320 |         dtype=data.dtype,
321 |         fill_value=1,
322 |         codecs=[
323 |             ShardingCodec(
324 |                 chunk_shape=(10, 10),
325 |                 codecs=[
326 |                     BytesCodec(),
327 |                     BloscCodec(),
328 |                 ],
329 |             )
330 |         ],
331 |     )
332 |     a[0:16, 0:16] = data
333 |     assert np.array_equal(a[0:16, 0:16], data)
334 | 
335 | 
336 | async def test_delete_empty_shards(store: Store) -> None:
337 |     if not store.supports_deletes:
338 |         pytest.skip("store does not support deletes")
339 |     path = "delete_empty_shards"
340 |     spath = StorePath(store, path)
341 |     a = await AsyncArray.create(
342 |         spath,
343 |         shape=(16, 16),
344 |         chunk_shape=(8, 16),
345 |         dtype="uint16",
346 |         fill_value=1,
347 |         codecs=[ShardingCodec(chunk_shape=(8, 8))],
348 |     )
349 |     await _AsyncArrayProxy(a)[:, :].set(np.zeros((16, 16)))
350 |     await _AsyncArrayProxy(a)[8:, :].set(np.ones((8, 16)))
351 |     await _AsyncArrayProxy(a)[:, 8:].set(np.ones((16, 8)))
352 |     # chunk (0, 0) is full
353 |     # chunks (0, 1), (1, 0), (1, 1) are empty
354 |     # shard (0, 0) is half-full
355 |     # shard (1, 0) is empty
356 | 
357 |     data = np.ones((16, 16), dtype="uint16")
358 |     data[:8, :8] = 0
359 |     assert np.array_equal(data, await _AsyncArrayProxy(a)[:, :].get())
360 |     assert (
361 |         await store.get(f"{path}/c/1/0", prototype=default_buffer_prototype()) is None
362 |     )
363 |     chunk_bytes = await store.get(f"{path}/c/0/0", prototype=default_buffer_prototype())
364 |     assert chunk_bytes is not None
365 |     assert len(chunk_bytes) == 16 * 2 + 8 * 8 * 2 + 4
366 | 
367 | 
368 | def test_pickle() -> None:
369 |     codec = ShardingCodec(chunk_shape=(8, 8))
370 |     assert pickle.loads(pickle.dumps(codec)) == codec
371 | 
372 | 
373 | @pytest.mark.parametrize(
374 |     "index_location", [ShardingCodecIndexLocation.start, ShardingCodecIndexLocation.end]
375 | )
376 | async def test_sharding_with_empty_inner_chunk(
377 |     store: Store, index_location: ShardingCodecIndexLocation
378 | ) -> None:
379 |     data = np.arange(0, 16 * 16, dtype="uint32").reshape((16, 16))
380 |     fill_value = 1
381 | 
382 |     path = f"sharding_with_empty_inner_chunk_{index_location}"
383 |     spath = StorePath(store, path)
384 |     a = await AsyncArray.create(
385 |         spath,
386 |         shape=(16, 16),
387 |         chunk_shape=(8, 8),
388 |         dtype="uint32",
389 |         fill_value=fill_value,
390 |         codecs=[ShardingCodec(chunk_shape=(4, 4), index_location=index_location)],
391 |     )
392 |     data[:4, :4] = fill_value
393 |     await a.setitem(..., data)
394 |     print("read data")
395 |     data_read = await a.getitem(...)
396 |     assert np.array_equal(data_read, data)
397 | 


--------------------------------------------------------------------------------
/tests/test_transpose.py:
--------------------------------------------------------------------------------
  1 | from typing import TYPE_CHECKING
  2 | 
  3 | import numpy as np
  4 | import pytest
  5 | from zarr import Array, AsyncArray, config
  6 | from zarr.abc.store import Store
  7 | from zarr.codecs import BytesCodec, ShardingCodec, TransposeCodec
  8 | from zarr.core.common import MemoryOrder
  9 | from zarr.storage import StorePath
 10 | 
 11 | from .test_codecs import _AsyncArrayProxy
 12 | 
 13 | if TYPE_CHECKING:
 14 |     from zarr.abc.codec import Codec
 15 | 
 16 | 
 17 | @pytest.mark.parametrize("input_order", ["F", "C"])
 18 | @pytest.mark.parametrize("runtime_write_order", ["C"])
 19 | @pytest.mark.parametrize("runtime_read_order", ["C"])
 20 | @pytest.mark.parametrize("with_sharding", [True, False])
 21 | async def test_transpose(
 22 |     *,
 23 |     store: Store,
 24 |     input_order: MemoryOrder,
 25 |     runtime_write_order: MemoryOrder,
 26 |     runtime_read_order: MemoryOrder,
 27 |     with_sharding: bool,
 28 | ) -> None:
 29 |     data = np.arange(0, 256, dtype="uint16").reshape((1, 32, 8), order=input_order)
 30 |     spath = StorePath(store, path="transpose")
 31 |     codecs_: list[Codec] = (
 32 |         [
 33 |             ShardingCodec(
 34 |                 chunk_shape=(1, 16, 8),
 35 |                 codecs=[TransposeCodec(order=(2, 1, 0)), BytesCodec()],
 36 |             )
 37 |         ]
 38 |         if with_sharding
 39 |         else [TransposeCodec(order=(2, 1, 0)), BytesCodec()]
 40 |     )
 41 |     with config.set({"array.order": runtime_write_order}):
 42 |         a = await AsyncArray.create(
 43 |             spath,
 44 |             shape=data.shape,
 45 |             chunk_shape=(1, 32, 8),
 46 |             dtype=data.dtype,
 47 |             fill_value=0,
 48 |             chunk_key_encoding=("v2", "."),
 49 |             codecs=codecs_,
 50 |         )
 51 | 
 52 |     await _AsyncArrayProxy(a)[:, :].set(data)
 53 |     read_data = await _AsyncArrayProxy(a)[:, :].get()
 54 |     assert np.array_equal(data, read_data)
 55 | 
 56 |     with config.set({"array.order": runtime_read_order}):
 57 |         a = await AsyncArray.open(
 58 |             spath,
 59 |         )
 60 |     read_data = await _AsyncArrayProxy(a)[:, :].get()
 61 |     assert np.array_equal(data, read_data)
 62 | 
 63 |     if runtime_read_order == "F":
 64 |         assert read_data.flags["F_CONTIGUOUS"]
 65 |         assert not read_data.flags["C_CONTIGUOUS"]
 66 |     else:
 67 |         assert not read_data.flags["F_CONTIGUOUS"]
 68 |         assert read_data.flags["C_CONTIGUOUS"]
 69 | 
 70 | 
 71 | @pytest.mark.parametrize("order", [[1, 2, 0], [1, 2, 3, 0], [3, 2, 4, 0, 1]])
 72 | def test_transpose_non_self_inverse(store: Store, order: list[int]) -> None:
 73 |     shape = [i + 3 for i in range(len(order))]
 74 |     data = np.arange(0, np.prod(shape), dtype="uint16").reshape(shape)
 75 |     spath = StorePath(store, "transpose_non_self_inverse")
 76 |     a = Array.create(
 77 |         spath,
 78 |         shape=data.shape,
 79 |         chunk_shape=data.shape,
 80 |         dtype=data.dtype,
 81 |         fill_value=0,
 82 |         codecs=[TransposeCodec(order=order), BytesCodec()],
 83 |     )
 84 |     a[:, :] = data
 85 |     read_data = a[:, :]
 86 |     assert np.array_equal(data, read_data)
 87 | 
 88 | 
 89 | def test_transpose_invalid(
 90 |     store: Store,
 91 | ) -> None:
 92 |     data = np.arange(0, 256, dtype="uint16").reshape((1, 32, 8))
 93 |     spath = StorePath(store, "transpose_invalid")
 94 |     for order in [(1, 0), (3, 2, 1), (3, 3, 1)]:
 95 |         with pytest.raises(ValueError, match=r".*order"):
 96 |             Array.create(
 97 |                 spath,
 98 |                 shape=data.shape,
 99 |                 chunk_shape=(1, 32, 8),
100 |                 dtype=data.dtype,
101 |                 fill_value=0,
102 |                 chunk_key_encoding=("v2", "."),
103 |                 codecs=[TransposeCodec(order=order), BytesCodec()],
104 |             )
105 | 


--------------------------------------------------------------------------------
/tests/test_v2.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from collections.abc import Iterator
  3 | from pathlib import Path
  4 | from typing import Any, Literal
  5 | 
  6 | import numcodecs.vlen
  7 | import numpy as np
  8 | import pytest
  9 | import zarr
 10 | import zarr.core.buffer
 11 | import zarr.storage
 12 | from numcodecs import Delta
 13 | from numcodecs.blosc import Blosc
 14 | from numcodecs.zstd import Zstd
 15 | from zarr import config
 16 | from zarr.abc.store import Store
 17 | from zarr.core.buffer.core import default_buffer_prototype
 18 | from zarr.core.sync import sync
 19 | from zarr.storage import LocalStore, StorePath
 20 | 
 21 | 
 22 | @pytest.fixture
 23 | async def store(tmp_path) -> Iterator[StorePath]:
 24 |     return StorePath(await LocalStore.open(tmp_path))
 25 | 
 26 | 
 27 | def test_simple(store: StorePath) -> None:
 28 |     data = np.arange(0, 256, dtype="uint16").reshape((16, 16))
 29 | 
 30 |     a = zarr.create_array(
 31 |         store / "simple_v2",
 32 |         zarr_format=2,
 33 |         shape=data.shape,
 34 |         chunks=(16, 16),
 35 |         dtype=data.dtype,
 36 |         fill_value=0,
 37 |     )
 38 | 
 39 |     a[:, :] = data
 40 |     assert np.array_equal(data, a[:, :])
 41 | 
 42 | 
 43 | @pytest.mark.parametrize(
 44 |     ("dtype", "fill_value"),
 45 |     [
 46 |         ("bool", False),
 47 |         ("int64", 0),
 48 |         ("float64", 0.0),
 49 |         ("|S1", b""),
 50 |         ("|U1", ""),
 51 |         ("object", ""),
 52 |         (str, ""),
 53 |     ],
 54 | )
 55 | def test_implicit_fill_value(store: LocalStore, dtype: str, fill_value: Any) -> None:
 56 |     arr = zarr.create(
 57 |         store=store, shape=(4,), fill_value=None, zarr_format=2, dtype=dtype
 58 |     )
 59 |     assert arr.metadata.fill_value is None
 60 |     assert arr.metadata.to_dict()["fill_value"] is None
 61 |     result = arr[:]
 62 |     numpy_dtype = np.dtype(object) if dtype is str else np.dtype(dtype)
 63 |     expected = np.full(arr.shape, fill_value, dtype=numpy_dtype)
 64 |     np.testing.assert_array_equal(result, expected)
 65 | 
 66 | 
 67 | def test_codec_pipeline(tmp_path) -> None:
 68 |     # https://github.com/zarr-developers/zarr-python/issues/2243
 69 |     store = LocalStore(tmp_path)
 70 |     array = zarr.create(
 71 |         store=store,
 72 |         shape=(1,),
 73 |         dtype="i4",
 74 |         zarr_format=2,
 75 |         filters=[Delta(dtype="i4").get_config()],
 76 |         compressor=Blosc().get_config(),
 77 |     )
 78 |     array[:] = 1
 79 |     result = array[:]
 80 |     expected = np.ones(1)
 81 |     np.testing.assert_array_equal(result, expected)
 82 | 
 83 | 
 84 | @pytest.mark.parametrize(
 85 |     ("dtype", "expected_dtype", "fill_value", "fill_value_encoding"),
 86 |     [
 87 |         ("|S", "|S0", b"X", "WA=="),
 88 |         ("|V", "|V0", b"X", "WA=="),
 89 |         ("|V10", "|V10", b"X", "WAAAAAAAAAAAAA=="),
 90 |     ],
 91 | )
 92 | async def test_v2_encode_decode(
 93 |     dtype, expected_dtype, fill_value, fill_value_encoding, tmp_path
 94 | ) -> None:
 95 |     with config.set(
 96 |         {
 97 |             "array.v2_default_filters.bytes": [{"id": "vlen-bytes"}],
 98 |             "array.v2_default_compressor.bytes": None,
 99 |         }
100 |     ):
101 |         store = zarr.storage.LocalStore(tmp_path)
102 |         g = zarr.group(store=store, zarr_format=2)
103 |         g.create_array(
104 |             name="foo",
105 |             shape=(3,),
106 |             chunks=(3,),
107 |             dtype=dtype,
108 |             fill_value=fill_value,
109 |             compressor=None,
110 |         )
111 | 
112 |         result = await store.get(
113 |             "foo/.zarray", zarr.core.buffer.default_buffer_prototype()
114 |         )
115 |         assert result is not None
116 | 
117 |         serialized = json.loads(result.to_bytes())
118 |         expected = {
119 |             "chunks": [3],
120 |             "compressor": None,
121 |             "dtype": expected_dtype,
122 |             "fill_value": fill_value_encoding,
123 |             "filters": [{"id": "vlen-bytes"}] if dtype == "|S" else None,
124 |             "order": "C",
125 |             "shape": [3],
126 |             "zarr_format": 2,
127 |             "dimension_separator": ".",
128 |         }
129 |         assert serialized == expected
130 | 
131 |         data = zarr.open_array(store=store, path="foo")[:]
132 |         expected = np.full((3,), b"X", dtype=dtype)
133 |         np.testing.assert_equal(data, expected)
134 | 
135 | 
136 | @pytest.mark.parametrize("dtype_value", [["|S", b"Y"], ["|U", "Y"], ["O", b"Y"]])
137 | def test_v2_encode_decode_with_data(dtype_value, tmp_path):
138 |     dtype, value = dtype_value
139 |     with config.set(
140 |         {
141 |             "array.v2_default_filters": {
142 |                 "string": [{"id": "vlen-utf8"}],
143 |                 "bytes": [{"id": "vlen-bytes"}],
144 |             },
145 |         }
146 |     ):
147 |         expected = np.full((3,), value, dtype=dtype)
148 |         a = zarr.create(
149 |             store=tmp_path,
150 |             shape=(3,),
151 |             zarr_format=2,
152 |             dtype=dtype,
153 |         )
154 |         a[:] = expected
155 |         data = a[:]
156 |         np.testing.assert_equal(data, expected)
157 | 
158 | 
159 | @pytest.mark.parametrize("dtype", [str, "str"])
160 | async def test_create_dtype_str(dtype: Any, tmp_path) -> None:
161 |     # see https://github.com/zarr-developers/zarr-python/issues/2627 for why this test
162 |     # is probably wrong
163 |     arr = zarr.create(store=tmp_path, shape=3, dtype=dtype, zarr_format=2)
164 |     assert arr.dtype.kind == "O"
165 |     assert arr.metadata.to_dict()["dtype"] == "|O"
166 |     assert arr.metadata.filters == (numcodecs.vlen.VLenBytes(),)
167 |     arr[:] = [b"a", b"bb", b"ccc"]
168 |     result = arr[:]
169 |     np.testing.assert_array_equal(
170 |         result, np.array([b"a", b"bb", b"ccc"], dtype="object")
171 |     )
172 | 
173 | 
174 | @pytest.mark.parametrize(
175 |     "filters", [[], [numcodecs.Delta(dtype="<i4")], [numcodecs.Zlib(level=2)]]
176 | )
177 | @pytest.mark.parametrize("order", ["C", "F"])
178 | def test_v2_filters_codecs(
179 |     filters: Any, order: Literal["C", "F"], tmp_path: Path
180 | ) -> None:
181 |     array_fixture = [42]
182 |     with config.set({"array.order": order}):
183 |         arr = zarr.create(
184 |             store=tmp_path, shape=1, dtype="<i4", zarr_format=2, filters=filters
185 |         )
186 |     arr[:] = array_fixture
187 |     result = arr[:]
188 |     np.testing.assert_array_equal(result, array_fixture)
189 | 
190 | 
191 | @pytest.mark.filterwarnings("ignore")
192 | def test_create_array_defaults(store: Store):
193 |     """
194 |     Test that passing compressor=None results in no compressor. Also test that the default value of the compressor
195 |     parameter does produce a compressor.
196 |     """
197 |     g = zarr.open(store, mode="w", zarr_format=2)
198 |     arr = g.create_array("one", dtype="i8", shape=(1,), chunks=(1,), compressor=None)
199 |     assert arr._async_array.compressor is None
200 |     assert not (arr.filters)
201 |     arr = g.create_array("two", dtype="i8", shape=(1,), chunks=(1,))
202 |     assert arr._async_array.compressor is not None
203 |     assert not (arr.filters)
204 |     arr = g.create_array(
205 |         "three", dtype="i8", shape=(1,), chunks=(1,), compressor=Zstd()
206 |     )
207 |     assert arr._async_array.compressor is not None
208 |     assert not (arr.filters)
209 |     with pytest.raises(ValueError):  # noqa: PT011
210 |         g.create_array(
211 |             "four",
212 |             dtype="i8",
213 |             shape=(1,),
214 |             chunks=(1,),
215 |             compressor=None,
216 |             compressors=None,
217 |         )
218 | 
219 | 
220 | @pytest.mark.parametrize(
221 |     "array_order", ["C", pytest.param("F", marks=[pytest.mark.xfail])]
222 | )
223 | @pytest.mark.parametrize("data_order", ["C", "F"])
224 | @pytest.mark.parametrize(
225 |     "memory_order", ["C", pytest.param("F", marks=[pytest.mark.xfail])]
226 | )
227 | def test_v2_non_contiguous(
228 |     array_order: Literal["C", "F"],
229 |     data_order: Literal["C", "F"],
230 |     memory_order: Literal["C", "F"],
231 |     tmp_path: Path,
232 | ) -> None:
233 |     store = LocalStore(tmp_path / "a_store")
234 |     arr = zarr.create_array(
235 |         store,
236 |         shape=(10, 8),
237 |         chunks=(3, 3),
238 |         fill_value=np.nan,
239 |         dtype="float64",
240 |         zarr_format=2,
241 |         filters=None,
242 |         compressors=None,
243 |         overwrite=True,
244 |         order=array_order,
245 |         config={"order": memory_order},
246 |     )
247 | 
248 |     # Non-contiguous write
249 |     a = np.arange(arr.shape[0] * arr.shape[1]).reshape(arr.shape, order=data_order)
250 |     arr[6:9, 3:6] = a[6:9, 3:6]  # The slice on the RHS is important
251 |     np.testing.assert_array_equal(arr[6:9, 3:6], a[6:9, 3:6])
252 | 
253 |     np.testing.assert_array_equal(
254 |         a[6:9, 3:6],
255 |         np.frombuffer(
256 |             sync(store.get("2.1", default_buffer_prototype())).to_bytes(),
257 |             dtype="float64",
258 |         ).reshape((3, 3), order=array_order),
259 |     )
260 |     if memory_order == "F":
261 |         assert (arr[6:9, 3:6]).flags.f_contiguous
262 |     else:
263 |         assert (arr[6:9, 3:6]).flags.c_contiguous
264 | 
265 |     store = LocalStore(tmp_path / "other_store")
266 |     arr = zarr.create_array(
267 |         store,
268 |         shape=(10, 8),
269 |         chunks=(3, 3),
270 |         fill_value=np.nan,
271 |         dtype="float64",
272 |         zarr_format=2,
273 |         compressors=None,
274 |         filters=None,
275 |         overwrite=True,
276 |         order=array_order,
277 |         config={"order": memory_order},
278 |     )
279 | 
280 |     # Contiguous write
281 |     a = np.arange(9).reshape((3, 3), order=data_order)
282 |     if data_order == "F":
283 |         assert a.flags.f_contiguous
284 |     else:
285 |         assert a.flags.c_contiguous
286 |     arr[6:9, 3:6] = a
287 |     np.testing.assert_array_equal(arr[6:9, 3:6], a)
288 | 
289 | 
290 | def test_default_compressor_deprecation_warning():
291 |     with pytest.warns(DeprecationWarning, match="default_compressor is deprecated"):
292 |         zarr.storage.default_compressor = "zarr.codecs.zstd.ZstdCodec()"
293 | 
294 | 
295 | @pytest.mark.parametrize(
296 |     "dtype_expected",
297 |     [
298 |         ["b", "zstd", None],
299 |         ["i", "zstd", None],
300 |         ["f", "zstd", None],
301 |         ["|S1", "zstd", "vlen-bytes"],
302 |         ["|U1", "zstd", "vlen-utf8"],
303 |     ],
304 | )
305 | def test_default_filters_and_compressor(dtype_expected: Any) -> None:
306 |     with config.set(
307 |         {
308 |             "array.v2_default_compressor": {
309 |                 "numeric": {"id": "zstd", "level": "0"},
310 |                 "string": {"id": "zstd", "level": "0"},
311 |                 "bytes": {"id": "zstd", "level": "0"},
312 |             },
313 |             "array.v2_default_filters": {
314 |                 "numeric": [],
315 |                 "string": [{"id": "vlen-utf8"}],
316 |                 "bytes": [{"id": "vlen-bytes"}],
317 |             },
318 |         }
319 |     ):
320 |         dtype, expected_compressor, expected_filter = dtype_expected
321 |         arr = zarr.create(shape=(3,), path="foo", store={}, zarr_format=2, dtype=dtype)
322 |         assert arr.metadata.compressor.codec_id == expected_compressor
323 |         if expected_filter is not None:
324 |             assert arr.metadata.filters[0].codec_id == expected_filter
325 | 
326 | 
327 | @pytest.mark.parametrize("fill_value", [None, (b"", 0, 0.0)], ids=["no_fill", "fill"])
328 | def test_structured_dtype_roundtrip(fill_value, tmp_path) -> None:
329 |     a = np.array(
330 |         [(b"aaa", 1, 4.2), (b"bbb", 2, 8.4), (b"ccc", 3, 12.6)],
331 |         dtype=[("foo", "S3"), ("bar", "i4"), ("baz", "f8")],
332 |     )
333 |     array_path = tmp_path / "data.zarr"
334 |     za = zarr.create(
335 |         shape=(3,),
336 |         store=array_path,
337 |         chunks=(2,),
338 |         fill_value=fill_value,
339 |         zarr_format=2,
340 |         dtype=a.dtype,
341 |     )
342 |     if fill_value is not None:
343 |         assert (np.array([fill_value] * a.shape[0], dtype=a.dtype) == za[:]).all()
344 |     za[...] = a
345 |     za = zarr.open_array(store=array_path)
346 |     assert (a == za[:]).all()
347 | 


--------------------------------------------------------------------------------
/tests/test_version.py:
--------------------------------------------------------------------------------
1 | from importlib.metadata import version
2 | 
3 | import zarrs
4 | 
5 | 
6 | def test_version():
7 |     assert zarrs.__version__ == version("zarrs")
8 | 


--------------------------------------------------------------------------------
/tests/test_vlen.py:
--------------------------------------------------------------------------------
  1 | from typing import Any
  2 | 
  3 | import numpy as np
  4 | import pytest
  5 | import zarr
  6 | from zarr.abc.codec import Codec
  7 | from zarr.abc.store import Store
  8 | from zarr.codecs import ZstdCodec
  9 | from zarr.core.metadata.v3 import ArrayV3Metadata, DataType
 10 | from zarr.core.strings import _NUMPY_SUPPORTS_VLEN_STRING
 11 | from zarr.storage import StorePath
 12 | 
 13 | numpy_str_dtypes: list[type | str | None] = [None, str, "str", np.dtypes.StrDType]
 14 | expected_zarr_string_dtype: np.dtype[Any]
 15 | if _NUMPY_SUPPORTS_VLEN_STRING:
 16 |     numpy_str_dtypes.append(np.dtypes.StringDType)
 17 |     expected_zarr_string_dtype = np.dtypes.StringDType()
 18 | else:
 19 |     expected_zarr_string_dtype = np.dtype("O")
 20 | 
 21 | 
 22 | @pytest.mark.parametrize("store", ["local"], indirect=["store"])
 23 | @pytest.mark.parametrize("dtype", numpy_str_dtypes)
 24 | @pytest.mark.parametrize("as_object_array", [False, True])
 25 | @pytest.mark.parametrize("compressor", [None, ZstdCodec()])
 26 | def test_vlen_string(
 27 |     store: Store,
 28 |     dtype: np.dtype[Any] | None,
 29 |     *,
 30 |     as_object_array: bool,
 31 |     compressor: Codec | None,
 32 | ) -> None:
 33 |     strings = ["hello", "world", "this", "is", "a", "test"]
 34 |     data = np.array(strings, dtype=dtype).reshape((2, 3))
 35 | 
 36 |     sp = StorePath(store, path="string")
 37 |     a = zarr.create_array(
 38 |         sp,
 39 |         shape=data.shape,
 40 |         chunks=data.shape,
 41 |         dtype=data.dtype,
 42 |         fill_value="",
 43 |         compressors=compressor,
 44 |     )
 45 |     assert isinstance(a.metadata, ArrayV3Metadata)  # needed for mypy
 46 | 
 47 |     # should also work if input array is an object array, provided we explicitly specified
 48 |     # a stringlike dtype when creating the Array
 49 |     if as_object_array:
 50 |         data = data.astype("O")
 51 | 
 52 |     a[:, :] = data
 53 |     assert np.array_equal(data, a[:, :])
 54 |     assert a.metadata.data_type == DataType.string
 55 |     assert a.dtype == expected_zarr_string_dtype
 56 | 
 57 |     # test round trip
 58 |     b = zarr.open(sp)
 59 |     assert isinstance(b.metadata, ArrayV3Metadata)  # needed for mypy
 60 |     assert np.array_equal(data, b[:, :])
 61 |     assert b.metadata.data_type == DataType.string
 62 |     assert a.dtype == expected_zarr_string_dtype
 63 | 
 64 | 
 65 | @pytest.mark.parametrize("store", ["local"], indirect=["store"])
 66 | @pytest.mark.parametrize("as_object_array", [False, True])
 67 | @pytest.mark.parametrize("compressor", [None, ZstdCodec()])
 68 | def test_vlen_bytes(
 69 |     store: Store, *, as_object_array: bool, compressor: Codec | None
 70 | ) -> None:
 71 |     bstrings = [b"hello", b"world", b"this", b"is", b"a", b"test"]
 72 |     data = np.array(bstrings).reshape((2, 3))
 73 |     assert data.dtype == "|S5"
 74 | 
 75 |     sp = StorePath(store, path="string")
 76 |     a = zarr.create_array(
 77 |         sp,
 78 |         shape=data.shape,
 79 |         chunks=data.shape,
 80 |         dtype=data.dtype,
 81 |         fill_value=b"",
 82 |         compressors=compressor,
 83 |     )
 84 |     assert isinstance(a.metadata, ArrayV3Metadata)  # needed for mypy
 85 | 
 86 |     # should also work if input array is an object array, provided we explicitly specified
 87 |     # a bytesting-like dtype when creating the Array
 88 |     if as_object_array:
 89 |         data = data.astype("O")
 90 |     a[:, :] = data
 91 |     assert np.array_equal(data, a[:, :])
 92 |     assert a.metadata.data_type == DataType.bytes
 93 |     assert a.dtype == "O"
 94 | 
 95 |     # test round trip
 96 |     b = zarr.open(sp)
 97 |     assert isinstance(b.metadata, ArrayV3Metadata)  # needed for mypy
 98 |     assert np.array_equal(data, b[:, :])
 99 |     assert b.metadata.data_type == DataType.bytes
100 |     assert a.dtype == "O"
101 | 


--------------------------------------------------------------------------------
/tests/test_zarrs_http.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import aiohttp
 4 | import numpy as np
 5 | import pytest
 6 | import zarr
 7 | from zarr.storage import FsspecStore
 8 | 
 9 | ARR_REF = np.array(
10 |     [
11 |         [np.nan, np.nan, np.nan, np.nan, 0.1, 0.1, -0.6, 0.1],
12 |         [np.nan, np.nan, np.nan, np.nan, 0.1, 0.1, -1.6, 0.1],
13 |         [np.nan, np.nan, np.nan, np.nan, 0.1, 0.1, -2.6, 0.1],
14 |         [np.nan, np.nan, np.nan, np.nan, -3.4, -3.5, -3.6, 0.1],
15 |         [1.0, 1.0, 1.0, -4.3, -4.4, -4.5, -4.6, 1.1],
16 |         [1.0, 1.0, 1.0, -5.3, -5.4, -5.5, -5.6, 1.1],
17 |         [1.0, 1.0, 1.0, 1.0, 1.1, 1.1, -6.6, 1.1],
18 |         [1.0, 1.0, 1.0, 1.0, -7.4, -7.5, -7.6, -7.7],
19 |     ]
20 | )
21 | 
22 | URL = "https://raw.githubusercontent.com/LDeakin/zarrs/main/zarrs/tests/data/array_write_read.zarr/group/array"
23 | 
24 | 
25 | def test_zarrs_http():
26 |     arr = zarr.open(URL)
27 |     assert arr.shape == (8, 8)
28 |     assert np.allclose(arr[:], ARR_REF, equal_nan=True)
29 | 
30 | 
31 | @pytest.mark.xfail(reason="Storage options are not supported for HTTP store")
32 | def test_zarrs_http_kwargs():
33 |     store = FsspecStore.from_url(
34 |         URL, storage_options={"auth": aiohttp.BasicAuth("user", "pass")}
35 |     )
36 |     arr = zarr.open(store)
37 |     assert arr.shape == (8, 8)
38 |     assert np.allclose(arr[:], ARR_REF, equal_nan=True)
39 | 


--------------------------------------------------------------------------------
/tests/test_zstd.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | from zarr import Array
 4 | from zarr.abc.store import Store
 5 | from zarr.codecs import BytesCodec, ZstdCodec
 6 | from zarr.storage import StorePath
 7 | 
 8 | 
 9 | @pytest.mark.parametrize("checksum", [True, False])
10 | def test_zstd(*, store: Store, checksum: bool) -> None:
11 |     data = np.arange(0, 256, dtype="uint16").reshape((16, 16))
12 | 
13 |     a = Array.create(
14 |         StorePath(store, path="zstd"),
15 |         shape=data.shape,
16 |         chunk_shape=(16, 16),
17 |         dtype=data.dtype,
18 |         fill_value=0,
19 |         codecs=[BytesCodec(), ZstdCodec(level=0, checksum=checksum)],
20 |     )
21 | 
22 |     a[:, :] = data
23 |     assert np.array_equal(data, a[:, :])
24 | 


--------------------------------------------------------------------------------