├── src └── pydantic_zarr │ ├── py.typed │ ├── __init__.py │ ├── experimental │ ├── __init__.py │ ├── README.md │ └── core.py │ └── core.py ├── tests ├── test_docs │ ├── __init__.py │ └── test_docs.py └── test_pydantic_zarr │ ├── __init__.py │ ├── test_experimental │ ├── __init__.py │ ├── test_core.py │ ├── conftest.py │ ├── test_v3.py │ └── test_v2.py │ ├── test_core.py │ ├── conftest.py │ ├── test_v3.py │ └── test_v2.py ├── docs ├── api │ ├── core.md │ ├── v2.md │ ├── v3.md │ └── experimental │ │ ├── v2.md │ │ ├── v3.md │ │ └── core.md ├── usage_zarr_v3.md ├── plugins │ └── main.py ├── index.md ├── release-notes.md ├── experimental │ ├── index.md │ └── usage.md └── usage_zarr_v2.md ├── changes ├── .gitignore └── README.md ├── .github ├── dependabot.yml └── workflows │ ├── pre-commit.yml │ ├── check_changelogs.yml │ ├── cd.yml │ └── test.yml ├── .readthedocs.yaml ├── LICENSE ├── .pre-commit-config.yaml ├── ci └── check_changelog_entries.py ├── mkdocs.yaml ├── README.md ├── .gitignore └── pyproject.toml /src/pydantic_zarr/py.typed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/test_docs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/pydantic_zarr/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/api/core.md: -------------------------------------------------------------------------------- 1 | ::: pydantic_zarr.core -------------------------------------------------------------------------------- /docs/api/v2.md: -------------------------------------------------------------------------------- 1 | ::: pydantic_zarr.v2 -------------------------------------------------------------------------------- /docs/api/v3.md: -------------------------------------------------------------------------------- 1 | ::: pydantic_zarr.v3 -------------------------------------------------------------------------------- /tests/test_pydantic_zarr/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/test_pydantic_zarr/test_experimental/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/test_pydantic_zarr/test_experimental/test_core.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/api/experimental/v2.md: -------------------------------------------------------------------------------- 1 | ::: pydantic_zarr.experimental.v2 2 | -------------------------------------------------------------------------------- /docs/api/experimental/v3.md: -------------------------------------------------------------------------------- 1 | ::: pydantic_zarr.experimental.v3 2 | -------------------------------------------------------------------------------- /docs/api/experimental/core.md: -------------------------------------------------------------------------------- 1 | ::: pydantic_zarr.experimental.core 2 | -------------------------------------------------------------------------------- /changes/.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore all files in this directory... 2 | * 3 | # ...except for the gitignore itself and the README 4 | !.gitignore 5 | !README.md 6 | # And keep the actual changelog fragments 7 | !*.md 8 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | --- 2 | version: 2 3 | updates: 4 | - package-ecosystem: github-actions 5 | directory: / 6 | schedule: 7 | interval: weekly 8 | groups: 9 | actions: 10 | patterns: 11 | - '*' 12 | -------------------------------------------------------------------------------- /.github/workflows/pre-commit.yml: -------------------------------------------------------------------------------- 1 | name: pre-commit 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: [main] 7 | 8 | jobs: 9 | pre-commit: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v6 13 | - uses: actions/setup-python@v6 14 | - uses: pre-commit/action@v3.0.1 15 | -------------------------------------------------------------------------------- /.github/workflows/check_changelogs.yml: -------------------------------------------------------------------------------- 1 | name: Check changelog entries 2 | 3 | on: 4 | pull_request: 5 | 6 | jobs: 7 | check-changelogs: 8 | name: Check changelog entries 9 | runs-on: ubuntu-latest 10 | 11 | steps: 12 | - uses: actions/checkout@v6 13 | 14 | - name: Install uv 15 | uses: astral-sh/setup-uv@v7 16 | 17 | - name: Check changelog entries 18 | run: uv run --no-sync python ci/check_changelog_entries.py 19 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file for MkDocs projects 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | # Required 5 | version: 2 6 | 7 | # Set the version of Python and other tools you might need 8 | build: 9 | os: ubuntu-22.04 10 | tools: 11 | python: "3.12" 12 | 13 | mkdocs: 14 | configuration: mkdocs.yaml 15 | 16 | # Optionally declare the Python requirements required to build your docs 17 | python: 18 | install: 19 | - method: pip 20 | path: . 21 | extra_requirements: 22 | - docs 23 | -------------------------------------------------------------------------------- /src/pydantic_zarr/experimental/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Experimental module for pydantic-zarr. 3 | 4 | This module contains refactored versions of the core modules with 5 | breaking API changes. Use with caution as the API is not yet stable. 6 | 7 | The main changes in the experimental module: 8 | - Removed generic type parameters from ArraySpec and GroupSpec 9 | - Simplified type system using concrete union types 10 | - Added BaseGroupSpec for group-only operations 11 | 12 | To use the experimental module: 13 | 14 | from pydantic_zarr.experimental import v2, v3 15 | 16 | # Use v2.ArraySpec, v2.GroupSpec, etc. instead of the main module versions 17 | """ 18 | 19 | from . import core, v2, v3 20 | 21 | __all__ = ["core", "v2", "v3"] 22 | -------------------------------------------------------------------------------- /tests/test_docs/test_docs.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from pathlib import Path 4 | 5 | import pytest 6 | from pytest_examples import CodeExample, EvalExample, find_examples 7 | 8 | SOURCES_ROOT: Path = Path(__file__).parent.parent.parent / "src/pydantic_zarr" 9 | 10 | 11 | @pytest.mark.parametrize("example", find_examples(str(SOURCES_ROOT)), ids=str) 12 | def test_docstrings(example: CodeExample, eval_example: EvalExample) -> None: 13 | eval_example.run_print_check(example) 14 | 15 | 16 | @pytest.mark.parametrize("example", find_examples("docs"), ids=str) 17 | def test_docs_examples(example: CodeExample, eval_example: EvalExample) -> None: 18 | pytest.importorskip("zarr") 19 | 20 | eval_example.run_print_check(example) 21 | -------------------------------------------------------------------------------- /changes/README.md: -------------------------------------------------------------------------------- 1 | # Changelog Entries 2 | 3 | This directory contains changelog entries for the pydantic-zarr project. 4 | 5 | ## Adding a changelog entry 6 | 7 | When making a pull request, you should add a changelog entry to this directory. 8 | The entry should be in a file named `xxxx..md` where: 9 | 10 | - `xxxx` is the pull request number 11 | - `` is one of: `feature`, `bugfix`, `doc`, `removal`, or `misc` 12 | 13 | The file should contain a short description of what you have changed, and how it impacts users of `pydantic-zarr`. 14 | 15 | ## Fragment types 16 | 17 | - `feature` - for new features 18 | - `bugfix` - for bug fixes 19 | - `doc` - for documentation changes 20 | - `removal` - for removals 21 | - `misc` - for miscellaneous changes that don't fit other categories 22 | -------------------------------------------------------------------------------- /tests/test_pydantic_zarr/test_experimental/conftest.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from dataclasses import dataclass 4 | from typing import TYPE_CHECKING, Any 5 | 6 | if TYPE_CHECKING: 7 | from collections.abc import Mapping 8 | 9 | import numpy as np 10 | 11 | 12 | @dataclass 13 | class FakeArray: 14 | shape: tuple[int, ...] 15 | dtype: np.dtype[Any] 16 | 17 | 18 | @dataclass 19 | class FakeH5PyArray(FakeArray): 20 | attrs: Mapping[str, Any] 21 | chunks: tuple[int, ...] 22 | 23 | 24 | @dataclass 25 | class FakeDaskArray(FakeArray): 26 | chunksize: tuple[int, ...] 27 | chunks: tuple[tuple[int, ...], ...] 28 | 29 | 30 | @dataclass 31 | class FakeXarray(FakeArray): 32 | chunksizes: dict[str, tuple[int, ...]] 33 | chunks: tuple[tuple[int, ...], ...] | None 34 | -------------------------------------------------------------------------------- /.github/workflows/cd.yml: -------------------------------------------------------------------------------- 1 | name: Build and publish Python 🐍 distributions 📦 to PyPI 2 | 3 | on: push 4 | 5 | jobs: 6 | build-n-publish: 7 | name: Build and publish Python 🐍 distributions 📦 to PyPI 8 | runs-on: ubuntu-latest 9 | if: 10 | startsWith(github.ref, 'refs/tags') 11 | steps: 12 | - uses: actions/checkout@v6 13 | - name: Set up Python 14 | uses: actions/setup-python@v6 15 | with: 16 | python-version: "3.x" 17 | - name: Install Hatch 18 | run: | 19 | pip install hatch==1.14.1 20 | - name: Build package 21 | run: | 22 | hatch build 23 | - name: Publish distribution 📦 to PyPI 24 | uses: pypa/gh-action-pypi-publish@release/v1 25 | with: 26 | password: ${{ secrets.PYPI_API_TOKEN }} 27 | -------------------------------------------------------------------------------- /tests/test_pydantic_zarr/test_core.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import pytest 4 | 5 | from pydantic_zarr.core import ensure_member_name, tuplify_json 6 | 7 | 8 | @pytest.mark.parametrize("data", ["/", "///", "a/b/", "a/b/vc"]) 9 | def test_parse_str_no_path(data: str) -> None: 10 | with pytest.raises(ValueError, match='Strings containing "/" are invalid.'): 11 | ensure_member_name(data) 12 | 13 | 14 | @pytest.mark.parametrize( 15 | ("input_obj", "expected_output"), 16 | [ 17 | ({"key": [1, 2, 3]}, {"key": (1, 2, 3)}), 18 | ([1, [2, 3], 4], (1, (2, 3), 4)), 19 | ({"nested": {"list": [1, 2]}}, {"nested": {"list": (1, 2)}}), 20 | ([{"a": [1, 2]}, {"b": 3}], ({"a": (1, 2)}, {"b": 3})), 21 | ([], ()), 22 | ], 23 | ) 24 | def test_tuplify_json(input_obj: object, expected_output: object) -> None: 25 | """ 26 | Test that tuplify_json converts lists to tuples, with recursion inside sequences 27 | and dictionaries. 28 | """ 29 | assert tuplify_json(input_obj) == expected_output 30 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright © 2023 Howard Hughes Medical Institute 2 | 3 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 4 | 5 | Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 6 | Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 7 | Neither the name of HHMI nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 8 | 9 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 10 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | ci: 2 | autoupdate_commit_msg: "chore: update pre-commit hooks" 3 | autoupdate_schedule: "monthly" 4 | autofix_commit_msg: "style: pre-commit fixes" 5 | autofix_prs: false 6 | default_stages: [pre-commit, pre-push] 7 | repos: 8 | - repo: https://github.com/astral-sh/ruff-pre-commit 9 | rev: v0.9.7 10 | hooks: 11 | - id: ruff 12 | args: ["--fix", "--show-fixes"] 13 | - id: ruff-format 14 | - repo: https://github.com/codespell-project/codespell 15 | rev: v2.4.1 16 | hooks: 17 | - id: codespell 18 | args: ["-L", "fo,ihs,kake,te", "-S", "fixture"] 19 | - repo: https://github.com/pre-commit/pre-commit-hooks 20 | rev: v5.0.0 21 | hooks: 22 | - id: check-yaml 23 | - id: trailing-whitespace 24 | - repo: https://github.com/pre-commit/mirrors-mypy 25 | rev: v1.15.0 26 | hooks: 27 | - id: mypy 28 | files: src 29 | additional_dependencies: 30 | - pytest 31 | - pydantic>2.0.0 32 | - numpy 33 | - zarr>=3.1.0 34 | - numcodecs 35 | - repo: https://github.com/scientific-python/cookie 36 | rev: 2025.01.22 37 | hooks: 38 | - id: sp-repo-review 39 | - repo: https://github.com/pre-commit/pygrep-hooks 40 | rev: v1.10.0 41 | hooks: 42 | - id: rst-directive-colons 43 | - id: rst-inline-touching-normal 44 | - repo: https://github.com/numpy/numpydoc 45 | rev: v1.8.0 46 | hooks: 47 | - id: numpydoc-validation 48 | -------------------------------------------------------------------------------- /ci/check_changelog_entries.py: -------------------------------------------------------------------------------- 1 | """ 2 | Check changelog entries have the correct filename structure. 3 | """ 4 | 5 | import sys 6 | from pathlib import Path 7 | 8 | VALID_CHANGELOG_TYPES = ["feature", "bugfix", "doc", "removal", "misc"] 9 | CHANGELOG_DIRECTORY = (Path(__file__).parent.parent / "changes").resolve() 10 | 11 | 12 | def is_int(s: str) -> bool: 13 | try: 14 | int(s) 15 | except ValueError: 16 | return False 17 | else: 18 | return True 19 | 20 | 21 | if __name__ == "__main__": 22 | print(f"Looking for changelog entries in {CHANGELOG_DIRECTORY}") 23 | entries = CHANGELOG_DIRECTORY.glob("*") 24 | entries = [e for e in entries if e.name not in [".gitignore", "README.md"]] 25 | print(f"Found {len(entries)} entries") 26 | print() 27 | 28 | bad_suffix = [e for e in entries if e.suffix != ".md"] 29 | bad_issue_no = [e for e in entries if not is_int(e.name.split(".")[0])] 30 | bad_type = [e for e in entries if e.name.split(".")[1] not in VALID_CHANGELOG_TYPES] 31 | 32 | if len(bad_suffix) or len(bad_issue_no) or len(bad_type): 33 | if len(bad_suffix): 34 | print("Changelog entries without .md suffix") 35 | print("-------------------------------------") 36 | print("\n".join([p.name for p in bad_suffix])) 37 | print() 38 | if len(bad_issue_no): 39 | print("Changelog entries without integer issue number") 40 | print("----------------------------------------------") 41 | print("\n".join([p.name for p in bad_issue_no])) 42 | print() 43 | if len(bad_type): 44 | print("Changelog entries without valid type") 45 | print("------------------------------------") 46 | print("\n".join([p.name for p in bad_type])) 47 | print(f"Valid types are: {VALID_CHANGELOG_TYPES}") 48 | print() 49 | sys.exit(1) 50 | 51 | sys.exit(0) 52 | -------------------------------------------------------------------------------- /mkdocs.yaml: -------------------------------------------------------------------------------- 1 | site_name: "pydantic-zarr" 2 | site_url: https://pydantic-zarr.readthedocs.io/ 3 | site_author: Davis Bennett 4 | site_description: >- 5 | Documentation for pydantic-zarr 6 | 7 | # Repository 8 | repo_name: zarr-developers/pydantic-zarr 9 | repo_url: https://github.com/zarr-developers/pydantic-zarr 10 | 11 | theme: 12 | name: material 13 | palette: 14 | # Palette toggle for light mode 15 | - scheme: default 16 | toggle: 17 | icon: material/brightness-7 18 | name: Switch to dark mode 19 | 20 | # Palette toggle for dark mode 21 | - scheme: slate 22 | toggle: 23 | icon: material/brightness-4 24 | name: Switch to light mode 25 | 26 | nav: 27 | - About: index.md 28 | - Usage (Zarr V3): usage_zarr_v3.md 29 | - Usage (Zarr V2): usage_zarr_v2.md 30 | - Experimental features: 31 | - Overview: experimental/index.md 32 | - Usage: experimental/usage.md 33 | - API: 34 | - core: api/core.md 35 | - v2: api/v2.md 36 | - v3: api/v3.md 37 | - experimental: 38 | - core: api/experimental/core.md 39 | - v2: api/experimental/v2.md 40 | - v3: api/experimental/v3.md 41 | - Release Notes: release-notes.md 42 | plugins: 43 | - mkdocstrings: 44 | handlers: 45 | python: 46 | options: 47 | docstring_style: numpy 48 | members_order: source 49 | separate_signature: true 50 | filters: ["!^_"] 51 | docstring_options: 52 | ignore_init_summary: true 53 | merge_init_into_class: true 54 | 55 | markdown_extensions: 56 | - pymdownx.highlight: 57 | anchor_linenums: true 58 | line_spans: __span 59 | pygments_lang_class: true 60 | - pymdownx.inlinehilite 61 | - pymdownx.snippets 62 | - pymdownx.superfences 63 | - pymdownx.tabbed: 64 | alternate_style: true 65 | - toc: 66 | baselevel: 2 67 | toc_depth: 4 68 | permalink: "#" 69 | -------------------------------------------------------------------------------- /docs/usage_zarr_v3.md: -------------------------------------------------------------------------------- 1 | # Usage (Zarr V3) 2 | 3 | ## Defining Zarr v3 hierarchies 4 | 5 | ```python 6 | from pydantic_zarr.v3 import ArraySpec, GroupSpec, NamedConfig 7 | 8 | array_attributes = {"baz": [1, 2, 3]} 9 | group_attributes = {"foo": 42, "bar": False} 10 | 11 | array_spec = ArraySpec( 12 | attributes=array_attributes, 13 | shape=[1000, 1000], 14 | dimension_names=["rows", "columns"], 15 | data_type="uint8", 16 | chunk_grid=NamedConfig(name="regular", configuration={"chunk_shape": [1000, 100]}), 17 | chunk_key_encoding=NamedConfig(name="default", configuration={"separator": "/"}), 18 | codecs=[NamedConfig(name="gzip", configuration={"level": 1})], 19 | storage_transformers=(), 20 | fill_value=0, 21 | ) 22 | 23 | spec = GroupSpec(attributes=group_attributes, members={"array": array_spec}) 24 | print(spec.model_dump_json(indent=2)) 25 | """ 26 | { 27 | "zarr_format": 3, 28 | "node_type": "group", 29 | "attributes": { 30 | "foo": 42, 31 | "bar": false 32 | }, 33 | "members": { 34 | "array": { 35 | "zarr_format": 3, 36 | "node_type": "array", 37 | "attributes": { 38 | "baz": [ 39 | 1, 40 | 2, 41 | 3 42 | ] 43 | }, 44 | "shape": [ 45 | 1000, 46 | 1000 47 | ], 48 | "data_type": "uint8", 49 | "chunk_grid": { 50 | "name": "regular", 51 | "configuration": { 52 | "chunk_shape": [ 53 | 1000, 54 | 100 55 | ] 56 | } 57 | }, 58 | "chunk_key_encoding": { 59 | "name": "default", 60 | "configuration": { 61 | "separator": "/" 62 | } 63 | }, 64 | "fill_value": 0, 65 | "codecs": [ 66 | { 67 | "name": "gzip", 68 | "configuration": { 69 | "level": 1 70 | } 71 | } 72 | ], 73 | "storage_transformers": [], 74 | "dimension_names": [ 75 | "rows", 76 | "columns" 77 | ] 78 | } 79 | } 80 | } 81 | """ 82 | ``` 83 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pydantic-zarr 2 | 3 | [![PyPI](https://img.shields.io/pypi/v/pydantic-zarr)](https://pypi.python.org/pypi/pydantic-zarr) 4 | 5 | [Pydantic](https://docs.pydantic.dev/latest/) models for [Zarr](https://zarr.readthedocs.io/en/stable/index.html). 6 | 7 | ## Installation 8 | 9 | ```sh 10 | pip install -U pydantic-zarr 11 | # or, with zarr i/o support 12 | pip install -U "pydantic-zarr[zarr]" 13 | ``` 14 | 15 | ## Getting help 16 | 17 | - Docs: see the [documentation](https://pydantic-zarr.readthedocs.io/) for detailed information about this project. 18 | - Chat: We use [Zulip](https://ossci.zulipchat.com/#narrow/channel/423692-Zarr) for project-related chat. 19 | 20 | ## Example 21 | 22 | ```python 23 | import zarr 24 | from pydantic_zarr import GroupSpec 25 | 26 | group = zarr.group(path='foo') 27 | array = zarr.create(store = group.store, path='foo/bar', shape=10, dtype='uint8') 28 | array.attrs.put({'metadata': 'hello'}) 29 | 30 | # this is a pydantic model 31 | spec = GroupSpec.from_zarr(group) 32 | print(spec.model_dump()) 33 | """ 34 | { 35 | 'zarr_format': 2, 36 | 'attributes': {}, 37 | 'members': { 38 | 'bar': { 39 | 'zarr_format': 2, 40 | 'attributes': {'metadata': 'hello'}, 41 | 'shape': (10,), 42 | 'chunks': (10,), 43 | 'dtype': '|u1', 44 | 'fill_value': 0, 45 | 'order': 'C', 46 | 'filters': None, 47 | 'dimension_separator': '.', 48 | 'compressor': { 49 | 'id': 'blosc', 50 | 'cname': 'lz4', 51 | 'clevel': 5, 52 | 'shuffle': 1, 53 | 'blocksize': 0, 54 | }, 55 | } 56 | }, 57 | } 58 | """ 59 | ``` 60 | 61 | ## History 62 | 63 | This project was developed at [HHMI / Janelia Research Campus](https://www.janelia.org/). It was originally written by Davis Bennett to solve problems he encountered while working on the [Cellmap Project team](https://www.janelia.org/project-team/cellmap/members). In December of 2024 this project was migrated from the [`janelia-cellmap`](https://github.com/janelia-cellmap) github organization to [`zarr-developers`](https://github.com/zarr-developers) organization. 64 | -------------------------------------------------------------------------------- /docs/plugins/main.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations as _annotations 2 | 3 | import logging 4 | import os 5 | import re 6 | from pathlib import Path 7 | 8 | from mkdocs.config import Config 9 | from mkdocs.structure.files import Files 10 | from mkdocs.structure.pages import Page 11 | 12 | logger = logging.getLogger("mkdocs.plugin") 13 | THIS_DIR = Path(__file__).parent 14 | DOCS_DIR = THIS_DIR.parent 15 | PROJECT_ROOT = DOCS_DIR.parent 16 | 17 | 18 | def on_pre_build(config: Config) -> None: 19 | """ 20 | Before the build starts. 21 | """ 22 | add_changelog() 23 | 24 | 25 | def on_files(files: Files, config: Config) -> Files: 26 | """ 27 | After the files are loaded, but before they are read. 28 | """ 29 | return files 30 | 31 | 32 | def on_page_markdown(markdown: str, page: Page, config: Config, files: Files) -> str: 33 | """ 34 | Called on each file after it is read and before it is converted to HTML. 35 | """ 36 | if md := add_version(markdown, page): 37 | return md 38 | else: 39 | return markdown 40 | 41 | 42 | def add_changelog() -> None: 43 | history = (PROJECT_ROOT / "HISTORY.md").read_text() 44 | history = re.sub( 45 | r"#(\d+)", r"[#\1](https://github.com/pydantic/pydantic/issues/\1)", history 46 | ) 47 | history = re.sub( 48 | r"(\s)@([\w\-]+)", r"\1[@\2](https://github.com/\2)", history, flags=re.I 49 | ) 50 | history = re.sub("@@", "@", history) 51 | new_file = DOCS_DIR / "changelog.md" 52 | 53 | # avoid writing file unless the content has changed to avoid infinite build loop 54 | if not new_file.is_file() or new_file.read_text() != history: 55 | new_file.write_text(history) 56 | 57 | 58 | MIN_MINOR_VERSION = 7 59 | MAX_MINOR_VERSION = 11 60 | 61 | 62 | def add_version(markdown: str, page: Page) -> str | None: 63 | if page.file.src_uri != "index.md": 64 | return None 65 | 66 | version_ref = os.getenv("GITHUB_REF") 67 | if version_ref and version_ref.startswith("refs/tags/"): 68 | version = re.sub("^refs/tags/", "", version_ref.lower()) 69 | url = f"https://github.com/pydantic/pydantic/releases/tag/{version}" 70 | version_str = f"Documentation for version: [{version}]({url})" 71 | elif sha := os.getenv("GITHUB_SHA"): 72 | url = f"https://github.com/pydantic/pydantic/commit/{sha}" 73 | sha = sha[:7] 74 | version_str = f"Documentation for development version: [{sha}]({url})" 75 | else: 76 | version_str = "Documentation for development version" 77 | markdown = re.sub(r"{{ *version *}}", version_str, markdown) 78 | return markdown 79 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Test 5 | 6 | on: 7 | push: 8 | branches: [ main ] 9 | pull_request: 10 | branches: [ main ] 11 | workflow_dispatch: 12 | 13 | concurrency: 14 | group: ${{ github.workflow }}-${{ github.ref }} 15 | cancel-in-progress: true 16 | 17 | jobs: 18 | test: 19 | name: os=${{ matrix.os }}, py=${{ matrix.python-version }}, zarr-python=${{ matrix.zarr-version }} 20 | 21 | strategy: 22 | matrix: 23 | python-version: ['3.11', '3.12', '3.13'] 24 | zarr-version: ['3.0.10', '3.1.0', 'none'] 25 | os: ["ubuntu-latest"] 26 | runs-on: ${{ matrix.os }} 27 | 28 | steps: 29 | - uses: actions/checkout@v6 30 | - name: Set up Python 31 | uses: actions/setup-python@v6 32 | with: 33 | python-version: ${{ matrix.python-version }} 34 | cache: 'pip' 35 | - name: Install Hatch 36 | run: | 37 | python -m pip install --upgrade pip 38 | pip install hatch 39 | - name: Run Tests (with zarr) 40 | if: matrix.zarr-version != 'none' 41 | run: | 42 | hatch run test.py${{ matrix.python-version }}-${{ matrix.zarr-version }}:list-env 43 | hatch run test.py${{ matrix.python-version }}-${{ matrix.zarr-version }}:test-cov 44 | - name: Run Tests (without zarr) 45 | if: matrix.zarr-version == 'none' 46 | run: | 47 | hatch run test-base.py${{ matrix.python-version }}:list-env 48 | hatch run test-base.py${{ matrix.python-version }}:test-cov 49 | - name: Upload coverage 50 | uses: codecov/codecov-action@v5 51 | with: 52 | token: ${{ secrets.CODECOV_TOKEN }} 53 | verbose: true # optional (default = false) 54 | 55 | doctests: 56 | name: doctests 57 | runs-on: ubuntu-latest 58 | steps: 59 | - uses: actions/checkout@v6 60 | with: 61 | fetch-depth: 0 # required for hatch version discovery, which is needed for numcodecs.zarr3 62 | - name: Set up Python 63 | uses: actions/setup-python@v6 64 | with: 65 | python-version: '3.11' 66 | cache: 'pip' 67 | - name: Install Hatch 68 | run: | 69 | python -m pip install --upgrade pip 70 | pip install hatch 71 | - name: Run Tests 72 | run: | 73 | hatch run docs:test 74 | 75 | test-min-reqs: 76 | runs-on: ubuntu-latest 77 | steps: 78 | - uses: actions/checkout@v6 79 | - name: Set up Python 80 | uses: actions/setup-python@v6 81 | with: 82 | python-version: '3.11' 83 | - name: Install minimum requirements 84 | run: | 85 | pip install . 86 | python -c "import pydantic_zarr.v2; import pydantic_zarr.v3" 87 | -------------------------------------------------------------------------------- /tests/test_pydantic_zarr/conftest.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import warnings 4 | from dataclasses import dataclass 5 | from importlib.metadata import version 6 | from importlib.util import find_spec 7 | 8 | from packaging.version import Version 9 | 10 | ZARR_AVAILABLE = find_spec("zarr") is not None 11 | 12 | if ZARR_AVAILABLE: 13 | ZARR_PYTHON_VERSION = Version(version("zarr")) 14 | else: 15 | ZARR_PYTHON_VERSION = Version("0.0.0") 16 | 17 | DTYPE_EXAMPLES_V2: tuple[DTypeExample, ...] 18 | DTYPE_EXAMPLES_V3: tuple[DTypeExample, ...] 19 | 20 | 21 | @dataclass(frozen=True, slots=True) 22 | class DTypeExample: 23 | name: object 24 | fill_value: object 25 | 26 | 27 | if ZARR_PYTHON_VERSION < Version("3.1.0"): 28 | DTYPE_EXAMPLES_V2 = ( 29 | DTypeExample("|b1", True), 30 | DTypeExample("|i1", -1), 31 | DTypeExample("|i2", -1), 32 | DTypeExample("|i4", -1), 33 | DTypeExample("|i8", -1), 34 | DTypeExample("|u1", 1), 35 | DTypeExample(" Mapping: ... 30 | 31 | 32 | @overload 33 | def tuplify_json(obj: list) -> tuple: ... 34 | 35 | 36 | def tuplify_json(obj: object) -> object: 37 | """ 38 | Recursively converts lists within a Python object to tuples. 39 | """ 40 | if isinstance(obj, list): 41 | return tuple(tuplify_json(elem) for elem in obj) 42 | elif isinstance(obj, dict): 43 | return {k: tuplify_json(v) for k, v in obj.items()} 44 | else: 45 | return obj 46 | 47 | 48 | class StrictBase(BaseModel): 49 | model_config = ConfigDict(frozen=True, extra="forbid") 50 | 51 | 52 | def parse_dtype_v2(value: npt.DTypeLike) -> str | list[tuple[Any, ...]]: 53 | """ 54 | Convert the input to a NumPy dtype and either return the ``str`` attribute of that 55 | object or, if the dtype is a structured dtype, return the fields of that dtype as a list 56 | of tuples. 57 | 58 | Parameters 59 | ---------- 60 | value : npt.DTypeLike 61 | A value that can be converted to a NumPy dtype. 62 | 63 | Returns 64 | ------- 65 | 66 | A Zarr V2-compatible encoding of the dtype. 67 | 68 | References 69 | ---------- 70 | See the [Zarr V2 specification](https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html#data-type-encoding) 71 | for more details on this encoding of data types. 72 | """ 73 | # Assume that a non-string sequence represents a the Zarr V2 JSON form of a structured dtype. 74 | if isinstance(value, Sequence) and not isinstance(value, str): 75 | return [tuple(v) for v in value] 76 | else: 77 | np_dtype = np.dtype(value) 78 | if np_dtype.fields is not None: 79 | # This is a structured dtype, which must be converted to a list of tuples. Note that 80 | # this function recurses, because a structured dtype is parametrized by other dtypes. 81 | return [(k, parse_dtype_v2(v[0])) for k, v in np_dtype.fields.items()] 82 | else: 83 | return np_dtype.str 84 | 85 | 86 | def ensure_member_name(data: Any) -> str: 87 | """ 88 | If the input is a string, then ensure that it is a valid 89 | name for a subnode in a zarr group 90 | """ 91 | if isinstance(data, str): 92 | if "/" in data: 93 | raise ValueError( 94 | f'Strings containing "/" are invalid. Got {data}, which violates this rule.' 95 | ) 96 | if data in ("", ".", ".."): 97 | raise ValueError(f"The string {data} is not a valid member name.") 98 | return data 99 | raise TypeError(f"Expected a str, got {type(data)}.") 100 | 101 | 102 | def ensure_key_no_path(data: Any) -> Any: 103 | if isinstance(data, Mapping): 104 | [ensure_member_name(key) for key in data] 105 | return data 106 | 107 | 108 | def model_like(a: BaseModel, b: BaseModel, exclude: IncEx = None, include: IncEx = None) -> bool: 109 | """ 110 | A similarity check for a pair pydantic.BaseModel, parametrized over included or excluded fields. 111 | 112 | 113 | """ 114 | 115 | a_dict = a.model_dump(exclude=exclude, include=include) 116 | b_dict = b.model_dump(exclude=exclude, include=include) 117 | return a_dict == b_dict 118 | 119 | 120 | # TODO: expose contains_array and contains_group as public functions in zarr-python 121 | # and replace these custom implementations 122 | def maybe_node( 123 | store: StoreLike, path: str, *, zarr_format: Literal[2, 3] 124 | ) -> zarr.Array | zarr.Group | None: 125 | """ 126 | Return the array or group found at the store / path, if an array or group exists there. 127 | Otherwise return None. 128 | """ 129 | from zarr.core.sync import sync 130 | from zarr.core.sync_group import get_node 131 | from zarr.storage._common import make_store_path 132 | 133 | # convert the storelike store argument to a Zarr store 134 | spath = sync(make_store_path(store, path=path)) 135 | try: 136 | return get_node(spath.store, spath.path, zarr_format=zarr_format) 137 | except FileNotFoundError: 138 | return None 139 | 140 | 141 | def ensure_multiple(data: Sequence[T]) -> Sequence[T]: 142 | """ 143 | Ensure that there is at least one element in the sequence 144 | """ 145 | if len(data) < 1: 146 | raise ValueError("Invalid length. Expected 1 or more, got 0.") 147 | return data 148 | -------------------------------------------------------------------------------- /src/pydantic_zarr/experimental/README.md: -------------------------------------------------------------------------------- 1 | # Experimental Module 2 | 3 | This module contains refactored versions of the core pydantic-zarr modules with breaking API changes. It is provided for early testing and feedback on proposed changes. 4 | 5 | ## What's Different 6 | 7 | The experimental module removes generic type parameters from `ArraySpec` and `GroupSpec`, simplifying the type system while maintaining full functionality. 8 | 9 | ### Key Changes 10 | 11 | #### 1. No Generic Type Parameters 12 | 13 | **Before (main module - with generics):** 14 | ```python 15 | from pydantic_zarr.v2 import ArraySpec, GroupSpec 16 | from collections.abc import Mapping 17 | 18 | # Generic type parameters allowed complex type constraints 19 | # SpecialGroup = GroupSpec[Mapping[str, "ArraySpec | GroupSpec"]] # Not supported in current version 20 | ``` 21 | 22 | **After (experimental module - without generics):** 23 | ```python 24 | from pydantic_zarr.experimental.v2 import ArraySpec, GroupSpec 25 | 26 | # No generics - simpler and more straightforward 27 | group = GroupSpec(attributes={}, members={}) 28 | print(group) 29 | #> zarr_format=2 attributes={} members={} 30 | ``` 31 | 32 | #### 2. New `BaseGroupSpec` Class 33 | 34 | The experimental module introduces `BaseGroupSpec` - a model of a Zarr group without members. This enables two important patterns: 35 | 36 | - **Flattened hierarchies**: In `to_flat()` output, groups appear as `BaseGroupSpec` (without recursive members) 37 | - **Partial loading**: Load a group's metadata without traversing its children 38 | 39 | **Example:** 40 | ```python 41 | from pydantic_zarr.experimental.v2 import ArraySpec, BaseGroupSpec, GroupSpec 42 | 43 | # BaseGroupSpec: just metadata 44 | base_group = BaseGroupSpec(attributes={"foo": "bar"}) 45 | 46 | # Create an array spec 47 | array_spec = ArraySpec(shape=(10,), dtype='uint8', chunks=(10,), attributes={}) 48 | 49 | # GroupSpec: metadata + hierarchy 50 | group = GroupSpec( 51 | attributes={"foo": "bar"}, 52 | members={"array": array_spec} 53 | ) 54 | 55 | # Flattened representation uses BaseGroupSpec 56 | flat = group.to_flat() 57 | # Returns: {"": BaseGroupSpec(...), "/array": ArraySpec(...)} 58 | ``` 59 | 60 | #### 3. Union Types Instead of Generics 61 | 62 | Member values are now concrete union types: 63 | 64 | **Before:** 65 | ``` 66 | members: Mapping[str, T] # T was generic 67 | ``` 68 | 69 | **After:** 70 | ``` 71 | members: dict[str, ArraySpec | GroupSpec | BaseGroupSpec] 72 | ``` 73 | 74 | This provides: 75 | - ✅ Better IDE autocomplete 76 | - ✅ Clearer error messages 77 | - ✅ No runtime type checking complexity 78 | - ✅ More explicit code 79 | 80 | #### 4. Refactored `to_zarr()` Method 81 | 82 | Both `BaseGroupSpec` and `GroupSpec` have `to_zarr()` methods: 83 | 84 | - `BaseGroupSpec.to_zarr()`: Creates a group and sets attributes (no recursion) 85 | - `GroupSpec.to_zarr()`: Calls `super().to_zarr()` then recursively writes members 86 | 87 | This eliminates code duplication while maintaining the inheritance hierarchy. 88 | 89 | ## API Stability 90 | 91 | **⚠️ WARNING:** This is an experimental module. The API may change in future releases. Do not rely on it in production code without understanding the risks. 92 | 93 | ## Migration Guide 94 | 95 | To try the experimental module: 96 | 97 | ```python 98 | # Current (stable) 99 | from pydantic_zarr.v2 import ArraySpec, GroupSpec 100 | 101 | # Experimental (breaking changes) 102 | from pydantic_zarr.experimental.v2 import ArraySpec, GroupSpec, BaseGroupSpec 103 | ``` 104 | 105 | ### What Works the Same 106 | 107 | - `ArraySpec.from_array()` 108 | - `ArraySpec.from_zarr()` / `ArraySpec.to_zarr()` 109 | - `GroupSpec.from_zarr()` / `GroupSpec.to_zarr()` 110 | - `GroupSpec.to_flat()` / `GroupSpec.from_flat()` 111 | - `.like()` comparisons 112 | - All codec/compressor configurations 113 | - All Zarr v2 and v3 array properties 114 | 115 | ### What Changed 116 | 117 | - ❌ Generic type parameters no longer supported 118 | - ✅ `BaseGroupSpec` class added 119 | - ✅ Member types are now explicit unions 120 | - ✅ Cleaner separation of concerns (base group vs hierarchical group) 121 | 122 | ## Rationale 123 | 124 | The generic type parameters were: 125 | - Not validated at runtime 126 | - Complex to understand and use 127 | - Provided false confidence in type safety 128 | - Made error messages harder to read 129 | 130 | Removing them in favor of explicit union types: 131 | - Improves readability 132 | - Maintains full functionality 133 | - Reduces cognitive overhead 134 | - Enables better error messages 135 | 136 | The addition of `BaseGroupSpec`: 137 | - Clarifies intent when working with flat hierarchies 138 | - Enables efficient partial loading 139 | - Prevents accidental null checks 140 | - Improves code maintainability 141 | 142 | ## Testing 143 | 144 | The experimental module passes all the same tests as the main module, with the addition of new tests for `BaseGroupSpec` functionality. 145 | 146 | ## Feedback 147 | 148 | If you use this module and have feedback on the API changes, please open an issue on GitHub with your thoughts. 149 | -------------------------------------------------------------------------------- /src/pydantic_zarr/experimental/core.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from collections.abc import Mapping, Sequence 4 | from typing import ( 5 | TYPE_CHECKING, 6 | Any, 7 | Literal, 8 | TypeAlias, 9 | TypeVar, 10 | overload, 11 | ) 12 | 13 | import numpy as np 14 | import numpy.typing as npt 15 | from pydantic import BaseModel, ConfigDict 16 | 17 | if TYPE_CHECKING: 18 | import zarr 19 | from zarr.storage._common import StoreLike 20 | 21 | BaseAttributes = Mapping[str, object] | BaseModel 22 | 23 | IncEx: TypeAlias = set[int] | set[str] | dict[int, Any] | dict[str, Any] | None 24 | 25 | AccessMode: TypeAlias = Literal["w", "w+", "r", "a"] 26 | 27 | T = TypeVar("T") 28 | 29 | 30 | @overload 31 | def tuplify_json(obj: Mapping) -> Mapping: ... 32 | 33 | 34 | @overload 35 | def tuplify_json(obj: list) -> tuple: ... 36 | 37 | 38 | def tuplify_json(obj: object) -> object: 39 | """ 40 | Recursively converts lists within a Python object to tuples. 41 | """ 42 | if isinstance(obj, list): 43 | return tuple(tuplify_json(elem) for elem in obj) 44 | elif isinstance(obj, dict): 45 | return {k: tuplify_json(v) for k, v in obj.items()} 46 | else: 47 | return obj 48 | 49 | 50 | class StrictBase(BaseModel): 51 | model_config = ConfigDict(frozen=True, extra="forbid") 52 | 53 | 54 | def parse_dtype_v2(value: npt.DTypeLike) -> str | list[tuple[Any, ...]]: 55 | """ 56 | Convert the input to a NumPy dtype and either return the ``str`` attribute of that 57 | object or, if the dtype is a structured dtype, return the fields of that dtype as a list 58 | of tuples. 59 | 60 | Parameters 61 | ---------- 62 | value : npt.DTypeLike 63 | A value that can be converted to a NumPy dtype. 64 | 65 | Returns 66 | ------- 67 | 68 | A Zarr V2-compatible encoding of the dtype. 69 | 70 | References 71 | ---------- 72 | See the [Zarr V2 specification](https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html#data-type-encoding) 73 | for more details on this encoding of data types. 74 | """ 75 | # Assume that a non-string sequence represents a the Zarr V2 JSON form of a structured dtype. 76 | if isinstance(value, Sequence) and not isinstance(value, str): 77 | return [tuple(v) for v in value] 78 | else: 79 | np_dtype = np.dtype(value) 80 | if np_dtype.fields is not None: 81 | # This is a structured dtype, which must be converted to a list of tuples. Note that 82 | # this function recurses, because a structured dtype is parametrized by other dtypes. 83 | return [(k, parse_dtype_v2(v[0])) for k, v in np_dtype.fields.items()] 84 | else: 85 | return np_dtype.str 86 | 87 | 88 | def ensure_member_name(data: Any) -> str: 89 | """ 90 | If the input is a string, then ensure that it is a valid 91 | name for a subnode in a zarr group 92 | """ 93 | if isinstance(data, str): 94 | if "/" in data: 95 | raise ValueError( 96 | f'Strings containing "/" are invalid. Got {data}, which violates this rule.' 97 | ) 98 | if data in ("", ".", ".."): 99 | raise ValueError(f"The string {data} is not a valid member name.") 100 | return data 101 | raise TypeError(f"Expected a str, got {type(data)}.") 102 | 103 | 104 | def ensure_key_no_path(data: Any) -> Any: 105 | if isinstance(data, Mapping): 106 | [ensure_member_name(key) for key in data] 107 | return data 108 | 109 | 110 | def model_like(a: BaseModel, b: BaseModel, exclude: IncEx = None, include: IncEx = None) -> bool: 111 | """ 112 | A similarity check for a pair pydantic.BaseModel, parametrized over included or excluded fields. 113 | 114 | 115 | """ 116 | 117 | a_dict = a.model_dump(exclude=exclude, include=include) 118 | b_dict = b.model_dump(exclude=exclude, include=include) 119 | return json_eq(a_dict, b_dict) 120 | 121 | 122 | # TODO: expose contains_array and contains_group as public functions in zarr-python 123 | # and replace these custom implementations 124 | def maybe_node( 125 | store: StoreLike, path: str, *, zarr_format: Literal[2, 3] 126 | ) -> zarr.Array | zarr.Group | None: 127 | """ 128 | Return the array or group found at the store / path, if an array or group exists there. 129 | Otherwise return None. 130 | """ 131 | from zarr.core.sync import sync 132 | from zarr.core.sync_group import get_node 133 | from zarr.storage._common import make_store_path 134 | 135 | # convert the storelike store argument to a Zarr store 136 | spath = sync(make_store_path(store, path=path)) 137 | try: 138 | return get_node(spath.store, spath.path, zarr_format=zarr_format) 139 | except FileNotFoundError: 140 | return None 141 | 142 | 143 | def ensure_multiple(data: Sequence[T]) -> Sequence[T]: 144 | """ 145 | Ensure that there is at least one element in the sequence 146 | """ 147 | if len(data) < 1: 148 | raise ValueError("Invalid length. Expected 1 or more, got 0.") 149 | return data 150 | 151 | 152 | def json_eq(a: object, b: object) -> bool: 153 | """ 154 | An equality check between python objects that recurses into dicts and sequences and ignores 155 | the difference between tuples and lists. Otherwise, it's just regular equality. Useful 156 | for comparing dicts that would become identical JSON, but where one has lists and the other 157 | has tuples. 158 | """ 159 | # treat lists & tuples as the same "sequence" type 160 | seq_types = (list, tuple) 161 | 162 | # both are sequences → compare element-wise 163 | if isinstance(a, seq_types) and isinstance(b, seq_types): 164 | return len(a) == len(b) and all(json_eq(x, y) for x, y in zip(a, b, strict=False)) 165 | 166 | # recurse into mappings 167 | if isinstance(a, Mapping) and isinstance(b, Mapping): 168 | return a.keys() == b.keys() and all(json_eq(a[k], b[k]) for k in a) 169 | 170 | # otherwise → regular equality 171 | return a == b 172 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # pydantic-zarr 2 | 3 | [![PyPI](https://img.shields.io/pypi/v/pydantic-zarr)](https://pypi.python.org/pypi/pydantic-zarr) 4 | 5 | Type-safety for Zarr data. 6 | 7 | ## Overview 8 | 9 | `pydantic-zarr` expresses data stored in the [Zarr](https://zarr.readthedocs.io/en/stable/) format with [Pydantic](https://docs.pydantic.dev/1.10/). Specifically, `pydantic-zarr` encodes Zarr groups and arrays as [Pydantic models](https://docs.pydantic.dev/1.10/usage/models/). These models are useful for formalizing the structure of Zarr hierarchies, type-checking Zarr hierarchies, and runtime validation for Zarr-based data. 10 | 11 | ```python 12 | import zarr 13 | 14 | from pydantic_zarr.v2 import GroupSpec 15 | 16 | # create a Zarr group 17 | group = zarr.create_group(store={}, path='foo', zarr_format=2) 18 | # put an array inside the group 19 | array = zarr.create_array( 20 | store=group.store, name='foo/bar', shape=10, dtype='uint8', zarr_format=2 21 | ) 22 | array.attrs.put({'metadata': 'hello'}) 23 | 24 | # create a pydantic model to model the Zarr group 25 | spec = GroupSpec.from_zarr(group) 26 | print(spec.model_dump()) 27 | """ 28 | { 29 | 'zarr_format': 2, 30 | 'attributes': {}, 31 | 'members': { 32 | 'bar': { 33 | 'zarr_format': 2, 34 | 'attributes': {'metadata': 'hello'}, 35 | 'shape': (10,), 36 | 'chunks': (10,), 37 | 'dtype': '|u1', 38 | 'fill_value': 0, 39 | 'order': 'C', 40 | 'filters': None, 41 | 'dimension_separator': '.', 42 | 'compressor': {'id': 'zstd', 'level': 0}, 43 | } 44 | }, 45 | } 46 | """ 47 | ``` 48 | 49 | More examples can be found in the [usage guide](usage_zarr_v2.md). 50 | 51 | ## Installation 52 | 53 | `pip install -U pydantic-zarr` 54 | 55 | ### Limitations 56 | 57 | #### No array data operations 58 | 59 | This library only provides tools to represent the _layout_ of Zarr groups and arrays, and the structure of their attributes. `pydantic-zarr` performs no type checking or runtime validation of the multidimensional array data contained _inside_ Zarr arrays, and `pydantic-zarr` does not contain any tools for efficiently reading or writing Zarr arrays. 60 | 61 | #### Supported Zarr versions 62 | 63 | This library supports [version 2](https://zarr.readthedocs.io/en/stable/spec/v2.html) of the Zarr format, with partial support for [Zarr v3](https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html). Progress towards complete support for Zarr v3 is tracked by [this issue](https://github.com/d-v-b/pydantic-zarr/issues/3). 64 | 65 | ## Design 66 | 67 | A Zarr group can be modeled as an object with two properties: 68 | 69 | - `attributes`: A dict-like object, with keys that are strings, values that are JSON-serializable. 70 | - `members`: A dict-like object, with keys that strings and values that are other Zarr groups, or Zarr arrays. 71 | 72 | A Zarr array can be modeled similarly, but without the `members` property (because Zarr arrays cannot contain Zarr groups or arrays), and with a set of array-specific properties like `shape`, `dtype`, etc. 73 | 74 | Note the use of the term "modeled": Zarr arrays are useful because they store N-dimensional array data, but `pydantic-zarr` does not treat that data as part of the "model" of a Zarr array. 75 | 76 | In `pydantic-zarr`, Zarr groups are modeled by the `GroupSpec` class, which is a [`Pydantic model`](https://docs.pydantic.dev/latest/concepts/models/) with two fields: 77 | 78 | - `attributes`: either a `Mapping` or a `pydantic.BaseModel`. 79 | - `members`: either a mapping with string keys and values that must be `GroupSpec` or `ArraySpec` instances, or the value `Null`. The use of nullability is explained in its own [section](#nullable-members). 80 | 81 | Zarr arrays are represented by the `ArraySpec` class, which has a similar `attributes` field, as well as fields for all the Zarr array properties (`dtype`, `shape`, `chunks`, etc). 82 | 83 | `GroupSpec` and `ArraySpec` are both [generic models](https://docs.pydantic.dev/1.10/usage/models/#generic-models). `GroupSpec` takes two type parameters, the first specializing the type of `GroupSpec.attributes`, and the second specializing the type of the _values_ of `GroupSpec.members` (the keys of `GroupSpec.members` are always strings). `ArraySpec` only takes one type parameter, which specializes the type of `ArraySpec.attributes`. 84 | 85 | Examples using this generic typing functionality can be found in the [usage guide](usage_zarr_v2.md#using-generic-types). 86 | 87 | ### Nullable `members` 88 | 89 | When a Zarr group has no members, a `GroupSpec` model of that Zarr group will have its `members` attribute set to the empty dict `{}`. But there are scenarios where the members of a Zarr group are unknown: 90 | 91 | - Some Zarr storage backends do not support directory listing, in which case it is possible to access a Zarr group and inspect its attributes, but impossible to discover its members. So the members of such a Zarr group are unknown. 92 | - Traversing a deeply nested large Zarr group on high latency storage can be slow. This can be mitigated by only partially traversing the hierarchy, e.g. only inspecting the root group and N subgroups. This defines a sub-hierarchy of the full hierarchy; leaf groups of this subtree by definition did not have their members checked, and so their members are unknown. 93 | - A Zarr hierarchy can be represented as a mapping `M` from paths to nodes (array or group). In this case, if `M["key"]` is a model of a Zarr group `G`, then `M["key/subkey"]` would encode a member of `G`. Since the key structure of the mapping `M` is doing the work of encoding the members of `G`, there is no value in `G` having a members attribute that claims anything about the members of `G`, and so `G.members` should be modeled as unknown. 94 | 95 | To handle these cases, `pydantic-zarr` allows the `members` attribute of a `GroupSpec` to be `Null`. 96 | 97 | ## Standardization 98 | 99 | The Zarr specifications do not define a model of the Zarr hierarchy. `pydantic-zarr` is an implementation of a particular model that can be found formalized in this [specification document](https://github.com/d-v-b/zeps/blob/zom/draft/ZEP0006.md), which has been proposed for inclusion in the Zarr specifications. You can find the discussion of that proposal in [this pull request](https://github.com/zarr-developers/zeps/pull/46). 100 | -------------------------------------------------------------------------------- /docs/release-notes.md: -------------------------------------------------------------------------------- 1 | # Release Notes 2 | 3 | 4 | 5 | ## pydantic-zarr 0.9.1 (2025-12-12) 6 | 7 | ### Features 8 | 9 | - Add `with_*` methods to the experimental `ArraySpec` and `GroupSpec` classes. 10 | These methods make it easier to create copies of existing `ArraySpec` and `GroupSpec` objects with 11 | new properties. ([#137](https://github.com/zarr-developers/pydantic-zarr/issues/137)) 12 | 13 | ### Bugfixes 14 | 15 | - Make V3 `ArraySpec` validation reject dimension names that don't match the dimensionality of the array. ([#136](https://github.com/zarr-developers/pydantic-zarr/issues/136)) 16 | - Fix a bug that prevented creation of `experimental.v3.GroupSpec` instances with typed members. ([#138](https://github.com/zarr-developers/pydantic-zarr/issues/138)) 17 | - Fix a bug where `experimental.v3.ArraySpec` instances took a default parameter for attributes. ([#138](https://github.com/zarr-developers/pydantic-zarr/issues/138)) 18 | 19 | 20 | ## pydantic-zarr 0.9.0 (2025-12-08) 21 | 22 | ### Breaking Changes 23 | 24 | This release contains breaking changes. See [Bugfixes](#090-bugfixes) 25 | 26 | ### Features 27 | 28 | - Make the zarr dependency optional to allow usage without installing zarr-python. ([#112](https://github.com/zarr-developers/pydantic-zarr/issues/112)) 29 | - Add experimental module with improved implementations of `ArraySpec` and `GroupSpec` classes. ([#120](https://github.com/zarr-developers/pydantic-zarr/issues/120)) 30 | 31 | ### Bugfixes 32 | 33 | - Remove default empty dictionary for attributes in ArraySpec and GroupSpec. This is a breaking change. 34 | To migrate your code, provide a value for the `attributes` argument when creating an `ArraySpec` or 35 | `GroupSpec`. ([#115](https://github.com/zarr-developers/pydantic-zarr/issues/115)). 36 | To replicate the previous default behaviour, pass an empty dictionary (`attributes={}`). 37 | - Fix a broken bare install by making the dependency on `packaging` explicit. ([#125](https://github.com/zarr-developers/pydantic-zarr/issues/125)) 38 | 39 | ### Improved Documentation 40 | 41 | - Update documentation URLs to point to pydantic-zarr.readthedocs.io. ([#123](https://github.com/zarr-developers/pydantic-zarr/issues/123)) 42 | - Add `towncrier` for managing the changelog. ([#128](https://github.com/zarr-developers/pydantic-zarr/issues/128)) 43 | 44 | ### Misc 45 | 46 | - [#121](https://github.com/zarr-developers/pydantic-zarr/issues/121) 47 | 48 | ## pydantic-zarr 0.8.4 (2025-09-09) 49 | 50 | ### Bugfixes 51 | 52 | - Fix from_zarr for arrays with no dimension names. ([#108](https://github.com/zarr-developers/pydantic-zarr/issues/108)) 53 | 54 | ### Misc 55 | 56 | - Bump actions/setup-python from 5 to 6 in the actions group. ([#109](https://github.com/zarr-developers/pydantic-zarr/issues/109)) 57 | 58 | ## pydantic-zarr 0.8.3 (2025-08-28) 59 | 60 | ### Features 61 | 62 | - Correctly propagate dimension names from zarr arrays. ([#103](https://github.com/zarr-developers/pydantic-zarr/issues/103)) 63 | - Improve runtime type checking in from_flat(). ([#101](https://github.com/zarr-developers/pydantic-zarr/issues/101)) 64 | 65 | ### Bugfixes 66 | 67 | - Make typing of v2 from_flat() invariant. ([#100](https://github.com/zarr-developers/pydantic-zarr/issues/100)) 68 | 69 | ### Improved Documentation 70 | 71 | - Remove out of date disclaimer. ([#99](https://github.com/zarr-developers/pydantic-zarr/issues/99)) 72 | 73 | ### Misc 74 | 75 | - Bump actions/checkout from 4 to 5 in the actions group. ([#97](https://github.com/zarr-developers/pydantic-zarr/issues/97)) 76 | 77 | ## pydantic-zarr 0.8.2 (2025-08-14) 78 | 79 | ### Features 80 | 81 | - Disallow empty codecs and use a sane default in auto_codecs, allow codecs to be specified by strings. ([#95](https://github.com/zarr-developers/pydantic-zarr/issues/95)) 82 | 83 | ### Bugfixes 84 | 85 | - Fix GroupSpec.from_zarr typing. ([#91](https://github.com/zarr-developers/pydantic-zarr/issues/91)) 86 | 87 | ## pydantic-zarr 0.8.1 (2025-08-05) 88 | 89 | ### Features 90 | 91 | - Add a py.typed file for better type checking support. ([#87](https://github.com/zarr-developers/pydantic-zarr/issues/87)) 92 | 93 | ### Misc 94 | 95 | - Update cd workflow to use hatch. ([#85](https://github.com/zarr-developers/pydantic-zarr/issues/85)) 96 | - Remove GH actions doc action. ([#84](https://github.com/zarr-developers/pydantic-zarr/issues/84)) 97 | 98 | ## pydantic-zarr 0.8.0 (2025-07-30) 99 | 100 | ### Features 101 | 102 | - Use the JSON form of the fill value. ([#77](https://github.com/zarr-developers/pydantic-zarr/issues/77)) 103 | - Add types for order and dimension separator. ([#81](https://github.com/zarr-developers/pydantic-zarr/issues/81)) 104 | - Allow zarr Arrays in from_array(). ([#80](https://github.com/zarr-developers/pydantic-zarr/issues/80)) 105 | - Allow BaseModel in TBaseAttr. ([#78](https://github.com/zarr-developers/pydantic-zarr/issues/78)) 106 | 107 | ### Bugfixes 108 | 109 | - Fix readthedocs config. ([#83](https://github.com/zarr-developers/pydantic-zarr/issues/83)) 110 | 111 | ## pydantic-zarr 0.7.0 (2024-03-20) 112 | 113 | ### Bugfixes 114 | 115 | - Default chunks fix. ([#28](https://github.com/zarr-developers/pydantic-zarr/issues/28)) 116 | 117 | ## pydantic-zarr 0.6.0 (2024-03-03) 118 | 119 | ### Features 120 | 121 | - Add to_flat, from_flat, like, and better handling for existing arrays and groups. ([#25](https://github.com/zarr-developers/pydantic-zarr/issues/25)) 122 | 123 | ### Improved Documentation 124 | 125 | - Fix repo name in docs. ([#26](https://github.com/zarr-developers/pydantic-zarr/issues/26)) 126 | 127 | ## pydantic-zarr 0.5.2 (2023-11-08) 128 | 129 | ### Features 130 | 131 | - Add Zarr V3 support. ([#17](https://github.com/zarr-developers/pydantic-zarr/issues/17)) 132 | 133 | ### Bugfixes 134 | 135 | - Various bugfixes. ([#18](https://github.com/zarr-developers/pydantic-zarr/issues/18)) 136 | 137 | ## pydantic-zarr 0.5.1 (2023-10-06) 138 | 139 | ### Bugfixes 140 | 141 | - Fix GroupSpec.from_zarr. ([#16](https://github.com/zarr-developers/pydantic-zarr/issues/16)) 142 | 143 | ## pydantic-zarr 0.5.0 (2023-08-22) 144 | 145 | ### Features 146 | 147 | - Rename items to members. ([#12](https://github.com/zarr-developers/pydantic-zarr/issues/12)) 148 | 149 | ### Improved Documentation 150 | 151 | - Fix doc link. ([#11](https://github.com/zarr-developers/pydantic-zarr/issues/11)) 152 | -------------------------------------------------------------------------------- /docs/experimental/index.md: -------------------------------------------------------------------------------- 1 | # experimental features 2 | 3 | ## Improved `GroupSpec` and `ArraySpec` classes 4 | 5 | We are developing a redesign of the `GroupSpec` and `ArraySpec` classes. These new classes can be found in the `experimental.v2` and `experimental.v3` modules for Zarr V2 and V3, respectively. Our 6 | design goal for these new classes is to make the models simpler, more explicit, and more expressive. 7 | 8 | Here's how we are doing that: 9 | 10 | ### Removing generic type parameters 11 | 12 | In `pydantic_zarr`, the `GroupSpec` and `ArraySpec` classes take generic type parameters. `GroupSpec[A, B]` models a Zarr group with attributes that must be instances of `A` and child nodes that must be instances of `B`. The generic type parameters offer concise class definitions but complicate type checking for pydantic, and they are also not strictly necessary for the `GroupSpec` and `ArraySpec` 13 | classes to do their jobs. So in `pydantic_zarr.experimental.v2` and `pydantic_zarr.experimental.v3` the `GroupSpec` and `ArraySpec` classes are not generic any more. They are just regular classes. 14 | 15 | Code like this: 16 | 17 | ```python 18 | from pydantic import BaseModel 19 | from pydantic_zarr.v3 import ArraySpec 20 | 21 | class AttrsType(BaseModel): 22 | a: int 23 | b: float 24 | 25 | MyArray = ArraySpec[AttrsType] 26 | print(MyArray) 27 | #> 28 | ``` 29 | 30 | becomes this: 31 | 32 | ```python 33 | from pydantic import BaseModel 34 | from pydantic_zarr.experimental.v3 import ArraySpec 35 | 36 | class AttrsType(BaseModel): 37 | a: int 38 | b: float 39 | 40 | class MyArray(ArraySpec): 41 | attributes: AttrsType 42 | 43 | print(MyArray) 44 | #> 45 | ``` 46 | 47 | ### A class hierarchy for Zarr Groups 48 | 49 | In `pydantic_zarr.v2` and `pydantic_zarr.v3`, the `members` attribute of the `GroupSpec` class is 50 | annotated as a union type with two variants: `Mapping` and `None`. `None` occurs in this union to handle the case where we want to model a Zarr group outside the context of any hierarchy, i.e. a situation where the `members` attribute would be undefined. 51 | 52 | The main place where this occurs is when we create a flattened representation of a Zarr hierarchy with the `to_flat` functions. `to_flat` takes a Zarr hierarchy (a tree) and converts it to a `key: value` data structure where the hierarchy information is encoded in the structure of the keys. After this transformation, it is redundant for 53 | the `GroupSpec` elements of the flattened Zarr hierarchy to carry their own representation of the hierarchy structure, as that information is completely specified by the keys. So when we flatten 54 | a `GroupSpec`, we set all the `members` attributes to `None`. 55 | 56 | But outside the context of flattening hierarchies, we need to handle the `None` variant in places where we are sure that the members are not `None`, and this is tedious. 57 | 58 | To solve this problem, instead of defining the `members` attribute as a union over two possible types, in `pydantic_zarr.experimental.v2` and `pydantic_zarr.experimental.v3` we define two classes for modelling Zarr groups. One class, `BaseGroupSpec`, 59 | narrowly models the structure of a Zarr Group. The `GroupSpec` class inherits from `BaseGroupSpec` and 60 | defines a new `members` attribute, which allows it to model Zarr groups that have information 61 | about the sub-groups and sub-arrays they contain. With this structure, `to_flat` can safely return a mapping from strings to `ArraySpec | BaseGroupSpec`, and `GroupSpec` instances don't need to handle the case where their `members` attribute is `None`. 62 | 63 | With `BaseGroupSpec`, type checkers and Pydantic can distinguish at definition-time whether a group should have members, eliminating runtime None-checks in code that knows members must exist. 64 | 65 | Ordinary `pydantic-zarr` usage should not be affected by the new class hierarchy for `GroupSpec` classes. The only time a user would create a `BaseGroupSpec` explicitly is when declaring a Zarr hierarchy in the flat form. Otherwise, `GroupSpec` is sufficient for all uses. 66 | 67 | ### More explicit modelling of Zarr groups 68 | 69 | Since `pydantic-zarr` started, the Python type system has become significantly more expressive. One very useful development has been improvements in the `TypedDict` class for modelling mappings with known keys and typed values. `TypedDict` is a perfect fit for modelling Zarr groups where the names 70 | of the members are part of the schema definition for that group. 71 | 72 | The `GroupSpec` classes defined in `pydantic_zarr.experimental` accept `TypedDict` annotations for their `members` attribute. As `pydantic` can validate values against a `TypedDict` type annotation, we get a very concise type check on the names of the members of a Zarr group. 73 | 74 | ```python 75 | from typing_extensions import TypedDict 76 | from pydantic import BaseModel 77 | 78 | from pydantic_zarr.experimental.v3 import ArraySpec, GroupSpec 79 | 80 | array = ArraySpec( 81 | shape=(1,), 82 | data_type='uint8', 83 | codecs=('bytes',), 84 | chunk_grid={"name": "regular", "configuration": {"chunk_shape": (1,)}}, 85 | chunk_key_encoding = {"name": "default"}, 86 | fill_value = 0, 87 | attributes={} 88 | ) 89 | 90 | class MyMembers(TypedDict): 91 | a: ArraySpec 92 | b: ArraySpec 93 | 94 | class MyGroup(GroupSpec): 95 | members: MyMembers 96 | 97 | # validation fails: missing array named "b" 98 | try: 99 | MyGroup(members={"a": array}, attributes={}) 100 | except ValueError as e: 101 | print(e) 102 | """ 103 | 1 validation error for MyGroup 104 | members.b 105 | Field required [type=missing, input_value={'a': ArraySpec(zarr_form..., dimension_names=None)}, input_type=dict] 106 | For further information visit https://errors.pydantic.dev/2.11/v/missing 107 | """ 108 | 109 | # validation fails: extra array named "c" 110 | try: 111 | MyGroup(members={"a": array, "b": array, "c": array}, attributes={}) 112 | except ValueError as e: 113 | print(e) 114 | """ 115 | 1 validation error for MyGroup 116 | members.c 117 | Extra inputs are not permitted [type=extra_forbidden, input_value=ArraySpec(zarr_format=3, ...), dimension_names=None), input_type=ArraySpec] 118 | For further information visit https://errors.pydantic.dev/2.11/v/extra_forbidden 119 | """ 120 | 121 | # validation succeeds 122 | MyGroup(members={"a" : array, "b": array}, attributes={}) 123 | ``` -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["hatchling", "hatch-vcs"] 3 | build-backend = "hatchling.build" 4 | 5 | [project] 6 | name = "pydantic-zarr" 7 | dynamic = ["version"] 8 | description = 'Pydantic models for the Zarr file format' 9 | readme = { file = "README.md", content-type = "text/markdown" } 10 | requires-python = ">=3.11" 11 | license = "MIT" 12 | keywords = ["zarr", "pydantic"] 13 | authors = [{ name = "Davis Bennett", email = "davis.v.bennett@gmail.com" }] 14 | maintainers = [{ name = "David Stansby" }] 15 | 16 | 17 | classifiers = [ 18 | "Programming Language :: Python", 19 | "Programming Language :: Python :: 3.11", 20 | "Programming Language :: Python :: 3.12", 21 | "Programming Language :: Python :: Implementation :: CPython", 22 | ] 23 | dependencies = ["pydantic>2.0.0", "numpy>=1.24.0", "packaging>=21.0"] 24 | 25 | [project.urls] 26 | Documentation = "https://pydantic-zarr.readthedocs.io/" 27 | Issues = "https://github.com/zarr-developers/pydantic-zarr/issues" 28 | Source = "https://github.com/zarr-developers/pydantic-zarr" 29 | 30 | [project.optional-dependencies] 31 | zarr = ["zarr>=3.0.0"] 32 | # pytest pin is due to https://github.com/pytest-dev/pytest-cov/issues/693 33 | test-base = [ 34 | "coverage", 35 | "pytest<8.4", 36 | "pytest-cov", 37 | "pytest-examples", 38 | "xarray==2025.10.0", 39 | "dask==2025.11.0" 40 | ] 41 | test = ["pydantic-zarr[test-base,zarr]"] 42 | docs = [ 43 | "mkdocs-material", 44 | "mkdocstrings[python]", 45 | "pytest-examples", 46 | "pydantic==2.11", 47 | "zarr>=3.1.0", 48 | "towncrier", 49 | ] 50 | 51 | [dependency-groups] 52 | zarr = ["zarr>=3.0.0"] 53 | # pytest pin is due to https://github.com/pytest-dev/pytest-cov/issues/693 54 | test-base = [ 55 | "coverage", 56 | "pytest<8.4", 57 | "pytest-cov", 58 | "pytest-examples", 59 | "xarray==2025.10.0", 60 | "dask==2025.11.0", 61 | ] 62 | test = [ 63 | {include-group = "test-base"}, 64 | {include-group = "zarr"}, 65 | ] 66 | docs = [ 67 | "mkdocs-material", 68 | "mkdocstrings[python]", 69 | "pytest-examples", 70 | "pydantic==2.11", 71 | "zarr>=3.1.0", 72 | "towncrier", 73 | ] 74 | 75 | [tool.hatch] 76 | version.source = "vcs" 77 | build.hooks.vcs.version-file = "src/pydantic_zarr/_version.py" 78 | 79 | [tool.hatch.envs.test] 80 | features = ["test"] 81 | dependencies = [ 82 | "zarr~={matrix:zarr}", 83 | ] 84 | 85 | [tool.hatch.envs.test.scripts] 86 | test = "pytest tests/test_pydantic_zarr/" 87 | test-cov = "pytest --cov-config=pyproject.toml --cov=pkg --cov-report html --cov=src tests/test_pydantic_zarr" 88 | list-env = "pip list" 89 | 90 | [[tool.hatch.envs.test.matrix]] 91 | python = ["3.11", "3.12", "3.13"] 92 | zarr = ["3.0.10", "3.1.0"] 93 | 94 | [tool.hatch.envs.test-base] 95 | features = ["test-base"] 96 | 97 | [tool.hatch.envs.test-base.scripts] 98 | test = "pytest tests/test_pydantic_zarr/" 99 | test-cov = "pytest --cov-config=pyproject.toml --cov=pkg --cov-report html --cov=src tests/test_pydantic_zarr" 100 | list-env = "pip list" 101 | 102 | [[tool.hatch.envs.test-base.matrix]] 103 | python = ["3.11", "3.12", "3.13"] 104 | 105 | [tool.hatch.envs.docs] 106 | features = ['docs'] 107 | 108 | [tool.hatch.envs.docs.scripts] 109 | build = "mkdocs build --clean --strict" 110 | serve = "mkdocs serve --watch src" 111 | deploy = "mkdocs gh-deploy" 112 | test = "pytest tests/test_docs" 113 | 114 | [tool.hatch.envs.types] 115 | extra-dependencies = ["mypy>=1.0.0", "pydantic"] 116 | [tool.hatch.envs.types.scripts] 117 | check = "mypy --install-types --non-interactive {args:src/pydantic_zarr tests}" 118 | 119 | [tool.coverage.run] 120 | source_pkgs = ["pydantic_zarr", "tests"] 121 | branch = true 122 | parallel = true 123 | omit = ["src/pydantic_zarr/__about__.py"] 124 | 125 | [tool.coverage.paths] 126 | pydantic_zarr = ["src/pydantic_zarr", "*/pydantic-zarr/src/pydantic_zarr"] 127 | tests = ["tests", "*/pydantic-zarr/tests"] 128 | 129 | [tool.coverage.report] 130 | exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"] 131 | 132 | [tool.ruff] 133 | line-length = 100 134 | force-exclude = true 135 | extend-exclude = [ 136 | ".bzr", 137 | ".direnv", 138 | ".eggs", 139 | ".git", 140 | ".mypy_cache", 141 | ".nox", 142 | ".pants.d", 143 | ".ruff_cache", 144 | ".venv", 145 | "__pypackages__", 146 | "_build", 147 | "buck-out", 148 | "build", 149 | "dist", 150 | "venv", 151 | "docs", 152 | ] 153 | 154 | [tool.ruff.lint] 155 | extend-select = [ 156 | "ANN", # flake8-annotations 157 | "B", # flake8-bugbear 158 | "EXE", # flake8-executable 159 | "C4", # flake8-comprehensions 160 | "FA", # flake8-future-annotations 161 | "FLY", # flynt 162 | "FURB", # refurb 163 | "G", # flake8-logging-format 164 | "I", # isort 165 | "ISC", # flake8-implicit-str-concat 166 | "LOG", # flake8-logging 167 | "PERF", # Perflint 168 | "PIE", # flake8-pie 169 | "PGH", # pygrep-hooks 170 | "PT", # flake8-pytest-style 171 | "PYI", # flake8-pyi 172 | "RET", # flake8-return 173 | "RSE", # flake8-raise 174 | "RUF", 175 | "SIM", # flake8-simplify 176 | "SLOT", # flake8-slots 177 | "TCH", # flake8-type-checking 178 | "TRY", # tryceratops 179 | "UP", # pyupgrade 180 | "W", # pycodestyle warnings 181 | ] 182 | ignore = [ 183 | "ANN401", 184 | "PT011", # TODO: apply this rule 185 | "PT012", # TODO: apply this rule 186 | "RET505", 187 | "RET506", 188 | "RUF005", 189 | "SIM108", 190 | "TRY003", 191 | "UP038", # https://github.com/astral-sh/ruff/issues/7871 192 | # https://docs.astral.sh/ruff/formatter/#conflicting-lint-rules 193 | "W191", 194 | "E111", 195 | "E114", 196 | "E117", 197 | "D206", 198 | "D300", 199 | "Q000", 200 | "Q001", 201 | "Q002", 202 | "Q003", 203 | "COM812", 204 | "COM819", 205 | ] 206 | 207 | [tool.ruff.lint.extend-per-file-ignores] 208 | "tests/**" = ["ANN001", "ANN201", "RUF029", "SIM117", "SIM300"] 209 | 210 | [tool.mypy] 211 | python_version = "3.11" 212 | ignore_missing_imports = true 213 | namespace_packages = false 214 | warn_unreachable = true 215 | plugins = "pydantic.mypy" 216 | enable_error_code = ["ignore-without-code", "redundant-expr", "truthy-bool"] 217 | strict = true 218 | # TODO: remove each of these and fix any errors: 219 | disallow_any_generics = false 220 | warn_return_any = false 221 | 222 | [tool.pytest.ini_options] 223 | minversion = "7" 224 | xfail_strict = true 225 | testpaths = ["tests"] 226 | log_cli_level = "INFO" 227 | addopts = ["--durations=10", "-ra", "--strict-config", "--strict-markers"] 228 | filterwarnings = [ 229 | "error", 230 | # https://github.com/zarr-developers/zarr-python/issues/2948 231 | "ignore:The `order` keyword argument has no effect for Zarr format 3 arrays:RuntimeWarning", 232 | ] 233 | 234 | [tool.repo-review] 235 | ignore = [ 236 | "PC111", # black formatting for docs, not sure if want, 237 | "PC180", # markdown formatter, no thanks 238 | "RTD100", # read the docs, no thanks, 239 | "RTD101", # read the docs, no thanks 240 | "RTD102", # read the docs, no thanks, 241 | "RTD103", # read the docs, no thanks 242 | ] 243 | 244 | [tool.numpydoc_validation] 245 | # See https://numpydoc.readthedocs.io/en/latest/validation.html#built-in-validation-checks for list of checks 246 | checks = [ 247 | "GL06", 248 | "GL07", 249 | # Currently broken; see https://github.com/numpy/numpydoc/issues/573 250 | # "GL09", 251 | "GL10", 252 | "SS02", 253 | "SS04", 254 | "PR02", 255 | "PR03", 256 | "PR05", 257 | "PR06", 258 | ] 259 | 260 | [tool.towncrier] 261 | directory = 'changes' 262 | filename = "docs/release-notes.md" 263 | title_format = "## {name} {version} ({project_date})" 264 | issue_format = "[#{issue}](https://github.com/zarr-developers/pydantic-zarr/issues/{issue})" 265 | start_string = "\n" 266 | -------------------------------------------------------------------------------- /docs/usage_zarr_v2.md: -------------------------------------------------------------------------------- 1 | # Usage (Zarr V2) 2 | 3 | ## Reading and writing a zarr hierarchy 4 | 5 | ### Reading 6 | 7 | The `GroupSpec` and `ArraySpec` classes represent Zarr v2 groups and arrays, respectively. To create an instance of a `GroupSpec` or `ArraySpec` from an existing Zarr group or array, pass the Zarr group / array to the `.from_zarr` method defined on the `GroupSpec` / `ArraySpec` classes. This will result in a `pydantic-zarr` model of the Zarr object. 8 | 9 | > By default `GroupSpec.from_zarr(zarr_group)` will traverse the entire hierarchy under `zarr_group`. This can be extremely slow if used on an extensive Zarr group on high latency storage. To limit the depth of traversal to a specific depth, use the `depth` keyword argument, e.g. `GroupSpec.from_zarr(zarr_group, depth=1)` 10 | 11 | Note that `from_zarr` will _not_ read the data inside an array. 12 | 13 | ### Writing 14 | 15 | To write a hierarchy to some zarr-compatible storage backend, `GroupSpec` and `ArraySpec` have `to_zarr` methods that take a Zarr store and a path and return a Zarr array or group created in the store at the given path. 16 | 17 | Note that `to_zarr` will _not_ write any array data. You have to do this separately. 18 | 19 | ```python 20 | from zarr import create_array, create_group 21 | 22 | from pydantic_zarr.v2 import GroupSpec 23 | 24 | # create an in-memory Zarr group + array with attributes 25 | grp = create_group(store={}, path='foo', zarr_format=2) 26 | grp.attrs.put({'group_metadata': 10}) 27 | arr = create_array( 28 | name='foo/bar', store=grp.store, shape=(10,), dtype="f8", compressors=None, zarr_format=2 29 | ) 30 | arr.attrs.put({'array_metadata': True}) 31 | 32 | spec = GroupSpec.from_zarr(grp) 33 | print(spec.model_dump()) 34 | """ 35 | { 36 | 'zarr_format': 2, 37 | 'attributes': {'group_metadata': 10}, 38 | 'members': { 39 | 'bar': { 40 | 'zarr_format': 2, 41 | 'attributes': {'array_metadata': True}, 42 | 'shape': (10,), 43 | 'chunks': (10,), 44 | 'dtype': ' {'a': 100, 'b': 'metadata'} 69 | 70 | print(dict(group2['bar'].attrs)) 71 | #> {'array_metadata': True} 72 | ``` 73 | 74 | ### Creating from an array 75 | 76 | The `ArraySpec` class has a `from_array` static method that takes an array-like object and returns an `ArraySpec` with `shape` and `dtype` fields matching those of the array-like object. 77 | 78 | ```python 79 | import numpy as np 80 | 81 | from pydantic_zarr.v2 import ArraySpec 82 | 83 | print(ArraySpec.from_array(np.arange(10)).model_dump()) 84 | """ 85 | { 86 | 'zarr_format': 2, 87 | 'attributes': {}, 88 | 'shape': (10,), 89 | 'chunks': (10,), 90 | 'dtype': ' False 260 | 261 | # Returns True, because we exclude shape. 262 | print(arr_a.like(arr_b, exclude={'shape'})) 263 | #> True 264 | 265 | # `ArraySpec.like` will convert a zarr.Array to ArraySpec 266 | store = zarr.storage.MemoryStore() 267 | # This is a zarr.Array 268 | arr_a_stored = arr_a.to_zarr(store, path='arr_a') 269 | 270 | # arr_a is like the zarr.Array version of itself 271 | print(arr_a.like(arr_a_stored)) 272 | #> True 273 | 274 | # Returns False, because of mismatched shape 275 | print(arr_b.like(arr_a_stored)) 276 | #> False 277 | 278 | # Returns True, because we exclude shape. 279 | print(arr_b.like(arr_a_stored, exclude={'shape'})) 280 | #> True 281 | 282 | # The same thing, but for groups 283 | g_a = GroupSpec(attributes={'foo': 10}, members={'a': arr_a, 'b': arr_b}) 284 | g_b = GroupSpec(attributes={'foo': 11}, members={'a': arr_a, 'b': arr_b}) 285 | 286 | # g_a is like itself 287 | print(g_a.like(g_a)) 288 | #> True 289 | 290 | # Returns False, because of mismatched attributes 291 | print(g_a.like(g_b)) 292 | #> False 293 | 294 | # Returns True, because we ignore attributes 295 | print(g_a.like(g_b, exclude={'attributes'})) 296 | #> True 297 | 298 | # g_a is like its zarr.Group counterpart 299 | print(g_a.like(g_a.to_zarr(store, path='g_a'))) 300 | #> True 301 | ``` 302 | 303 | ## Creating from an array 304 | 305 | The `ArraySpec` class has a `from_array` static method that takes an array-like object and returns an `ArraySpec` with `shape` and `dtype` fields matching those of the array-like object. 306 | 307 | ```python 308 | import numpy as np 309 | 310 | from pydantic_zarr.v2 import ArraySpec 311 | 312 | print(ArraySpec.from_array(np.arange(10)).model_dump()) 313 | """ 314 | { 315 | 'zarr_format': 2, 316 | 'attributes': {}, 317 | 'shape': (10,), 318 | 'chunks': (10,), 319 | 'dtype': ' None: 34 | """ 35 | Test that the `dimension_names` attribute is rejected if any of the following are true: 36 | - it is a sequence with length different from the number of dimensions of the array 37 | - it is a sequence containing values other than strings or `None`. 38 | - it is neither a valid sequence nor the value `None`. 39 | """ 40 | base_array: AnyArraySpec = ArraySpec( 41 | shape=(1, 2, 3), 42 | data_type="int8", 43 | codecs=({"name": "bytes"},), 44 | chunk_grid={"name": "regular", "configuration": {"chunk_shape": (1, 2, 3)}}, 45 | chunk_key_encoding={"name": "default", "configuration": {"separator": "/"}}, 46 | fill_value=0, 47 | attributes={}, 48 | ) 49 | with pytest.raises(ValidationError): 50 | ArraySpec(**(base_array.model_dump() | {"dimension_names": invalid_dimension_names})) 51 | 52 | 53 | def test_serialize_deserialize() -> None: 54 | array_attributes = {"foo": 42, "bar": "apples", "baz": [1, 2, 3, 4]} 55 | 56 | group_attributes = {"group": True} 57 | 58 | array_spec = ArraySpec( 59 | attributes=array_attributes, 60 | shape=[1000, 1000], 61 | dimension_names=["rows", "columns"], 62 | data_type="float64", 63 | chunk_grid=NamedConfig(name="regular", configuration={"chunk_shape": [1000, 100]}), 64 | chunk_key_encoding=NamedConfig(name="default", configuration={"separator": "/"}), 65 | codecs=[NamedConfig(name="GZip", configuration={"level": 1})], 66 | fill_value="NaN", 67 | storage_transformers=[], 68 | ) 69 | 70 | GroupSpec(attributes=group_attributes, members={"array": array_spec}) 71 | 72 | 73 | def test_from_array() -> None: 74 | array = np.arange(10) 75 | array_spec = ArraySpec.from_array(array) 76 | 77 | assert array_spec == ArraySpec( 78 | zarr_format=3, 79 | node_type="array", 80 | attributes={}, 81 | shape=(10,), 82 | data_type="int64", 83 | chunk_grid=RegularChunking( 84 | name="regular", configuration=RegularChunkingConfig(chunk_shape=(10,)) 85 | ), 86 | chunk_key_encoding=DefaultChunkKeyEncoding( 87 | name="default", configuration=DefaultChunkKeyEncodingConfig(separator="/") 88 | ), 89 | fill_value=0, 90 | codecs=auto_codecs(array), 91 | storage_transformers=(), 92 | dimension_names=None, 93 | ) 94 | # check that we can write this array to zarr 95 | # TODO: fix type of the store argument in to_zarr 96 | if not ZARR_AVAILABLE: 97 | return 98 | array_spec.to_zarr(store={}, path="") # type: ignore[arg-type] 99 | 100 | 101 | def test_arrayspec_no_empty_codecs() -> None: 102 | """ 103 | Ensure that it is not possible to create an ArraySpec with no codecs 104 | """ 105 | 106 | with pytest.raises( 107 | ValidationError, match="Value error, Invalid length. Expected 1 or more, got 0." 108 | ): 109 | ArraySpec( 110 | shape=(1,), 111 | data_type="uint8", 112 | codecs=[], 113 | attributes={}, 114 | fill_value=0, 115 | chunk_grid={"name": "regular", "configuration": {"chunk_shape": (1,)}}, 116 | chunk_key_encoding={"name": "default", "configuration": {"separator": "/"}}, 117 | ) 118 | 119 | 120 | @pytest.mark.filterwarnings("ignore:The dtype:UserWarning") 121 | @pytest.mark.filterwarnings("ignore:The data type:FutureWarning") 122 | @pytest.mark.filterwarnings("ignore:The codec:UserWarning") 123 | @pytest.mark.parametrize("dtype_example", DTYPE_EXAMPLES_V3, ids=str) 124 | def test_arrayspec_from_zarr(dtype_example: DTypeExample) -> None: 125 | """ 126 | Test that deserializing an ArraySpec from a zarr python store works as expected. 127 | """ 128 | zarr = pytest.importorskip("zarr") 129 | store = {} 130 | 131 | data_type = dtype_example.name 132 | 133 | if data_type == "variable_length_bytes": 134 | pytest.skip( 135 | reason="Bug in zarr python: see https://github.com/zarr-developers/zarr-python/issues/3263" 136 | ) 137 | 138 | arr = zarr.create_array(store=store, shape=(10,), dtype=data_type, zarr_format=3) 139 | 140 | arr_spec = ArraySpec.from_zarr(arr) 141 | assert arr_spec.model_dump() == json.loads( 142 | store["zarr.json"].to_bytes(), object_hook=tuplify_json 143 | ) 144 | 145 | 146 | @pytest.mark.parametrize("path", ["", "foo"]) 147 | @pytest.mark.parametrize("overwrite", [True, False]) 148 | @pytest.mark.parametrize("dtype_example", DTYPE_EXAMPLES_V3, ids=str) 149 | @pytest.mark.parametrize("config", [{}, {"write_empty_chunks": True, "order": "F"}]) 150 | @pytest.mark.filterwarnings("ignore:The codec `vlen-utf8`:UserWarning") 151 | @pytest.mark.filterwarnings("ignore:The codec `vlen-bytes`:UserWarning") 152 | @pytest.mark.filterwarnings("ignore:The data type :FutureWarning") 153 | def test_arrayspec_to_zarr( 154 | path: str, overwrite: bool, config: dict[str, object], dtype_example: DTypeExample 155 | ) -> None: 156 | """ 157 | Test that serializing an ArraySpec to a zarr python store works as expected. 158 | """ 159 | data_type = dtype_example.name 160 | fill_value = dtype_example.fill_value 161 | 162 | codecs = ({"name": "bytes", "configuration": {}},) 163 | if data_type == "variable_length_bytes": 164 | codecs = ({"name": "vlen-bytes"},) 165 | 166 | elif data_type in ("str", "string"): 167 | codecs = ({"name": "vlen-utf8"},) 168 | 169 | store = {} 170 | 171 | arr_spec = ArraySpec( 172 | attributes={}, 173 | shape=(10,), 174 | data_type=data_type, 175 | chunk_grid={"name": "regular", "configuration": {"chunk_shape": (10,)}}, 176 | chunk_key_encoding={"name": "default", "configuration": {"separator": "/"}}, 177 | codecs=codecs, 178 | fill_value=fill_value, 179 | dimension_names=("x",), 180 | ) 181 | if not ZARR_AVAILABLE: 182 | return 183 | arr = arr_spec.to_zarr(store=store, path=path, overwrite=overwrite, config=config) 184 | assert arr._async_array.metadata == arr._async_array.metadata 185 | for key, value in config.items(): 186 | assert asdict(arr._async_array._config)[key] == value 187 | 188 | 189 | def get_flat_example() -> tuple[dict[str, AnyArraySpec | AnyGroupSpec], AnyGroupSpec]: 190 | """ 191 | Get example data for testing to_flat and from_flat. 192 | 193 | The returned value is a tuple with two elements: a flattened dict representation of a hierarchy, 194 | and the root group, with all of its members (i.e., the non-flat version of that hierarchy). 195 | """ 196 | named_nodes: tuple[AnyArraySpec | AnyGroupSpec, ...] = ( 197 | GroupSpec(attributes={"name": ""}, members=None), 198 | ArraySpec.from_array(np.arange(10), attributes={"name": "/a1"}), 199 | GroupSpec(attributes={"name": "/g1"}, members=None), 200 | ArraySpec.from_array(np.arange(10), attributes={"name": "/g1/a2"}), 201 | GroupSpec(attributes={"name": "/g1/g2"}, members=None), 202 | ArraySpec.from_array(np.arange(10), attributes={"name": "/g1/g2/a3"}), 203 | ) 204 | 205 | members_flat: dict[str, AnyArraySpec | AnyGroupSpec] = { 206 | a.attributes["name"]: a for a in named_nodes 207 | } 208 | g2 = members_flat["/g1/g2"].model_copy(update={"members": {"a3": members_flat["/g1/g2/a3"]}}) 209 | g1 = members_flat["/g1"].model_copy( 210 | update={"members": {"a2": members_flat["/g1/a2"], "g2": g2}} 211 | ) 212 | root = members_flat[""].model_copy(update={"members": {"g1": g1, "a1": members_flat["/a1"]}}) 213 | return members_flat, root 214 | 215 | 216 | class TestGroupSpec: 217 | @staticmethod 218 | def test_to_flat() -> None: 219 | """ 220 | Test that the to_flat method generates a flat representation of the hierarchy 221 | """ 222 | 223 | members_flat, root = get_flat_example() 224 | observed = root.to_flat() 225 | assert observed == members_flat 226 | 227 | @staticmethod 228 | def test_from_flat() -> None: 229 | """ 230 | Test that the from_flat method generates a `GroupSpec` from a flat representation of the 231 | hierarchy 232 | """ 233 | members_flat, root = get_flat_example() 234 | assert GroupSpec.from_flat(members_flat) == root 235 | 236 | @staticmethod 237 | def test_from_zarr_depth() -> None: 238 | zarr = pytest.importorskip("zarr") 239 | codecs = ({"name": "bytes", "configuration": {}},) 240 | tree: dict[str, AnyGroupSpec | AnyArraySpec] = { 241 | "": GroupSpec(members=None, attributes={"level": 0, "type": "group"}), 242 | "/1": GroupSpec(members=None, attributes={"level": 1, "type": "group"}), 243 | "/1/2": GroupSpec(members=None, attributes={"level": 2, "type": "group"}), 244 | "/1/2/1": GroupSpec(members=None, attributes={"level": 3, "type": "group"}), 245 | "/1/2/2": ArraySpec.from_array( 246 | np.arange(20), attributes={"level": 3, "type": "array"}, codecs=codecs 247 | ), 248 | } 249 | store = zarr.storage.MemoryStore() 250 | group_out = GroupSpec.from_flat(tree).to_zarr(store, path="test") 251 | group_in_0 = GroupSpec.from_zarr(group_out, depth=0) # type: ignore[var-annotated] 252 | assert group_in_0 == tree[""] 253 | 254 | group_in_1 = GroupSpec.from_zarr(group_out, depth=1) # type: ignore[var-annotated] 255 | assert group_in_1.attributes == tree[""].attributes # type: ignore[attr-defined] 256 | assert group_in_1.members is not None 257 | assert group_in_1.members["1"] == tree["/1"] 258 | 259 | group_in_2 = GroupSpec.from_zarr(group_out, depth=2) # type: ignore[var-annotated] 260 | assert group_in_2.members is not None 261 | assert group_in_2.members["1"].members["2"] == tree["/1/2"] 262 | assert group_in_2.attributes == tree[""].attributes # type: ignore[attr-defined] 263 | assert group_in_2.members["1"].attributes == tree["/1"].attributes # type: ignore[attr-defined] 264 | 265 | group_in_3 = GroupSpec.from_zarr(group_out, depth=3) # type: ignore[var-annotated] 266 | assert group_in_3.members is not None 267 | assert group_in_3.members["1"].members["2"].members["1"] == tree["/1/2/1"] 268 | assert group_in_3.attributes == tree[""].attributes # type: ignore[attr-defined] 269 | assert group_in_3.members["1"].attributes == tree["/1"].attributes # type: ignore[attr-defined] 270 | assert group_in_3.members["1"].members["2"].attributes == tree["/1/2"].attributes # type: ignore[attr-defined] 271 | 272 | 273 | def test_mix_v3_v2_fails() -> None: 274 | from pydantic_zarr.v2 import ArraySpec as ArraySpecv2 275 | 276 | members_flat = {"/a": ArraySpecv2.from_array(np.ones(1))} 277 | with pytest.raises( 278 | ValueError, 279 | match=re.escape( 280 | "Value at '/a' is not a v3 ArraySpec or GroupSpec (got type(value)=)" 281 | ), 282 | ): 283 | GroupSpec.from_flat(members_flat) # type: ignore[arg-type] 284 | 285 | 286 | @pytest.mark.parametrize( 287 | ("args", "kwargs", "expected_names"), 288 | [ 289 | ((1,), {"dimension_names": ["x"]}, ("x",)), 290 | ((1,), {}, None), 291 | ], 292 | ) 293 | def test_dim_names_from_zarr_array( 294 | args: tuple, kwargs: dict, expected_names: tuple[str, ...] | None 295 | ) -> None: 296 | zarr = pytest.importorskip("zarr") 297 | 298 | arr = zarr.zeros(*args, **kwargs) 299 | spec: AnyArraySpec = ArraySpec.from_zarr(arr) 300 | assert spec.dimension_names == expected_names 301 | -------------------------------------------------------------------------------- /tests/test_pydantic_zarr/test_experimental/test_v3.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import json 4 | import re 5 | from dataclasses import asdict 6 | 7 | import numpy as np 8 | import pytest 9 | from pydantic import ValidationError 10 | from typing_extensions import TypedDict 11 | 12 | from pydantic_zarr.experimental.core import json_eq 13 | from pydantic_zarr.experimental.v3 import ( 14 | ArraySpec, 15 | BaseGroupSpec, 16 | DefaultChunkKeyEncoding, 17 | DefaultChunkKeyEncodingConfig, 18 | GroupSpec, 19 | NamedConfig, 20 | RegularChunking, 21 | RegularChunkingConfig, 22 | auto_codecs, 23 | ) 24 | 25 | from ..conftest import DTYPE_EXAMPLES_V3, ZARR_AVAILABLE, DTypeExample 26 | 27 | 28 | @pytest.mark.parametrize("invalid_dimension_names", [[], "hi", ["1", 2, None]], ids=str) 29 | def test_dimension_names_validation(invalid_dimension_names: object) -> None: 30 | """ 31 | Test that the `dimension_names` attribute is rejected if any of the following are true: 32 | - it is a sequence with length different from the number of dimensions of the array 33 | - it is a sequence containing values other than strings or `None`. 34 | - it is neither a valid sequence nor the value `None`. 35 | """ 36 | base_array = ArraySpec( 37 | shape=(1, 2, 3), 38 | data_type="int8", 39 | codecs=({"name": "bytes"},), 40 | chunk_grid={"name": "regular", "configuration": {"chunk_shape": (1, 2, 3)}}, 41 | chunk_key_encoding={"name": "default", "configuration": {"separator": "/"}}, 42 | fill_value=0, 43 | attributes={}, 44 | ) 45 | with pytest.raises(ValidationError): 46 | ArraySpec(**(base_array.model_dump() | {"dimension_names": invalid_dimension_names})) 47 | 48 | 49 | def test_serialize_deserialize() -> None: 50 | array_attributes = {"foo": 42, "bar": "apples", "baz": [1, 2, 3, 4]} 51 | 52 | group_attributes = {"group": True} 53 | 54 | array_spec = ArraySpec( 55 | attributes=array_attributes, 56 | shape=(1000, 1000), 57 | dimension_names=("rows", "columns"), 58 | data_type="float64", 59 | chunk_grid=NamedConfig(name="regular", configuration={"chunk_shape": (1000, 100)}), 60 | chunk_key_encoding=NamedConfig(name="default", configuration={"separator": "/"}), 61 | codecs=(NamedConfig(name="GZip", configuration={"level": 1}),), 62 | fill_value="NaN", 63 | storage_transformers=(), 64 | ) 65 | 66 | GroupSpec(attributes=group_attributes, members={"array": array_spec}) 67 | 68 | 69 | def test_from_array() -> None: 70 | array = np.arange(10) 71 | array_spec = ArraySpec.from_array(array) 72 | 73 | assert array_spec == ArraySpec( 74 | zarr_format=3, 75 | node_type="array", 76 | attributes={}, 77 | shape=(10,), 78 | data_type="int64", 79 | chunk_grid=RegularChunking( 80 | name="regular", configuration=RegularChunkingConfig(chunk_shape=(10,)) 81 | ), 82 | chunk_key_encoding=DefaultChunkKeyEncoding( 83 | name="default", configuration=DefaultChunkKeyEncodingConfig(separator="/") 84 | ), 85 | fill_value=0, 86 | codecs=auto_codecs(array), 87 | storage_transformers=(), 88 | dimension_names=None, 89 | ) 90 | # check that we can write this array to zarr 91 | # TODO: fix type of the store argument in to_zarr 92 | if not ZARR_AVAILABLE: 93 | return 94 | array_spec.to_zarr(store={}, path="") # type: ignore[arg-type] 95 | 96 | 97 | def test_arrayspec_no_empty_codecs() -> None: 98 | """ 99 | Ensure that it is not possible to create an ArraySpec with no codecs 100 | """ 101 | 102 | with pytest.raises( 103 | ValidationError, match="Value error, Invalid length. Expected 1 or more, got 0." 104 | ): 105 | ArraySpec( 106 | shape=(1,), 107 | data_type="uint8", 108 | codecs=[], 109 | attributes={}, 110 | fill_value=0, 111 | chunk_grid={"name": "regular", "configuration": {"chunk_shape": (1,)}}, 112 | chunk_key_encoding={"name": "default", "configuration": {"separator": "/"}}, 113 | ) 114 | 115 | 116 | @pytest.mark.filterwarnings("ignore:The dtype:UserWarning") 117 | @pytest.mark.filterwarnings("ignore:The data type:FutureWarning") 118 | @pytest.mark.filterwarnings("ignore:The codec:UserWarning") 119 | @pytest.mark.parametrize("dtype_example", DTYPE_EXAMPLES_V3, ids=str) 120 | def test_arrayspec_from_zarr(dtype_example: DTypeExample) -> None: 121 | """ 122 | Test that deserializing an ArraySpec from a zarr python store works as expected. 123 | """ 124 | zarr = pytest.importorskip("zarr") 125 | store = {} 126 | 127 | data_type = dtype_example.name 128 | 129 | if data_type == "variable_length_bytes": 130 | pytest.skip( 131 | reason="Bug in zarr python: see https://github.com/zarr-developers/zarr-python/issues/3263" 132 | ) 133 | 134 | arr = zarr.create_array(store=store, shape=(10,), dtype=data_type, zarr_format=3) 135 | 136 | arr_spec = ArraySpec.from_zarr(arr) 137 | assert json_eq(arr_spec.model_dump(), json.loads(store["zarr.json"].to_bytes())) 138 | 139 | 140 | @pytest.mark.parametrize("path", ["", "foo"]) 141 | @pytest.mark.parametrize("overwrite", [True, False]) 142 | @pytest.mark.parametrize("dtype_example", DTYPE_EXAMPLES_V3, ids=str) 143 | @pytest.mark.parametrize("config", [{}, {"write_empty_chunks": True, "order": "F"}]) 144 | @pytest.mark.filterwarnings("ignore:The codec `vlen-utf8`:UserWarning") 145 | @pytest.mark.filterwarnings("ignore:The codec `vlen-bytes`:UserWarning") 146 | @pytest.mark.filterwarnings("ignore:The data type :FutureWarning") 147 | def test_arrayspec_to_zarr( 148 | path: str, overwrite: bool, config: dict[str, object], dtype_example: DTypeExample 149 | ) -> None: 150 | """ 151 | Test that serializing an ArraySpec to a zarr python store works as expected. 152 | """ 153 | data_type = dtype_example.name 154 | fill_value = dtype_example.fill_value 155 | 156 | codecs = ({"name": "bytes", "configuration": {}},) 157 | if data_type == "variable_length_bytes": 158 | codecs = ({"name": "vlen-bytes"},) 159 | 160 | elif data_type in ("str", "string"): 161 | codecs = ({"name": "vlen-utf8"},) 162 | 163 | store = {} 164 | 165 | arr_spec = ArraySpec( 166 | attributes={}, 167 | shape=(10,), 168 | data_type=data_type, 169 | chunk_grid={"name": "regular", "configuration": {"chunk_shape": (10,)}}, 170 | chunk_key_encoding={"name": "default", "configuration": {"separator": "/"}}, 171 | codecs=codecs, 172 | fill_value=fill_value, 173 | dimension_names=("x",), 174 | ) 175 | if not ZARR_AVAILABLE: 176 | return 177 | arr = arr_spec.to_zarr(store=store, path=path, overwrite=overwrite, config=config) 178 | assert arr._async_array.metadata == arr._async_array.metadata 179 | for key, value in config.items(): 180 | assert asdict(arr._async_array._config)[key] == value 181 | 182 | 183 | def get_flat_example() -> tuple[dict[str, ArraySpec | GroupSpec], GroupSpec]: 184 | """ 185 | Get example data for testing to_flat and from_flat. 186 | 187 | The returned value is a tuple with two elements: a flattened dict representation of a hierarchy, 188 | and the root group, with all of its members (i.e., the non-flat version of that hierarchy). 189 | """ 190 | named_nodes: tuple[ArraySpec | BaseGroupSpec, ...] = ( 191 | BaseGroupSpec(attributes={"name": ""}), 192 | ArraySpec.from_array(np.arange(10), attributes={"name": "/a1"}), 193 | BaseGroupSpec(attributes={"name": "/g1"}), 194 | ArraySpec.from_array(np.arange(10), attributes={"name": "/g1/a2"}), 195 | BaseGroupSpec(attributes={"name": "/g1/g2"}), 196 | ArraySpec.from_array(np.arange(10), attributes={"name": "/g1/g2/a3"}), 197 | ) 198 | 199 | members_flat: dict[str, ArraySpec | BaseGroupSpec] = { 200 | a.attributes["name"]: a for a in named_nodes 201 | } 202 | g2 = GroupSpec( 203 | attributes=members_flat["/g1/g2"].attributes, members={"a3": members_flat["/g1/g2/a3"]} 204 | ) 205 | g1 = GroupSpec( 206 | attributes=members_flat["/g1"].attributes, members={"a2": members_flat["/g1/a2"], "g2": g2} 207 | ) 208 | root = GroupSpec( 209 | attributes=members_flat[""].attributes, members={"g1": g1, "a1": members_flat["/a1"]} 210 | ) 211 | return members_flat, root 212 | 213 | 214 | class TestGroupSpec: 215 | @staticmethod 216 | def test_to_flat() -> None: 217 | """ 218 | Test that the to_flat method generates a flat representation of the hierarchy 219 | """ 220 | 221 | members_flat, root = get_flat_example() 222 | observed = root.to_flat() 223 | assert observed == members_flat 224 | 225 | @staticmethod 226 | def test_from_flat() -> None: 227 | """ 228 | Test that the from_flat method generates a `GroupSpec` from a flat representation of the 229 | hierarchy 230 | """ 231 | members_flat, root = get_flat_example() 232 | assert GroupSpec.from_flat(members_flat).attributes == root.attributes 233 | 234 | @staticmethod 235 | def test_from_zarr_depth() -> None: 236 | zarr = pytest.importorskip("zarr") 237 | codecs = ({"name": "bytes", "configuration": {}},) 238 | tree: dict[str, BaseGroupSpec | ArraySpec] = { 239 | "": BaseGroupSpec(attributes={"level": 0, "type": "group"}), 240 | "/1": BaseGroupSpec(attributes={"level": 1, "type": "group"}), 241 | "/1/2": BaseGroupSpec(attributes={"level": 2, "type": "group"}), 242 | "/1/2/1": BaseGroupSpec(attributes={"level": 3, "type": "group"}), 243 | "/1/2/2": ArraySpec.from_array( 244 | np.arange(20), attributes={"level": 3, "type": "array"}, codecs=codecs 245 | ), 246 | } 247 | store = zarr.storage.MemoryStore() 248 | group_out = GroupSpec.from_flat(tree).to_zarr(store, path="test") 249 | group_in_0 = GroupSpec.from_zarr(group_out, depth=0) # type: ignore[var-annotated] 250 | assert group_in_0.attributes == tree[""].attributes 251 | 252 | group_in_1 = GroupSpec.from_zarr(group_out, depth=1) # type: ignore[var-annotated] 253 | assert group_in_1.attributes == tree[""].attributes # type: ignore[attr-defined] 254 | assert group_in_1.members is not None 255 | assert group_in_1.members["1"].attributes == tree["/1"].attributes 256 | 257 | group_in_2 = GroupSpec.from_zarr(group_out, depth=2) # type: ignore[var-annotated] 258 | assert group_in_2.members is not None 259 | assert group_in_2.members["1"].members["2"].attributes == tree["/1/2"].attributes 260 | assert group_in_2.attributes == tree[""].attributes # type: ignore[attr-defined] 261 | assert group_in_2.members["1"].attributes == tree["/1"].attributes # type: ignore[attr-defined] 262 | 263 | group_in_3 = GroupSpec.from_zarr(group_out, depth=3) # type: ignore[var-annotated] 264 | assert group_in_3.members is not None 265 | assert ( 266 | group_in_3.members["1"].members["2"].members["1"].attributes 267 | == tree["/1/2/1"].attributes 268 | ) 269 | assert group_in_3.attributes == tree[""].attributes # type: ignore[attr-defined] 270 | assert group_in_3.members["1"].attributes == tree["/1"].attributes # type: ignore[attr-defined] 271 | assert group_in_3.members["1"].members["2"].attributes == tree["/1/2"].attributes # type: ignore[attr-defined] 272 | 273 | 274 | def test_mix_v3_v2_fails() -> None: 275 | from pydantic_zarr.v2 import ArraySpec as ArraySpecv2 276 | 277 | members_flat = {"/a": ArraySpecv2.from_array(np.ones(1))} 278 | with pytest.raises( 279 | ValueError, 280 | match=re.escape( 281 | "Value at '/a' is not a v3 ArraySpec or BaseGroupSpec (got type(value)=)" 282 | ), 283 | ): 284 | GroupSpec.from_flat(members_flat) # type: ignore[arg-type] 285 | 286 | 287 | @pytest.mark.parametrize( 288 | ("args", "kwargs", "expected_names"), 289 | [ 290 | ((1,), {"dimension_names": ["x"]}, ("x",)), 291 | ((1,), {}, None), 292 | ], 293 | ) 294 | def test_dim_names_from_zarr_array( 295 | args: tuple, kwargs: dict, expected_names: tuple[str, ...] | None 296 | ) -> None: 297 | zarr = pytest.importorskip("zarr") 298 | 299 | arr = zarr.zeros(*args, **kwargs) 300 | spec: ArraySpec = ArraySpec.from_zarr(arr) 301 | assert spec.dimension_names == expected_names 302 | 303 | 304 | @pytest.mark.skipif(not ZARR_AVAILABLE, reason="zarr-python is not installed") 305 | def test_typed_members() -> None: 306 | """ 307 | Test GroupSpec creation with typed members 308 | """ 309 | 310 | array1d = ArraySpec( 311 | shape=(1,), 312 | data_type="uint8", 313 | chunk_grid={"name": "regular", "configuration": {"chunk_shape": (1,)}}, 314 | chunk_key_encoding={"name": "default", "configuration": {"separator": "/"}}, 315 | fill_value=0, 316 | codecs=({"name": "bytes"},), 317 | attributes={}, 318 | ) 319 | 320 | class DatasetMembers(TypedDict): 321 | x: ArraySpec 322 | y: ArraySpec 323 | 324 | class DatasetGroup(GroupSpec): 325 | members: DatasetMembers 326 | 327 | class ExpectedMembers(TypedDict): 328 | r10m: DatasetGroup 329 | r20m: DatasetGroup 330 | 331 | class ExpectedGroup(GroupSpec): 332 | members: ExpectedMembers 333 | 334 | flat = { 335 | "": BaseGroupSpec(attributes={}), 336 | "/r10m": BaseGroupSpec(attributes={}), 337 | "/r20m": BaseGroupSpec(attributes={}), 338 | "/r10m/x": array1d, 339 | "/r10m/y": array1d, 340 | "/r20m/x": array1d, 341 | "/r20m/y": array1d, 342 | } 343 | 344 | zg = GroupSpec.from_flat(flat).to_zarr({}, path="") 345 | ExpectedGroup.from_zarr(zg) 346 | 347 | 348 | def test_arrayspec_with_methods() -> None: 349 | """ 350 | Test that ArraySpec with_* methods create new validated copies 351 | """ 352 | original = ArraySpec.from_array(np.arange(10), attributes={"foo": "bar"}) 353 | 354 | # Test with_attributes 355 | new_attrs = original.with_attributes({"baz": "qux"}) 356 | assert new_attrs.attributes == {"baz": "qux"} 357 | assert original.attributes == {"foo": "bar"} # Original unchanged 358 | assert new_attrs is not original 359 | 360 | # Test with_shape 361 | new_shape = original.with_shape((20,)) 362 | assert new_shape.shape == (20,) 363 | assert original.shape == (10,) 364 | 365 | # Test with_data_type 366 | new_dtype = original.with_data_type("float32") 367 | assert new_dtype.data_type == "float32" 368 | assert original.data_type == "int64" 369 | 370 | # Test with_chunk_grid 371 | new_grid = original.with_chunk_grid({"name": "regular", "configuration": {"chunk_shape": (5,)}}) 372 | assert new_grid.chunk_grid["configuration"]["chunk_shape"] == (5,) # type: ignore[index] 373 | assert original.chunk_grid["configuration"]["chunk_shape"] == (10,) # type: ignore[index] 374 | 375 | # Test with_chunk_key_encoding 376 | new_encoding = original.with_chunk_key_encoding( 377 | {"name": "default", "configuration": {"separator": "."}} 378 | ) 379 | assert new_encoding.chunk_key_encoding["configuration"]["separator"] == "." # type: ignore[index] 380 | assert original.chunk_key_encoding["configuration"]["separator"] == "/" # type: ignore[index] 381 | 382 | # Test with_fill_value 383 | new_fill = original.with_fill_value(999) 384 | assert new_fill.fill_value == 999 385 | assert original.fill_value == 0 386 | 387 | # Test with_codecs 388 | new_codecs = original.with_codecs(({"name": "gzip", "configuration": {"level": 1}},)) 389 | assert len(new_codecs.codecs) == 1 390 | assert new_codecs.codecs[0]["name"] == "gzip" # type: ignore[index] 391 | 392 | # Test with_storage_transformers 393 | new_transformers = original.with_storage_transformers(({"name": "test", "configuration": {}},)) 394 | assert len(new_transformers.storage_transformers) == 1 395 | assert original.storage_transformers == () 396 | 397 | # Test with_dimension_names 398 | new_dims = original.with_dimension_names(("x",)) 399 | assert new_dims.dimension_names == ("x",) 400 | assert original.dimension_names is None 401 | 402 | 403 | def test_arrayspec_with_methods_validation() -> None: 404 | """ 405 | Test that ArraySpec with_* methods trigger validation 406 | """ 407 | spec = ArraySpec.from_array(np.arange(10), attributes={}) 408 | 409 | # Test that validation fails when dimension_names length doesn't match shape 410 | with pytest.raises(ValidationError, match="Invalid `dimension names` attribute"): 411 | spec.with_dimension_names(("x", "y")) # 2 names for 1D array 412 | 413 | # Test that validation fails with empty codecs 414 | with pytest.raises(ValidationError, match="Invalid length. Expected 1 or more, got 0"): 415 | spec.with_codecs(()) 416 | 417 | 418 | def test_groupspec_with_methods() -> None: 419 | """ 420 | Test that GroupSpec with_* methods create new validated copies 421 | """ 422 | array_spec = ArraySpec.from_array(np.arange(10), attributes={}) 423 | original = GroupSpec(attributes={"group": "attr"}, members={"arr": array_spec}) 424 | 425 | # Test with_attributes 426 | new_attrs = original.with_attributes({"new": "attr"}) 427 | assert new_attrs.attributes == {"new": "attr"} 428 | assert original.attributes == {"group": "attr"} # Original unchanged 429 | assert new_attrs is not original 430 | 431 | # Test with_members 432 | new_array = ArraySpec.from_array(np.arange(5), attributes={}) 433 | new_members = original.with_members({"new_arr": new_array}) 434 | assert "new_arr" in new_members.members 435 | assert "arr" not in new_members.members # Replacement, not merge 436 | assert "arr" in original.members # Original unchanged 437 | 438 | 439 | def test_groupspec_with_members_validation() -> None: 440 | """ 441 | Test that GroupSpec with_members triggers validation 442 | """ 443 | spec = GroupSpec(attributes={}, members={}) 444 | 445 | # Test that validation fails with invalid member names 446 | with pytest.raises(ValidationError, match='Strings containing "/" are invalid'): 447 | spec.with_members({"a/b": ArraySpec.from_array(np.arange(10), attributes={})}) 448 | -------------------------------------------------------------------------------- /docs/experimental/usage.md: -------------------------------------------------------------------------------- 1 | # Usage 2 | 3 | This page demonstrates how to use the experimental `ArraySpec` and `GroupSpec` models for Zarr V2 and V3. 4 | 5 | ## Creating an `ArraySpec` 6 | 7 | The `ArraySpec` model represents Zarr array metadata. 8 | 9 | === "Zarr V2" 10 | 11 | ```python 12 | from pydantic_zarr.experimental.v2 import ArraySpec 13 | 14 | # Create a simple array specification 15 | array = ArraySpec( 16 | shape=(1000, 1000), 17 | dtype='uint8', 18 | chunks=(100, 100), 19 | attributes={'description': 'my array', 'units': 'meters'} 20 | ) 21 | 22 | # Get the model as a JSON string 23 | spec_json = array.model_dump_json(indent=2) 24 | print(spec_json) 25 | """ 26 | { 27 | "zarr_format": 2, 28 | "attributes": { 29 | "description": "my array", 30 | "units": "meters" 31 | }, 32 | "shape": [ 33 | 1000, 34 | 1000 35 | ], 36 | "chunks": [ 37 | 100, 38 | 100 39 | ], 40 | "dtype": "|u1", 41 | "fill_value": 0, 42 | "order": "C", 43 | "filters": null, 44 | "dimension_separator": "/", 45 | "compressor": null 46 | } 47 | """ 48 | ``` 49 | 50 | === "Zarr V3" 51 | 52 | ```python 53 | from pydantic_zarr.experimental.v3 import ArraySpec 54 | 55 | # Create a simple array specification 56 | array = ArraySpec( 57 | shape=(1000, 1000), 58 | data_type='uint8', 59 | chunk_grid={ 60 | 'name': 'regular', 61 | 'configuration': {'chunk_shape': (100, 100)} 62 | }, 63 | chunk_key_encoding={'name': 'default', 'configuration': {'separator': '/'}}, 64 | codecs=[{'name': 'bytes'}], 65 | fill_value=0, 66 | attributes={'description': 'my array', 'units': 'meters'} 67 | ) 68 | 69 | # Get the model as JSON string 70 | spec_json = array.model_dump_json(indent=2) 71 | print(spec_json) 72 | """ 73 | { 74 | "zarr_format": 3, 75 | "node_type": "array", 76 | "attributes": { 77 | "description": "my array", 78 | "units": "meters" 79 | }, 80 | "shape": [ 81 | 1000, 82 | 1000 83 | ], 84 | "data_type": "uint8", 85 | "chunk_grid": { 86 | "name": "regular", 87 | "configuration": { 88 | "chunk_shape": [ 89 | 100, 90 | 100 91 | ] 92 | } 93 | }, 94 | "chunk_key_encoding": { 95 | "name": "default", 96 | "configuration": { 97 | "separator": "/" 98 | } 99 | }, 100 | "fill_value": 0, 101 | "codecs": [ 102 | { 103 | "name": "bytes" 104 | } 105 | ], 106 | "storage_transformers": [], 107 | "dimension_names": null 108 | } 109 | """ 110 | ``` 111 | 112 | ## Creating a Group Specification 113 | 114 | The `GroupSpec` model represents a Zarr group that can contain arrays and other groups as members. 115 | 116 | === "Zarr V2" 117 | 118 | ```python 119 | from pydantic_zarr.experimental.v2 import ArraySpec, GroupSpec 120 | 121 | # Create ArraySpec for group members 122 | data_array = ArraySpec( 123 | shape=(1000, 1000), 124 | dtype='float32', 125 | chunks=(100, 100), 126 | attributes={'description': 'image data'} 127 | ) 128 | 129 | metadata_array = ArraySpec( 130 | shape=(1000,), 131 | dtype='uint32', 132 | chunks=(100,), 133 | attributes={'description': 'pixel metadata'} 134 | ) 135 | 136 | # Create a group containing these arrays 137 | group = GroupSpec( 138 | attributes={ 139 | 'name': 'experiment_001', 140 | 'date': '2024-11-23', 141 | 'version': 1 142 | }, 143 | members={ 144 | 'image': data_array, 145 | 'metadata': metadata_array 146 | } 147 | ) 148 | ``` 149 | 150 | === "Zarr V3" 151 | 152 | ```python 153 | from pydantic_zarr.experimental.v3 import ArraySpec, GroupSpec 154 | 155 | # Create ArraySpec for group members 156 | data_array = ArraySpec( 157 | shape=(1000, 1000), 158 | data_type='float32', 159 | chunk_grid={ 160 | 'name': 'regular', 161 | 'configuration': {'chunk_shape': (100, 100)} 162 | }, 163 | chunk_key_encoding={'name': 'default', 'configuration': {'separator': '/'}}, 164 | codecs=[{'name': 'bytes'}], 165 | fill_value=0, 166 | attributes={'description': 'image data'} 167 | ) 168 | 169 | metadata_array = ArraySpec( 170 | shape=(1000,), 171 | data_type='uint32', 172 | chunk_grid={ 173 | 'name': 'regular', 174 | 'configuration': {'chunk_shape': (100,)} 175 | }, 176 | chunk_key_encoding={'name': 'default', 'configuration': {'separator': '/'}}, 177 | codecs=[{'name': 'bytes'}], 178 | fill_value=0, 179 | attributes={'description': 'pixel metadata'} 180 | ) 181 | 182 | # Create a GroupSpec containing these arrays 183 | group = GroupSpec( 184 | attributes={ 185 | 'name': 'experiment_001', 186 | 'date': '2024-11-23', 187 | 'version': 1 188 | }, 189 | members={ 190 | 'image': data_array, 191 | 'metadata': metadata_array 192 | } 193 | ) 194 | ``` 195 | 196 | ## Nested Groups 197 | 198 | You can create hierarchical structures by nesting groups within groups. 199 | 200 | === "Zarr V2" 201 | 202 | ```python 203 | from pydantic_zarr.experimental.v2 import ArraySpec, GroupSpec 204 | 205 | # Create a multi-level hierarchy 206 | raw_data = ArraySpec( 207 | shape=(512, 512), 208 | dtype='uint8', 209 | chunks=(64, 64), 210 | attributes={} 211 | ) 212 | 213 | processed_data = ArraySpec( 214 | shape=(512, 512), 215 | dtype='float32', 216 | chunks=(64, 64), 217 | attributes={} 218 | ) 219 | 220 | # Create sub-groups 221 | raw_group = GroupSpec( 222 | attributes={'processing_level': 'raw'}, 223 | members={'data': raw_data} 224 | ) 225 | 226 | processed_group = GroupSpec( 227 | attributes={'processing_level': 'processed'}, 228 | members={'data': processed_data} 229 | ) 230 | 231 | # Create root group containing sub-groups 232 | root = GroupSpec( 233 | attributes={'project': 'imaging_study'}, 234 | members={ 235 | 'raw': raw_group, 236 | 'processed': processed_group 237 | } 238 | ) 239 | ``` 240 | 241 | === "Zarr V3" 242 | 243 | ```python 244 | from pydantic_zarr.experimental.v3 import ArraySpec, GroupSpec 245 | 246 | # Create a multi-level hierarchy 247 | raw_data = ArraySpec( 248 | shape=(512, 512), 249 | data_type='uint8', 250 | chunk_grid={ 251 | 'name': 'regular', 252 | 'configuration': {'chunk_shape': (64, 64)} 253 | }, 254 | chunk_key_encoding={'name': 'default', 'configuration': {'separator': '/'}}, 255 | codecs=[{'name': 'bytes'}], 256 | fill_value=0, 257 | attributes={} 258 | ) 259 | 260 | processed_data = ArraySpec( 261 | shape=(512, 512), 262 | data_type='float32', 263 | chunk_grid={ 264 | 'name': 'regular', 265 | 'configuration': {'chunk_shape': (64, 64)} 266 | }, 267 | chunk_key_encoding={'name': 'default', 'configuration': {'separator': '/'}}, 268 | codecs=[{'name': 'bytes'}], 269 | fill_value=0, 270 | attributes={} 271 | ) 272 | 273 | # Create sub-groups 274 | raw_group = GroupSpec( 275 | attributes={'processing_level': 'raw'}, 276 | members={'data': raw_data} 277 | ) 278 | 279 | processed_group = GroupSpec( 280 | attributes={'processing_level': 'processed'}, 281 | members={'data': processed_data} 282 | ) 283 | 284 | # Create root group containing sub-groups 285 | root = GroupSpec( 286 | attributes={'project': 'imaging_study'}, 287 | members={ 288 | 'raw': raw_group, 289 | 'processed': processed_group 290 | } 291 | ) 292 | ``` 293 | 294 | ## Working with Flattened Hierarchies 295 | 296 | The `to_flat()` method converts a hierarchical group structure into a flat dictionary representation. In the dict form, instances of `GroupSpec` are converted to instances of `BaseGroupSpec`, which models a Zarr group without any members. We use a different type because in the flat representation, the hierarchy structure is fully encoded by the keys of the dict. 297 | 298 | === "Zarr V2" 299 | 300 | ```python 301 | from pydantic_zarr.experimental.v2 import ArraySpec, GroupSpec 302 | 303 | # Create a group hierarchy 304 | array = ArraySpec( 305 | shape=(100,), 306 | dtype='float32', 307 | chunks=(10,), 308 | attributes={} 309 | ) 310 | 311 | subgroup = GroupSpec( 312 | attributes={'level': 1}, 313 | members={'data': array} 314 | ) 315 | 316 | root = GroupSpec( 317 | attributes={'level': 0}, 318 | members={'sub': subgroup} 319 | ) 320 | 321 | # Convert to flat representation 322 | flat = root.to_flat() 323 | print(flat) 324 | """ 325 | { 326 | '': BaseGroupSpec(zarr_format=2, attributes={'level': 0}), 327 | '/sub': BaseGroupSpec(zarr_format=2, attributes={'level': 1}), 328 | '/sub/data': ArraySpec( 329 | zarr_format=2, 330 | attributes={}, 331 | shape=(100,), 332 | chunks=(10,), 333 | dtype=' False 424 | 425 | # True because we are ignoring attributes 426 | print(array1.like(array2, exclude={'attributes'})) 427 | #> True 428 | 429 | # Create two groups 430 | group1 = GroupSpec( 431 | attributes={'version': 1}, 432 | members={'data': array1} 433 | ) 434 | 435 | group2 = GroupSpec( 436 | attributes={'version': 2}, 437 | members={'data': array1} 438 | ) 439 | 440 | # False because of differing attributes 441 | print(group1.like(group2)) 442 | #> False 443 | 444 | # True because we are ignoring attributes 445 | print(group1.like(group2, exclude={'attributes'})) 446 | #> True 447 | ``` 448 | 449 | === "Zarr V3" 450 | 451 | ```python 452 | from pydantic_zarr.experimental.v3 import ArraySpec, GroupSpec 453 | 454 | # Create two similar arrays 455 | array1 = ArraySpec( 456 | shape=(100, 100), 457 | data_type='uint8', 458 | chunk_grid={ 459 | 'name': 'regular', 460 | 'configuration': {'chunk_shape': (10, 10)} 461 | }, 462 | chunk_key_encoding={'name': 'default', 'configuration': {'separator': '/'}}, 463 | codecs=[{'name': 'bytes'}], 464 | fill_value=0, 465 | attributes={'name': 'array1'} 466 | ) 467 | 468 | array2 = ArraySpec( 469 | shape=(100, 100), 470 | data_type='uint8', 471 | chunk_grid={ 472 | 'name': 'regular', 473 | 'configuration': {'chunk_shape': (10, 10)} 474 | }, 475 | chunk_key_encoding={'name': 'default', 'configuration': {'separator': '/'}}, 476 | codecs=[{'name': 'bytes'}], 477 | fill_value=0, 478 | attributes={'name': 'array2'} 479 | ) 480 | 481 | # False because of differing attributes 482 | print(array1.like(array2)) 483 | #> False 484 | 485 | # True because we are ignoring attributes 486 | print(array1.like(array2, exclude={'attributes'})) 487 | #> True 488 | 489 | # Create two groups 490 | group1 = GroupSpec( 491 | attributes={'version': 1}, 492 | members={'data': array1} 493 | ) 494 | 495 | group2 = GroupSpec( 496 | attributes={'version': 2}, 497 | members={'data': array1} 498 | ) 499 | 500 | # False because of differing attributes 501 | print(group1.like(group2)) 502 | #> False 503 | 504 | # True because we are ignoring attributes 505 | print(group1.like(group2, exclude={'attributes'})) 506 | #> True 507 | ``` 508 | 509 | ## Type-safe Group Members with TypedDict 510 | 511 | Define strict schemas for group members using `TypedDict` to enable runtime validation. 512 | 513 | === "Zarr V2" 514 | 515 | ```python 516 | from typing_extensions import TypedDict 517 | from pydantic_zarr.experimental.v2 import ArraySpec, GroupSpec 518 | 519 | # Define the expected structure of group members 520 | class TimeseriesMembers(TypedDict): 521 | timestamps: ArraySpec 522 | values: ArraySpec 523 | 524 | # Create ArraySpec 525 | timestamps = ArraySpec( 526 | shape=(10000,), 527 | dtype='float64', 528 | chunks=(1000,), 529 | attributes={'units': 'seconds since epoch'} 530 | ) 531 | 532 | values = ArraySpec( 533 | shape=(10000,), 534 | dtype='float32', 535 | chunks=(1000,), 536 | attributes={'units': 'meters'} 537 | ) 538 | 539 | # Define a custom GroupSpec with typed members 540 | class TimeseriesGroup(GroupSpec): 541 | members: TimeseriesMembers 542 | 543 | # This succeeds - all required members present 544 | ts_group = TimeseriesGroup( 545 | attributes={'sensor': 'accelerometer'}, 546 | members={'timestamps': timestamps, 'values': values} 547 | ) 548 | 549 | # This fails because the required member 'values' is missing 550 | try: 551 | ts_group = TimeseriesGroup( 552 | attributes={'sensor': 'accelerometer'}, 553 | members={'timestamps': timestamps} 554 | ) 555 | except ValueError as e: 556 | print(e) 557 | """ 558 | 1 validation error for TimeseriesGroup 559 | members.values 560 | Field required [type=missing, input_value={'timestamps': ArraySpec(...r='/', compressor=None)}, input_type=dict] 561 | For further information visit https://errors.pydantic.dev/2.11/v/missing 562 | """ 563 | ``` 564 | 565 | === "Zarr V3" 566 | 567 | ```python 568 | from typing_extensions import TypedDict 569 | from pydantic_zarr.experimental.v3 import ArraySpec, GroupSpec 570 | 571 | # Define the expected structure of group members 572 | class TimeseriesMembers(TypedDict): 573 | timestamps: ArraySpec 574 | values: ArraySpec 575 | 576 | # Create ArraySpec 577 | timestamps = ArraySpec( 578 | shape=(10000,), 579 | data_type='float64', 580 | chunk_grid={ 581 | 'name': 'regular', 582 | 'configuration': {'chunk_shape': (1000,)} 583 | }, 584 | chunk_key_encoding={'name': 'default', 'configuration': {'separator': '/'}}, 585 | codecs=[{'name': 'bytes'}], 586 | fill_value=0, 587 | attributes={'units': 'seconds since epoch'} 588 | ) 589 | 590 | values = ArraySpec( 591 | shape=(10000,), 592 | data_type='float32', 593 | chunk_grid={ 594 | 'name': 'regular', 595 | 'configuration': {'chunk_shape': (1000,)} 596 | }, 597 | chunk_key_encoding={'name': 'default', 'configuration': {'separator': '/'}}, 598 | codecs=[{'name': 'bytes'}], 599 | fill_value=0, 600 | attributes={'units': 'meters'} 601 | ) 602 | 603 | # Define a custom GroupSpec with typed members 604 | class TimeseriesGroup(GroupSpec): 605 | members: TimeseriesMembers 606 | 607 | # This succeeds - all required members present 608 | ts_group = TimeseriesGroup( 609 | attributes={'sensor': 'accelerometer'}, 610 | members={'timestamps': timestamps, 'values': values} 611 | ) 612 | 613 | # This fails because the required member 'values' is missing 614 | try: 615 | ts_group = TimeseriesGroup( 616 | attributes={'sensor': 'accelerometer'}, 617 | members={'timestamps': timestamps} 618 | ) 619 | except ValueError as e: 620 | print(e) 621 | """ 622 | 1 validation error for TimeseriesGroup 623 | members.values 624 | Field required [type=missing, input_value={'timestamps': ArraySpec(..., dimension_names=None)}, input_type=dict] 625 | For further information visit https://errors.pydantic.dev/2.11/v/missing 626 | """ 627 | ``` 628 | -------------------------------------------------------------------------------- /tests/test_pydantic_zarr/test_v2.py: -------------------------------------------------------------------------------- 1 | """ 2 | Testts for pydantic_zarr.v2. 3 | """ 4 | 5 | from __future__ import annotations 6 | 7 | import json 8 | import re 9 | from contextlib import suppress 10 | from typing import TYPE_CHECKING, Any 11 | 12 | import pytest 13 | from pydantic import ValidationError 14 | 15 | from pydantic_zarr.core import tuplify_json 16 | 17 | from .conftest import DTYPE_EXAMPLES_V2, ZARR_PYTHON_VERSION, DTypeExample 18 | 19 | if TYPE_CHECKING: 20 | from typing import Literal 21 | 22 | import sys 23 | from dataclasses import dataclass 24 | from typing import TYPE_CHECKING, Literal 25 | 26 | if TYPE_CHECKING: 27 | from numcodecs.abc import Codec 28 | 29 | import numpy as np 30 | import numpy.typing as npt 31 | from packaging.version import Version 32 | 33 | from pydantic_zarr.v2 import ( 34 | ArraySpec, 35 | GroupSpec, 36 | auto_attributes, 37 | auto_chunks, 38 | auto_compresser, 39 | auto_dimension_separator, 40 | auto_fill_value, 41 | auto_filters, 42 | auto_order, 43 | from_flat, 44 | from_zarr, 45 | to_flat, 46 | to_zarr, 47 | ) 48 | 49 | if sys.version_info < (3, 12): 50 | from typing_extensions import TypedDict 51 | else: 52 | from typing import TypedDict 53 | 54 | try: 55 | import numcodecs 56 | except ImportError: 57 | numcodecs = None 58 | 59 | with suppress(ImportError): 60 | from zarr.errors import ContainsArrayError, ContainsGroupError 61 | 62 | ArrayMemoryOrder = Literal["C", "F"] 63 | DimensionSeparator = Literal[".", "/"] 64 | 65 | 66 | @pytest.fixture(params=("C", "F"), ids=["C", "F"]) 67 | def memory_order(request: pytest.FixtureRequest) -> ArrayMemoryOrder: 68 | """ 69 | Fixture that returns either "C" or "F" 70 | """ 71 | if request.param == "C": 72 | return "C" 73 | elif request.param == "F": 74 | return "F" 75 | msg = f"Invalid array memory order requested. Got {request.param}, expected one of (C, F)." 76 | raise ValueError(msg) 77 | 78 | 79 | @pytest.fixture(params=("/", "."), ids=["/", "."]) 80 | def dimension_separator(request: pytest.FixtureRequest) -> DimensionSeparator: 81 | """ 82 | Fixture that returns either "." or "/" 83 | """ 84 | if request.param == ".": 85 | return "." 86 | elif request.param == "/": 87 | return "/" 88 | msg = f"Invalid dimension separator requested. Got {request.param}, expected one of (., /)." 89 | raise ValueError(msg) 90 | 91 | 92 | @pytest.mark.parametrize("chunks", [(1,), (1, 2), ((1, 2, 3))]) 93 | @pytest.mark.parametrize("dtype", ["bool", "uint8", "float64"]) 94 | @pytest.mark.parametrize("compressor", [None, "LZMA", "GZip"]) 95 | @pytest.mark.parametrize( 96 | "filters", [(None,), ("delta",), ("scale_offset",), ("delta", "scale_offset")] 97 | ) 98 | def test_array_spec( 99 | chunks: tuple[int, ...], 100 | memory_order: ArrayMemoryOrder, 101 | dtype: str, 102 | dimension_separator: DimensionSeparator, 103 | compressor: str | None, 104 | filters: tuple[str, ...] | None, 105 | ) -> None: 106 | zarr = pytest.importorskip("zarr") 107 | numcodecs = pytest.importorskip("numcodecs") 108 | 109 | if compressor is not None: 110 | compressor = getattr(numcodecs, compressor)() 111 | 112 | store = zarr.storage.MemoryStore() 113 | _filters: list[Codec] | None 114 | if filters is not None: 115 | _filters = [] 116 | for filter in filters: 117 | if filter == "delta": 118 | _filters.append(numcodecs.Delta(dtype)) 119 | if filter == "scale_offset": 120 | _filters.append(numcodecs.FixedScaleOffset(0, 1.0, dtype=dtype)) 121 | else: 122 | _filters = filters 123 | 124 | array = zarr.create( 125 | (100,) * len(chunks), 126 | path="foo", 127 | store=store, 128 | chunks=chunks, 129 | dtype=dtype, 130 | order=memory_order, 131 | dimension_separator=dimension_separator, 132 | compressor=compressor, 133 | filters=_filters, 134 | zarr_format=2, 135 | ) 136 | attributes = {"foo": [100, 200, 300], "bar": "hello"} 137 | array.attrs.put(attributes) 138 | spec = ArraySpec.from_zarr(array) 139 | 140 | assert spec.zarr_format == array.metadata.zarr_format 141 | assert spec.dtype == array.dtype 142 | assert spec.attributes == array.attrs.asdict() 143 | assert spec.chunks == array.chunks 144 | 145 | assert spec.dimension_separator == array.metadata.dimension_separator 146 | assert spec.shape == array.shape 147 | assert spec.fill_value == array.fill_value 148 | # this is a sign that nullability is being misused in zarr-python 149 | # the correct approach would be to use an empty list to express "no filters". 150 | if len(array.filters): 151 | assert spec.filters == [f.get_config() for f in array.filters] 152 | else: 153 | assert spec.filters is None 154 | 155 | if len(array.compressors): 156 | assert spec.compressor == array.compressors[0].get_config() 157 | else: 158 | assert spec.compressor is None 159 | 160 | assert spec.order == array.order 161 | 162 | array2 = spec.to_zarr(store, "foo2") 163 | 164 | assert spec.zarr_format == array2.metadata.zarr_format 165 | assert spec.dtype == array2.dtype 166 | assert spec.attributes == array2.attrs 167 | assert spec.chunks == array2.chunks 168 | 169 | if len(array2.compressors): 170 | assert spec.compressor == array2.compressors[0].get_config() 171 | else: 172 | assert spec.compressor is None 173 | 174 | if len(array2.filters): 175 | assert spec.filters == [f.get_config() for f in array2.filters] 176 | else: 177 | assert spec.filters is None 178 | 179 | assert spec.dimension_separator == array2.metadata.dimension_separator 180 | assert spec.shape == array2.shape 181 | assert spec.fill_value == array2.fill_value 182 | 183 | # test serialization 184 | store = zarr.storage.MemoryStore() 185 | stored = spec.to_zarr(store, path="foo") 186 | assert ArraySpec.from_zarr(stored) == spec 187 | 188 | # test that to_zarr is idempotent 189 | assert spec.to_zarr(store, path="foo") == stored 190 | 191 | # test that to_zarr raises if the extant array is different 192 | spec_2 = spec.model_copy(update={"attributes": {"baz": 10}}) 193 | with pytest.raises(ContainsArrayError): 194 | spec_2.to_zarr(store, path="foo") 195 | 196 | # test that we can overwrite the dissimilar array 197 | stored_2 = spec_2.to_zarr(store, path="foo", overwrite=True) 198 | assert ArraySpec.from_zarr(stored_2) == spec_2 199 | 200 | assert spec_2.to_zarr(store, path="foo").read_only is False 201 | 202 | 203 | @dataclass 204 | class FakeArray: 205 | shape: tuple[int, ...] 206 | dtype: np.dtype[Any] 207 | 208 | 209 | @dataclass 210 | class WithAttrs: 211 | attrs: dict[str, Any] 212 | 213 | 214 | @dataclass 215 | class WithChunksize: 216 | chunksize: tuple[int, ...] 217 | 218 | 219 | @dataclass 220 | class FakeDaskArray(FakeArray, WithChunksize): ... 221 | 222 | 223 | @dataclass 224 | class FakeXarray(FakeDaskArray, WithAttrs): ... 225 | 226 | 227 | @pytest.mark.parametrize( 228 | "array", 229 | [ 230 | np.zeros((100), dtype="uint8"), 231 | FakeArray(shape=(11,), dtype=np.dtype("float64")), 232 | FakeDaskArray(shape=(22,), dtype=np.dtype("uint8"), chunksize=(11,)), 233 | FakeXarray(shape=(22,), dtype=np.dtype("uint8"), chunksize=(11,), attrs={"foo": "bar"}), 234 | ], 235 | ) 236 | @pytest.mark.parametrize("chunks", ["omit", "auto", (10,)]) 237 | @pytest.mark.parametrize("attributes", ["omit", "auto", {"foo": 10}]) 238 | @pytest.mark.parametrize("fill_value", ["omit", "auto", 15]) 239 | @pytest.mark.parametrize("order", ["omit", "auto", "F"]) 240 | @pytest.mark.parametrize("filters", ["omit", "auto", []]) 241 | @pytest.mark.parametrize("dimension_separator", ["omit", "auto", "."]) 242 | @pytest.mark.parametrize("compressor", ["omit", "auto", {"id": "gzip", "level": 1}]) 243 | def test_array_spec_from_array( 244 | *, 245 | array: npt.NDArray[Any], 246 | chunks: str | tuple[int, ...], 247 | attributes: str | dict[str, object], 248 | fill_value: object, 249 | order: str, 250 | filters: str | list[Codec], 251 | dimension_separator: str, 252 | compressor: str | dict[str, object], 253 | ) -> None: 254 | auto_options = ("omit", "auto") 255 | kwargs_out: dict[str, object] = {} 256 | 257 | kwargs_out["chunks"] = chunks 258 | kwargs_out["attributes"] = attributes 259 | kwargs_out["fill_value"] = fill_value 260 | kwargs_out["order"] = order 261 | kwargs_out["filters"] = filters 262 | kwargs_out["dimension_separator"] = dimension_separator 263 | kwargs_out["compressor"] = compressor 264 | 265 | # remove all the keyword arguments that should be defaulted 266 | kwargs_out = dict(filter(lambda kvp: kvp[1] != "omit", kwargs_out.items())) 267 | 268 | spec = ArraySpec.from_array(array, **kwargs_out) 269 | # arrayspec should round-trip from_array with no arguments 270 | assert spec.from_array(spec) == spec 271 | 272 | assert spec.dtype == array.dtype.str 273 | assert np.dtype(spec.dtype) == array.dtype 274 | 275 | assert spec.shape == array.shape 276 | 277 | if chunks in auto_options: 278 | assert spec.chunks == auto_chunks(array) 279 | else: 280 | assert spec.chunks == chunks 281 | 282 | if attributes in auto_options: 283 | assert spec.attributes == auto_attributes(array) 284 | else: 285 | assert spec.attributes == attributes 286 | 287 | if fill_value in auto_options: 288 | assert spec.fill_value == auto_fill_value(array) 289 | else: 290 | assert spec.fill_value == fill_value 291 | 292 | if order in auto_options: 293 | assert spec.order == auto_order(array) 294 | else: 295 | assert spec.order == order 296 | 297 | if filters in auto_options: 298 | assert spec.filters == auto_filters(array) 299 | else: 300 | assert spec.filters is None 301 | 302 | if dimension_separator in auto_options: 303 | assert spec.dimension_separator == auto_dimension_separator(array) 304 | else: 305 | assert spec.dimension_separator == dimension_separator 306 | 307 | if compressor in auto_options: 308 | assert spec.compressor == auto_compresser(array) 309 | else: 310 | assert spec.compressor == compressor 311 | 312 | 313 | @pytest.mark.parametrize("chunks", [(1,), (1, 2), ((1, 2, 3))]) 314 | @pytest.mark.parametrize("dtype", ["bool", "uint8", np.dtype("uint8"), "float64"]) 315 | @pytest.mark.parametrize("dimension_separator", [".", "/"]) 316 | @pytest.mark.parametrize( 317 | "compressor", 318 | [{"id": "lzma", "format": 1, "check": -1, "preset": None, "filters": None}, "GZip"], 319 | ) 320 | @pytest.mark.parametrize("filters", [(), ("delta",), ("scale_offset",), ("delta", "scale_offset")]) 321 | def test_serialize_deserialize_groupspec( 322 | chunks: tuple[int, ...], 323 | memory_order: ArrayMemoryOrder, 324 | dtype: str, 325 | dimension_separator: Literal[".", "/"], 326 | compressor: Any, 327 | filters: tuple[str, ...] | None, 328 | ) -> None: 329 | zarr = pytest.importorskip("zarr") 330 | numcodecs = pytest.importorskip("numcodecs") 331 | if isinstance(compressor, str): 332 | compressor = getattr(numcodecs, compressor)() 333 | 334 | _filters: list[Codec] | None 335 | if filters is not None: 336 | _filters = [] 337 | for filter in filters: 338 | if filter == "delta": 339 | _filters.append(numcodecs.Delta(dtype)) 340 | if filter == "scale_offset": 341 | _filters.append(numcodecs.FixedScaleOffset(0, 1.0, dtype=dtype)) 342 | else: 343 | _filters = filters 344 | 345 | class RootAttrs(TypedDict): 346 | foo: int 347 | bar: list[int] 348 | 349 | class SubGroupAttrs(TypedDict): 350 | a: str 351 | b: float 352 | 353 | SubGroup = GroupSpec[SubGroupAttrs, Any] 354 | 355 | class ArrayAttrs(TypedDict): 356 | scale: list[float] 357 | 358 | store = zarr.storage.MemoryStore() 359 | 360 | spec = GroupSpec[RootAttrs, ArraySpec | SubGroup]( 361 | attributes=RootAttrs(foo=10, bar=[0, 1, 2]), 362 | members={ 363 | "s0": ArraySpec[ArrayAttrs]( 364 | shape=(10,) * len(chunks), 365 | chunks=chunks, 366 | dtype=dtype, 367 | filters=_filters, 368 | compressor=compressor, 369 | order=memory_order, 370 | dimension_separator=dimension_separator, 371 | attributes=ArrayAttrs(scale=[1.0]), 372 | ), 373 | "s1": ArraySpec[ArrayAttrs]( 374 | shape=(5,) * len(chunks), 375 | chunks=chunks, 376 | dtype=dtype, 377 | filters=_filters, 378 | compressor=compressor, 379 | order=memory_order, 380 | dimension_separator=dimension_separator, 381 | attributes=ArrayAttrs(scale=[2.0]), 382 | ), 383 | "subgroup": SubGroup(attributes=SubGroupAttrs(a="foo", b=1.0)), 384 | }, 385 | ) 386 | # check that the model round-trips dict representation 387 | assert spec == GroupSpec(**spec.model_dump()) 388 | 389 | # materialize a zarr group, based on the spec 390 | group = to_zarr(spec, store, "/group_a") 391 | 392 | # parse the spec from that group 393 | observed = from_zarr(group) 394 | assert observed == spec 395 | 396 | # assert that we get the same group twice 397 | assert to_zarr(spec, store, "/group_a", overwrite=True) == group 398 | 399 | # check that we can't call to_zarr targeting the original group with a different spec 400 | spec_2 = spec.model_copy(update={"attributes": RootAttrs(foo=99, bar=[0, 1, 2])}) 401 | with pytest.raises(ContainsGroupError): 402 | _ = to_zarr(spec_2, store, "/group_a") 403 | 404 | # check that we can't call to_zarr with the original spec if the group has changed 405 | group.attrs["foo"] = 100 406 | with pytest.raises(ContainsGroupError): 407 | _ = to_zarr(spec, store, "/group_a") 408 | group.attrs["foo"] = 10 409 | 410 | # materialize again with overwrite 411 | group2 = to_zarr(spec, store, "/group_a", overwrite=True) 412 | assert group2 == group 413 | 414 | # again with class methods 415 | group3 = spec.to_zarr(store, "/group_b") 416 | observed = spec.from_zarr(group3) 417 | assert observed == spec 418 | 419 | 420 | @pytest.mark.parametrize("base", range(1, 5)) 421 | def test_shape_chunks(base: int) -> None: 422 | """ 423 | Test that the length of the chunks and the shape match 424 | """ 425 | with pytest.raises(ValidationError): 426 | ArraySpec(shape=(1,) * base, chunks=(1,) * (base + 1), dtype="uint8", attributes={}) 427 | with pytest.raises(ValidationError): 428 | ArraySpec(shape=(1,) * (base + 1), chunks=(1,) * base, dtype="uint8", attributes={}) 429 | 430 | 431 | def test_validation() -> None: 432 | """ 433 | Test that specialized GroupSpec and ArraySpec instances cannot be serialized from 434 | the wrong inputs without a ValidationError. 435 | """ 436 | zarr = pytest.importorskip("zarr") 437 | 438 | class GroupAttrsA(TypedDict): 439 | group_a: bool 440 | 441 | class GroupAttrsB(TypedDict): 442 | group_b: bool 443 | 444 | class ArrayAttrsA(TypedDict): 445 | array_a: bool 446 | 447 | class ArrayAttrsB(TypedDict): 448 | array_b: bool 449 | 450 | ArrayA = ArraySpec[ArrayAttrsA] 451 | ArrayB = ArraySpec[ArrayAttrsB] 452 | GroupA = GroupSpec[GroupAttrsA, ArrayA] 453 | GroupB = GroupSpec[GroupAttrsB, ArrayB] 454 | 455 | store = zarr.storage.MemoryStore 456 | 457 | specA = GroupA( 458 | attributes=GroupAttrsA(group_a=True), 459 | members={ 460 | "a": ArrayA( 461 | attributes=ArrayAttrsA(array_a=True), 462 | shape=(100,), 463 | dtype="uint8", 464 | chunks=(10,), 465 | ) 466 | }, 467 | ) 468 | 469 | specB = GroupB( 470 | attributes=GroupAttrsB(group_b=True), 471 | members={ 472 | "a": ArrayB( 473 | attributes=ArrayAttrsB(array_b=True), 474 | shape=(100,), 475 | dtype="uint8", 476 | chunks=(10,), 477 | ) 478 | }, 479 | ) 480 | 481 | # check that we cannot create a specialized GroupSpec with the wrong attributes 482 | with pytest.raises(ValidationError): 483 | GroupB( 484 | attributes=GroupAttrsA(group_a=True), 485 | members={}, 486 | ) 487 | 488 | store = zarr.storage.MemoryStore() 489 | groupAMat = specA.to_zarr(store, path="group_a") 490 | groupBMat = specB.to_zarr(store, path="group_b") 491 | 492 | GroupA.from_zarr(groupAMat) 493 | GroupB.from_zarr(groupBMat) 494 | 495 | ArrayA.from_zarr(groupAMat["a"]) 496 | ArrayB.from_zarr(groupBMat["a"]) 497 | 498 | with pytest.raises(ValidationError): 499 | ArrayA.from_zarr(groupBMat["a"]) 500 | 501 | with pytest.raises(ValidationError): 502 | ArrayB.from_zarr(groupAMat["a"]) 503 | 504 | with pytest.raises(ValidationError): 505 | GroupB.from_zarr(groupAMat) 506 | 507 | with pytest.raises(ValidationError): 508 | GroupA.from_zarr(groupBMat) 509 | 510 | 511 | @pytest.mark.parametrize("shape", [(1,), (2, 2), (3, 4, 5)]) 512 | @pytest.mark.parametrize("dtype", [None, "uint8", "float32"]) 513 | def test_from_array(shape: tuple[int, ...], dtype: str | None) -> None: 514 | template = np.zeros(shape=shape, dtype=dtype) 515 | spec = ArraySpec.from_array(template) # type: ignore[var-annotated] 516 | 517 | assert spec.shape == template.shape 518 | assert np.dtype(spec.dtype) == np.dtype(template.dtype) 519 | assert spec.chunks == template.shape 520 | assert spec.attributes == {} 521 | 522 | chunks = template.ndim * (1,) 523 | attrs = {"foo": 100} 524 | spec2 = ArraySpec.from_array(template, chunks=chunks, attributes=attrs) 525 | assert spec2.chunks == chunks 526 | assert spec2.attributes == attrs 527 | 528 | 529 | @pytest.mark.parametrize("data", ["/", "a/b/c"]) 530 | def test_member_name(data: str) -> None: 531 | with pytest.raises(ValidationError, match='Strings containing "/" are invalid.'): 532 | GroupSpec(attributes={}, members={data: GroupSpec(attributes={}, members={})}) 533 | 534 | 535 | @pytest.mark.parametrize( 536 | ("data", "expected"), 537 | [ 538 | ( 539 | ArraySpec.from_array(np.arange(10)), 540 | {"": ArraySpec.from_array(np.arange(10))}, 541 | ), 542 | ( 543 | GroupSpec( 544 | attributes={"foo": 10}, 545 | members={"a": ArraySpec.from_array(np.arange(5), attributes={"foo": 100})}, 546 | ), 547 | { 548 | "": GroupSpec(attributes={"foo": 10}, members=None), 549 | "/a": ArraySpec.from_array(np.arange(5), attributes={"foo": 100}), 550 | }, 551 | ), 552 | ( 553 | GroupSpec( 554 | attributes={}, 555 | members={ 556 | "a": GroupSpec( 557 | attributes={"foo": 10}, 558 | members={"a": ArraySpec.from_array(np.arange(5), attributes={"foo": 100})}, 559 | ), 560 | "b": ArraySpec.from_array(np.arange(2), attributes={"foo": 3}), 561 | }, 562 | ), 563 | { 564 | "": GroupSpec(attributes={}, members=None), 565 | "/a": GroupSpec(attributes={"foo": 10}, members=None), 566 | "/a/a": ArraySpec.from_array(np.arange(5), attributes={"foo": 100}), 567 | "/b": ArraySpec.from_array(np.arange(2), attributes={"foo": 3}), 568 | }, 569 | ), 570 | ], 571 | ) 572 | def test_flatten_unflatten( 573 | data: ArraySpec | GroupSpec, expected: dict[str, ArraySpec | GroupSpec] 574 | ) -> None: 575 | flattened = to_flat(data) 576 | assert flattened == expected 577 | assert from_flat(flattened) == data 578 | 579 | 580 | # todo: parametrize 581 | def test_array_like() -> None: 582 | a = ArraySpec.from_array(np.arange(10)) # type: ignore[var-annotated] 583 | assert a.like(a) 584 | 585 | b = a.model_copy(update={"dtype": "uint8"}) 586 | assert not a.like(b) 587 | assert a.like(b, exclude={"dtype"}) 588 | assert a.like(b, include={"shape"}) 589 | 590 | c = a.model_copy(update={"shape": (100, 100)}) 591 | assert not a.like(c) 592 | assert a.like(c, exclude={"shape"}) 593 | assert a.like(c, include={"dtype"}) 594 | 595 | 596 | def test_array_like_with_zarr() -> None: 597 | zarr = pytest.importorskip("zarr") 598 | arr = ArraySpec(shape=(1,), dtype="uint8", chunks=(1,), attributes={}) 599 | store = zarr.storage.MemoryStore() 600 | arr_stored = arr.to_zarr(store, path="arr") 601 | assert arr.like(arr_stored) 602 | 603 | 604 | # todo: parametrize 605 | def test_group_like() -> None: 606 | tree: dict[str, GroupSpec | ArraySpec] = { 607 | "": GroupSpec(attributes={"path": ""}, members=None), 608 | "/a": GroupSpec(attributes={"path": "/a"}, members=None), 609 | "/b": ArraySpec.from_array(np.arange(10), attributes={"path": "/b"}), 610 | "/a/b": ArraySpec.from_array(np.arange(10), attributes={"path": "/a/b"}), 611 | } 612 | group = GroupSpec.from_flat(tree) # type: ignore[var-annotated] 613 | assert group.like(group) 614 | assert not group.like(group.model_copy(update={"attributes": None})) 615 | assert group.like(group.model_copy(update={"attributes": None}), exclude={"attributes"}) 616 | assert group.like(group.model_copy(update={"attributes": None}), include={"members"}) 617 | 618 | 619 | # todo: parametrize 620 | def test_from_zarr_depth() -> None: 621 | zarr = pytest.importorskip("zarr") 622 | tree: dict[str, GroupSpec | ArraySpec] = { 623 | "": GroupSpec(members=None, attributes={"level": 0, "type": "group"}), 624 | "/1": GroupSpec(members=None, attributes={"level": 1, "type": "group"}), 625 | "/1/2": GroupSpec(members=None, attributes={"level": 2, "type": "group"}), 626 | "/1/2/1": GroupSpec(members=None, attributes={"level": 3, "type": "group"}), 627 | "/1/2/2": ArraySpec.from_array(np.arange(20), attributes={"level": 3, "type": "array"}), 628 | } 629 | 630 | store = zarr.storage.MemoryStore() 631 | group_out = GroupSpec.from_flat(tree).to_zarr(store, path="test") 632 | group_in_0 = GroupSpec.from_zarr(group_out, depth=0) # type: ignore[var-annotated] 633 | assert group_in_0 == tree[""] 634 | 635 | group_in_1 = GroupSpec.from_zarr(group_out, depth=1) # type: ignore[var-annotated] 636 | assert group_in_1.attributes == tree[""].attributes # type: ignore[attr-defined] 637 | assert group_in_1.members is not None 638 | assert group_in_1.members["1"] == tree["/1"] 639 | 640 | group_in_2 = GroupSpec.from_zarr(group_out, depth=2) # type: ignore[var-annotated] 641 | assert group_in_2.members is not None 642 | assert group_in_2.members["1"].members["2"] == tree["/1/2"] 643 | assert group_in_2.attributes == tree[""].attributes # type: ignore[attr-defined] 644 | assert group_in_2.members["1"].attributes == tree["/1"].attributes # type: ignore[attr-defined] 645 | 646 | group_in_3 = GroupSpec.from_zarr(group_out, depth=3) # type: ignore[var-annotated] 647 | assert group_in_3.members is not None 648 | assert group_in_3.members["1"].members["2"].members["1"] == tree["/1/2/1"] 649 | assert group_in_3.attributes == tree[""].attributes # type: ignore[attr-defined] 650 | assert group_in_3.members["1"].attributes == tree["/1"].attributes # type: ignore[attr-defined] 651 | assert group_in_3.members["1"].members["2"].attributes == tree["/1/2"].attributes # type: ignore[attr-defined] 652 | 653 | 654 | @pytest.mark.parametrize(("dtype_example"), DTYPE_EXAMPLES_V2, ids=str) 655 | def test_arrayspec_from_zarr(dtype_example: DTypeExample) -> None: 656 | """ 657 | Test that deserializing an ArraySpec from a zarr python store works as expected. 658 | """ 659 | zarr = pytest.importorskip("zarr") 660 | store = {} 661 | data_type = dtype_example.name 662 | if ZARR_PYTHON_VERSION >= Version("3.1.0") and data_type == "|O": 663 | pytest.skip(reason="Data type inference with an object dtype will fail in zarr>=3.1.0") 664 | arr = zarr.create_array(store=store, shape=(10,), dtype=data_type, zarr_format=2) 665 | 666 | arr_spec = ArraySpec.from_zarr(arr) 667 | 668 | observed = {"attributes": arr.attrs.asdict()} | json.loads( 669 | store[".zarray"].to_bytes(), object_hook=tuplify_json 670 | ) 671 | if observed["filters"] is not None: 672 | observed["filters"] = list(observed["filters"]) 673 | # this covers the case of the structured data type, which would otherwise be deserialized as a 674 | # tuple of tuples, but is stored on the arrayspec as a list of tuples. 675 | if isinstance(observed["dtype"], tuple): 676 | observed["dtype"] = list(observed["dtype"]) 677 | 678 | assert arr_spec.model_dump() == observed 679 | 680 | 681 | def test_mix_v3_v2_fails() -> None: 682 | from pydantic_zarr.v3 import ArraySpec as ArraySpecv3 683 | 684 | members_flat = {"/a": ArraySpecv3.from_array(np.ones(1))} 685 | with pytest.raises( 686 | ValueError, 687 | match=re.escape( 688 | "Value at '/a' is not a v2 ArraySpec or GroupSpec (got type(value)=)" 689 | ), 690 | ): 691 | GroupSpec.from_flat(members_flat) # type: ignore[arg-type] 692 | -------------------------------------------------------------------------------- /tests/test_pydantic_zarr/test_experimental/test_v2.py: -------------------------------------------------------------------------------- 1 | """ 2 | Testts for pydantic_zarr.v2. 3 | """ 4 | 5 | from __future__ import annotations 6 | 7 | import json 8 | import re 9 | import sys 10 | from collections.abc import Mapping # noqa: TC003 11 | from contextlib import suppress 12 | from typing import TYPE_CHECKING, Any 13 | 14 | import dask.array as da 15 | import pytest 16 | import xarray as xr 17 | from pydantic import ValidationError 18 | 19 | from pydantic_zarr.core import tuplify_json 20 | from pydantic_zarr.experimental.core import json_eq 21 | 22 | from ..conftest import DTYPE_EXAMPLES_V2, ZARR_AVAILABLE, ZARR_PYTHON_VERSION, DTypeExample 23 | 24 | if TYPE_CHECKING: 25 | from numcodecs.abc import Codec 26 | 27 | import numpy as np 28 | import numpy.typing as npt 29 | from packaging.version import Version 30 | 31 | from pydantic_zarr.experimental.v2 import ( 32 | DIMENSION_SEPARATOR, 33 | MEMORY_ORDER, 34 | ArraySpec, 35 | BaseGroupSpec, 36 | CodecDict, 37 | DimensionSeparator, 38 | GroupSpec, 39 | MemoryOrder, 40 | auto_attributes, 41 | auto_chunks, 42 | auto_compresser, 43 | auto_dimension_separator, 44 | auto_fill_value, 45 | auto_filters, 46 | auto_order, 47 | from_flat, 48 | from_zarr, 49 | to_flat, 50 | to_zarr, 51 | ) 52 | 53 | if sys.version_info < (3, 12): 54 | from typing_extensions import TypedDict 55 | else: 56 | from typing import TypedDict 57 | 58 | try: 59 | import numcodecs 60 | except ImportError: 61 | numcodecs = None 62 | 63 | with suppress(ImportError): 64 | from zarr.errors import ContainsArrayError, ContainsGroupError 65 | 66 | 67 | @pytest.mark.parametrize(("chunks", "shape"), [((1,), (10,)), ((1, 2, 3), (4, 5, 6))]) 68 | @pytest.mark.parametrize("dtype", ["bool", "float64", "|u1", np.float32]) 69 | @pytest.mark.parametrize("compressor", [None, {"id": "gzip", "level": 1}]) 70 | @pytest.mark.parametrize( 71 | "filters", 72 | [ 73 | None, 74 | (), 75 | ({"id": "delta", "dtype": "uint8"},), 76 | ({"id": "delta", "dtype": "uint8"}, {"id": "gzip", "level": 1}), 77 | ], 78 | ) 79 | @pytest.mark.parametrize("dimension_separator", DIMENSION_SEPARATOR) 80 | @pytest.mark.parametrize("memory_order", MEMORY_ORDER) 81 | @pytest.mark.parametrize("attributes", [{}, {"a": [100]}, {"b": ("e", "f")}]) 82 | def test_array_spec( 83 | chunks: tuple[int, ...], 84 | shape: tuple[int, ...], 85 | memory_order: MemoryOrder, 86 | dtype: str, 87 | dimension_separator: DimensionSeparator, 88 | compressor: str | CodecDict, 89 | filters: tuple[str, ...] | None, 90 | attributes: dict[str, object], 91 | ) -> None: 92 | zarr = pytest.importorskip("zarr") 93 | import numcodecs 94 | 95 | if filters is not None: 96 | _filters = tuple(numcodecs.get_codec(f) for f in filters) 97 | else: 98 | _filters = None 99 | store = {} 100 | 101 | array = zarr.create_array( 102 | shape=shape, 103 | store=store, 104 | chunks=chunks, 105 | dtype=dtype, 106 | order=memory_order, 107 | chunk_key_encoding={"name": "v2", "configuration": {"separator": dimension_separator}}, 108 | compressors=compressor, 109 | filters=_filters, 110 | zarr_format=2, 111 | attributes=attributes, 112 | ) 113 | 114 | spec = ArraySpec.from_zarr(array) 115 | 116 | assert json_eq( 117 | spec.model_dump(), {**json.loads(store[".zarray"].to_bytes()), "attributes": attributes} 118 | ) 119 | 120 | 121 | @pytest.mark.parametrize("overwrite", [True, False]) 122 | @pytest.mark.parametrize("path", ["", "foo"]) 123 | @pytest.mark.parametrize("config", [None, {}, {"order": "C", "write_empty_chunks": True}]) 124 | def test_arrayspec_to_zarr(overwrite: bool, path: str, config: dict[str, object] | None) -> None: 125 | """ 126 | Test serializing an arrayspec to zarr and back again 127 | """ 128 | zarr = pytest.importorskip("zarr") 129 | from zarr.core.array_spec import ArrayConfig 130 | 131 | spec = ArraySpec( 132 | shape=(10,), 133 | dtype="uint8", 134 | chunks=(1,), 135 | attributes={"a": 10}, 136 | ) 137 | 138 | # test serialization 139 | store = zarr.storage.MemoryStore() 140 | stored = spec.to_zarr(store, path=path, config=config) # type: ignore[arg-type] 141 | 142 | if config not in (None, {}): 143 | assert stored._async_array._config == ArrayConfig( 144 | order=config["order"], write_empty_chunks=config["write_empty_chunks"] 145 | ) 146 | 147 | assert json_eq(ArraySpec.from_zarr(stored).model_dump(), spec.model_dump()) 148 | 149 | # test that to_zarr is idempotent when the arrays match 150 | assert json_eq(spec.to_zarr(store, path=path).metadata.to_dict(), stored.metadata.to_dict()) 151 | 152 | # test that to_zarr raises if the extant array is different 153 | # unless overwrite is True 154 | spec_2 = spec.model_copy(update={"attributes": {"baz": 10}}) 155 | if not overwrite: 156 | with pytest.raises(ContainsArrayError): 157 | spec_2.to_zarr(store, path=path, overwrite=overwrite) 158 | else: 159 | arr_2 = spec_2.to_zarr(store, path=path, overwrite=overwrite) 160 | assert json_eq(arr_2.attrs.asdict(), spec_2.attributes) 161 | 162 | 163 | @pytest.mark.parametrize( 164 | "array", 165 | [ 166 | np.zeros((100), dtype="uint8"), 167 | xr.DataArray(np.arange(10), attrs={"foo": 10}), 168 | xr.DataArray(da.arange(10), attrs={"foo": 10}), 169 | da.arange(10), 170 | ], 171 | ) 172 | @pytest.mark.parametrize("chunks", ["omit", "auto", (10,)]) 173 | @pytest.mark.parametrize("attributes", ["omit", "auto", {"foo": 10}]) 174 | @pytest.mark.parametrize("fill_value", ["omit", "auto", 15]) 175 | @pytest.mark.parametrize("order", ["omit", "auto", "F"]) 176 | @pytest.mark.parametrize("filters", ["omit", "auto", []]) 177 | @pytest.mark.parametrize("dimension_separator", ["omit", "auto", "."]) 178 | @pytest.mark.parametrize("compressor", ["omit", "auto", {"id": "gzip", "level": 1}]) 179 | def test_array_spec_from_array( 180 | *, 181 | array: npt.NDArray[Any], 182 | chunks: str | tuple[int, ...], 183 | attributes: str | dict[str, object], 184 | fill_value: object, 185 | order: str, 186 | filters: str | list[Codec], 187 | dimension_separator: str, 188 | compressor: str | dict[str, object], 189 | ) -> None: 190 | auto_options = ("omit", "auto") 191 | kwargs_out: dict[str, object] = {} 192 | 193 | kwargs_out["chunks"] = chunks 194 | kwargs_out["attributes"] = attributes 195 | kwargs_out["fill_value"] = fill_value 196 | kwargs_out["order"] = order 197 | kwargs_out["filters"] = filters 198 | kwargs_out["dimension_separator"] = dimension_separator 199 | kwargs_out["compressor"] = compressor 200 | 201 | # remove all the keyword arguments that should be defaulted 202 | kwargs_out = dict(filter(lambda kvp: kvp[1] != "omit", kwargs_out.items())) 203 | 204 | spec = ArraySpec.from_array(array, **kwargs_out) 205 | # arrayspec should round-trip from_array with no arguments 206 | assert spec.from_array(spec) == spec 207 | 208 | assert spec.dtype == array.dtype.str 209 | assert np.dtype(spec.dtype) == array.dtype 210 | 211 | assert spec.shape == array.shape 212 | 213 | if chunks in auto_options: 214 | assert spec.chunks == auto_chunks(array) 215 | else: 216 | assert spec.chunks == chunks 217 | 218 | if attributes in auto_options: 219 | assert spec.attributes == auto_attributes(array) 220 | else: 221 | assert spec.attributes == attributes 222 | 223 | if fill_value in auto_options: 224 | assert spec.fill_value == auto_fill_value(array) 225 | else: 226 | assert spec.fill_value == fill_value 227 | 228 | if order in auto_options: 229 | assert spec.order == auto_order(array) 230 | else: 231 | assert spec.order == order 232 | 233 | if filters in auto_options: 234 | assert spec.filters == auto_filters(array) 235 | else: 236 | assert spec.filters is None 237 | 238 | if dimension_separator in auto_options: 239 | assert spec.dimension_separator == auto_dimension_separator(array) 240 | else: 241 | assert spec.dimension_separator == dimension_separator 242 | 243 | if compressor in auto_options: 244 | assert spec.compressor == auto_compresser(array) 245 | else: 246 | assert spec.compressor == compressor 247 | 248 | 249 | def test_serialize_deserialize_groupspec() -> None: 250 | zarr = pytest.importorskip("zarr") 251 | 252 | class RootAttrs(TypedDict): 253 | foo: int 254 | bar: list[int] 255 | 256 | class SubGroupAttrs(TypedDict): 257 | a: str 258 | b: float 259 | 260 | class SubGroup(GroupSpec): 261 | attributes: SubGroupAttrs 262 | 263 | class ArrayAttrs(TypedDict): 264 | scale: list[float] 265 | 266 | class MemberArray(ArraySpec): 267 | attributes: ArrayAttrs 268 | 269 | class RootGroup(GroupSpec): 270 | attributes: RootAttrs 271 | members: Mapping[str, MemberArray | SubGroup] 272 | 273 | store = zarr.storage.MemoryStore() 274 | 275 | spec = RootGroup( 276 | attributes=RootAttrs(foo=10, bar=[0, 1, 2]), 277 | members={ 278 | "s0": MemberArray( 279 | shape=(10,), 280 | chunks=(1,), 281 | dtype="uint8", 282 | filters=None, 283 | compressor=None, 284 | order="C", 285 | dimension_separator="/", 286 | attributes=ArrayAttrs(scale=[1.0]), 287 | ), 288 | "s1": MemberArray( 289 | shape=(10,), 290 | chunks=(1,), 291 | dtype="uint8", 292 | filters=None, 293 | compressor=None, 294 | order="C", 295 | dimension_separator="/", 296 | attributes=ArrayAttrs(scale=[2.0]), 297 | ), 298 | "subgroup": SubGroup(attributes=SubGroupAttrs(a="foo", b=1.0), members={}), 299 | }, 300 | ) 301 | # check that the model round-trips dict representation 302 | assert spec.model_dump() == GroupSpec(**spec.model_dump()).model_dump() 303 | 304 | # materialize a zarr group, based on the spec 305 | group = to_zarr(spec, store, "/group_a") 306 | 307 | # parse the spec from that group 308 | observed = from_zarr(group) 309 | assert json_eq(observed.model_dump(), spec.model_dump()) 310 | 311 | # assert that we get the same group twice 312 | assert to_zarr(spec, store, "/group_a", overwrite=True) == group 313 | 314 | # check that we can't call to_zarr targeting the original group with a different spec 315 | spec_2 = spec.model_copy(update={"attributes": RootAttrs(foo=99, bar=[0, 1, 2])}) 316 | with pytest.raises(ContainsGroupError): 317 | _ = to_zarr(spec_2, store, "/group_a") 318 | 319 | # check that we can't call to_zarr with the original spec if the group has changed 320 | group.attrs["foo"] = 100 321 | with pytest.raises(ContainsGroupError): 322 | _ = to_zarr(spec, store, "/group_a") 323 | group.attrs["foo"] = 10 324 | 325 | # materialize again with overwrite 326 | group2 = to_zarr(spec, store, "/group_a", overwrite=True) 327 | assert group2 == group 328 | 329 | # again with class methods 330 | group3 = spec.to_zarr(store, "/group_b") 331 | observed = spec.from_zarr(group3) 332 | assert observed == spec 333 | 334 | 335 | @pytest.mark.parametrize("base", range(1, 5)) 336 | def test_shape_chunks(base: int) -> None: 337 | """ 338 | Test that the length of the chunks and the shape match 339 | """ 340 | with pytest.raises(ValidationError): 341 | ArraySpec(shape=(1,) * base, chunks=(1,) * (base + 1), dtype="uint8", attributes={}) 342 | with pytest.raises(ValidationError): 343 | ArraySpec(shape=(1,) * (base + 1), chunks=(1,) * base, dtype="uint8", attributes={}) 344 | 345 | 346 | def test_validation() -> None: 347 | """ 348 | Test that specialized GroupSpec and ArraySpec instances cannot be serialized from 349 | the wrong inputs without a ValidationError. 350 | """ 351 | zarr = pytest.importorskip("zarr") 352 | 353 | class GroupAttrsA(TypedDict): 354 | group_a: bool 355 | 356 | class GroupAttrsB(TypedDict): 357 | group_b: bool 358 | 359 | class ArrayAttrsA(TypedDict): 360 | array_a: bool 361 | 362 | class ArrayAttrsB(TypedDict): 363 | array_b: bool 364 | 365 | class ArrayA(ArraySpec): 366 | attributes: ArrayAttrsA 367 | 368 | class ArrayB(ArraySpec): 369 | attributes: ArrayAttrsB 370 | 371 | class GroupA(GroupSpec): 372 | attributes: GroupAttrsA 373 | members: Mapping[str, ArrayA] 374 | 375 | class GroupB(GroupSpec): 376 | attributes: GroupAttrsB 377 | members: Mapping[str, ArrayB] 378 | 379 | store = zarr.storage.MemoryStore 380 | 381 | specA = GroupA( 382 | attributes=GroupAttrsA(group_a=True), 383 | members={ 384 | "a": ArrayA( 385 | attributes=ArrayAttrsA(array_a=True), 386 | shape=(100,), 387 | dtype="uint8", 388 | chunks=(10,), 389 | ) 390 | }, 391 | ) 392 | 393 | specB = GroupB( 394 | attributes=GroupAttrsB(group_b=True), 395 | members={ 396 | "a": ArrayB( 397 | attributes=ArrayAttrsB(array_b=True), 398 | shape=(100,), 399 | dtype="uint8", 400 | chunks=(10,), 401 | ) 402 | }, 403 | ) 404 | 405 | # check that we cannot create a specialized GroupSpec with the wrong attributes 406 | with pytest.raises(ValidationError): 407 | GroupB( 408 | attributes=GroupAttrsA(group_a=True), 409 | members={}, 410 | ) 411 | 412 | store = zarr.storage.MemoryStore() 413 | groupAMat = specA.to_zarr(store, path="group_a") 414 | groupBMat = specB.to_zarr(store, path="group_b") 415 | 416 | GroupA.from_zarr(groupAMat) 417 | GroupB.from_zarr(groupBMat) 418 | 419 | ArrayA.from_zarr(groupAMat["a"]) 420 | ArrayB.from_zarr(groupBMat["a"]) 421 | 422 | with pytest.raises(ValidationError): 423 | ArrayA.from_zarr(groupBMat["a"]) 424 | 425 | with pytest.raises(ValidationError): 426 | ArrayB.from_zarr(groupAMat["a"]) 427 | 428 | with pytest.raises(ValidationError): 429 | GroupB.from_zarr(groupAMat) 430 | 431 | with pytest.raises(ValidationError): 432 | GroupA.from_zarr(groupBMat) 433 | 434 | 435 | @pytest.mark.parametrize("data", ["/", "a/b/c"]) 436 | def test_member_name(data: str) -> None: 437 | with pytest.raises(ValidationError, match='Strings containing "/" are invalid.'): 438 | GroupSpec(attributes={}, members={data: GroupSpec(attributes={}, members={})}) 439 | 440 | 441 | @pytest.mark.parametrize( 442 | ("data", "expected"), 443 | [ 444 | ( 445 | ArraySpec.from_array(np.arange(10)), 446 | {"": ArraySpec.from_array(np.arange(10))}, 447 | ), 448 | ( 449 | GroupSpec( 450 | attributes={"foo": 10}, 451 | members={"a": ArraySpec.from_array(np.arange(5), attributes={"foo": 100})}, 452 | ), 453 | { 454 | "": BaseGroupSpec(attributes={"foo": 10}), 455 | "/a": ArraySpec.from_array(np.arange(5), attributes={"foo": 100}), 456 | }, 457 | ), 458 | ( 459 | GroupSpec( 460 | attributes={}, 461 | members={ 462 | "a": GroupSpec( 463 | attributes={"foo": 10}, 464 | members={"a": ArraySpec.from_array(np.arange(5), attributes={"foo": 100})}, 465 | ), 466 | "b": ArraySpec.from_array(np.arange(2), attributes={"foo": 3}), 467 | }, 468 | ), 469 | { 470 | "": BaseGroupSpec(attributes={}), 471 | "/a": BaseGroupSpec(attributes={"foo": 10}), 472 | "/a/a": ArraySpec.from_array(np.arange(5), attributes={"foo": 100}), 473 | "/b": ArraySpec.from_array(np.arange(2), attributes={"foo": 3}), 474 | }, 475 | ), 476 | ], 477 | ) 478 | def test_flatten_unflatten( 479 | data: ArraySpec | GroupSpec, expected: dict[str, ArraySpec | GroupSpec] 480 | ) -> None: 481 | flattened = to_flat(data) 482 | assert flattened == expected 483 | assert from_flat(flattened) == data 484 | 485 | 486 | # todo: parametrize 487 | def test_array_like() -> None: 488 | a = ArraySpec.from_array(np.arange(10)) # type: ignore[var-annotated] 489 | assert a.like(a) 490 | 491 | b = a.model_copy(update={"dtype": "uint8"}) 492 | assert not a.like(b) 493 | assert a.like(b, exclude={"dtype"}) 494 | assert a.like(b, include={"shape"}) 495 | 496 | c = a.model_copy(update={"shape": (100, 100)}) 497 | assert not a.like(c) 498 | assert a.like(c, exclude={"shape"}) 499 | assert a.like(c, include={"dtype"}) 500 | 501 | 502 | def test_array_like_with_zarr() -> None: 503 | zarr = pytest.importorskip("zarr") 504 | arr = ArraySpec(shape=(1,), dtype="uint8", chunks=(1,), attributes={}) 505 | store = zarr.storage.MemoryStore() 506 | arr_stored = arr.to_zarr(store, path="arr") 507 | assert arr.like(arr_stored) 508 | 509 | dissimilar_arr = arr.model_copy(update={"attributes": {"a": 10}}).to_zarr(store, path="arr_2") 510 | assert not arr.like(dissimilar_arr) 511 | assert arr.like(dissimilar_arr, exclude={"attributes"}) 512 | 513 | 514 | # todo: parametrize 515 | def test_group_like() -> None: 516 | tree: dict[str, BaseGroupSpec | ArraySpec] = { 517 | "": BaseGroupSpec(attributes={"path": ""}), 518 | "/a": BaseGroupSpec(attributes={"path": "/a"}), 519 | "/b": ArraySpec.from_array(np.arange(10), attributes={"path": "/b"}), 520 | "/a/b": ArraySpec.from_array(np.arange(10), attributes={"path": "/a/b"}), 521 | } 522 | group = GroupSpec.from_flat(tree) # type: ignore[var-annotated] 523 | assert group.like(group) 524 | assert not group.like(group.model_copy(update={"attributes": {}})) 525 | assert group.like(group.model_copy(update={"attributes": {}}), exclude={"attributes"}) 526 | assert group.like(group.model_copy(update={"attributes": {}}), include={"members"}) 527 | 528 | 529 | # todo: parametrize 530 | def test_from_zarr_depth() -> None: 531 | zarr = pytest.importorskip("zarr") 532 | tree: dict[str, BaseGroupSpec | ArraySpec] = { 533 | "": BaseGroupSpec(attributes={"level": 0, "type": "group"}), 534 | "/1": BaseGroupSpec(attributes={"level": 1, "type": "group"}), 535 | "/1/2": BaseGroupSpec(attributes={"level": 2, "type": "group"}), 536 | "/1/2/1": BaseGroupSpec(attributes={"level": 3, "type": "group"}), 537 | "/1/2/2": ArraySpec.from_array(np.arange(20), attributes={"level": 3, "type": "array"}), 538 | } 539 | 540 | store = zarr.storage.MemoryStore() 541 | group_out = GroupSpec.from_flat(tree).to_zarr(store, path="test") 542 | group_in_0 = GroupSpec.from_zarr(group_out, depth=0) # type: ignore[var-annotated] 543 | assert group_in_0.attributes == tree[""].attributes 544 | 545 | group_in_1 = GroupSpec.from_zarr(group_out, depth=1) # type: ignore[var-annotated] 546 | assert group_in_1.attributes == tree[""].attributes # type: ignore[attr-defined] 547 | assert group_in_1.members["1"].attributes == tree["/1"].attributes 548 | 549 | group_in_2 = GroupSpec.from_zarr(group_out, depth=2) # type: ignore[var-annotated] 550 | assert group_in_2.members["1"].members["2"].attributes == tree["/1/2"].attributes 551 | assert group_in_2.attributes == tree[""].attributes # type: ignore[attr-defined] 552 | assert group_in_2.members["1"].attributes == tree["/1"].attributes # type: ignore[attr-defined] 553 | 554 | group_in_3 = GroupSpec.from_zarr(group_out, depth=3) # type: ignore[var-annotated] 555 | assert group_in_3.members["1"].members["2"].members["1"].attributes == tree["/1/2/1"].attributes 556 | assert group_in_3.attributes == tree[""].attributes # type: ignore[attr-defined] 557 | assert group_in_3.members["1"].attributes == tree["/1"].attributes # type: ignore[attr-defined] 558 | assert group_in_3.members["1"].members["2"].attributes == tree["/1/2"].attributes # type: ignore[attr-defined] 559 | 560 | 561 | @pytest.mark.parametrize(("dtype_example"), DTYPE_EXAMPLES_V2, ids=str) 562 | def test_arrayspec_from_zarr(dtype_example: DTypeExample) -> None: 563 | """ 564 | Test that deserializing an ArraySpec from a zarr python store works as expected. 565 | """ 566 | zarr = pytest.importorskip("zarr") 567 | store = {} 568 | data_type = dtype_example.name 569 | if ZARR_PYTHON_VERSION >= Version("3.1.0") and data_type == "|O": 570 | pytest.skip(reason="Data type inference with an object dtype will fail in zarr>=3.1.0") 571 | arr = zarr.create_array(store=store, shape=(10,), dtype=data_type, zarr_format=2) 572 | 573 | arr_spec = ArraySpec.from_zarr(arr) 574 | 575 | observed = {"attributes": arr.attrs.asdict()} | json.loads( 576 | store[".zarray"].to_bytes(), object_hook=tuplify_json 577 | ) 578 | if observed["filters"] is not None: 579 | observed["filters"] = list(observed["filters"]) 580 | # this covers the case of the structured data type, which would otherwise be deserialized as a 581 | # tuple of tuples, but is stored on the arrayspec as a list of tuples. 582 | if isinstance(observed["dtype"], tuple): 583 | observed["dtype"] = list(observed["dtype"]) 584 | 585 | assert json_eq(arr_spec.model_dump(), observed) 586 | 587 | 588 | def test_mix_v3_v2_fails() -> None: 589 | from pydantic_zarr.v3 import ArraySpec as ArraySpecv3 590 | 591 | members_flat = {"/a": ArraySpecv3.from_array(np.ones(1))} 592 | with pytest.raises( 593 | ValueError, 594 | match=re.escape( 595 | "Value at '/a' is not a v2 ArraySpec or GroupSpec (got type(value)=)" 596 | ), 597 | ): 598 | GroupSpec.from_flat(members_flat) # type: ignore[arg-type] 599 | 600 | 601 | @pytest.mark.skipif(not ZARR_AVAILABLE, reason="zarr-python is not installed") 602 | def test_typed_members() -> None: 603 | """ 604 | Test GroupSpec creation with typed members 605 | """ 606 | array1d = ArraySpec( 607 | shape=(1,), 608 | dtype="uint8", 609 | chunks=(1,), 610 | fill_value=0, 611 | compressor=None, 612 | attributes={}, 613 | ) 614 | 615 | class DatasetMembers(TypedDict): 616 | x: ArraySpec 617 | y: ArraySpec 618 | 619 | class DatasetGroup(GroupSpec): 620 | members: DatasetMembers 621 | 622 | class ExpectedMembers(TypedDict): 623 | r10m: DatasetGroup 624 | r20m: DatasetGroup 625 | 626 | class ExpectedGroup(GroupSpec): 627 | members: ExpectedMembers 628 | 629 | flat = { 630 | "": BaseGroupSpec(attributes={}), 631 | "/r10m": BaseGroupSpec(attributes={}), 632 | "/r20m": BaseGroupSpec(attributes={}), 633 | "/r10m/x": array1d, 634 | "/r10m/y": array1d, 635 | "/r20m/x": array1d, 636 | "/r20m/y": array1d, 637 | } 638 | 639 | zg = GroupSpec.from_flat(flat).to_zarr({}, path="") 640 | ExpectedGroup.from_zarr(zg) 641 | 642 | 643 | def test_arrayspec_with_methods() -> None: 644 | """ 645 | Test that ArraySpec with_* methods create new validated copies 646 | """ 647 | original = ArraySpec.from_array(np.arange(10), attributes={"foo": "bar"}) 648 | 649 | # Test with_attributes 650 | new_attrs = original.with_attributes({"baz": "qux"}) 651 | assert new_attrs.attributes == {"baz": "qux"} 652 | assert original.attributes == {"foo": "bar"} # Original unchanged 653 | assert new_attrs is not original 654 | 655 | # Test with_shape 656 | new_shape = original.with_shape((20,)) 657 | assert new_shape.shape == (20,) 658 | assert original.shape == (10,) 659 | 660 | # Test with_chunks 661 | new_chunks = original.with_chunks((5,)) 662 | assert new_chunks.chunks == (5,) 663 | assert original.chunks == (10,) 664 | 665 | # Test with_dtype 666 | new_dtype = original.with_dtype("float32") 667 | assert new_dtype.dtype == " None: 697 | """ 698 | Test that ArraySpec with_* methods trigger validation 699 | """ 700 | spec = ArraySpec(shape=(10,), chunks=(5,), dtype="uint8", attributes={}) 701 | 702 | # Test that validation fails when shape and chunks have mismatched lengths 703 | with pytest.raises(ValidationError): 704 | spec.with_shape((10, 10)) # Shape has 2 dims but chunks still has 1 705 | 706 | 707 | def test_groupspec_with_methods() -> None: 708 | """ 709 | Test that GroupSpec with_* methods create new validated copies 710 | """ 711 | array_spec = ArraySpec.from_array(np.arange(10), attributes={}) 712 | original = GroupSpec(attributes={"group": "attr"}, members={"arr": array_spec}) 713 | 714 | # Test with_attributes 715 | new_attrs = original.with_attributes({"new": "attr"}) 716 | assert new_attrs.attributes == {"new": "attr"} 717 | assert original.attributes == {"group": "attr"} # Original unchanged 718 | assert new_attrs is not original 719 | 720 | # Test with_members 721 | new_array = ArraySpec.from_array(np.arange(5), attributes={}) 722 | new_members = original.with_members({"new_arr": new_array}) 723 | assert "new_arr" in new_members.members 724 | assert "arr" not in new_members.members # Replacement, not merge 725 | assert "arr" in original.members # Original unchanged 726 | 727 | 728 | def test_groupspec_with_members_validation() -> None: 729 | """ 730 | Test that GroupSpec with_members triggers validation 731 | """ 732 | spec = GroupSpec(attributes={}, members={}) 733 | 734 | # Test that validation fails with invalid member names 735 | with pytest.raises(ValidationError, match='Strings containing "/" are invalid'): 736 | spec.with_members({"a/b": ArraySpec.from_array(np.arange(10), attributes={})}) 737 | --------------------------------------------------------------------------------