├── src
    └── pydantic_zarr
    │   ├── py.typed
    │   ├── __init__.py
    │   ├── experimental
    │       ├── __init__.py
    │       ├── README.md
    │       └── core.py
    │   └── core.py
├── tests
    ├── test_docs
    │   ├── __init__.py
    │   └── test_docs.py
    └── test_pydantic_zarr
    │   ├── __init__.py
    │   ├── test_experimental
    │       ├── __init__.py
    │       ├── test_core.py
    │       ├── conftest.py
    │       ├── test_v3.py
    │       └── test_v2.py
    │   ├── test_core.py
    │   ├── conftest.py
    │   ├── test_v3.py
    │   └── test_v2.py
├── docs
    ├── api
    │   ├── core.md
    │   ├── v2.md
    │   ├── v3.md
    │   └── experimental
    │   │   ├── v2.md
    │   │   ├── v3.md
    │   │   └── core.md
    ├── usage_zarr_v3.md
    ├── plugins
    │   └── main.py
    ├── index.md
    ├── release-notes.md
    ├── experimental
    │   ├── index.md
    │   └── usage.md
    └── usage_zarr_v2.md
├── changes
    ├── .gitignore
    └── README.md
├── .github
    ├── dependabot.yml
    └── workflows
    │   ├── pre-commit.yml
    │   ├── check_changelogs.yml
    │   ├── cd.yml
    │   └── test.yml
├── .readthedocs.yaml
├── LICENSE
├── .pre-commit-config.yaml
├── ci
    └── check_changelog_entries.py
├── mkdocs.yaml
├── README.md
├── .gitignore
└── pyproject.toml


/src/pydantic_zarr/py.typed:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/test_docs/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/pydantic_zarr/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docs/api/core.md:
--------------------------------------------------------------------------------
1 | ::: pydantic_zarr.core


--------------------------------------------------------------------------------
/docs/api/v2.md:
--------------------------------------------------------------------------------
1 | ::: pydantic_zarr.v2


--------------------------------------------------------------------------------
/docs/api/v3.md:
--------------------------------------------------------------------------------
1 | ::: pydantic_zarr.v3


--------------------------------------------------------------------------------
/tests/test_pydantic_zarr/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/test_pydantic_zarr/test_experimental/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/test_pydantic_zarr/test_experimental/test_core.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docs/api/experimental/v2.md:
--------------------------------------------------------------------------------
1 | ::: pydantic_zarr.experimental.v2
2 | 


--------------------------------------------------------------------------------
/docs/api/experimental/v3.md:
--------------------------------------------------------------------------------
1 | ::: pydantic_zarr.experimental.v3
2 | 


--------------------------------------------------------------------------------
/docs/api/experimental/core.md:
--------------------------------------------------------------------------------
1 | ::: pydantic_zarr.experimental.core
2 | 


--------------------------------------------------------------------------------
/changes/.gitignore:
--------------------------------------------------------------------------------
1 | # Ignore all files in this directory...
2 | *
3 | # ...except for the gitignore itself and the README
4 | !.gitignore
5 | !README.md
6 | # And keep the actual changelog fragments
7 | !*.md
8 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | version: 2
 3 | updates:
 4 |   - package-ecosystem: github-actions
 5 |     directory: /
 6 |     schedule:
 7 |       interval: weekly
 8 |     groups:
 9 |       actions:
10 |         patterns:
11 |           - '*'
12 | 


--------------------------------------------------------------------------------
/.github/workflows/pre-commit.yml:
--------------------------------------------------------------------------------
 1 | name: pre-commit
 2 | 
 3 | on:
 4 |   pull_request:
 5 |   push:
 6 |     branches: [main]
 7 | 
 8 | jobs:
 9 |   pre-commit:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - uses: actions/checkout@v6
13 |       - uses: actions/setup-python@v6
14 |       - uses: pre-commit/action@v3.0.1
15 | 


--------------------------------------------------------------------------------
/.github/workflows/check_changelogs.yml:
--------------------------------------------------------------------------------
 1 | name: Check changelog entries
 2 | 
 3 | on:
 4 |   pull_request:
 5 | 
 6 | jobs:
 7 |   check-changelogs:
 8 |     name: Check changelog entries
 9 |     runs-on: ubuntu-latest
10 | 
11 |     steps:
12 |       - uses: actions/checkout@v6
13 | 
14 |       - name: Install uv
15 |         uses: astral-sh/setup-uv@v7
16 | 
17 |       - name: Check changelog entries
18 |         run: uv run --no-sync python ci/check_changelog_entries.py
19 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # Read the Docs configuration file for MkDocs projects
 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 3 | 
 4 | # Required
 5 | version: 2
 6 | 
 7 | # Set the version of Python and other tools you might need
 8 | build:
 9 |   os: ubuntu-22.04
10 |   tools:
11 |     python: "3.12"
12 | 
13 | mkdocs:
14 |   configuration: mkdocs.yaml
15 | 
16 | # Optionally declare the Python requirements required to build your docs
17 | python:
18 |   install:
19 |     - method: pip
20 |       path: .
21 |       extra_requirements:
22 |         - docs
23 | 


--------------------------------------------------------------------------------
/src/pydantic_zarr/experimental/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Experimental module for pydantic-zarr.
 3 | 
 4 | This module contains refactored versions of the core modules with
 5 | breaking API changes. Use with caution as the API is not yet stable.
 6 | 
 7 | The main changes in the experimental module:
 8 | - Removed generic type parameters from ArraySpec and GroupSpec
 9 | - Simplified type system using concrete union types
10 | - Added BaseGroupSpec for group-only operations
11 | 
12 | To use the experimental module:
13 | 
14 |     from pydantic_zarr.experimental import v2, v3
15 | 
16 |     # Use v2.ArraySpec, v2.GroupSpec, etc. instead of the main module versions
17 | """
18 | 
19 | from . import core, v2, v3
20 | 
21 | __all__ = ["core", "v2", "v3"]
22 | 


--------------------------------------------------------------------------------
/tests/test_docs/test_docs.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from pathlib import Path
 4 | 
 5 | import pytest
 6 | from pytest_examples import CodeExample, EvalExample, find_examples
 7 | 
 8 | SOURCES_ROOT: Path = Path(__file__).parent.parent.parent / "src/pydantic_zarr"
 9 | 
10 | 
11 | @pytest.mark.parametrize("example", find_examples(str(SOURCES_ROOT)), ids=str)
12 | def test_docstrings(example: CodeExample, eval_example: EvalExample) -> None:
13 |     eval_example.run_print_check(example)
14 | 
15 | 
16 | @pytest.mark.parametrize("example", find_examples("docs"), ids=str)
17 | def test_docs_examples(example: CodeExample, eval_example: EvalExample) -> None:
18 |     pytest.importorskip("zarr")
19 | 
20 |     eval_example.run_print_check(example)
21 | 


--------------------------------------------------------------------------------
/changes/README.md:
--------------------------------------------------------------------------------
 1 | # Changelog Entries
 2 | 
 3 | This directory contains changelog entries for the pydantic-zarr project.
 4 | 
 5 | ## Adding a changelog entry
 6 | 
 7 | When making a pull request, you should add a changelog entry to this directory.
 8 | The entry should be in a file named `xxxx.<type>.md` where:
 9 | 
10 | - `xxxx` is the pull request number
11 | - `<type>` is one of: `feature`, `bugfix`, `doc`, `removal`, or `misc`
12 | 
13 | The file should contain a short description of what you have changed, and how it impacts users of `pydantic-zarr`.
14 | 
15 | ## Fragment types
16 | 
17 | - `feature` - for new features
18 | - `bugfix` - for bug fixes
19 | - `doc` - for documentation changes
20 | - `removal` - for removals
21 | - `misc` - for miscellaneous changes that don't fit other categories
22 | 


--------------------------------------------------------------------------------
/tests/test_pydantic_zarr/test_experimental/conftest.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from dataclasses import dataclass
 4 | from typing import TYPE_CHECKING, Any
 5 | 
 6 | if TYPE_CHECKING:
 7 |     from collections.abc import Mapping
 8 | 
 9 |     import numpy as np
10 | 
11 | 
12 | @dataclass
13 | class FakeArray:
14 |     shape: tuple[int, ...]
15 |     dtype: np.dtype[Any]
16 | 
17 | 
18 | @dataclass
19 | class FakeH5PyArray(FakeArray):
20 |     attrs: Mapping[str, Any]
21 |     chunks: tuple[int, ...]
22 | 
23 | 
24 | @dataclass
25 | class FakeDaskArray(FakeArray):
26 |     chunksize: tuple[int, ...]
27 |     chunks: tuple[tuple[int, ...], ...]
28 | 
29 | 
30 | @dataclass
31 | class FakeXarray(FakeArray):
32 |     chunksizes: dict[str, tuple[int, ...]]
33 |     chunks: tuple[tuple[int, ...], ...] | None
34 | 


--------------------------------------------------------------------------------
/.github/workflows/cd.yml:
--------------------------------------------------------------------------------
 1 | name: Build and publish Python 🐍 distributions 📦 to PyPI
 2 | 
 3 | on: push
 4 | 
 5 | jobs:
 6 |     build-n-publish:
 7 |       name: Build and publish Python 🐍 distributions 📦 to PyPI
 8 |       runs-on: ubuntu-latest
 9 |       if:
10 |         startsWith(github.ref, 'refs/tags')
11 |       steps:
12 |         - uses: actions/checkout@v6
13 |         - name: Set up Python
14 |           uses: actions/setup-python@v6
15 |           with:
16 |             python-version: "3.x"
17 |         - name: Install Hatch
18 |           run: |
19 |             pip install hatch==1.14.1
20 |         - name: Build package
21 |           run: |
22 |             hatch build
23 |         - name: Publish distribution 📦 to PyPI
24 |           uses: pypa/gh-action-pypi-publish@release/v1
25 |           with:
26 |             password: ${{ secrets.PYPI_API_TOKEN }}
27 | 


--------------------------------------------------------------------------------
/tests/test_pydantic_zarr/test_core.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import pytest
 4 | 
 5 | from pydantic_zarr.core import ensure_member_name, tuplify_json
 6 | 
 7 | 
 8 | @pytest.mark.parametrize("data", ["/", "///", "a/b/", "a/b/vc"])
 9 | def test_parse_str_no_path(data: str) -> None:
10 |     with pytest.raises(ValueError, match='Strings containing "/" are invalid.'):
11 |         ensure_member_name(data)
12 | 
13 | 
14 | @pytest.mark.parametrize(
15 |     ("input_obj", "expected_output"),
16 |     [
17 |         ({"key": [1, 2, 3]}, {"key": (1, 2, 3)}),
18 |         ([1, [2, 3], 4], (1, (2, 3), 4)),
19 |         ({"nested": {"list": [1, 2]}}, {"nested": {"list": (1, 2)}}),
20 |         ([{"a": [1, 2]}, {"b": 3}], ({"a": (1, 2)}, {"b": 3})),
21 |         ([], ()),
22 |     ],
23 | )
24 | def test_tuplify_json(input_obj: object, expected_output: object) -> None:
25 |     """
26 |     Test that tuplify_json converts lists to tuples, with recursion inside sequences
27 |     and dictionaries.
28 |     """
29 |     assert tuplify_json(input_obj) == expected_output
30 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright © 2023 Howard Hughes Medical Institute
 2 | 
 3 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
 4 | 
 5 |     Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
 6 |     Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
 7 |     Neither the name of HHMI nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
 8 | 
 9 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
10 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | ci:
 2 |   autoupdate_commit_msg: "chore: update pre-commit hooks"
 3 |   autoupdate_schedule: "monthly"
 4 |   autofix_commit_msg: "style: pre-commit fixes"
 5 |   autofix_prs: false
 6 | default_stages: [pre-commit, pre-push]
 7 | repos:
 8 |   - repo: https://github.com/astral-sh/ruff-pre-commit
 9 |     rev: v0.9.7
10 |     hooks:
11 |     - id: ruff
12 |       args: ["--fix", "--show-fixes"]
13 |     - id: ruff-format
14 |   - repo: https://github.com/codespell-project/codespell
15 |     rev: v2.4.1
16 |     hooks:
17 |       - id: codespell
18 |         args: ["-L", "fo,ihs,kake,te", "-S", "fixture"]
19 |   - repo: https://github.com/pre-commit/pre-commit-hooks
20 |     rev: v5.0.0
21 |     hooks:
22 |     - id: check-yaml
23 |     - id: trailing-whitespace
24 |   - repo: https://github.com/pre-commit/mirrors-mypy
25 |     rev: v1.15.0
26 |     hooks:
27 |       - id: mypy
28 |         files: src
29 |         additional_dependencies:
30 |           - pytest
31 |           - pydantic>2.0.0
32 |           - numpy
33 |           - zarr>=3.1.0
34 |           - numcodecs
35 |   - repo: https://github.com/scientific-python/cookie
36 |     rev: 2025.01.22
37 |     hooks:
38 |       - id: sp-repo-review
39 |   - repo: https://github.com/pre-commit/pygrep-hooks
40 |     rev: v1.10.0
41 |     hooks:
42 |       - id: rst-directive-colons
43 |       - id: rst-inline-touching-normal
44 |   - repo: https://github.com/numpy/numpydoc
45 |     rev: v1.8.0
46 |     hooks:
47 |       - id: numpydoc-validation
48 | 


--------------------------------------------------------------------------------
/ci/check_changelog_entries.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Check changelog entries have the correct filename structure.
 3 | """
 4 | 
 5 | import sys
 6 | from pathlib import Path
 7 | 
 8 | VALID_CHANGELOG_TYPES = ["feature", "bugfix", "doc", "removal", "misc"]
 9 | CHANGELOG_DIRECTORY = (Path(__file__).parent.parent / "changes").resolve()
10 | 
11 | 
12 | def is_int(s: str) -> bool:
13 |     try:
14 |         int(s)
15 |     except ValueError:
16 |         return False
17 |     else:
18 |         return True
19 | 
20 | 
21 | if __name__ == "__main__":
22 |     print(f"Looking for changelog entries in {CHANGELOG_DIRECTORY}")
23 |     entries = CHANGELOG_DIRECTORY.glob("*")
24 |     entries = [e for e in entries if e.name not in [".gitignore", "README.md"]]
25 |     print(f"Found {len(entries)} entries")
26 |     print()
27 | 
28 |     bad_suffix = [e for e in entries if e.suffix != ".md"]
29 |     bad_issue_no = [e for e in entries if not is_int(e.name.split(".")[0])]
30 |     bad_type = [e for e in entries if e.name.split(".")[1] not in VALID_CHANGELOG_TYPES]
31 | 
32 |     if len(bad_suffix) or len(bad_issue_no) or len(bad_type):
33 |         if len(bad_suffix):
34 |             print("Changelog entries without .md suffix")
35 |             print("-------------------------------------")
36 |             print("\n".join([p.name for p in bad_suffix]))
37 |             print()
38 |         if len(bad_issue_no):
39 |             print("Changelog entries without integer issue number")
40 |             print("----------------------------------------------")
41 |             print("\n".join([p.name for p in bad_issue_no]))
42 |             print()
43 |         if len(bad_type):
44 |             print("Changelog entries without valid type")
45 |             print("------------------------------------")
46 |             print("\n".join([p.name for p in bad_type]))
47 |             print(f"Valid types are: {VALID_CHANGELOG_TYPES}")
48 |             print()
49 |         sys.exit(1)
50 | 
51 |     sys.exit(0)
52 | 


--------------------------------------------------------------------------------
/mkdocs.yaml:
--------------------------------------------------------------------------------
 1 | site_name: "pydantic-zarr"
 2 | site_url: https://pydantic-zarr.readthedocs.io/
 3 | site_author: Davis Bennett
 4 | site_description: >-
 5 |   Documentation for pydantic-zarr
 6 | 
 7 | # Repository
 8 | repo_name: zarr-developers/pydantic-zarr
 9 | repo_url: https://github.com/zarr-developers/pydantic-zarr
10 | 
11 | theme:
12 |   name: material
13 |   palette:
14 |   # Palette toggle for light mode
15 |   - scheme: default
16 |     toggle:
17 |       icon: material/brightness-7
18 |       name: Switch to dark mode
19 | 
20 |   # Palette toggle for dark mode
21 |   - scheme: slate
22 |     toggle:
23 |       icon: material/brightness-4
24 |       name: Switch to light mode
25 | 
26 | nav:
27 |     - About: index.md
28 |     - Usage (Zarr V3): usage_zarr_v3.md
29 |     - Usage (Zarr V2): usage_zarr_v2.md
30 |     - Experimental features:
31 |       - Overview: experimental/index.md
32 |       - Usage: experimental/usage.md
33 |     - API:
34 |       - core: api/core.md
35 |       - v2: api/v2.md
36 |       - v3: api/v3.md
37 |       - experimental:
38 |         - core: api/experimental/core.md
39 |         - v2: api/experimental/v2.md
40 |         - v3: api/experimental/v3.md
41 |     - Release Notes: release-notes.md
42 | plugins:
43 | - mkdocstrings:
44 |     handlers:
45 |         python:
46 |           options:
47 |             docstring_style: numpy
48 |             members_order: source
49 |             separate_signature: true
50 |             filters: ["!^_"]
51 |             docstring_options:
52 |               ignore_init_summary: true
53 |             merge_init_into_class: true
54 | 
55 | markdown_extensions:
56 |   - pymdownx.highlight:
57 |       anchor_linenums: true
58 |       line_spans: __span
59 |       pygments_lang_class: true
60 |   - pymdownx.inlinehilite
61 |   - pymdownx.snippets
62 |   - pymdownx.superfences
63 |   - pymdownx.tabbed:
64 |       alternate_style: true
65 |   - toc:
66 |       baselevel: 2
67 |       toc_depth: 4
68 |       permalink: "#"
69 | 


--------------------------------------------------------------------------------
/docs/usage_zarr_v3.md:
--------------------------------------------------------------------------------
 1 | # Usage (Zarr V3)
 2 | 
 3 | ## Defining Zarr v3 hierarchies
 4 | 
 5 | ```python
 6 | from pydantic_zarr.v3 import ArraySpec, GroupSpec, NamedConfig
 7 | 
 8 | array_attributes = {"baz": [1, 2, 3]}
 9 | group_attributes = {"foo": 42, "bar": False}
10 | 
11 | array_spec = ArraySpec(
12 |     attributes=array_attributes,
13 |     shape=[1000, 1000],
14 |     dimension_names=["rows", "columns"],
15 |     data_type="uint8",
16 |     chunk_grid=NamedConfig(name="regular", configuration={"chunk_shape": [1000, 100]}),
17 |     chunk_key_encoding=NamedConfig(name="default", configuration={"separator": "/"}),
18 |     codecs=[NamedConfig(name="gzip", configuration={"level": 1})],
19 |     storage_transformers=(),
20 |     fill_value=0,
21 | )
22 | 
23 | spec = GroupSpec(attributes=group_attributes, members={"array": array_spec})
24 | print(spec.model_dump_json(indent=2))
25 | """
26 | {
27 |   "zarr_format": 3,
28 |   "node_type": "group",
29 |   "attributes": {
30 |     "foo": 42,
31 |     "bar": false
32 |   },
33 |   "members": {
34 |     "array": {
35 |       "zarr_format": 3,
36 |       "node_type": "array",
37 |       "attributes": {
38 |         "baz": [
39 |           1,
40 |           2,
41 |           3
42 |         ]
43 |       },
44 |       "shape": [
45 |         1000,
46 |         1000
47 |       ],
48 |       "data_type": "uint8",
49 |       "chunk_grid": {
50 |         "name": "regular",
51 |         "configuration": {
52 |           "chunk_shape": [
53 |             1000,
54 |             100
55 |           ]
56 |         }
57 |       },
58 |       "chunk_key_encoding": {
59 |         "name": "default",
60 |         "configuration": {
61 |           "separator": "/"
62 |         }
63 |       },
64 |       "fill_value": 0,
65 |       "codecs": [
66 |         {
67 |           "name": "gzip",
68 |           "configuration": {
69 |             "level": 1
70 |           }
71 |         }
72 |       ],
73 |       "storage_transformers": [],
74 |       "dimension_names": [
75 |         "rows",
76 |         "columns"
77 |       ]
78 |     }
79 |   }
80 | }
81 | """
82 | ```
83 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # pydantic-zarr
 2 | 
 3 | [![PyPI](https://img.shields.io/pypi/v/pydantic-zarr)](https://pypi.python.org/pypi/pydantic-zarr)
 4 | 
 5 | [Pydantic](https://docs.pydantic.dev/latest/) models for [Zarr](https://zarr.readthedocs.io/en/stable/index.html).
 6 | 
 7 | ## Installation
 8 | 
 9 | ```sh
10 | pip install -U pydantic-zarr
11 | # or, with zarr i/o support
12 | pip install -U "pydantic-zarr[zarr]"
13 | ```
14 | 
15 | ## Getting help
16 | 
17 | - Docs: see the [documentation](https://pydantic-zarr.readthedocs.io/) for detailed information about this project.
18 | - Chat: We use [Zulip](https://ossci.zulipchat.com/#narrow/channel/423692-Zarr) for project-related chat.
19 | 
20 | ## Example
21 | 
22 | ```python
23 | import zarr
24 | from pydantic_zarr import GroupSpec
25 | 
26 | group = zarr.group(path='foo')
27 | array = zarr.create(store = group.store, path='foo/bar', shape=10, dtype='uint8')
28 | array.attrs.put({'metadata': 'hello'})
29 | 
30 | # this is a pydantic model
31 | spec = GroupSpec.from_zarr(group)
32 | print(spec.model_dump())
33 | """
34 | {
35 |     'zarr_format': 2,
36 |     'attributes': {},
37 |     'members': {
38 |         'bar': {
39 |             'zarr_format': 2,
40 |             'attributes': {'metadata': 'hello'},
41 |             'shape': (10,),
42 |             'chunks': (10,),
43 |             'dtype': '|u1',
44 |             'fill_value': 0,
45 |             'order': 'C',
46 |             'filters': None,
47 |             'dimension_separator': '.',
48 |             'compressor': {
49 |                 'id': 'blosc',
50 |                 'cname': 'lz4',
51 |                 'clevel': 5,
52 |                 'shuffle': 1,
53 |                 'blocksize': 0,
54 |             },
55 |         }
56 |     },
57 | }
58 | """
59 | ```
60 | 
61 | ## History
62 | 
63 | This project was developed at [HHMI / Janelia Research Campus](https://www.janelia.org/). It was originally written by Davis Bennett to solve problems he encountered while working on the [Cellmap Project team](https://www.janelia.org/project-team/cellmap/members). In December of 2024 this project was migrated from the [`janelia-cellmap`](https://github.com/janelia-cellmap) github organization to [`zarr-developers`](https://github.com/zarr-developers) organization.
64 | 


--------------------------------------------------------------------------------
/docs/plugins/main.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations as _annotations
 2 | 
 3 | import logging
 4 | import os
 5 | import re
 6 | from pathlib import Path
 7 | 
 8 | from mkdocs.config import Config
 9 | from mkdocs.structure.files import Files
10 | from mkdocs.structure.pages import Page
11 | 
12 | logger = logging.getLogger("mkdocs.plugin")
13 | THIS_DIR = Path(__file__).parent
14 | DOCS_DIR = THIS_DIR.parent
15 | PROJECT_ROOT = DOCS_DIR.parent
16 | 
17 | 
18 | def on_pre_build(config: Config) -> None:
19 |     """
20 |     Before the build starts.
21 |     """
22 |     add_changelog()
23 | 
24 | 
25 | def on_files(files: Files, config: Config) -> Files:
26 |     """
27 |     After the files are loaded, but before they are read.
28 |     """
29 |     return files
30 | 
31 | 
32 | def on_page_markdown(markdown: str, page: Page, config: Config, files: Files) -> str:
33 |     """
34 |     Called on each file after it is read and before it is converted to HTML.
35 |     """
36 |     if md := add_version(markdown, page):
37 |         return md
38 |     else:
39 |         return markdown
40 | 
41 | 
42 | def add_changelog() -> None:
43 |     history = (PROJECT_ROOT / "HISTORY.md").read_text()
44 |     history = re.sub(
45 |         r"#(\d+)", r"[#\1](https://github.com/pydantic/pydantic/issues/\1)", history
46 |     )
47 |     history = re.sub(
48 |         r"(\s)@([\w\-]+)", r"\1[@\2](https://github.com/\2)", history, flags=re.I
49 |     )
50 |     history = re.sub("@@", "@", history)
51 |     new_file = DOCS_DIR / "changelog.md"
52 | 
53 |     # avoid writing file unless the content has changed to avoid infinite build loop
54 |     if not new_file.is_file() or new_file.read_text() != history:
55 |         new_file.write_text(history)
56 | 
57 | 
58 | MIN_MINOR_VERSION = 7
59 | MAX_MINOR_VERSION = 11
60 | 
61 | 
62 | def add_version(markdown: str, page: Page) -> str | None:
63 |     if page.file.src_uri != "index.md":
64 |         return None
65 | 
66 |     version_ref = os.getenv("GITHUB_REF")
67 |     if version_ref and version_ref.startswith("refs/tags/"):
68 |         version = re.sub("^refs/tags/", "", version_ref.lower())
69 |         url = f"https://github.com/pydantic/pydantic/releases/tag/{version}"
70 |         version_str = f"Documentation for version: [{version}]({url})"
71 |     elif sha := os.getenv("GITHUB_SHA"):
72 |         url = f"https://github.com/pydantic/pydantic/commit/{sha}"
73 |         sha = sha[:7]
74 |         version_str = f"Documentation for development version: [{sha}]({url})"
75 |     else:
76 |         version_str = "Documentation for development version"
77 |     markdown = re.sub(r"{{ *version *}}", version_str, markdown)
78 |     return markdown
79 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | 
 4 | name: Test
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [ main ]
 9 |   pull_request:
10 |     branches: [ main ]
11 |   workflow_dispatch:
12 | 
13 | concurrency:
14 |   group: ${{ github.workflow }}-${{ github.ref }}
15 |   cancel-in-progress: true
16 | 
17 | jobs:
18 |   test:
19 |     name: os=${{ matrix.os }}, py=${{ matrix.python-version }}, zarr-python=${{ matrix.zarr-version }}
20 | 
21 |     strategy:
22 |       matrix:
23 |         python-version: ['3.11', '3.12', '3.13']
24 |         zarr-version: ['3.0.10', '3.1.0', 'none']
25 |         os: ["ubuntu-latest"]
26 |     runs-on: ${{ matrix.os }}
27 | 
28 |     steps:
29 |     - uses: actions/checkout@v6
30 |     - name: Set up Python
31 |       uses: actions/setup-python@v6
32 |       with:
33 |         python-version: ${{ matrix.python-version }}
34 |         cache: 'pip'
35 |     - name: Install Hatch
36 |       run: |
37 |         python -m pip install --upgrade pip
38 |         pip install hatch
39 |     - name: Run Tests (with zarr)
40 |       if: matrix.zarr-version != 'none'
41 |       run: |
42 |         hatch run test.py${{ matrix.python-version }}-${{ matrix.zarr-version }}:list-env
43 |         hatch run test.py${{ matrix.python-version }}-${{ matrix.zarr-version }}:test-cov
44 |     - name: Run Tests (without zarr)
45 |       if: matrix.zarr-version == 'none'
46 |       run: |
47 |         hatch run test-base.py${{ matrix.python-version }}:list-env
48 |         hatch run test-base.py${{ matrix.python-version }}:test-cov
49 |     - name: Upload coverage
50 |       uses: codecov/codecov-action@v5
51 |       with:
52 |         token: ${{ secrets.CODECOV_TOKEN }}
53 |         verbose: true # optional (default = false)
54 | 
55 |   doctests:
56 |     name: doctests
57 |     runs-on: ubuntu-latest
58 |     steps:
59 |     - uses: actions/checkout@v6
60 |       with:
61 |         fetch-depth: 0 # required for hatch version discovery, which is needed for numcodecs.zarr3
62 |     - name: Set up Python
63 |       uses: actions/setup-python@v6
64 |       with:
65 |         python-version: '3.11'
66 |         cache: 'pip'
67 |     - name: Install Hatch
68 |       run: |
69 |         python -m pip install --upgrade pip
70 |         pip install hatch
71 |     - name: Run Tests
72 |       run: |
73 |         hatch run docs:test
74 | 
75 |   test-min-reqs:
76 |     runs-on: ubuntu-latest
77 |     steps:
78 |     - uses: actions/checkout@v6
79 |     - name: Set up Python
80 |       uses: actions/setup-python@v6
81 |       with:
82 |         python-version: '3.11'
83 |     - name: Install minimum requirements
84 |       run: |
85 |         pip install .
86 |         python -c "import pydantic_zarr.v2; import pydantic_zarr.v3"
87 | 


--------------------------------------------------------------------------------
/tests/test_pydantic_zarr/conftest.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import warnings
  4 | from dataclasses import dataclass
  5 | from importlib.metadata import version
  6 | from importlib.util import find_spec
  7 | 
  8 | from packaging.version import Version
  9 | 
 10 | ZARR_AVAILABLE = find_spec("zarr") is not None
 11 | 
 12 | if ZARR_AVAILABLE:
 13 |     ZARR_PYTHON_VERSION = Version(version("zarr"))
 14 | else:
 15 |     ZARR_PYTHON_VERSION = Version("0.0.0")
 16 | 
 17 | DTYPE_EXAMPLES_V2: tuple[DTypeExample, ...]
 18 | DTYPE_EXAMPLES_V3: tuple[DTypeExample, ...]
 19 | 
 20 | 
 21 | @dataclass(frozen=True, slots=True)
 22 | class DTypeExample:
 23 |     name: object
 24 |     fill_value: object
 25 | 
 26 | 
 27 | if ZARR_PYTHON_VERSION < Version("3.1.0"):
 28 |     DTYPE_EXAMPLES_V2 = (
 29 |         DTypeExample("|b1", True),
 30 |         DTypeExample("|i1", -1),
 31 |         DTypeExample("|i2", -1),
 32 |         DTypeExample("|i4", -1),
 33 |         DTypeExample("|i8", -1),
 34 |         DTypeExample("|u1", 1),
 35 |         DTypeExample("<u2", 1),
 36 |         DTypeExample("<u4", 1),
 37 |         DTypeExample("<u8", 1),
 38 |         DTypeExample("<f2", 1.0),
 39 |         DTypeExample("<f4", 1.0),
 40 |         DTypeExample("<f8", 1.0),
 41 |         DTypeExample("<c8", [1.0, 1.0]),
 42 |         DTypeExample("<c16", [1.0, 10]),
 43 |         DTypeExample("<U10", "abcdefghij"),
 44 |         DTypeExample("|O", "hi"),
 45 |         DTypeExample("|V10", "AAAAAAAAAAAAAA=="),
 46 |         DTypeExample("|S10", "AAAAAAAAAAAAAA=="),
 47 |         DTypeExample([("a", "<i4"), ("b", "<f2")], "AAAAAAAA"),
 48 |         DTypeExample("<M8[10s]", "NaT"),
 49 |         DTypeExample("<m8[10s]", "NaT"),
 50 |     )
 51 |     DTYPE_EXAMPLES_V3 = (
 52 |         DTypeExample("bool", True),
 53 |         DTypeExample("int8", -1),
 54 |         DTypeExample("int16", -1),
 55 |         DTypeExample("int32", -1),
 56 |         DTypeExample("int64", -1),
 57 |         DTypeExample("uint8", 1),
 58 |         DTypeExample("uint16", 1),
 59 |         DTypeExample("uint32", 1),
 60 |         DTypeExample("uint64", 1),
 61 |         DTypeExample("float16", 1.0),
 62 |         DTypeExample("float32", 1.0),
 63 |         DTypeExample("float64", 1.0),
 64 |         DTypeExample("complex64", [1, 1]),
 65 |         DTypeExample("complex128", [1, 1]),
 66 |         DTypeExample("str", "hi"),
 67 |     )
 68 | else:
 69 |     from zarr.core.dtype import (
 70 |         DateTime64,
 71 |         FixedLengthUTF32,
 72 |         Float16,
 73 |         Int32,
 74 |         NullTerminatedBytes,
 75 |         RawBytes,
 76 |         Structured,
 77 |         TimeDelta64,
 78 |         data_type_registry,
 79 |     )
 80 | 
 81 |     v2_examples: list[DTypeExample] = []
 82 |     v3_examples: list[DTypeExample] = []
 83 |     for dtype_cls in data_type_registry.contents.values():
 84 |         if dtype_cls in (DateTime64, TimeDelta64):
 85 |             dt = dtype_cls(unit="s", scale_factor=10)
 86 |         elif dtype_cls in (FixedLengthUTF32, RawBytes, NullTerminatedBytes):
 87 |             dt = dtype_cls(length=10)
 88 |         elif dtype_cls == Structured:
 89 |             dt = dtype_cls(fields=[("a", Int32()), ("b", Float16())])
 90 |         else:
 91 |             dt = dtype_cls()
 92 | 
 93 |         v2_examples.append(
 94 |             DTypeExample(
 95 |                 dt.to_json(zarr_format=2)["name"],
 96 |                 dt.to_json_scalar(dt.default_scalar(), zarr_format=2),
 97 |             )
 98 |         )
 99 |         # Suppress the userwarning emitted when creating off-spec dtypes
100 |         with warnings.catch_warnings():
101 |             warnings.simplefilter("ignore", FutureWarning)
102 |             v3_examples.append(
103 |                 DTypeExample(
104 |                     dt.to_json(zarr_format=3), dt.to_json_scalar(dt.default_scalar(), zarr_format=3)
105 |                 )
106 |             )
107 | 
108 |     DTYPE_EXAMPLES_V2 = tuple(v2_examples)
109 |     DTYPE_EXAMPLES_V3 = tuple(v3_examples)
110 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # vscode
  2 | .vscode
  3 | 
  4 | # Byte-compiled / optimized / DLL files
  5 | __pycache__/
  6 | *.py[cod]
  7 | *$py.class
  8 | 
  9 | # C extensions
 10 | *.so
 11 | 
 12 | # Distribution / packaging
 13 | .Python
 14 | build/
 15 | develop-eggs/
 16 | dist/
 17 | downloads/
 18 | eggs/
 19 | .eggs/
 20 | lib/
 21 | lib64/
 22 | parts/
 23 | sdist/
 24 | var/
 25 | wheels/
 26 | share/python-wheels/
 27 | *.egg-info/
 28 | .installed.cfg
 29 | *.egg
 30 | MANIFEST
 31 | 
 32 | # PyInstaller
 33 | #  Usually these files are written by a python script from a template
 34 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 35 | *.manifest
 36 | *.spec
 37 | 
 38 | # Installer logs
 39 | pip-log.txt
 40 | pip-delete-this-directory.txt
 41 | 
 42 | # Unit test / coverage reports
 43 | htmlcov/
 44 | .tox/
 45 | .nox/
 46 | .coverage
 47 | .coverage.*
 48 | .cache
 49 | nosetests.xml
 50 | coverage.xml
 51 | *.cover
 52 | *.py,cover
 53 | .hypothesis/
 54 | .pytest_cache/
 55 | cover/
 56 | 
 57 | # Translations
 58 | *.mo
 59 | *.pot
 60 | 
 61 | # Django stuff:
 62 | *.log
 63 | local_settings.py
 64 | db.sqlite3
 65 | db.sqlite3-journal
 66 | 
 67 | # Flask stuff:
 68 | instance/
 69 | .webassets-cache
 70 | 
 71 | # Scrapy stuff:
 72 | .scrapy
 73 | 
 74 | # Sphinx documentation
 75 | docs/_build/
 76 | 
 77 | # PyBuilder
 78 | .pybuilder/
 79 | target/
 80 | 
 81 | # Jupyter Notebook
 82 | .ipynb_checkpoints
 83 | 
 84 | # IPython
 85 | profile_default/
 86 | ipython_config.py
 87 | 
 88 | # pyenv
 89 | #   For a library or package, you might want to ignore these files since the code is
 90 | #   intended to run in multiple environments; otherwise, check them in:
 91 | # .python-version
 92 | 
 93 | # pipenv
 94 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 95 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 96 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 97 | #   install all needed dependencies.
 98 | #Pipfile.lock
 99 | 
100 | # poetry
101 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
102 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
103 | #   commonly ignored for libraries.
104 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
105 | #poetry.lock
106 | 
107 | # pdm
108 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
109 | #pdm.lock
110 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
111 | #   in version control.
112 | #   https://pdm.fming.dev/#use-with-ide
113 | .pdm.toml
114 | 
115 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
116 | __pypackages__/
117 | 
118 | # Celery stuff
119 | celerybeat-schedule
120 | celerybeat.pid
121 | 
122 | # SageMath parsed files
123 | *.sage.py
124 | 
125 | # Environments
126 | .env
127 | .venv
128 | env/
129 | venv/
130 | ENV/
131 | env.bak/
132 | venv.bak/
133 | 
134 | # Spyder project settings
135 | .spyderproject
136 | .spyproject
137 | 
138 | # Rope project settings
139 | .ropeproject
140 | 
141 | # mkdocs documentation
142 | /site
143 | 
144 | # mypy
145 | .mypy_cache/
146 | .dmypy.json
147 | dmypy.json
148 | 
149 | # Pyre type checker
150 | .pyre/
151 | 
152 | # pytype static type analyzer
153 | .pytype/
154 | 
155 | # Cython debug symbols
156 | cython_debug/
157 | 
158 | # PyCharm
159 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
160 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
161 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
162 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
163 | #.idea/
164 | 
165 | # VCS versioning
166 | src/pydantic_zarr/_version.py
167 | 
168 | # uv lockfile, remove this if we switch to uv for packaging
169 | uv.lock
170 | 
171 | # osx metadata
172 | .DS_Store
173 | 


--------------------------------------------------------------------------------
/src/pydantic_zarr/core.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from collections.abc import Mapping, Sequence
  4 | from typing import (
  5 |     TYPE_CHECKING,
  6 |     Any,
  7 |     Literal,
  8 |     TypeAlias,
  9 |     TypeVar,
 10 |     overload,
 11 | )
 12 | 
 13 | import numpy as np
 14 | import numpy.typing as npt
 15 | from pydantic import BaseModel, ConfigDict
 16 | 
 17 | if TYPE_CHECKING:
 18 |     import zarr
 19 |     from zarr.storage._common import StoreLike
 20 | 
 21 | IncEx: TypeAlias = set[int] | set[str] | dict[int, Any] | dict[str, Any] | None
 22 | 
 23 | AccessMode: TypeAlias = Literal["w", "w+", "r", "a"]
 24 | 
 25 | T = TypeVar("T")
 26 | 
 27 | 
 28 | @overload
 29 | def tuplify_json(obj: Mapping) -> Mapping: ...
 30 | 
 31 | 
 32 | @overload
 33 | def tuplify_json(obj: list) -> tuple: ...
 34 | 
 35 | 
 36 | def tuplify_json(obj: object) -> object:
 37 |     """
 38 |     Recursively converts lists within a Python object to tuples.
 39 |     """
 40 |     if isinstance(obj, list):
 41 |         return tuple(tuplify_json(elem) for elem in obj)
 42 |     elif isinstance(obj, dict):
 43 |         return {k: tuplify_json(v) for k, v in obj.items()}
 44 |     else:
 45 |         return obj
 46 | 
 47 | 
 48 | class StrictBase(BaseModel):
 49 |     model_config = ConfigDict(frozen=True, extra="forbid")
 50 | 
 51 | 
 52 | def parse_dtype_v2(value: npt.DTypeLike) -> str | list[tuple[Any, ...]]:
 53 |     """
 54 |     Convert the input to a NumPy dtype and either return the ``str`` attribute of that
 55 |     object or, if the dtype is a structured dtype, return the fields of that dtype as a list
 56 |     of tuples.
 57 | 
 58 |     Parameters
 59 |     ----------
 60 |     value : npt.DTypeLike
 61 |         A value that can be converted to a NumPy dtype.
 62 | 
 63 |     Returns
 64 |     -------
 65 | 
 66 |     A Zarr V2-compatible encoding of the dtype.
 67 | 
 68 |     References
 69 |     ----------
 70 |     See the [Zarr V2 specification](https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html#data-type-encoding)
 71 |     for more details on this encoding of data types.
 72 |     """
 73 |     # Assume that a non-string sequence represents a the Zarr V2 JSON form of a structured dtype.
 74 |     if isinstance(value, Sequence) and not isinstance(value, str):
 75 |         return [tuple(v) for v in value]
 76 |     else:
 77 |         np_dtype = np.dtype(value)
 78 |         if np_dtype.fields is not None:
 79 |             # This is a structured dtype, which must be converted to a list of tuples. Note that
 80 |             # this function recurses, because a structured dtype is parametrized by other dtypes.
 81 |             return [(k, parse_dtype_v2(v[0])) for k, v in np_dtype.fields.items()]
 82 |         else:
 83 |             return np_dtype.str
 84 | 
 85 | 
 86 | def ensure_member_name(data: Any) -> str:
 87 |     """
 88 |     If the input is a string, then ensure that it is a valid
 89 |     name for a subnode in a zarr group
 90 |     """
 91 |     if isinstance(data, str):
 92 |         if "/" in data:
 93 |             raise ValueError(
 94 |                 f'Strings containing "/" are invalid. Got {data}, which violates this rule.'
 95 |             )
 96 |         if data in ("", ".", ".."):
 97 |             raise ValueError(f"The string {data} is not a valid member name.")
 98 |         return data
 99 |     raise TypeError(f"Expected a str, got {type(data)}.")
100 | 
101 | 
102 | def ensure_key_no_path(data: Any) -> Any:
103 |     if isinstance(data, Mapping):
104 |         [ensure_member_name(key) for key in data]
105 |     return data
106 | 
107 | 
108 | def model_like(a: BaseModel, b: BaseModel, exclude: IncEx = None, include: IncEx = None) -> bool:
109 |     """
110 |     A similarity check for a pair pydantic.BaseModel, parametrized over included or excluded fields.
111 | 
112 | 
113 |     """
114 | 
115 |     a_dict = a.model_dump(exclude=exclude, include=include)
116 |     b_dict = b.model_dump(exclude=exclude, include=include)
117 |     return a_dict == b_dict
118 | 
119 | 
120 | # TODO: expose contains_array and contains_group as public functions in zarr-python
121 | # and replace these custom implementations
122 | def maybe_node(
123 |     store: StoreLike, path: str, *, zarr_format: Literal[2, 3]
124 | ) -> zarr.Array | zarr.Group | None:
125 |     """
126 |     Return the array or group found at the store / path, if an array or group exists there.
127 |     Otherwise return None.
128 |     """
129 |     from zarr.core.sync import sync
130 |     from zarr.core.sync_group import get_node
131 |     from zarr.storage._common import make_store_path
132 | 
133 |     # convert the storelike store argument to a Zarr store
134 |     spath = sync(make_store_path(store, path=path))
135 |     try:
136 |         return get_node(spath.store, spath.path, zarr_format=zarr_format)
137 |     except FileNotFoundError:
138 |         return None
139 | 
140 | 
141 | def ensure_multiple(data: Sequence[T]) -> Sequence[T]:
142 |     """
143 |     Ensure that there is at least one element in the sequence
144 |     """
145 |     if len(data) < 1:
146 |         raise ValueError("Invalid length. Expected 1 or more, got 0.")
147 |     return data
148 | 


--------------------------------------------------------------------------------
/src/pydantic_zarr/experimental/README.md:
--------------------------------------------------------------------------------
  1 | # Experimental Module
  2 | 
  3 | This module contains refactored versions of the core pydantic-zarr modules with breaking API changes. It is provided for early testing and feedback on proposed changes.
  4 | 
  5 | ## What's Different
  6 | 
  7 | The experimental module removes generic type parameters from `ArraySpec` and `GroupSpec`, simplifying the type system while maintaining full functionality.
  8 | 
  9 | ### Key Changes
 10 | 
 11 | #### 1. No Generic Type Parameters
 12 | 
 13 | **Before (main module - with generics):**
 14 | ```python
 15 | from pydantic_zarr.v2 import ArraySpec, GroupSpec
 16 | from collections.abc import Mapping
 17 | 
 18 | # Generic type parameters allowed complex type constraints
 19 | # SpecialGroup = GroupSpec[Mapping[str, "ArraySpec | GroupSpec"]]  # Not supported in current version
 20 | ```
 21 | 
 22 | **After (experimental module - without generics):**
 23 | ```python
 24 | from pydantic_zarr.experimental.v2 import ArraySpec, GroupSpec
 25 | 
 26 | # No generics - simpler and more straightforward
 27 | group = GroupSpec(attributes={}, members={})
 28 | print(group)
 29 | #> zarr_format=2 attributes={} members={}
 30 | ```
 31 | 
 32 | #### 2. New `BaseGroupSpec` Class
 33 | 
 34 | The experimental module introduces `BaseGroupSpec` - a model of a Zarr group without members. This enables two important patterns:
 35 | 
 36 | - **Flattened hierarchies**: In `to_flat()` output, groups appear as `BaseGroupSpec` (without recursive members)
 37 | - **Partial loading**: Load a group's metadata without traversing its children
 38 | 
 39 | **Example:**
 40 | ```python
 41 | from pydantic_zarr.experimental.v2 import ArraySpec, BaseGroupSpec, GroupSpec
 42 | 
 43 | # BaseGroupSpec: just metadata
 44 | base_group = BaseGroupSpec(attributes={"foo": "bar"})
 45 | 
 46 | # Create an array spec
 47 | array_spec = ArraySpec(shape=(10,), dtype='uint8', chunks=(10,), attributes={})
 48 | 
 49 | # GroupSpec: metadata + hierarchy
 50 | group = GroupSpec(
 51 |     attributes={"foo": "bar"},
 52 |     members={"array": array_spec}
 53 | )
 54 | 
 55 | # Flattened representation uses BaseGroupSpec
 56 | flat = group.to_flat()
 57 | # Returns: {"": BaseGroupSpec(...), "/array": ArraySpec(...)}
 58 | ```
 59 | 
 60 | #### 3. Union Types Instead of Generics
 61 | 
 62 | Member values are now concrete union types:
 63 | 
 64 | **Before:**
 65 | ```
 66 | members: Mapping[str, T]  # T was generic
 67 | ```
 68 | 
 69 | **After:**
 70 | ```
 71 | members: dict[str, ArraySpec | GroupSpec | BaseGroupSpec]
 72 | ```
 73 | 
 74 | This provides:
 75 | - ✅ Better IDE autocomplete
 76 | - ✅ Clearer error messages
 77 | - ✅ No runtime type checking complexity
 78 | - ✅ More explicit code
 79 | 
 80 | #### 4. Refactored `to_zarr()` Method
 81 | 
 82 | Both `BaseGroupSpec` and `GroupSpec` have `to_zarr()` methods:
 83 | 
 84 | - `BaseGroupSpec.to_zarr()`: Creates a group and sets attributes (no recursion)
 85 | - `GroupSpec.to_zarr()`: Calls `super().to_zarr()` then recursively writes members
 86 | 
 87 | This eliminates code duplication while maintaining the inheritance hierarchy.
 88 | 
 89 | ## API Stability
 90 | 
 91 | **⚠️ WARNING:** This is an experimental module. The API may change in future releases. Do not rely on it in production code without understanding the risks.
 92 | 
 93 | ## Migration Guide
 94 | 
 95 | To try the experimental module:
 96 | 
 97 | ```python
 98 | # Current (stable)
 99 | from pydantic_zarr.v2 import ArraySpec, GroupSpec
100 | 
101 | # Experimental (breaking changes)
102 | from pydantic_zarr.experimental.v2 import ArraySpec, GroupSpec, BaseGroupSpec
103 | ```
104 | 
105 | ### What Works the Same
106 | 
107 | - `ArraySpec.from_array()`
108 | - `ArraySpec.from_zarr()` / `ArraySpec.to_zarr()`
109 | - `GroupSpec.from_zarr()` / `GroupSpec.to_zarr()`
110 | - `GroupSpec.to_flat()` / `GroupSpec.from_flat()`
111 | - `.like()` comparisons
112 | - All codec/compressor configurations
113 | - All Zarr v2 and v3 array properties
114 | 
115 | ### What Changed
116 | 
117 | - ❌ Generic type parameters no longer supported
118 | - ✅ `BaseGroupSpec` class added
119 | - ✅ Member types are now explicit unions
120 | - ✅ Cleaner separation of concerns (base group vs hierarchical group)
121 | 
122 | ## Rationale
123 | 
124 | The generic type parameters were:
125 | - Not validated at runtime
126 | - Complex to understand and use
127 | - Provided false confidence in type safety
128 | - Made error messages harder to read
129 | 
130 | Removing them in favor of explicit union types:
131 | - Improves readability
132 | - Maintains full functionality
133 | - Reduces cognitive overhead
134 | - Enables better error messages
135 | 
136 | The addition of `BaseGroupSpec`:
137 | - Clarifies intent when working with flat hierarchies
138 | - Enables efficient partial loading
139 | - Prevents accidental null checks
140 | - Improves code maintainability
141 | 
142 | ## Testing
143 | 
144 | The experimental module passes all the same tests as the main module, with the addition of new tests for `BaseGroupSpec` functionality.
145 | 
146 | ## Feedback
147 | 
148 | If you use this module and have feedback on the API changes, please open an issue on GitHub with your thoughts.
149 | 


--------------------------------------------------------------------------------
/src/pydantic_zarr/experimental/core.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from collections.abc import Mapping, Sequence
  4 | from typing import (
  5 |     TYPE_CHECKING,
  6 |     Any,
  7 |     Literal,
  8 |     TypeAlias,
  9 |     TypeVar,
 10 |     overload,
 11 | )
 12 | 
 13 | import numpy as np
 14 | import numpy.typing as npt
 15 | from pydantic import BaseModel, ConfigDict
 16 | 
 17 | if TYPE_CHECKING:
 18 |     import zarr
 19 |     from zarr.storage._common import StoreLike
 20 | 
 21 | BaseAttributes = Mapping[str, object] | BaseModel
 22 | 
 23 | IncEx: TypeAlias = set[int] | set[str] | dict[int, Any] | dict[str, Any] | None
 24 | 
 25 | AccessMode: TypeAlias = Literal["w", "w+", "r", "a"]
 26 | 
 27 | T = TypeVar("T")
 28 | 
 29 | 
 30 | @overload
 31 | def tuplify_json(obj: Mapping) -> Mapping: ...
 32 | 
 33 | 
 34 | @overload
 35 | def tuplify_json(obj: list) -> tuple: ...
 36 | 
 37 | 
 38 | def tuplify_json(obj: object) -> object:
 39 |     """
 40 |     Recursively converts lists within a Python object to tuples.
 41 |     """
 42 |     if isinstance(obj, list):
 43 |         return tuple(tuplify_json(elem) for elem in obj)
 44 |     elif isinstance(obj, dict):
 45 |         return {k: tuplify_json(v) for k, v in obj.items()}
 46 |     else:
 47 |         return obj
 48 | 
 49 | 
 50 | class StrictBase(BaseModel):
 51 |     model_config = ConfigDict(frozen=True, extra="forbid")
 52 | 
 53 | 
 54 | def parse_dtype_v2(value: npt.DTypeLike) -> str | list[tuple[Any, ...]]:
 55 |     """
 56 |     Convert the input to a NumPy dtype and either return the ``str`` attribute of that
 57 |     object or, if the dtype is a structured dtype, return the fields of that dtype as a list
 58 |     of tuples.
 59 | 
 60 |     Parameters
 61 |     ----------
 62 |     value : npt.DTypeLike
 63 |         A value that can be converted to a NumPy dtype.
 64 | 
 65 |     Returns
 66 |     -------
 67 | 
 68 |     A Zarr V2-compatible encoding of the dtype.
 69 | 
 70 |     References
 71 |     ----------
 72 |     See the [Zarr V2 specification](https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html#data-type-encoding)
 73 |     for more details on this encoding of data types.
 74 |     """
 75 |     # Assume that a non-string sequence represents a the Zarr V2 JSON form of a structured dtype.
 76 |     if isinstance(value, Sequence) and not isinstance(value, str):
 77 |         return [tuple(v) for v in value]
 78 |     else:
 79 |         np_dtype = np.dtype(value)
 80 |         if np_dtype.fields is not None:
 81 |             # This is a structured dtype, which must be converted to a list of tuples. Note that
 82 |             # this function recurses, because a structured dtype is parametrized by other dtypes.
 83 |             return [(k, parse_dtype_v2(v[0])) for k, v in np_dtype.fields.items()]
 84 |         else:
 85 |             return np_dtype.str
 86 | 
 87 | 
 88 | def ensure_member_name(data: Any) -> str:
 89 |     """
 90 |     If the input is a string, then ensure that it is a valid
 91 |     name for a subnode in a zarr group
 92 |     """
 93 |     if isinstance(data, str):
 94 |         if "/" in data:
 95 |             raise ValueError(
 96 |                 f'Strings containing "/" are invalid. Got {data}, which violates this rule.'
 97 |             )
 98 |         if data in ("", ".", ".."):
 99 |             raise ValueError(f"The string {data} is not a valid member name.")
100 |         return data
101 |     raise TypeError(f"Expected a str, got {type(data)}.")
102 | 
103 | 
104 | def ensure_key_no_path(data: Any) -> Any:
105 |     if isinstance(data, Mapping):
106 |         [ensure_member_name(key) for key in data]
107 |     return data
108 | 
109 | 
110 | def model_like(a: BaseModel, b: BaseModel, exclude: IncEx = None, include: IncEx = None) -> bool:
111 |     """
112 |     A similarity check for a pair pydantic.BaseModel, parametrized over included or excluded fields.
113 | 
114 | 
115 |     """
116 | 
117 |     a_dict = a.model_dump(exclude=exclude, include=include)
118 |     b_dict = b.model_dump(exclude=exclude, include=include)
119 |     return json_eq(a_dict, b_dict)
120 | 
121 | 
122 | # TODO: expose contains_array and contains_group as public functions in zarr-python
123 | # and replace these custom implementations
124 | def maybe_node(
125 |     store: StoreLike, path: str, *, zarr_format: Literal[2, 3]
126 | ) -> zarr.Array | zarr.Group | None:
127 |     """
128 |     Return the array or group found at the store / path, if an array or group exists there.
129 |     Otherwise return None.
130 |     """
131 |     from zarr.core.sync import sync
132 |     from zarr.core.sync_group import get_node
133 |     from zarr.storage._common import make_store_path
134 | 
135 |     # convert the storelike store argument to a Zarr store
136 |     spath = sync(make_store_path(store, path=path))
137 |     try:
138 |         return get_node(spath.store, spath.path, zarr_format=zarr_format)
139 |     except FileNotFoundError:
140 |         return None
141 | 
142 | 
143 | def ensure_multiple(data: Sequence[T]) -> Sequence[T]:
144 |     """
145 |     Ensure that there is at least one element in the sequence
146 |     """
147 |     if len(data) < 1:
148 |         raise ValueError("Invalid length. Expected 1 or more, got 0.")
149 |     return data
150 | 
151 | 
152 | def json_eq(a: object, b: object) -> bool:
153 |     """
154 |     An equality check between python objects that recurses into dicts and sequences and ignores
155 |     the difference between tuples and lists. Otherwise, it's just regular equality. Useful
156 |     for comparing dicts that would become identical JSON, but where one has lists and the other
157 |     has tuples.
158 |     """
159 |     # treat lists & tuples as the same "sequence" type
160 |     seq_types = (list, tuple)
161 | 
162 |     # both are sequences → compare element-wise
163 |     if isinstance(a, seq_types) and isinstance(b, seq_types):
164 |         return len(a) == len(b) and all(json_eq(x, y) for x, y in zip(a, b, strict=False))
165 | 
166 |     # recurse into mappings
167 |     if isinstance(a, Mapping) and isinstance(b, Mapping):
168 |         return a.keys() == b.keys() and all(json_eq(a[k], b[k]) for k in a)
169 | 
170 |     # otherwise → regular equality
171 |     return a == b
172 | 


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
  1 | # pydantic-zarr
  2 | 
  3 | [![PyPI](https://img.shields.io/pypi/v/pydantic-zarr)](https://pypi.python.org/pypi/pydantic-zarr)
  4 | 
  5 | Type-safety for Zarr data.
  6 | 
  7 | ## Overview
  8 | 
  9 | `pydantic-zarr` expresses data stored in the [Zarr](https://zarr.readthedocs.io/en/stable/) format with [Pydantic](https://docs.pydantic.dev/1.10/). Specifically, `pydantic-zarr` encodes Zarr groups and arrays as [Pydantic models](https://docs.pydantic.dev/1.10/usage/models/). These models are useful for formalizing the structure of Zarr hierarchies, type-checking Zarr hierarchies, and runtime validation for Zarr-based data.
 10 | 
 11 | ```python
 12 | import zarr
 13 | 
 14 | from pydantic_zarr.v2 import GroupSpec
 15 | 
 16 | # create a Zarr group
 17 | group = zarr.create_group(store={}, path='foo', zarr_format=2)
 18 | # put an array inside the group
 19 | array = zarr.create_array(
 20 |     store=group.store, name='foo/bar', shape=10, dtype='uint8', zarr_format=2
 21 | )
 22 | array.attrs.put({'metadata': 'hello'})
 23 | 
 24 | # create a pydantic model to model the Zarr group
 25 | spec = GroupSpec.from_zarr(group)
 26 | print(spec.model_dump())
 27 | """
 28 | {
 29 |     'zarr_format': 2,
 30 |     'attributes': {},
 31 |     'members': {
 32 |         'bar': {
 33 |             'zarr_format': 2,
 34 |             'attributes': {'metadata': 'hello'},
 35 |             'shape': (10,),
 36 |             'chunks': (10,),
 37 |             'dtype': '|u1',
 38 |             'fill_value': 0,
 39 |             'order': 'C',
 40 |             'filters': None,
 41 |             'dimension_separator': '.',
 42 |             'compressor': {'id': 'zstd', 'level': 0},
 43 |         }
 44 |     },
 45 | }
 46 | """
 47 | ```
 48 | 
 49 | More examples can be found in the [usage guide](usage_zarr_v2.md).
 50 | 
 51 | ## Installation
 52 | 
 53 | `pip install -U pydantic-zarr`
 54 | 
 55 | ### Limitations
 56 | 
 57 | #### No array data operations
 58 | 
 59 | This library only provides tools to represent the _layout_ of Zarr groups and arrays, and the structure of their attributes. `pydantic-zarr` performs no type checking or runtime validation of the multidimensional array data contained _inside_ Zarr arrays, and `pydantic-zarr` does not contain any tools for efficiently reading or writing Zarr arrays.
 60 | 
 61 | #### Supported Zarr versions
 62 | 
 63 | This library supports [version 2](https://zarr.readthedocs.io/en/stable/spec/v2.html) of the Zarr format, with partial support for [Zarr v3](https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html). Progress towards complete support for Zarr v3 is tracked by [this issue](https://github.com/d-v-b/pydantic-zarr/issues/3).
 64 | 
 65 | ## Design
 66 | 
 67 | A Zarr group can be modeled as an object with two properties:
 68 | 
 69 | - `attributes`: A dict-like object, with keys that are strings, values that are JSON-serializable.
 70 | - `members`: A dict-like object, with keys that strings and values that are other Zarr groups, or Zarr arrays.
 71 | 
 72 | A Zarr array can be modeled similarly, but without the `members` property (because Zarr arrays cannot contain Zarr groups or arrays), and with a set of array-specific properties like `shape`, `dtype`, etc.
 73 | 
 74 | Note the use of the term "modeled": Zarr arrays are useful because they store N-dimensional array data, but `pydantic-zarr` does not treat that data as part of the "model" of a Zarr array.
 75 | 
 76 | In `pydantic-zarr`, Zarr groups are modeled by the `GroupSpec` class, which is a [`Pydantic model`](https://docs.pydantic.dev/latest/concepts/models/) with two fields:
 77 | 
 78 | - `attributes`: either a `Mapping` or a `pydantic.BaseModel`.
 79 | - `members`: either a mapping with string keys and values that must be `GroupSpec` or `ArraySpec` instances, or the value `Null`. The use of nullability is explained in its own [section](#nullable-members).
 80 | 
 81 | Zarr arrays are represented by the `ArraySpec` class, which has a similar `attributes` field, as well as fields for all the Zarr array properties (`dtype`, `shape`, `chunks`, etc).
 82 | 
 83 | `GroupSpec` and `ArraySpec` are both [generic models](https://docs.pydantic.dev/1.10/usage/models/#generic-models). `GroupSpec` takes two type parameters, the first specializing the type of `GroupSpec.attributes`, and the second specializing the type of the _values_ of `GroupSpec.members` (the keys of `GroupSpec.members` are always strings). `ArraySpec` only takes one type parameter, which specializes the type of `ArraySpec.attributes`.
 84 | 
 85 | Examples using this generic typing functionality can be found in the [usage guide](usage_zarr_v2.md#using-generic-types).
 86 | 
 87 | ### Nullable `members`
 88 | 
 89 | When a Zarr group has no members, a `GroupSpec` model of that Zarr group will have its `members` attribute set to the empty dict `{}`. But there are scenarios where the members of a Zarr group are unknown:
 90 | 
 91 | - Some Zarr storage backends do not support directory listing, in which case it is possible to access a Zarr group and inspect its attributes, but impossible to discover its members. So the members of such a Zarr group are unknown.
 92 | - Traversing a deeply nested large Zarr group on high latency storage can be slow. This can be mitigated by only partially traversing the hierarchy, e.g. only inspecting the root group and N subgroups. This defines a sub-hierarchy of the full hierarchy; leaf groups of this subtree by definition did not have their members checked, and so their members are unknown.
 93 | - A Zarr hierarchy can be represented as a mapping `M` from paths to nodes (array or group). In this case, if `M["key"]` is a model of a Zarr group `G`, then `M["key/subkey"]` would encode a member of `G`. Since the key structure of the mapping `M` is doing the work of encoding the members of `G`, there is no value in `G` having a members attribute that claims anything about the members of `G`, and so `G.members` should be modeled as unknown.
 94 | 
 95 | To handle these cases, `pydantic-zarr` allows the `members` attribute of a `GroupSpec` to be `Null`.
 96 | 
 97 | ## Standardization
 98 | 
 99 | The Zarr specifications do not define a model of the Zarr hierarchy. `pydantic-zarr` is an implementation of a particular model that can be found formalized in this [specification document](https://github.com/d-v-b/zeps/blob/zom/draft/ZEP0006.md), which has been proposed for inclusion in the Zarr specifications. You can find the discussion of that proposal in [this pull request](https://github.com/zarr-developers/zeps/pull/46).
100 | 


--------------------------------------------------------------------------------
/docs/release-notes.md:
--------------------------------------------------------------------------------
  1 | # Release Notes
  2 | 
  3 | <!-- towncrier release notes start -->
  4 | 
  5 | ## pydantic-zarr 0.9.1 (2025-12-12)
  6 | 
  7 | ### Features
  8 | 
  9 | - Add `with_*` methods to the experimental `ArraySpec` and `GroupSpec` classes.
 10 |   These methods make it easier to create copies of existing `ArraySpec` and `GroupSpec` objects with
 11 |   new properties. ([#137](https://github.com/zarr-developers/pydantic-zarr/issues/137))
 12 | 
 13 | ### Bugfixes
 14 | 
 15 | - Make V3 `ArraySpec` validation reject dimension names that don't match the dimensionality of the array. ([#136](https://github.com/zarr-developers/pydantic-zarr/issues/136))
 16 | - Fix a bug that prevented creation of `experimental.v3.GroupSpec` instances with typed members. ([#138](https://github.com/zarr-developers/pydantic-zarr/issues/138))
 17 | - Fix a bug where `experimental.v3.ArraySpec` instances took a default parameter for attributes. ([#138](https://github.com/zarr-developers/pydantic-zarr/issues/138))
 18 | 
 19 | 
 20 | ## pydantic-zarr 0.9.0 (2025-12-08)
 21 | 
 22 | ### Breaking Changes
 23 | 
 24 | This release contains breaking changes. See [Bugfixes](#090-bugfixes)
 25 | 
 26 | ### Features
 27 | 
 28 | - Make the zarr dependency optional to allow usage without installing zarr-python. ([#112](https://github.com/zarr-developers/pydantic-zarr/issues/112))
 29 | - Add experimental module with improved implementations of `ArraySpec` and `GroupSpec` classes. ([#120](https://github.com/zarr-developers/pydantic-zarr/issues/120))
 30 | 
 31 | ### Bugfixes<a id='090-bugfixes'></a>
 32 | 
 33 | - Remove default empty dictionary for attributes in ArraySpec and GroupSpec. This is a breaking change.
 34 | To migrate your code, provide a value for the `attributes` argument when creating an `ArraySpec` or
 35 | `GroupSpec`. ([#115](https://github.com/zarr-developers/pydantic-zarr/issues/115)).
 36 | To replicate the previous default behaviour, pass an empty dictionary (`attributes={}`).
 37 | - Fix a broken bare install by making the dependency on `packaging` explicit. ([#125](https://github.com/zarr-developers/pydantic-zarr/issues/125))
 38 | 
 39 | ### Improved Documentation
 40 | 
 41 | - Update documentation URLs to point to pydantic-zarr.readthedocs.io. ([#123](https://github.com/zarr-developers/pydantic-zarr/issues/123))
 42 | - Add `towncrier` for managing the changelog. ([#128](https://github.com/zarr-developers/pydantic-zarr/issues/128))
 43 | 
 44 | ### Misc
 45 | 
 46 | - [#121](https://github.com/zarr-developers/pydantic-zarr/issues/121)
 47 | 
 48 | ## pydantic-zarr 0.8.4 (2025-09-09)
 49 | 
 50 | ### Bugfixes
 51 | 
 52 | - Fix from_zarr for arrays with no dimension names. ([#108](https://github.com/zarr-developers/pydantic-zarr/issues/108))
 53 | 
 54 | ### Misc
 55 | 
 56 | - Bump actions/setup-python from 5 to 6 in the actions group. ([#109](https://github.com/zarr-developers/pydantic-zarr/issues/109))
 57 | 
 58 | ## pydantic-zarr 0.8.3 (2025-08-28)
 59 | 
 60 | ### Features
 61 | 
 62 | - Correctly propagate dimension names from zarr arrays. ([#103](https://github.com/zarr-developers/pydantic-zarr/issues/103))
 63 | - Improve runtime type checking in from_flat(). ([#101](https://github.com/zarr-developers/pydantic-zarr/issues/101))
 64 | 
 65 | ### Bugfixes
 66 | 
 67 | - Make typing of v2 from_flat() invariant. ([#100](https://github.com/zarr-developers/pydantic-zarr/issues/100))
 68 | 
 69 | ### Improved Documentation
 70 | 
 71 | - Remove out of date disclaimer. ([#99](https://github.com/zarr-developers/pydantic-zarr/issues/99))
 72 | 
 73 | ### Misc
 74 | 
 75 | - Bump actions/checkout from 4 to 5 in the actions group. ([#97](https://github.com/zarr-developers/pydantic-zarr/issues/97))
 76 | 
 77 | ## pydantic-zarr 0.8.2 (2025-08-14)
 78 | 
 79 | ### Features
 80 | 
 81 | - Disallow empty codecs and use a sane default in auto_codecs, allow codecs to be specified by strings. ([#95](https://github.com/zarr-developers/pydantic-zarr/issues/95))
 82 | 
 83 | ### Bugfixes
 84 | 
 85 | - Fix GroupSpec.from_zarr typing. ([#91](https://github.com/zarr-developers/pydantic-zarr/issues/91))
 86 | 
 87 | ## pydantic-zarr 0.8.1 (2025-08-05)
 88 | 
 89 | ### Features
 90 | 
 91 | - Add a py.typed file for better type checking support. ([#87](https://github.com/zarr-developers/pydantic-zarr/issues/87))
 92 | 
 93 | ### Misc
 94 | 
 95 | - Update cd workflow to use hatch. ([#85](https://github.com/zarr-developers/pydantic-zarr/issues/85))
 96 | - Remove GH actions doc action. ([#84](https://github.com/zarr-developers/pydantic-zarr/issues/84))
 97 | 
 98 | ## pydantic-zarr 0.8.0 (2025-07-30)
 99 | 
100 | ### Features
101 | 
102 | - Use the JSON form of the fill value. ([#77](https://github.com/zarr-developers/pydantic-zarr/issues/77))
103 | - Add types for order and dimension separator. ([#81](https://github.com/zarr-developers/pydantic-zarr/issues/81))
104 | - Allow zarr Arrays in from_array(). ([#80](https://github.com/zarr-developers/pydantic-zarr/issues/80))
105 | - Allow BaseModel in TBaseAttr. ([#78](https://github.com/zarr-developers/pydantic-zarr/issues/78))
106 | 
107 | ### Bugfixes
108 | 
109 | - Fix readthedocs config. ([#83](https://github.com/zarr-developers/pydantic-zarr/issues/83))
110 | 
111 | ## pydantic-zarr 0.7.0 (2024-03-20)
112 | 
113 | ### Bugfixes
114 | 
115 | - Default chunks fix. ([#28](https://github.com/zarr-developers/pydantic-zarr/issues/28))
116 | 
117 | ## pydantic-zarr 0.6.0 (2024-03-03)
118 | 
119 | ### Features
120 | 
121 | - Add to_flat, from_flat, like, and better handling for existing arrays and groups. ([#25](https://github.com/zarr-developers/pydantic-zarr/issues/25))
122 | 
123 | ### Improved Documentation
124 | 
125 | - Fix repo name in docs. ([#26](https://github.com/zarr-developers/pydantic-zarr/issues/26))
126 | 
127 | ## pydantic-zarr 0.5.2 (2023-11-08)
128 | 
129 | ### Features
130 | 
131 | - Add Zarr V3 support. ([#17](https://github.com/zarr-developers/pydantic-zarr/issues/17))
132 | 
133 | ### Bugfixes
134 | 
135 | - Various bugfixes. ([#18](https://github.com/zarr-developers/pydantic-zarr/issues/18))
136 | 
137 | ## pydantic-zarr 0.5.1 (2023-10-06)
138 | 
139 | ### Bugfixes
140 | 
141 | - Fix GroupSpec.from_zarr. ([#16](https://github.com/zarr-developers/pydantic-zarr/issues/16))
142 | 
143 | ## pydantic-zarr 0.5.0 (2023-08-22)
144 | 
145 | ### Features
146 | 
147 | - Rename items to members. ([#12](https://github.com/zarr-developers/pydantic-zarr/issues/12))
148 | 
149 | ### Improved Documentation
150 | 
151 | - Fix doc link. ([#11](https://github.com/zarr-developers/pydantic-zarr/issues/11))
152 | 


--------------------------------------------------------------------------------
/docs/experimental/index.md:
--------------------------------------------------------------------------------
  1 | # experimental features
  2 | 
  3 | ## Improved `GroupSpec` and `ArraySpec` classes
  4 | 
  5 | We are developing a redesign of the `GroupSpec` and `ArraySpec` classes. These new classes can be found in the `experimental.v2` and `experimental.v3` modules for Zarr V2 and V3, respectively. Our
  6 | design goal for these new classes is to make the models simpler, more explicit, and more expressive.
  7 | 
  8 | Here's how we are doing that:
  9 | 
 10 | ### Removing generic type parameters
 11 | 
 12 | In `pydantic_zarr`, the `GroupSpec` and `ArraySpec` classes take generic type parameters. `GroupSpec[A, B]` models a Zarr group with attributes that must be instances of `A` and child nodes that must be instances of `B`. The generic type parameters offer concise class definitions but complicate type checking for pydantic, and they are also not strictly necessary for the `GroupSpec` and `ArraySpec`
 13 | classes to do their jobs. So in `pydantic_zarr.experimental.v2` and `pydantic_zarr.experimental.v3` the `GroupSpec` and `ArraySpec` classes are not generic any more. They are just regular classes.
 14 | 
 15 | Code like this:
 16 | 
 17 | ```python
 18 | from pydantic import BaseModel
 19 | from pydantic_zarr.v3 import ArraySpec
 20 | 
 21 | class AttrsType(BaseModel):
 22 |     a: int
 23 |     b: float
 24 | 
 25 | MyArray = ArraySpec[AttrsType]
 26 | print(MyArray)
 27 | #> <class 'pydantic_zarr.v3.ArraySpec[AttrsType]'>
 28 | ```
 29 | 
 30 | becomes this:
 31 | 
 32 | ```python
 33 | from pydantic import BaseModel
 34 | from pydantic_zarr.experimental.v3 import ArraySpec
 35 | 
 36 | class AttrsType(BaseModel):
 37 |     a: int
 38 |     b: float
 39 | 
 40 | class MyArray(ArraySpec):
 41 |     attributes: AttrsType
 42 | 
 43 | print(MyArray)
 44 | #> <class '__main__.MyArray'>
 45 | ```
 46 | 
 47 | ### A class hierarchy for Zarr Groups
 48 | 
 49 | In `pydantic_zarr.v2` and `pydantic_zarr.v3`, the `members` attribute of the `GroupSpec` class is
 50 | annotated as a union type with two variants: `Mapping` and `None`. `None` occurs in this union to handle the case where we want to model a Zarr group outside the context of any hierarchy, i.e. a situation where the `members` attribute would be undefined.
 51 | 
 52 | The main place where this occurs is when we create a flattened representation of a Zarr hierarchy with the `to_flat` functions. `to_flat` takes a Zarr hierarchy (a tree) and converts it to a `key: value` data structure where the hierarchy information is encoded in the structure of the keys. After this transformation, it is redundant for
 53 | the `GroupSpec` elements of the flattened Zarr hierarchy to carry their own representation of the hierarchy structure, as that information is completely specified by the keys. So when we flatten
 54 | a `GroupSpec`, we set all the `members` attributes to `None`.
 55 | 
 56 | But outside the context of flattening hierarchies, we need to handle the `None` variant in places where we are sure that the members are not `None`, and this is tedious.
 57 | 
 58 | To solve this problem, instead of defining the `members` attribute as a union over two possible types, in `pydantic_zarr.experimental.v2` and `pydantic_zarr.experimental.v3` we define two classes for modelling Zarr groups. One class, `BaseGroupSpec`,
 59 | narrowly models the structure of a Zarr Group. The `GroupSpec` class inherits from `BaseGroupSpec` and
 60 | defines a new `members` attribute, which allows it to model Zarr groups that have information
 61 | about the sub-groups and sub-arrays they contain. With this structure, `to_flat` can safely return a mapping from strings to `ArraySpec | BaseGroupSpec`, and `GroupSpec` instances don't need to handle the case where their `members` attribute is `None`.
 62 | 
 63 | With `BaseGroupSpec`, type checkers and Pydantic can distinguish at definition-time whether a group should have members, eliminating runtime None-checks in code that knows members must exist.
 64 | 
 65 | Ordinary `pydantic-zarr` usage should not be affected by the new class hierarchy for `GroupSpec` classes. The only time a user would create a `BaseGroupSpec` explicitly is when declaring a Zarr hierarchy in the flat form. Otherwise, `GroupSpec` is sufficient for all uses.
 66 | 
 67 | ### More explicit modelling of Zarr groups
 68 | 
 69 | Since `pydantic-zarr` started, the Python type system has become significantly more expressive. One very useful development has been improvements in the `TypedDict` class for modelling mappings with known keys and typed values. `TypedDict` is a perfect fit for modelling Zarr groups where the names
 70 | of the members are part of the schema definition for that group.
 71 | 
 72 | The `GroupSpec` classes defined in `pydantic_zarr.experimental` accept `TypedDict` annotations for their `members` attribute. As `pydantic` can validate values against a `TypedDict` type annotation, we get a very concise type check on the names of the members of a Zarr group.
 73 | 
 74 | ```python
 75 | from typing_extensions import TypedDict
 76 | from pydantic import BaseModel
 77 | 
 78 | from pydantic_zarr.experimental.v3 import ArraySpec, GroupSpec
 79 | 
 80 | array = ArraySpec(
 81 |     shape=(1,),
 82 |     data_type='uint8',
 83 |     codecs=('bytes',),
 84 |     chunk_grid={"name": "regular", "configuration": {"chunk_shape": (1,)}},
 85 |     chunk_key_encoding = {"name": "default"},
 86 |     fill_value = 0,
 87 |     attributes={}
 88 |     )
 89 | 
 90 | class MyMembers(TypedDict):
 91 |     a: ArraySpec
 92 |     b: ArraySpec
 93 | 
 94 | class MyGroup(GroupSpec):
 95 |     members: MyMembers
 96 | 
 97 | # validation fails: missing array named "b"
 98 | try:
 99 |     MyGroup(members={"a": array}, attributes={})
100 | except ValueError as e:
101 |     print(e)
102 |     """
103 |     1 validation error for MyGroup
104 |     members.b
105 |       Field required [type=missing, input_value={'a': ArraySpec(zarr_form..., dimension_names=None)}, input_type=dict]
106 |         For further information visit https://errors.pydantic.dev/2.11/v/missing
107 |     """
108 | 
109 | # validation fails: extra array named "c"
110 | try:
111 |     MyGroup(members={"a": array, "b": array, "c": array}, attributes={})
112 | except ValueError as e:
113 |     print(e)
114 |     """
115 |     1 validation error for MyGroup
116 |     members.c
117 |       Extra inputs are not permitted [type=extra_forbidden, input_value=ArraySpec(zarr_format=3, ...), dimension_names=None), input_type=ArraySpec]
118 |         For further information visit https://errors.pydantic.dev/2.11/v/extra_forbidden
119 |     """
120 | 
121 | # validation succeeds
122 | MyGroup(members={"a" : array, "b": array}, attributes={})
123 | ```


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [build-system]
  2 | requires = ["hatchling", "hatch-vcs"]
  3 | build-backend = "hatchling.build"
  4 | 
  5 | [project]
  6 | name = "pydantic-zarr"
  7 | dynamic = ["version"]
  8 | description = 'Pydantic models for the Zarr file format'
  9 | readme = { file = "README.md", content-type = "text/markdown" }
 10 | requires-python = ">=3.11"
 11 | license = "MIT"
 12 | keywords = ["zarr", "pydantic"]
 13 | authors = [{ name = "Davis Bennett", email = "davis.v.bennett@gmail.com" }]
 14 | maintainers = [{ name = "David Stansby" }]
 15 | 
 16 | 
 17 | classifiers = [
 18 |     "Programming Language :: Python",
 19 |     "Programming Language :: Python :: 3.11",
 20 |     "Programming Language :: Python :: 3.12",
 21 |     "Programming Language :: Python :: Implementation :: CPython",
 22 | ]
 23 | dependencies = ["pydantic>2.0.0", "numpy>=1.24.0", "packaging>=21.0"]
 24 | 
 25 | [project.urls]
 26 | Documentation = "https://pydantic-zarr.readthedocs.io/"
 27 | Issues = "https://github.com/zarr-developers/pydantic-zarr/issues"
 28 | Source = "https://github.com/zarr-developers/pydantic-zarr"
 29 | 
 30 | [project.optional-dependencies]
 31 | zarr = ["zarr>=3.0.0"]
 32 | # pytest pin is due to https://github.com/pytest-dev/pytest-cov/issues/693
 33 | test-base = [
 34 |     "coverage",
 35 |     "pytest<8.4",
 36 |     "pytest-cov",
 37 |     "pytest-examples",
 38 |     "xarray==2025.10.0",
 39 |     "dask==2025.11.0"
 40 |     ]
 41 | test = ["pydantic-zarr[test-base,zarr]"]
 42 | docs = [
 43 |     "mkdocs-material",
 44 |     "mkdocstrings[python]",
 45 |     "pytest-examples",
 46 |     "pydantic==2.11",
 47 |     "zarr>=3.1.0",
 48 |     "towncrier",
 49 | ]
 50 | 
 51 | [dependency-groups]
 52 | zarr = ["zarr>=3.0.0"]
 53 | # pytest pin is due to https://github.com/pytest-dev/pytest-cov/issues/693
 54 | test-base = [
 55 |     "coverage",
 56 |     "pytest<8.4",
 57 |     "pytest-cov",
 58 |     "pytest-examples",
 59 |     "xarray==2025.10.0",
 60 |     "dask==2025.11.0",
 61 | ]
 62 | test = [
 63 |     {include-group = "test-base"},
 64 |     {include-group = "zarr"},
 65 | ]
 66 | docs = [
 67 |     "mkdocs-material",
 68 |     "mkdocstrings[python]",
 69 |     "pytest-examples",
 70 |     "pydantic==2.11",
 71 |     "zarr>=3.1.0",
 72 |     "towncrier",
 73 | ]
 74 | 
 75 | [tool.hatch]
 76 | version.source = "vcs"
 77 | build.hooks.vcs.version-file = "src/pydantic_zarr/_version.py"
 78 | 
 79 | [tool.hatch.envs.test]
 80 | features = ["test"]
 81 | dependencies = [
 82 |     "zarr~={matrix:zarr}",
 83 | ]
 84 | 
 85 | [tool.hatch.envs.test.scripts]
 86 | test = "pytest tests/test_pydantic_zarr/"
 87 | test-cov = "pytest --cov-config=pyproject.toml --cov=pkg --cov-report html --cov=src tests/test_pydantic_zarr"
 88 | list-env = "pip list"
 89 | 
 90 | [[tool.hatch.envs.test.matrix]]
 91 | python = ["3.11", "3.12", "3.13"]
 92 | zarr = ["3.0.10", "3.1.0"]
 93 | 
 94 | [tool.hatch.envs.test-base]
 95 | features = ["test-base"]
 96 | 
 97 | [tool.hatch.envs.test-base.scripts]
 98 | test = "pytest tests/test_pydantic_zarr/"
 99 | test-cov = "pytest --cov-config=pyproject.toml --cov=pkg --cov-report html --cov=src tests/test_pydantic_zarr"
100 | list-env = "pip list"
101 | 
102 | [[tool.hatch.envs.test-base.matrix]]
103 | python = ["3.11", "3.12", "3.13"]
104 | 
105 | [tool.hatch.envs.docs]
106 | features = ['docs']
107 | 
108 | [tool.hatch.envs.docs.scripts]
109 | build = "mkdocs build --clean --strict"
110 | serve = "mkdocs serve --watch src"
111 | deploy = "mkdocs gh-deploy"
112 | test = "pytest tests/test_docs"
113 | 
114 | [tool.hatch.envs.types]
115 | extra-dependencies = ["mypy>=1.0.0", "pydantic"]
116 | [tool.hatch.envs.types.scripts]
117 | check = "mypy --install-types --non-interactive {args:src/pydantic_zarr tests}"
118 | 
119 | [tool.coverage.run]
120 | source_pkgs = ["pydantic_zarr", "tests"]
121 | branch = true
122 | parallel = true
123 | omit = ["src/pydantic_zarr/__about__.py"]
124 | 
125 | [tool.coverage.paths]
126 | pydantic_zarr = ["src/pydantic_zarr", "*/pydantic-zarr/src/pydantic_zarr"]
127 | tests = ["tests", "*/pydantic-zarr/tests"]
128 | 
129 | [tool.coverage.report]
130 | exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"]
131 | 
132 | [tool.ruff]
133 | line-length = 100
134 | force-exclude = true
135 | extend-exclude = [
136 |     ".bzr",
137 |     ".direnv",
138 |     ".eggs",
139 |     ".git",
140 |     ".mypy_cache",
141 |     ".nox",
142 |     ".pants.d",
143 |     ".ruff_cache",
144 |     ".venv",
145 |     "__pypackages__",
146 |     "_build",
147 |     "buck-out",
148 |     "build",
149 |     "dist",
150 |     "venv",
151 |     "docs",
152 | ]
153 | 
154 | [tool.ruff.lint]
155 | extend-select = [
156 |     "ANN",  # flake8-annotations
157 |     "B",    # flake8-bugbear
158 |     "EXE",  # flake8-executable
159 |     "C4",   # flake8-comprehensions
160 |     "FA",   # flake8-future-annotations
161 |     "FLY",  # flynt
162 |     "FURB", # refurb
163 |     "G",    # flake8-logging-format
164 |     "I",    # isort
165 |     "ISC",  # flake8-implicit-str-concat
166 |     "LOG",  # flake8-logging
167 |     "PERF", # Perflint
168 |     "PIE",  # flake8-pie
169 |     "PGH",  # pygrep-hooks
170 |     "PT",   # flake8-pytest-style
171 |     "PYI",  # flake8-pyi
172 |     "RET",  # flake8-return
173 |     "RSE",  # flake8-raise
174 |     "RUF",
175 |     "SIM",  # flake8-simplify
176 |     "SLOT", # flake8-slots
177 |     "TCH",  # flake8-type-checking
178 |     "TRY",  # tryceratops
179 |     "UP",   # pyupgrade
180 |     "W",    # pycodestyle warnings
181 | ]
182 | ignore = [
183 |     "ANN401",
184 |     "PT011",  # TODO: apply this rule
185 |     "PT012",  # TODO: apply this rule
186 |     "RET505",
187 |     "RET506",
188 |     "RUF005",
189 |     "SIM108",
190 |     "TRY003",
191 |     "UP038",  # https://github.com/astral-sh/ruff/issues/7871
192 |     # https://docs.astral.sh/ruff/formatter/#conflicting-lint-rules
193 |     "W191",
194 |     "E111",
195 |     "E114",
196 |     "E117",
197 |     "D206",
198 |     "D300",
199 |     "Q000",
200 |     "Q001",
201 |     "Q002",
202 |     "Q003",
203 |     "COM812",
204 |     "COM819",
205 | ]
206 | 
207 | [tool.ruff.lint.extend-per-file-ignores]
208 | "tests/**" = ["ANN001", "ANN201", "RUF029", "SIM117", "SIM300"]
209 | 
210 | [tool.mypy]
211 | python_version = "3.11"
212 | ignore_missing_imports = true
213 | namespace_packages = false
214 | warn_unreachable = true
215 | plugins = "pydantic.mypy"
216 | enable_error_code = ["ignore-without-code", "redundant-expr", "truthy-bool"]
217 | strict = true
218 | # TODO: remove each of these and fix any errors:
219 | disallow_any_generics = false
220 | warn_return_any = false
221 | 
222 | [tool.pytest.ini_options]
223 | minversion = "7"
224 | xfail_strict = true
225 | testpaths = ["tests"]
226 | log_cli_level = "INFO"
227 | addopts = ["--durations=10", "-ra", "--strict-config", "--strict-markers"]
228 | filterwarnings = [
229 |     "error",
230 |     # https://github.com/zarr-developers/zarr-python/issues/2948
231 |     "ignore:The `order` keyword argument has no effect for Zarr format 3 arrays:RuntimeWarning",
232 | ]
233 | 
234 | [tool.repo-review]
235 | ignore = [
236 |     "PC111",  # black formatting for docs, not sure if want,
237 |     "PC180",  # markdown formatter, no thanks
238 |     "RTD100", # read the docs, no thanks,
239 |     "RTD101", # read the docs, no thanks
240 |     "RTD102", # read the docs, no thanks,
241 |     "RTD103", # read the docs, no thanks
242 | ]
243 | 
244 | [tool.numpydoc_validation]
245 | # See https://numpydoc.readthedocs.io/en/latest/validation.html#built-in-validation-checks for list of checks
246 | checks = [
247 |     "GL06",
248 |     "GL07",
249 |     # Currently broken; see https://github.com/numpy/numpydoc/issues/573
250 |     # "GL09",
251 |     "GL10",
252 |     "SS02",
253 |     "SS04",
254 |     "PR02",
255 |     "PR03",
256 |     "PR05",
257 |     "PR06",
258 | ]
259 | 
260 | [tool.towncrier]
261 | directory = 'changes'
262 | filename = "docs/release-notes.md"
263 | title_format = "## {name} {version} ({project_date})"
264 | issue_format = "[#{issue}](https://github.com/zarr-developers/pydantic-zarr/issues/{issue})"
265 | start_string = "<!-- towncrier release notes start -->\n"
266 | 


--------------------------------------------------------------------------------
/docs/usage_zarr_v2.md:
--------------------------------------------------------------------------------
  1 | # Usage (Zarr V2)
  2 | 
  3 | ## Reading and writing a zarr hierarchy
  4 | 
  5 | ### Reading
  6 | 
  7 | The `GroupSpec` and `ArraySpec` classes represent Zarr v2 groups and arrays, respectively. To create an instance of a `GroupSpec` or `ArraySpec` from an existing Zarr group or array, pass the Zarr group / array to the `.from_zarr` method defined on the `GroupSpec` / `ArraySpec` classes. This will result in a `pydantic-zarr` model of the Zarr object.
  8 | 
  9 | > By default `GroupSpec.from_zarr(zarr_group)` will traverse the entire hierarchy under `zarr_group`. This can be extremely slow if used on an extensive Zarr group on high latency storage. To limit the depth of traversal to a specific depth, use the `depth` keyword argument, e.g. `GroupSpec.from_zarr(zarr_group, depth=1)`
 10 | 
 11 | Note that `from_zarr` will _not_ read the data inside an array.
 12 | 
 13 | ### Writing
 14 | 
 15 | To write a hierarchy to some zarr-compatible storage backend, `GroupSpec` and `ArraySpec` have `to_zarr` methods that take a Zarr store and a path and return a Zarr array or group created in the store at the given path.
 16 | 
 17 | Note that `to_zarr` will _not_ write any array data. You have to do this separately.
 18 | 
 19 | ```python
 20 | from zarr import create_array, create_group
 21 | 
 22 | from pydantic_zarr.v2 import GroupSpec
 23 | 
 24 | # create an in-memory Zarr group + array with attributes
 25 | grp = create_group(store={}, path='foo', zarr_format=2)
 26 | grp.attrs.put({'group_metadata': 10})
 27 | arr = create_array(
 28 |     name='foo/bar', store=grp.store, shape=(10,), dtype="f8", compressors=None, zarr_format=2
 29 | )
 30 | arr.attrs.put({'array_metadata': True})
 31 | 
 32 | spec = GroupSpec.from_zarr(grp)
 33 | print(spec.model_dump())
 34 | """
 35 | {
 36 |     'zarr_format': 2,
 37 |     'attributes': {'group_metadata': 10},
 38 |     'members': {
 39 |         'bar': {
 40 |             'zarr_format': 2,
 41 |             'attributes': {'array_metadata': True},
 42 |             'shape': (10,),
 43 |             'chunks': (10,),
 44 |             'dtype': '<f8',
 45 |             'fill_value': 0.0,
 46 |             'order': 'C',
 47 |             'filters': None,
 48 |             'dimension_separator': '.',
 49 |             'compressor': None,
 50 |         }
 51 |     },
 52 | }
 53 | """
 54 | 
 55 | # convert the spec to a dict so we can modify it
 56 | spec_dict2 = spec.model_dump()
 57 | 
 58 | # change the group metadata
 59 | spec_dict2['attributes'] = {'a': 100, 'b': 'metadata'}
 60 | 
 61 | # change the properties of an array member
 62 | spec_dict2['members']['bar']['shape'] = (100,)
 63 | 
 64 | # serialize the spec to the store
 65 | group2 = GroupSpec(**spec_dict2).to_zarr(grp.store, path='foo2')
 66 | 
 67 | print(dict(group2.attrs))
 68 | #> {'a': 100, 'b': 'metadata'}
 69 | 
 70 | print(dict(group2['bar'].attrs))
 71 | #> {'array_metadata': True}
 72 | ```
 73 | 
 74 | ### Creating from an array
 75 | 
 76 | The `ArraySpec` class has a `from_array` static method that takes an array-like object and returns an `ArraySpec` with `shape` and `dtype` fields matching those of the array-like object.
 77 | 
 78 | ```python
 79 | import numpy as np
 80 | 
 81 | from pydantic_zarr.v2 import ArraySpec
 82 | 
 83 | print(ArraySpec.from_array(np.arange(10)).model_dump())
 84 | """
 85 | {
 86 |     'zarr_format': 2,
 87 |     'attributes': {},
 88 |     'shape': (10,),
 89 |     'chunks': (10,),
 90 |     'dtype': '<i8',
 91 |     'fill_value': 0,
 92 |     'order': 'C',
 93 |     'filters': None,
 94 |     'dimension_separator': '/',
 95 |     'compressor': None,
 96 | }
 97 | """
 98 | ```
 99 | 
100 | ### Flattening and unflattening Zarr hierarchies
101 | 
102 | In the previous section we built a model of a Zarr hierarchy by defining `GroupSpec` and `ArraySpec`
103 | instances, then providing those objects as `members` to the constructor of another `GroupSpec`. In
104 | other words, with this approach we create "child nodes" and give those nodes to the "parent node",
105 | recursively.
106 | 
107 | Constructing deeply nested hierarchies this way can be tedious.
108 | For this reason, `pydantic-zarr` supports an alternative representation of the Zarr
109 | hierarchy in the form of a dictionary with `str` keys and `ArraySpec` / `GroupSpec` values, and
110 | methods to convert to / from these dictionaries.
111 | 
112 | #### Making a `GroupSpec` object from a flat hierarchy
113 | 
114 | This example demonstrates how to create a `GroupSpec` from a `dict` representation of a Zarr hierarchy.
115 | 
116 | ```python
117 | from pydantic_zarr.v2 import ArraySpec, GroupSpec
118 | 
119 | # other than the key representing the root path "",
120 | # the keys must be valid paths in the Zarr storage hierarchy
121 | # note that the `members` attribute is `None` for the `GroupSpec` instances in this `dict`.
122 | tree = {
123 |     "": GroupSpec(members=None, attributes={"root": True}),
124 |     "/a": GroupSpec(members=None, attributes={"root": False}),
125 |     "/a/b": ArraySpec(shape=(10, 10), dtype="uint8", chunks=(1, 1), attributes={}),
126 | }
127 | 
128 | print(GroupSpec.from_flat(tree).model_dump())
129 | """
130 | {
131 |     'zarr_format': 2,
132 |     'attributes': {'root': True},
133 |     'members': {
134 |         'a': {
135 |             'zarr_format': 2,
136 |             'attributes': {'root': False},
137 |             'members': {
138 |                 'b': {
139 |                     'zarr_format': 2,
140 |                     'attributes': {},
141 |                     'shape': (10, 10),
142 |                     'chunks': (1, 1),
143 |                     'dtype': '|u1',
144 |                     'fill_value': 0,
145 |                     'order': 'C',
146 |                     'filters': None,
147 |                     'dimension_separator': '/',
148 |                     'compressor': None,
149 |                 }
150 |             },
151 |         }
152 |     },
153 | }
154 | """
155 | ```
156 | 
157 | #### flattening `GroupSpec` objects
158 | 
159 | This is similar to the example above, except that we are working in reverse -- we are making the
160 | flat `dict` from the `GroupSpec` object.
161 | 
162 | ```python
163 | from pydantic_zarr.v2 import ArraySpec, GroupSpec
164 | 
165 | # other than the key representing the root path "",
166 | # the keys must be valid paths in the Zarr storage hierarchy
167 | # note that the `members` attribute is `None` for the `GroupSpec` instances in this `dict`.
168 | 
169 | a_b = ArraySpec(shape=(10, 10), dtype="uint8", chunks=(1, 1), attributes={})
170 | a = GroupSpec(members={'b': a_b}, attributes={"root": False})
171 | root = GroupSpec(members={'a': a}, attributes={"root": True})
172 | 
173 | print(root.to_flat())
174 | """
175 | {
176 |     '': GroupSpec(zarr_format=2, attributes={'root': True}, members=None),
177 |     '/a': GroupSpec(zarr_format=2, attributes={'root': False}, members=None),
178 |     '/a/b': ArraySpec(
179 |         zarr_format=2,
180 |         attributes={},
181 |         shape=(10, 10),
182 |         chunks=(1, 1),
183 |         dtype='|u1',
184 |         fill_value=0,
185 |         order='C',
186 |         filters=None,
187 |         dimension_separator='/',
188 |         compressor=None,
189 |     ),
190 | }
191 | """
192 | ```
193 | 
194 | #### Implicit groups
195 | 
196 | `zarr-python` supports creating Zarr arrays or groups deep in the
197 | hierarchy without explicitly creating the intermediate groups first.
198 | `from_flat` models this behavior. For example, `{'/a/b/c': ArraySpec(...)}` implicitly defines the existence of a groups named `a` and `b` (which is contained in `a`). `from_flat` will create the expected `GroupSpec` object from such `dict` instances.
199 | 
200 | ```python
201 | from pydantic_zarr.v2 import ArraySpec, GroupSpec
202 | 
203 | tree = {'/a/b/c': ArraySpec(shape=(1,), dtype='uint8', chunks=(1,), attributes={})}
204 | print(GroupSpec.from_flat(tree).model_dump())
205 | """
206 | {
207 |     'zarr_format': 2,
208 |     'attributes': {},
209 |     'members': {
210 |         'a': {
211 |             'zarr_format': 2,
212 |             'attributes': {},
213 |             'members': {
214 |                 'b': {
215 |                     'zarr_format': 2,
216 |                     'attributes': {},
217 |                     'members': {
218 |                         'c': {
219 |                             'zarr_format': 2,
220 |                             'attributes': {},
221 |                             'shape': (1,),
222 |                             'chunks': (1,),
223 |                             'dtype': '|u1',
224 |                             'fill_value': 0,
225 |                             'order': 'C',
226 |                             'filters': None,
227 |                             'dimension_separator': '/',
228 |                             'compressor': None,
229 |                         }
230 |                     },
231 |                 }
232 |             },
233 |         }
234 |     },
235 | }
236 | """
237 | ```
238 | 
239 | ## Comparing `GroupSpec` and `ArraySpec` models
240 | 
241 | `GroupSpec` and `ArraySpec` both have `like` methods that take another `GroupSpec` or `ArraySpec` as an argument and return `True` (the models are like each other) or `False` (the models are not like each other).
242 | 
243 | The `like` method works by converting both input models to `dict` via `pydantic.BaseModel.model_dump`, and comparing the `dict` representation of the models. This means that instances of two different subclasses of `GroupSpec`, which would not be considered equal according to the `==` operator, will be considered `like` if and only if they serialize to identical `dict` instances.
244 | 
245 | The `like` method takes keyword arguments `include` and `exclude`, which determine the attributes included or excluded from the model comparison. So it's possible to use `like` to check if two `ArraySpec` instances have the same `shape`, `dtype` and `chunks` by calling `array_a.like(array_b, include={'shape', 'dtype', 'chunks'})`. This is useful if you don't care about the compressor or filters and just want to ensure that you can safely write an in-memory array to a Zarr array, which depends just on the two arrays having matching `shape`, `dtype`, and `chunks` attributes.
246 | 
247 | ```python
248 | import zarr
249 | import zarr.storage
250 | 
251 | from pydantic_zarr.v2 import ArraySpec, GroupSpec
252 | 
253 | arr_a = ArraySpec(shape=(1,), dtype='uint8', chunks=(1,), attributes={})
254 | # make an array with a different shape
255 | arr_b = ArraySpec(shape=(2,), dtype='uint8', chunks=(1,), attributes={})
256 | 
257 | # Returns False, because of mismatched shape
258 | print(arr_a.like(arr_b))
259 | #> False
260 | 
261 | # Returns True, because we exclude shape.
262 | print(arr_a.like(arr_b, exclude={'shape'}))
263 | #> True
264 | 
265 | # `ArraySpec.like` will convert a zarr.Array to ArraySpec
266 | store = zarr.storage.MemoryStore()
267 | # This is a zarr.Array
268 | arr_a_stored = arr_a.to_zarr(store, path='arr_a')
269 | 
270 | # arr_a is like the zarr.Array version of itself
271 | print(arr_a.like(arr_a_stored))
272 | #> True
273 | 
274 | # Returns False, because of mismatched shape
275 | print(arr_b.like(arr_a_stored))
276 | #> False
277 | 
278 | # Returns True, because we exclude shape.
279 | print(arr_b.like(arr_a_stored, exclude={'shape'}))
280 | #> True
281 | 
282 | # The same thing, but for groups
283 | g_a = GroupSpec(attributes={'foo': 10}, members={'a': arr_a, 'b': arr_b})
284 | g_b = GroupSpec(attributes={'foo': 11}, members={'a': arr_a, 'b': arr_b})
285 | 
286 | # g_a is like itself
287 | print(g_a.like(g_a))
288 | #> True
289 | 
290 | # Returns False, because of mismatched attributes
291 | print(g_a.like(g_b))
292 | #> False
293 | 
294 | # Returns True, because we ignore attributes
295 | print(g_a.like(g_b, exclude={'attributes'}))
296 | #> True
297 | 
298 | # g_a is like its zarr.Group counterpart
299 | print(g_a.like(g_a.to_zarr(store, path='g_a')))
300 | #> True
301 | ```
302 | 
303 | ## Creating from an array
304 | 
305 | The `ArraySpec` class has a `from_array` static method that takes an array-like object and returns an `ArraySpec` with `shape` and `dtype` fields matching those of the array-like object.
306 | 
307 | ```python
308 | import numpy as np
309 | 
310 | from pydantic_zarr.v2 import ArraySpec
311 | 
312 | print(ArraySpec.from_array(np.arange(10)).model_dump())
313 | """
314 | {
315 |     'zarr_format': 2,
316 |     'attributes': {},
317 |     'shape': (10,),
318 |     'chunks': (10,),
319 |     'dtype': '<i8',
320 |     'fill_value': 0,
321 |     'order': 'C',
322 |     'filters': None,
323 |     'dimension_separator': '/',
324 |     'compressor': None,
325 | }
326 | """
327 | ```
328 | 


--------------------------------------------------------------------------------
/tests/test_pydantic_zarr/test_v3.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import importlib
  4 | import importlib.util
  5 | import json
  6 | import re
  7 | from dataclasses import asdict
  8 | 
  9 | import numpy as np
 10 | import pytest
 11 | from pydantic import ValidationError
 12 | 
 13 | from pydantic_zarr.core import tuplify_json
 14 | from pydantic_zarr.v3 import (
 15 |     AnyArraySpec,
 16 |     AnyGroupSpec,
 17 |     ArraySpec,
 18 |     DefaultChunkKeyEncoding,
 19 |     DefaultChunkKeyEncodingConfig,
 20 |     GroupSpec,
 21 |     NamedConfig,
 22 |     RegularChunking,
 23 |     RegularChunkingConfig,
 24 |     auto_codecs,
 25 | )
 26 | 
 27 | from .conftest import DTYPE_EXAMPLES_V3, DTypeExample
 28 | 
 29 | ZARR_AVAILABLE = importlib.util.find_spec("zarr") is not None
 30 | 
 31 | 
 32 | @pytest.mark.parametrize("invalid_dimension_names", [[], "hi", ["1", 2, None]], ids=str)
 33 | def test_dimension_names_validation(invalid_dimension_names: object) -> None:
 34 |     """
 35 |     Test that the `dimension_names` attribute is rejected if any of the following are true:
 36 |     - it is a sequence with length different from the number of dimensions of the array
 37 |     - it is a sequence containing values other than strings or `None`.
 38 |     - it is neither a valid sequence nor the value `None`.
 39 |     """
 40 |     base_array: AnyArraySpec = ArraySpec(
 41 |         shape=(1, 2, 3),
 42 |         data_type="int8",
 43 |         codecs=({"name": "bytes"},),
 44 |         chunk_grid={"name": "regular", "configuration": {"chunk_shape": (1, 2, 3)}},
 45 |         chunk_key_encoding={"name": "default", "configuration": {"separator": "/"}},
 46 |         fill_value=0,
 47 |         attributes={},
 48 |     )
 49 |     with pytest.raises(ValidationError):
 50 |         ArraySpec(**(base_array.model_dump() | {"dimension_names": invalid_dimension_names}))
 51 | 
 52 | 
 53 | def test_serialize_deserialize() -> None:
 54 |     array_attributes = {"foo": 42, "bar": "apples", "baz": [1, 2, 3, 4]}
 55 | 
 56 |     group_attributes = {"group": True}
 57 | 
 58 |     array_spec = ArraySpec(
 59 |         attributes=array_attributes,
 60 |         shape=[1000, 1000],
 61 |         dimension_names=["rows", "columns"],
 62 |         data_type="float64",
 63 |         chunk_grid=NamedConfig(name="regular", configuration={"chunk_shape": [1000, 100]}),
 64 |         chunk_key_encoding=NamedConfig(name="default", configuration={"separator": "/"}),
 65 |         codecs=[NamedConfig(name="GZip", configuration={"level": 1})],
 66 |         fill_value="NaN",
 67 |         storage_transformers=[],
 68 |     )
 69 | 
 70 |     GroupSpec(attributes=group_attributes, members={"array": array_spec})
 71 | 
 72 | 
 73 | def test_from_array() -> None:
 74 |     array = np.arange(10)
 75 |     array_spec = ArraySpec.from_array(array)
 76 | 
 77 |     assert array_spec == ArraySpec(
 78 |         zarr_format=3,
 79 |         node_type="array",
 80 |         attributes={},
 81 |         shape=(10,),
 82 |         data_type="int64",
 83 |         chunk_grid=RegularChunking(
 84 |             name="regular", configuration=RegularChunkingConfig(chunk_shape=(10,))
 85 |         ),
 86 |         chunk_key_encoding=DefaultChunkKeyEncoding(
 87 |             name="default", configuration=DefaultChunkKeyEncodingConfig(separator="/")
 88 |         ),
 89 |         fill_value=0,
 90 |         codecs=auto_codecs(array),
 91 |         storage_transformers=(),
 92 |         dimension_names=None,
 93 |     )
 94 |     # check that we can write this array to zarr
 95 |     # TODO: fix type of the store argument in to_zarr
 96 |     if not ZARR_AVAILABLE:
 97 |         return
 98 |     array_spec.to_zarr(store={}, path="")  # type: ignore[arg-type]
 99 | 
100 | 
101 | def test_arrayspec_no_empty_codecs() -> None:
102 |     """
103 |     Ensure that it is not possible to create an ArraySpec with no codecs
104 |     """
105 | 
106 |     with pytest.raises(
107 |         ValidationError, match="Value error, Invalid length. Expected 1 or more, got 0."
108 |     ):
109 |         ArraySpec(
110 |             shape=(1,),
111 |             data_type="uint8",
112 |             codecs=[],
113 |             attributes={},
114 |             fill_value=0,
115 |             chunk_grid={"name": "regular", "configuration": {"chunk_shape": (1,)}},
116 |             chunk_key_encoding={"name": "default", "configuration": {"separator": "/"}},
117 |         )
118 | 
119 | 
120 | @pytest.mark.filterwarnings("ignore:The dtype:UserWarning")
121 | @pytest.mark.filterwarnings("ignore:The data type:FutureWarning")
122 | @pytest.mark.filterwarnings("ignore:The codec:UserWarning")
123 | @pytest.mark.parametrize("dtype_example", DTYPE_EXAMPLES_V3, ids=str)
124 | def test_arrayspec_from_zarr(dtype_example: DTypeExample) -> None:
125 |     """
126 |     Test that deserializing an ArraySpec from a zarr python store works as expected.
127 |     """
128 |     zarr = pytest.importorskip("zarr")
129 |     store = {}
130 | 
131 |     data_type = dtype_example.name
132 | 
133 |     if data_type == "variable_length_bytes":
134 |         pytest.skip(
135 |             reason="Bug in zarr python: see https://github.com/zarr-developers/zarr-python/issues/3263"
136 |         )
137 | 
138 |     arr = zarr.create_array(store=store, shape=(10,), dtype=data_type, zarr_format=3)
139 | 
140 |     arr_spec = ArraySpec.from_zarr(arr)
141 |     assert arr_spec.model_dump() == json.loads(
142 |         store["zarr.json"].to_bytes(), object_hook=tuplify_json
143 |     )
144 | 
145 | 
146 | @pytest.mark.parametrize("path", ["", "foo"])
147 | @pytest.mark.parametrize("overwrite", [True, False])
148 | @pytest.mark.parametrize("dtype_example", DTYPE_EXAMPLES_V3, ids=str)
149 | @pytest.mark.parametrize("config", [{}, {"write_empty_chunks": True, "order": "F"}])
150 | @pytest.mark.filterwarnings("ignore:The codec `vlen-utf8`:UserWarning")
151 | @pytest.mark.filterwarnings("ignore:The codec `vlen-bytes`:UserWarning")
152 | @pytest.mark.filterwarnings("ignore:The data type :FutureWarning")
153 | def test_arrayspec_to_zarr(
154 |     path: str, overwrite: bool, config: dict[str, object], dtype_example: DTypeExample
155 | ) -> None:
156 |     """
157 |     Test that serializing an ArraySpec to a zarr python store works as expected.
158 |     """
159 |     data_type = dtype_example.name
160 |     fill_value = dtype_example.fill_value
161 | 
162 |     codecs = ({"name": "bytes", "configuration": {}},)
163 |     if data_type == "variable_length_bytes":
164 |         codecs = ({"name": "vlen-bytes"},)
165 | 
166 |     elif data_type in ("str", "string"):
167 |         codecs = ({"name": "vlen-utf8"},)
168 | 
169 |     store = {}
170 | 
171 |     arr_spec = ArraySpec(
172 |         attributes={},
173 |         shape=(10,),
174 |         data_type=data_type,
175 |         chunk_grid={"name": "regular", "configuration": {"chunk_shape": (10,)}},
176 |         chunk_key_encoding={"name": "default", "configuration": {"separator": "/"}},
177 |         codecs=codecs,
178 |         fill_value=fill_value,
179 |         dimension_names=("x",),
180 |     )
181 |     if not ZARR_AVAILABLE:
182 |         return
183 |     arr = arr_spec.to_zarr(store=store, path=path, overwrite=overwrite, config=config)
184 |     assert arr._async_array.metadata == arr._async_array.metadata
185 |     for key, value in config.items():
186 |         assert asdict(arr._async_array._config)[key] == value
187 | 
188 | 
189 | def get_flat_example() -> tuple[dict[str, AnyArraySpec | AnyGroupSpec], AnyGroupSpec]:
190 |     """
191 |     Get example data for testing to_flat and from_flat.
192 | 
193 |     The returned value is a tuple with two elements: a flattened dict representation of a hierarchy,
194 |     and the root group, with all of its members (i.e., the non-flat version of that hierarchy).
195 |     """
196 |     named_nodes: tuple[AnyArraySpec | AnyGroupSpec, ...] = (
197 |         GroupSpec(attributes={"name": ""}, members=None),
198 |         ArraySpec.from_array(np.arange(10), attributes={"name": "/a1"}),
199 |         GroupSpec(attributes={"name": "/g1"}, members=None),
200 |         ArraySpec.from_array(np.arange(10), attributes={"name": "/g1/a2"}),
201 |         GroupSpec(attributes={"name": "/g1/g2"}, members=None),
202 |         ArraySpec.from_array(np.arange(10), attributes={"name": "/g1/g2/a3"}),
203 |     )
204 | 
205 |     members_flat: dict[str, AnyArraySpec | AnyGroupSpec] = {
206 |         a.attributes["name"]: a for a in named_nodes
207 |     }
208 |     g2 = members_flat["/g1/g2"].model_copy(update={"members": {"a3": members_flat["/g1/g2/a3"]}})
209 |     g1 = members_flat["/g1"].model_copy(
210 |         update={"members": {"a2": members_flat["/g1/a2"], "g2": g2}}
211 |     )
212 |     root = members_flat[""].model_copy(update={"members": {"g1": g1, "a1": members_flat["/a1"]}})
213 |     return members_flat, root
214 | 
215 | 
216 | class TestGroupSpec:
217 |     @staticmethod
218 |     def test_to_flat() -> None:
219 |         """
220 |         Test that the to_flat method generates a flat representation of the hierarchy
221 |         """
222 | 
223 |         members_flat, root = get_flat_example()
224 |         observed = root.to_flat()
225 |         assert observed == members_flat
226 | 
227 |     @staticmethod
228 |     def test_from_flat() -> None:
229 |         """
230 |         Test that the from_flat method generates a `GroupSpec` from a flat representation of the
231 |         hierarchy
232 |         """
233 |         members_flat, root = get_flat_example()
234 |         assert GroupSpec.from_flat(members_flat) == root
235 | 
236 |     @staticmethod
237 |     def test_from_zarr_depth() -> None:
238 |         zarr = pytest.importorskip("zarr")
239 |         codecs = ({"name": "bytes", "configuration": {}},)
240 |         tree: dict[str, AnyGroupSpec | AnyArraySpec] = {
241 |             "": GroupSpec(members=None, attributes={"level": 0, "type": "group"}),
242 |             "/1": GroupSpec(members=None, attributes={"level": 1, "type": "group"}),
243 |             "/1/2": GroupSpec(members=None, attributes={"level": 2, "type": "group"}),
244 |             "/1/2/1": GroupSpec(members=None, attributes={"level": 3, "type": "group"}),
245 |             "/1/2/2": ArraySpec.from_array(
246 |                 np.arange(20), attributes={"level": 3, "type": "array"}, codecs=codecs
247 |             ),
248 |         }
249 |         store = zarr.storage.MemoryStore()
250 |         group_out = GroupSpec.from_flat(tree).to_zarr(store, path="test")
251 |         group_in_0 = GroupSpec.from_zarr(group_out, depth=0)  # type: ignore[var-annotated]
252 |         assert group_in_0 == tree[""]
253 | 
254 |         group_in_1 = GroupSpec.from_zarr(group_out, depth=1)  # type: ignore[var-annotated]
255 |         assert group_in_1.attributes == tree[""].attributes  # type: ignore[attr-defined]
256 |         assert group_in_1.members is not None
257 |         assert group_in_1.members["1"] == tree["/1"]
258 | 
259 |         group_in_2 = GroupSpec.from_zarr(group_out, depth=2)  # type: ignore[var-annotated]
260 |         assert group_in_2.members is not None
261 |         assert group_in_2.members["1"].members["2"] == tree["/1/2"]
262 |         assert group_in_2.attributes == tree[""].attributes  # type: ignore[attr-defined]
263 |         assert group_in_2.members["1"].attributes == tree["/1"].attributes  # type: ignore[attr-defined]
264 | 
265 |         group_in_3 = GroupSpec.from_zarr(group_out, depth=3)  # type: ignore[var-annotated]
266 |         assert group_in_3.members is not None
267 |         assert group_in_3.members["1"].members["2"].members["1"] == tree["/1/2/1"]
268 |         assert group_in_3.attributes == tree[""].attributes  # type: ignore[attr-defined]
269 |         assert group_in_3.members["1"].attributes == tree["/1"].attributes  # type: ignore[attr-defined]
270 |         assert group_in_3.members["1"].members["2"].attributes == tree["/1/2"].attributes  # type: ignore[attr-defined]
271 | 
272 | 
273 | def test_mix_v3_v2_fails() -> None:
274 |     from pydantic_zarr.v2 import ArraySpec as ArraySpecv2
275 | 
276 |     members_flat = {"/a": ArraySpecv2.from_array(np.ones(1))}
277 |     with pytest.raises(
278 |         ValueError,
279 |         match=re.escape(
280 |             "Value at '/a' is not a v3 ArraySpec or GroupSpec (got type(value)=<class 'pydantic_zarr.v2.ArraySpec'>)"
281 |         ),
282 |     ):
283 |         GroupSpec.from_flat(members_flat)  # type: ignore[arg-type]
284 | 
285 | 
286 | @pytest.mark.parametrize(
287 |     ("args", "kwargs", "expected_names"),
288 |     [
289 |         ((1,), {"dimension_names": ["x"]}, ("x",)),
290 |         ((1,), {}, None),
291 |     ],
292 | )
293 | def test_dim_names_from_zarr_array(
294 |     args: tuple, kwargs: dict, expected_names: tuple[str, ...] | None
295 | ) -> None:
296 |     zarr = pytest.importorskip("zarr")
297 | 
298 |     arr = zarr.zeros(*args, **kwargs)
299 |     spec: AnyArraySpec = ArraySpec.from_zarr(arr)
300 |     assert spec.dimension_names == expected_names
301 | 


--------------------------------------------------------------------------------
/tests/test_pydantic_zarr/test_experimental/test_v3.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import json
  4 | import re
  5 | from dataclasses import asdict
  6 | 
  7 | import numpy as np
  8 | import pytest
  9 | from pydantic import ValidationError
 10 | from typing_extensions import TypedDict
 11 | 
 12 | from pydantic_zarr.experimental.core import json_eq
 13 | from pydantic_zarr.experimental.v3 import (
 14 |     ArraySpec,
 15 |     BaseGroupSpec,
 16 |     DefaultChunkKeyEncoding,
 17 |     DefaultChunkKeyEncodingConfig,
 18 |     GroupSpec,
 19 |     NamedConfig,
 20 |     RegularChunking,
 21 |     RegularChunkingConfig,
 22 |     auto_codecs,
 23 | )
 24 | 
 25 | from ..conftest import DTYPE_EXAMPLES_V3, ZARR_AVAILABLE, DTypeExample
 26 | 
 27 | 
 28 | @pytest.mark.parametrize("invalid_dimension_names", [[], "hi", ["1", 2, None]], ids=str)
 29 | def test_dimension_names_validation(invalid_dimension_names: object) -> None:
 30 |     """
 31 |     Test that the `dimension_names` attribute is rejected if any of the following are true:
 32 |     - it is a sequence with length different from the number of dimensions of the array
 33 |     - it is a sequence containing values other than strings or `None`.
 34 |     - it is neither a valid sequence nor the value `None`.
 35 |     """
 36 |     base_array = ArraySpec(
 37 |         shape=(1, 2, 3),
 38 |         data_type="int8",
 39 |         codecs=({"name": "bytes"},),
 40 |         chunk_grid={"name": "regular", "configuration": {"chunk_shape": (1, 2, 3)}},
 41 |         chunk_key_encoding={"name": "default", "configuration": {"separator": "/"}},
 42 |         fill_value=0,
 43 |         attributes={},
 44 |     )
 45 |     with pytest.raises(ValidationError):
 46 |         ArraySpec(**(base_array.model_dump() | {"dimension_names": invalid_dimension_names}))
 47 | 
 48 | 
 49 | def test_serialize_deserialize() -> None:
 50 |     array_attributes = {"foo": 42, "bar": "apples", "baz": [1, 2, 3, 4]}
 51 | 
 52 |     group_attributes = {"group": True}
 53 | 
 54 |     array_spec = ArraySpec(
 55 |         attributes=array_attributes,
 56 |         shape=(1000, 1000),
 57 |         dimension_names=("rows", "columns"),
 58 |         data_type="float64",
 59 |         chunk_grid=NamedConfig(name="regular", configuration={"chunk_shape": (1000, 100)}),
 60 |         chunk_key_encoding=NamedConfig(name="default", configuration={"separator": "/"}),
 61 |         codecs=(NamedConfig(name="GZip", configuration={"level": 1}),),
 62 |         fill_value="NaN",
 63 |         storage_transformers=(),
 64 |     )
 65 | 
 66 |     GroupSpec(attributes=group_attributes, members={"array": array_spec})
 67 | 
 68 | 
 69 | def test_from_array() -> None:
 70 |     array = np.arange(10)
 71 |     array_spec = ArraySpec.from_array(array)
 72 | 
 73 |     assert array_spec == ArraySpec(
 74 |         zarr_format=3,
 75 |         node_type="array",
 76 |         attributes={},
 77 |         shape=(10,),
 78 |         data_type="int64",
 79 |         chunk_grid=RegularChunking(
 80 |             name="regular", configuration=RegularChunkingConfig(chunk_shape=(10,))
 81 |         ),
 82 |         chunk_key_encoding=DefaultChunkKeyEncoding(
 83 |             name="default", configuration=DefaultChunkKeyEncodingConfig(separator="/")
 84 |         ),
 85 |         fill_value=0,
 86 |         codecs=auto_codecs(array),
 87 |         storage_transformers=(),
 88 |         dimension_names=None,
 89 |     )
 90 |     # check that we can write this array to zarr
 91 |     # TODO: fix type of the store argument in to_zarr
 92 |     if not ZARR_AVAILABLE:
 93 |         return
 94 |     array_spec.to_zarr(store={}, path="")  # type: ignore[arg-type]
 95 | 
 96 | 
 97 | def test_arrayspec_no_empty_codecs() -> None:
 98 |     """
 99 |     Ensure that it is not possible to create an ArraySpec with no codecs
100 |     """
101 | 
102 |     with pytest.raises(
103 |         ValidationError, match="Value error, Invalid length. Expected 1 or more, got 0."
104 |     ):
105 |         ArraySpec(
106 |             shape=(1,),
107 |             data_type="uint8",
108 |             codecs=[],
109 |             attributes={},
110 |             fill_value=0,
111 |             chunk_grid={"name": "regular", "configuration": {"chunk_shape": (1,)}},
112 |             chunk_key_encoding={"name": "default", "configuration": {"separator": "/"}},
113 |         )
114 | 
115 | 
116 | @pytest.mark.filterwarnings("ignore:The dtype:UserWarning")
117 | @pytest.mark.filterwarnings("ignore:The data type:FutureWarning")
118 | @pytest.mark.filterwarnings("ignore:The codec:UserWarning")
119 | @pytest.mark.parametrize("dtype_example", DTYPE_EXAMPLES_V3, ids=str)
120 | def test_arrayspec_from_zarr(dtype_example: DTypeExample) -> None:
121 |     """
122 |     Test that deserializing an ArraySpec from a zarr python store works as expected.
123 |     """
124 |     zarr = pytest.importorskip("zarr")
125 |     store = {}
126 | 
127 |     data_type = dtype_example.name
128 | 
129 |     if data_type == "variable_length_bytes":
130 |         pytest.skip(
131 |             reason="Bug in zarr python: see https://github.com/zarr-developers/zarr-python/issues/3263"
132 |         )
133 | 
134 |     arr = zarr.create_array(store=store, shape=(10,), dtype=data_type, zarr_format=3)
135 | 
136 |     arr_spec = ArraySpec.from_zarr(arr)
137 |     assert json_eq(arr_spec.model_dump(), json.loads(store["zarr.json"].to_bytes()))
138 | 
139 | 
140 | @pytest.mark.parametrize("path", ["", "foo"])
141 | @pytest.mark.parametrize("overwrite", [True, False])
142 | @pytest.mark.parametrize("dtype_example", DTYPE_EXAMPLES_V3, ids=str)
143 | @pytest.mark.parametrize("config", [{}, {"write_empty_chunks": True, "order": "F"}])
144 | @pytest.mark.filterwarnings("ignore:The codec `vlen-utf8`:UserWarning")
145 | @pytest.mark.filterwarnings("ignore:The codec `vlen-bytes`:UserWarning")
146 | @pytest.mark.filterwarnings("ignore:The data type :FutureWarning")
147 | def test_arrayspec_to_zarr(
148 |     path: str, overwrite: bool, config: dict[str, object], dtype_example: DTypeExample
149 | ) -> None:
150 |     """
151 |     Test that serializing an ArraySpec to a zarr python store works as expected.
152 |     """
153 |     data_type = dtype_example.name
154 |     fill_value = dtype_example.fill_value
155 | 
156 |     codecs = ({"name": "bytes", "configuration": {}},)
157 |     if data_type == "variable_length_bytes":
158 |         codecs = ({"name": "vlen-bytes"},)
159 | 
160 |     elif data_type in ("str", "string"):
161 |         codecs = ({"name": "vlen-utf8"},)
162 | 
163 |     store = {}
164 | 
165 |     arr_spec = ArraySpec(
166 |         attributes={},
167 |         shape=(10,),
168 |         data_type=data_type,
169 |         chunk_grid={"name": "regular", "configuration": {"chunk_shape": (10,)}},
170 |         chunk_key_encoding={"name": "default", "configuration": {"separator": "/"}},
171 |         codecs=codecs,
172 |         fill_value=fill_value,
173 |         dimension_names=("x",),
174 |     )
175 |     if not ZARR_AVAILABLE:
176 |         return
177 |     arr = arr_spec.to_zarr(store=store, path=path, overwrite=overwrite, config=config)
178 |     assert arr._async_array.metadata == arr._async_array.metadata
179 |     for key, value in config.items():
180 |         assert asdict(arr._async_array._config)[key] == value
181 | 
182 | 
183 | def get_flat_example() -> tuple[dict[str, ArraySpec | GroupSpec], GroupSpec]:
184 |     """
185 |     Get example data for testing to_flat and from_flat.
186 | 
187 |     The returned value is a tuple with two elements: a flattened dict representation of a hierarchy,
188 |     and the root group, with all of its members (i.e., the non-flat version of that hierarchy).
189 |     """
190 |     named_nodes: tuple[ArraySpec | BaseGroupSpec, ...] = (
191 |         BaseGroupSpec(attributes={"name": ""}),
192 |         ArraySpec.from_array(np.arange(10), attributes={"name": "/a1"}),
193 |         BaseGroupSpec(attributes={"name": "/g1"}),
194 |         ArraySpec.from_array(np.arange(10), attributes={"name": "/g1/a2"}),
195 |         BaseGroupSpec(attributes={"name": "/g1/g2"}),
196 |         ArraySpec.from_array(np.arange(10), attributes={"name": "/g1/g2/a3"}),
197 |     )
198 | 
199 |     members_flat: dict[str, ArraySpec | BaseGroupSpec] = {
200 |         a.attributes["name"]: a for a in named_nodes
201 |     }
202 |     g2 = GroupSpec(
203 |         attributes=members_flat["/g1/g2"].attributes, members={"a3": members_flat["/g1/g2/a3"]}
204 |     )
205 |     g1 = GroupSpec(
206 |         attributes=members_flat["/g1"].attributes, members={"a2": members_flat["/g1/a2"], "g2": g2}
207 |     )
208 |     root = GroupSpec(
209 |         attributes=members_flat[""].attributes, members={"g1": g1, "a1": members_flat["/a1"]}
210 |     )
211 |     return members_flat, root
212 | 
213 | 
214 | class TestGroupSpec:
215 |     @staticmethod
216 |     def test_to_flat() -> None:
217 |         """
218 |         Test that the to_flat method generates a flat representation of the hierarchy
219 |         """
220 | 
221 |         members_flat, root = get_flat_example()
222 |         observed = root.to_flat()
223 |         assert observed == members_flat
224 | 
225 |     @staticmethod
226 |     def test_from_flat() -> None:
227 |         """
228 |         Test that the from_flat method generates a `GroupSpec` from a flat representation of the
229 |         hierarchy
230 |         """
231 |         members_flat, root = get_flat_example()
232 |         assert GroupSpec.from_flat(members_flat).attributes == root.attributes
233 | 
234 |     @staticmethod
235 |     def test_from_zarr_depth() -> None:
236 |         zarr = pytest.importorskip("zarr")
237 |         codecs = ({"name": "bytes", "configuration": {}},)
238 |         tree: dict[str, BaseGroupSpec | ArraySpec] = {
239 |             "": BaseGroupSpec(attributes={"level": 0, "type": "group"}),
240 |             "/1": BaseGroupSpec(attributes={"level": 1, "type": "group"}),
241 |             "/1/2": BaseGroupSpec(attributes={"level": 2, "type": "group"}),
242 |             "/1/2/1": BaseGroupSpec(attributes={"level": 3, "type": "group"}),
243 |             "/1/2/2": ArraySpec.from_array(
244 |                 np.arange(20), attributes={"level": 3, "type": "array"}, codecs=codecs
245 |             ),
246 |         }
247 |         store = zarr.storage.MemoryStore()
248 |         group_out = GroupSpec.from_flat(tree).to_zarr(store, path="test")
249 |         group_in_0 = GroupSpec.from_zarr(group_out, depth=0)  # type: ignore[var-annotated]
250 |         assert group_in_0.attributes == tree[""].attributes
251 | 
252 |         group_in_1 = GroupSpec.from_zarr(group_out, depth=1)  # type: ignore[var-annotated]
253 |         assert group_in_1.attributes == tree[""].attributes  # type: ignore[attr-defined]
254 |         assert group_in_1.members is not None
255 |         assert group_in_1.members["1"].attributes == tree["/1"].attributes
256 | 
257 |         group_in_2 = GroupSpec.from_zarr(group_out, depth=2)  # type: ignore[var-annotated]
258 |         assert group_in_2.members is not None
259 |         assert group_in_2.members["1"].members["2"].attributes == tree["/1/2"].attributes
260 |         assert group_in_2.attributes == tree[""].attributes  # type: ignore[attr-defined]
261 |         assert group_in_2.members["1"].attributes == tree["/1"].attributes  # type: ignore[attr-defined]
262 | 
263 |         group_in_3 = GroupSpec.from_zarr(group_out, depth=3)  # type: ignore[var-annotated]
264 |         assert group_in_3.members is not None
265 |         assert (
266 |             group_in_3.members["1"].members["2"].members["1"].attributes
267 |             == tree["/1/2/1"].attributes
268 |         )
269 |         assert group_in_3.attributes == tree[""].attributes  # type: ignore[attr-defined]
270 |         assert group_in_3.members["1"].attributes == tree["/1"].attributes  # type: ignore[attr-defined]
271 |         assert group_in_3.members["1"].members["2"].attributes == tree["/1/2"].attributes  # type: ignore[attr-defined]
272 | 
273 | 
274 | def test_mix_v3_v2_fails() -> None:
275 |     from pydantic_zarr.v2 import ArraySpec as ArraySpecv2
276 | 
277 |     members_flat = {"/a": ArraySpecv2.from_array(np.ones(1))}
278 |     with pytest.raises(
279 |         ValueError,
280 |         match=re.escape(
281 |             "Value at '/a' is not a v3 ArraySpec or BaseGroupSpec (got type(value)=<class 'pydantic_zarr.v2.ArraySpec'>)"
282 |         ),
283 |     ):
284 |         GroupSpec.from_flat(members_flat)  # type: ignore[arg-type]
285 | 
286 | 
287 | @pytest.mark.parametrize(
288 |     ("args", "kwargs", "expected_names"),
289 |     [
290 |         ((1,), {"dimension_names": ["x"]}, ("x",)),
291 |         ((1,), {}, None),
292 |     ],
293 | )
294 | def test_dim_names_from_zarr_array(
295 |     args: tuple, kwargs: dict, expected_names: tuple[str, ...] | None
296 | ) -> None:
297 |     zarr = pytest.importorskip("zarr")
298 | 
299 |     arr = zarr.zeros(*args, **kwargs)
300 |     spec: ArraySpec = ArraySpec.from_zarr(arr)
301 |     assert spec.dimension_names == expected_names
302 | 
303 | 
304 | @pytest.mark.skipif(not ZARR_AVAILABLE, reason="zarr-python is not installed")
305 | def test_typed_members() -> None:
306 |     """
307 |     Test GroupSpec creation with typed members
308 |     """
309 | 
310 |     array1d = ArraySpec(
311 |         shape=(1,),
312 |         data_type="uint8",
313 |         chunk_grid={"name": "regular", "configuration": {"chunk_shape": (1,)}},
314 |         chunk_key_encoding={"name": "default", "configuration": {"separator": "/"}},
315 |         fill_value=0,
316 |         codecs=({"name": "bytes"},),
317 |         attributes={},
318 |     )
319 | 
320 |     class DatasetMembers(TypedDict):
321 |         x: ArraySpec
322 |         y: ArraySpec
323 | 
324 |     class DatasetGroup(GroupSpec):
325 |         members: DatasetMembers
326 | 
327 |     class ExpectedMembers(TypedDict):
328 |         r10m: DatasetGroup
329 |         r20m: DatasetGroup
330 | 
331 |     class ExpectedGroup(GroupSpec):
332 |         members: ExpectedMembers
333 | 
334 |     flat = {
335 |         "": BaseGroupSpec(attributes={}),
336 |         "/r10m": BaseGroupSpec(attributes={}),
337 |         "/r20m": BaseGroupSpec(attributes={}),
338 |         "/r10m/x": array1d,
339 |         "/r10m/y": array1d,
340 |         "/r20m/x": array1d,
341 |         "/r20m/y": array1d,
342 |     }
343 | 
344 |     zg = GroupSpec.from_flat(flat).to_zarr({}, path="")
345 |     ExpectedGroup.from_zarr(zg)
346 | 
347 | 
348 | def test_arrayspec_with_methods() -> None:
349 |     """
350 |     Test that ArraySpec with_* methods create new validated copies
351 |     """
352 |     original = ArraySpec.from_array(np.arange(10), attributes={"foo": "bar"})
353 | 
354 |     # Test with_attributes
355 |     new_attrs = original.with_attributes({"baz": "qux"})
356 |     assert new_attrs.attributes == {"baz": "qux"}
357 |     assert original.attributes == {"foo": "bar"}  # Original unchanged
358 |     assert new_attrs is not original
359 | 
360 |     # Test with_shape
361 |     new_shape = original.with_shape((20,))
362 |     assert new_shape.shape == (20,)
363 |     assert original.shape == (10,)
364 | 
365 |     # Test with_data_type
366 |     new_dtype = original.with_data_type("float32")
367 |     assert new_dtype.data_type == "float32"
368 |     assert original.data_type == "int64"
369 | 
370 |     # Test with_chunk_grid
371 |     new_grid = original.with_chunk_grid({"name": "regular", "configuration": {"chunk_shape": (5,)}})
372 |     assert new_grid.chunk_grid["configuration"]["chunk_shape"] == (5,)  # type: ignore[index]
373 |     assert original.chunk_grid["configuration"]["chunk_shape"] == (10,)  # type: ignore[index]
374 | 
375 |     # Test with_chunk_key_encoding
376 |     new_encoding = original.with_chunk_key_encoding(
377 |         {"name": "default", "configuration": {"separator": "."}}
378 |     )
379 |     assert new_encoding.chunk_key_encoding["configuration"]["separator"] == "."  # type: ignore[index]
380 |     assert original.chunk_key_encoding["configuration"]["separator"] == "/"  # type: ignore[index]
381 | 
382 |     # Test with_fill_value
383 |     new_fill = original.with_fill_value(999)
384 |     assert new_fill.fill_value == 999
385 |     assert original.fill_value == 0
386 | 
387 |     # Test with_codecs
388 |     new_codecs = original.with_codecs(({"name": "gzip", "configuration": {"level": 1}},))
389 |     assert len(new_codecs.codecs) == 1
390 |     assert new_codecs.codecs[0]["name"] == "gzip"  # type: ignore[index]
391 | 
392 |     # Test with_storage_transformers
393 |     new_transformers = original.with_storage_transformers(({"name": "test", "configuration": {}},))
394 |     assert len(new_transformers.storage_transformers) == 1
395 |     assert original.storage_transformers == ()
396 | 
397 |     # Test with_dimension_names
398 |     new_dims = original.with_dimension_names(("x",))
399 |     assert new_dims.dimension_names == ("x",)
400 |     assert original.dimension_names is None
401 | 
402 | 
403 | def test_arrayspec_with_methods_validation() -> None:
404 |     """
405 |     Test that ArraySpec with_* methods trigger validation
406 |     """
407 |     spec = ArraySpec.from_array(np.arange(10), attributes={})
408 | 
409 |     # Test that validation fails when dimension_names length doesn't match shape
410 |     with pytest.raises(ValidationError, match="Invalid `dimension names` attribute"):
411 |         spec.with_dimension_names(("x", "y"))  # 2 names for 1D array
412 | 
413 |     # Test that validation fails with empty codecs
414 |     with pytest.raises(ValidationError, match="Invalid length. Expected 1 or more, got 0"):
415 |         spec.with_codecs(())
416 | 
417 | 
418 | def test_groupspec_with_methods() -> None:
419 |     """
420 |     Test that GroupSpec with_* methods create new validated copies
421 |     """
422 |     array_spec = ArraySpec.from_array(np.arange(10), attributes={})
423 |     original = GroupSpec(attributes={"group": "attr"}, members={"arr": array_spec})
424 | 
425 |     # Test with_attributes
426 |     new_attrs = original.with_attributes({"new": "attr"})
427 |     assert new_attrs.attributes == {"new": "attr"}
428 |     assert original.attributes == {"group": "attr"}  # Original unchanged
429 |     assert new_attrs is not original
430 | 
431 |     # Test with_members
432 |     new_array = ArraySpec.from_array(np.arange(5), attributes={})
433 |     new_members = original.with_members({"new_arr": new_array})
434 |     assert "new_arr" in new_members.members
435 |     assert "arr" not in new_members.members  # Replacement, not merge
436 |     assert "arr" in original.members  # Original unchanged
437 | 
438 | 
439 | def test_groupspec_with_members_validation() -> None:
440 |     """
441 |     Test that GroupSpec with_members triggers validation
442 |     """
443 |     spec = GroupSpec(attributes={}, members={})
444 | 
445 |     # Test that validation fails with invalid member names
446 |     with pytest.raises(ValidationError, match='Strings containing "/" are invalid'):
447 |         spec.with_members({"a/b": ArraySpec.from_array(np.arange(10), attributes={})})
448 | 


--------------------------------------------------------------------------------
/docs/experimental/usage.md:
--------------------------------------------------------------------------------
  1 | # Usage
  2 | 
  3 | This page demonstrates how to use the experimental `ArraySpec` and `GroupSpec` models for Zarr V2 and V3.
  4 | 
  5 | ## Creating an `ArraySpec`
  6 | 
  7 | The `ArraySpec` model represents Zarr array metadata.
  8 | 
  9 | === "Zarr V2"
 10 | 
 11 |     ```python
 12 |     from pydantic_zarr.experimental.v2 import ArraySpec
 13 | 
 14 |     # Create a simple array specification
 15 |     array = ArraySpec(
 16 |         shape=(1000, 1000),
 17 |         dtype='uint8',
 18 |         chunks=(100, 100),
 19 |         attributes={'description': 'my array', 'units': 'meters'}
 20 |     )
 21 | 
 22 |     # Get the model as a JSON string
 23 |     spec_json = array.model_dump_json(indent=2)
 24 |     print(spec_json)
 25 |     """
 26 |     {
 27 |       "zarr_format": 2,
 28 |       "attributes": {
 29 |         "description": "my array",
 30 |         "units": "meters"
 31 |       },
 32 |       "shape": [
 33 |         1000,
 34 |         1000
 35 |       ],
 36 |       "chunks": [
 37 |         100,
 38 |         100
 39 |       ],
 40 |       "dtype": "|u1",
 41 |       "fill_value": 0,
 42 |       "order": "C",
 43 |       "filters": null,
 44 |       "dimension_separator": "/",
 45 |       "compressor": null
 46 |     }
 47 |     """
 48 |     ```
 49 | 
 50 | === "Zarr V3"
 51 | 
 52 |     ```python
 53 |     from pydantic_zarr.experimental.v3 import ArraySpec
 54 | 
 55 |     # Create a simple array specification
 56 |     array = ArraySpec(
 57 |         shape=(1000, 1000),
 58 |         data_type='uint8',
 59 |         chunk_grid={
 60 |             'name': 'regular',
 61 |             'configuration': {'chunk_shape': (100, 100)}
 62 |         },
 63 |         chunk_key_encoding={'name': 'default', 'configuration': {'separator': '/'}},
 64 |         codecs=[{'name': 'bytes'}],
 65 |         fill_value=0,
 66 |         attributes={'description': 'my array', 'units': 'meters'}
 67 |     )
 68 | 
 69 |     # Get the model as JSON string
 70 |     spec_json = array.model_dump_json(indent=2)
 71 |     print(spec_json)
 72 |     """
 73 |     {
 74 |       "zarr_format": 3,
 75 |       "node_type": "array",
 76 |       "attributes": {
 77 |         "description": "my array",
 78 |         "units": "meters"
 79 |       },
 80 |       "shape": [
 81 |         1000,
 82 |         1000
 83 |       ],
 84 |       "data_type": "uint8",
 85 |       "chunk_grid": {
 86 |         "name": "regular",
 87 |         "configuration": {
 88 |           "chunk_shape": [
 89 |             100,
 90 |             100
 91 |           ]
 92 |         }
 93 |       },
 94 |       "chunk_key_encoding": {
 95 |         "name": "default",
 96 |         "configuration": {
 97 |           "separator": "/"
 98 |         }
 99 |       },
100 |       "fill_value": 0,
101 |       "codecs": [
102 |         {
103 |           "name": "bytes"
104 |         }
105 |       ],
106 |       "storage_transformers": [],
107 |       "dimension_names": null
108 |     }
109 |     """
110 |     ```
111 | 
112 | ## Creating a Group Specification
113 | 
114 | The `GroupSpec` model represents a Zarr group that can contain arrays and other groups as members.
115 | 
116 | === "Zarr V2"
117 | 
118 |     ```python
119 |     from pydantic_zarr.experimental.v2 import ArraySpec, GroupSpec
120 | 
121 |     # Create ArraySpec for group members
122 |     data_array = ArraySpec(
123 |         shape=(1000, 1000),
124 |         dtype='float32',
125 |         chunks=(100, 100),
126 |         attributes={'description': 'image data'}
127 |     )
128 | 
129 |     metadata_array = ArraySpec(
130 |         shape=(1000,),
131 |         dtype='uint32',
132 |         chunks=(100,),
133 |         attributes={'description': 'pixel metadata'}
134 |     )
135 | 
136 |     # Create a group containing these arrays
137 |     group = GroupSpec(
138 |         attributes={
139 |             'name': 'experiment_001',
140 |             'date': '2024-11-23',
141 |             'version': 1
142 |         },
143 |         members={
144 |             'image': data_array,
145 |             'metadata': metadata_array
146 |         }
147 |     )
148 |     ```
149 | 
150 | === "Zarr V3"
151 | 
152 |     ```python
153 |     from pydantic_zarr.experimental.v3 import ArraySpec, GroupSpec
154 | 
155 |     # Create ArraySpec for group members
156 |     data_array = ArraySpec(
157 |         shape=(1000, 1000),
158 |         data_type='float32',
159 |         chunk_grid={
160 |             'name': 'regular',
161 |             'configuration': {'chunk_shape': (100, 100)}
162 |         },
163 |         chunk_key_encoding={'name': 'default', 'configuration': {'separator': '/'}},
164 |         codecs=[{'name': 'bytes'}],
165 |         fill_value=0,
166 |         attributes={'description': 'image data'}
167 |     )
168 | 
169 |     metadata_array = ArraySpec(
170 |         shape=(1000,),
171 |         data_type='uint32',
172 |         chunk_grid={
173 |             'name': 'regular',
174 |             'configuration': {'chunk_shape': (100,)}
175 |         },
176 |         chunk_key_encoding={'name': 'default', 'configuration': {'separator': '/'}},
177 |         codecs=[{'name': 'bytes'}],
178 |         fill_value=0,
179 |         attributes={'description': 'pixel metadata'}
180 |     )
181 | 
182 |     # Create a GroupSpec containing these arrays
183 |     group = GroupSpec(
184 |         attributes={
185 |             'name': 'experiment_001',
186 |             'date': '2024-11-23',
187 |             'version': 1
188 |         },
189 |         members={
190 |             'image': data_array,
191 |             'metadata': metadata_array
192 |         }
193 |     )
194 |     ```
195 | 
196 | ## Nested Groups
197 | 
198 | You can create hierarchical structures by nesting groups within groups.
199 | 
200 | === "Zarr V2"
201 | 
202 |     ```python
203 |     from pydantic_zarr.experimental.v2 import ArraySpec, GroupSpec
204 | 
205 |     # Create a multi-level hierarchy
206 |     raw_data = ArraySpec(
207 |         shape=(512, 512),
208 |         dtype='uint8',
209 |         chunks=(64, 64),
210 |         attributes={}
211 |     )
212 | 
213 |     processed_data = ArraySpec(
214 |         shape=(512, 512),
215 |         dtype='float32',
216 |         chunks=(64, 64),
217 |         attributes={}
218 |     )
219 | 
220 |     # Create sub-groups
221 |     raw_group = GroupSpec(
222 |         attributes={'processing_level': 'raw'},
223 |         members={'data': raw_data}
224 |     )
225 | 
226 |     processed_group = GroupSpec(
227 |         attributes={'processing_level': 'processed'},
228 |         members={'data': processed_data}
229 |     )
230 | 
231 |     # Create root group containing sub-groups
232 |     root = GroupSpec(
233 |         attributes={'project': 'imaging_study'},
234 |         members={
235 |             'raw': raw_group,
236 |             'processed': processed_group
237 |         }
238 |     )
239 |     ```
240 | 
241 | === "Zarr V3"
242 | 
243 |     ```python
244 |     from pydantic_zarr.experimental.v3 import ArraySpec, GroupSpec
245 | 
246 |     # Create a multi-level hierarchy
247 |     raw_data = ArraySpec(
248 |         shape=(512, 512),
249 |         data_type='uint8',
250 |         chunk_grid={
251 |             'name': 'regular',
252 |             'configuration': {'chunk_shape': (64, 64)}
253 |         },
254 |         chunk_key_encoding={'name': 'default', 'configuration': {'separator': '/'}},
255 |         codecs=[{'name': 'bytes'}],
256 |         fill_value=0,
257 |         attributes={}
258 |     )
259 | 
260 |     processed_data = ArraySpec(
261 |         shape=(512, 512),
262 |         data_type='float32',
263 |         chunk_grid={
264 |             'name': 'regular',
265 |             'configuration': {'chunk_shape': (64, 64)}
266 |         },
267 |         chunk_key_encoding={'name': 'default', 'configuration': {'separator': '/'}},
268 |         codecs=[{'name': 'bytes'}],
269 |         fill_value=0,
270 |         attributes={}
271 |     )
272 | 
273 |     # Create sub-groups
274 |     raw_group = GroupSpec(
275 |         attributes={'processing_level': 'raw'},
276 |         members={'data': raw_data}
277 |     )
278 | 
279 |     processed_group = GroupSpec(
280 |         attributes={'processing_level': 'processed'},
281 |         members={'data': processed_data}
282 |     )
283 | 
284 |     # Create root group containing sub-groups
285 |     root = GroupSpec(
286 |         attributes={'project': 'imaging_study'},
287 |         members={
288 |             'raw': raw_group,
289 |             'processed': processed_group
290 |         }
291 |     )
292 |     ```
293 | 
294 | ## Working with Flattened Hierarchies
295 | 
296 | The `to_flat()` method converts a hierarchical group structure into a flat dictionary representation. In the dict form, instances of `GroupSpec` are converted to instances of `BaseGroupSpec`, which models a Zarr group without any members. We use a different type because in the flat representation, the hierarchy structure is fully encoded by the keys of the dict.
297 | 
298 | === "Zarr V2"
299 | 
300 |     ```python
301 |     from pydantic_zarr.experimental.v2 import ArraySpec, GroupSpec
302 | 
303 |     # Create a group hierarchy
304 |     array = ArraySpec(
305 |         shape=(100,),
306 |         dtype='float32',
307 |         chunks=(10,),
308 |         attributes={}
309 |     )
310 | 
311 |     subgroup = GroupSpec(
312 |         attributes={'level': 1},
313 |         members={'data': array}
314 |     )
315 | 
316 |     root = GroupSpec(
317 |         attributes={'level': 0},
318 |         members={'sub': subgroup}
319 |     )
320 | 
321 |     # Convert to flat representation
322 |     flat = root.to_flat()
323 |     print(flat)
324 |     """
325 |     {
326 |         '': BaseGroupSpec(zarr_format=2, attributes={'level': 0}),
327 |         '/sub': BaseGroupSpec(zarr_format=2, attributes={'level': 1}),
328 |         '/sub/data': ArraySpec(
329 |             zarr_format=2,
330 |             attributes={},
331 |             shape=(100,),
332 |             chunks=(10,),
333 |             dtype='<f4',
334 |             fill_value=0,
335 |             order='C',
336 |             filters=None,
337 |             dimension_separator='/',
338 |             compressor=None,
339 |         ),
340 |     }
341 |     """
342 |     ```
343 | 
344 | === "Zarr V3"
345 | 
346 |     ```python
347 |     from pydantic_zarr.experimental.v3 import ArraySpec, GroupSpec
348 | 
349 |     # Create a group hierarchy
350 |     array = ArraySpec(
351 |         shape=(100,),
352 |         data_type='float32',
353 |         chunk_grid={
354 |             'name': 'regular',
355 |             'configuration': {'chunk_shape': (10,)}
356 |         },
357 |         chunk_key_encoding={'name': 'default', 'configuration': {'separator': '/'}},
358 |         codecs=[{'name': 'bytes'}],
359 |         fill_value=0,
360 |         attributes={}
361 |     )
362 | 
363 |     subgroup = GroupSpec(
364 |         attributes={'level': 1},
365 |         members={'data': array}
366 |     )
367 | 
368 |     root = GroupSpec(
369 |         attributes={'level': 0},
370 |         members={'sub': subgroup}
371 |     )
372 | 
373 |     # Convert to flat representation
374 |     flat = root.to_flat()
375 |     print(flat)
376 |     """
377 |     {
378 |         '': BaseGroupSpec(zarr_format=3, attributes={'level': 0}),
379 |         '/sub': BaseGroupSpec(zarr_format=3, attributes={'level': 1}),
380 |         '/sub/data': ArraySpec(
381 |             zarr_format=3,
382 |             node_type='array',
383 |             attributes={},
384 |             shape=(100,),
385 |             data_type='float32',
386 |             chunk_grid={'name': 'regular', 'configuration': {'chunk_shape': (10,)}},
387 |             chunk_key_encoding={'name': 'default', 'configuration': {'separator': '/'}},
388 |             fill_value=0,
389 |             codecs=({'name': 'bytes'},),
390 |             storage_transformers=(),
391 |             dimension_names=None,
392 |         ),
393 |     }
394 |     """
395 |     ```
396 | 
397 | ## Comparing Arrays and Groups
398 | 
399 | Use the `like()` method to compare `ArraySpec` or `GroupSpec` instances to check if they are structurally equivalent.
400 | 
401 | === "Zarr V2"
402 | 
403 |     ```python
404 |     from pydantic_zarr.experimental.v2 import ArraySpec, GroupSpec
405 | 
406 |     # Create two similar arrays
407 |     array1 = ArraySpec(
408 |         shape=(100, 100),
409 |         dtype='uint8',
410 |         chunks=(10, 10),
411 |         attributes={'name': 'array1'}
412 |     )
413 | 
414 |     array2 = ArraySpec(
415 |         shape=(100, 100),
416 |         dtype='uint8',
417 |         chunks=(10, 10),
418 |         attributes={'name': 'array2'}
419 |     )
420 | 
421 |     # False because of differing attributes
422 |     print(array1.like(array2))
423 |     #> False
424 | 
425 |     # True because we are ignoring attributes
426 |     print(array1.like(array2, exclude={'attributes'}))
427 |     #> True
428 | 
429 |     # Create two groups
430 |     group1 = GroupSpec(
431 |         attributes={'version': 1},
432 |         members={'data': array1}
433 |     )
434 | 
435 |     group2 = GroupSpec(
436 |         attributes={'version': 2},
437 |         members={'data': array1}
438 |     )
439 | 
440 |     # False because of differing attributes
441 |     print(group1.like(group2))
442 |     #> False
443 | 
444 |     # True because we are ignoring attributes
445 |     print(group1.like(group2, exclude={'attributes'}))
446 |     #> True
447 |     ```
448 | 
449 | === "Zarr V3"
450 | 
451 |     ```python
452 |     from pydantic_zarr.experimental.v3 import ArraySpec, GroupSpec
453 | 
454 |     # Create two similar arrays
455 |     array1 = ArraySpec(
456 |         shape=(100, 100),
457 |         data_type='uint8',
458 |         chunk_grid={
459 |             'name': 'regular',
460 |             'configuration': {'chunk_shape': (10, 10)}
461 |         },
462 |         chunk_key_encoding={'name': 'default', 'configuration': {'separator': '/'}},
463 |         codecs=[{'name': 'bytes'}],
464 |         fill_value=0,
465 |         attributes={'name': 'array1'}
466 |     )
467 | 
468 |     array2 = ArraySpec(
469 |         shape=(100, 100),
470 |         data_type='uint8',
471 |         chunk_grid={
472 |             'name': 'regular',
473 |             'configuration': {'chunk_shape': (10, 10)}
474 |         },
475 |         chunk_key_encoding={'name': 'default', 'configuration': {'separator': '/'}},
476 |         codecs=[{'name': 'bytes'}],
477 |         fill_value=0,
478 |         attributes={'name': 'array2'}
479 |     )
480 | 
481 |     # False because of differing attributes
482 |     print(array1.like(array2))
483 |     #> False
484 | 
485 |     # True because we are ignoring attributes
486 |     print(array1.like(array2, exclude={'attributes'}))
487 |     #> True
488 | 
489 |     # Create two groups
490 |     group1 = GroupSpec(
491 |         attributes={'version': 1},
492 |         members={'data': array1}
493 |     )
494 | 
495 |     group2 = GroupSpec(
496 |         attributes={'version': 2},
497 |         members={'data': array1}
498 |     )
499 | 
500 |     # False because of differing attributes
501 |     print(group1.like(group2))
502 |     #> False
503 | 
504 |     # True because we are ignoring attributes
505 |     print(group1.like(group2, exclude={'attributes'}))
506 |     #> True
507 |     ```
508 | 
509 | ## Type-safe Group Members with TypedDict
510 | 
511 | Define strict schemas for group members using `TypedDict` to enable runtime validation.
512 | 
513 | === "Zarr V2"
514 | 
515 |     ```python
516 |     from typing_extensions import TypedDict
517 |     from pydantic_zarr.experimental.v2 import ArraySpec, GroupSpec
518 | 
519 |     # Define the expected structure of group members
520 |     class TimeseriesMembers(TypedDict):
521 |         timestamps: ArraySpec
522 |         values: ArraySpec
523 | 
524 |     # Create ArraySpec
525 |     timestamps = ArraySpec(
526 |         shape=(10000,),
527 |         dtype='float64',
528 |         chunks=(1000,),
529 |         attributes={'units': 'seconds since epoch'}
530 |     )
531 | 
532 |     values = ArraySpec(
533 |         shape=(10000,),
534 |         dtype='float32',
535 |         chunks=(1000,),
536 |         attributes={'units': 'meters'}
537 |     )
538 | 
539 |     # Define a custom GroupSpec with typed members
540 |     class TimeseriesGroup(GroupSpec):
541 |         members: TimeseriesMembers
542 | 
543 |     # This succeeds - all required members present
544 |     ts_group = TimeseriesGroup(
545 |         attributes={'sensor': 'accelerometer'},
546 |         members={'timestamps': timestamps, 'values': values}
547 |     )
548 | 
549 |     # This fails because the required member 'values' is missing
550 |     try:
551 |         ts_group = TimeseriesGroup(
552 |             attributes={'sensor': 'accelerometer'},
553 |             members={'timestamps': timestamps}
554 |         )
555 |     except ValueError as e:
556 |         print(e)
557 |         """
558 |         1 validation error for TimeseriesGroup
559 |         members.values
560 |           Field required [type=missing, input_value={'timestamps': ArraySpec(...r='/', compressor=None)}, input_type=dict]
561 |             For further information visit https://errors.pydantic.dev/2.11/v/missing
562 |         """
563 |     ```
564 | 
565 | === "Zarr V3"
566 | 
567 |     ```python
568 |     from typing_extensions import TypedDict
569 |     from pydantic_zarr.experimental.v3 import ArraySpec, GroupSpec
570 | 
571 |     # Define the expected structure of group members
572 |     class TimeseriesMembers(TypedDict):
573 |         timestamps: ArraySpec
574 |         values: ArraySpec
575 | 
576 |     # Create ArraySpec
577 |     timestamps = ArraySpec(
578 |         shape=(10000,),
579 |         data_type='float64',
580 |         chunk_grid={
581 |             'name': 'regular',
582 |             'configuration': {'chunk_shape': (1000,)}
583 |         },
584 |         chunk_key_encoding={'name': 'default', 'configuration': {'separator': '/'}},
585 |         codecs=[{'name': 'bytes'}],
586 |         fill_value=0,
587 |         attributes={'units': 'seconds since epoch'}
588 |     )
589 | 
590 |     values = ArraySpec(
591 |         shape=(10000,),
592 |         data_type='float32',
593 |         chunk_grid={
594 |             'name': 'regular',
595 |             'configuration': {'chunk_shape': (1000,)}
596 |         },
597 |         chunk_key_encoding={'name': 'default', 'configuration': {'separator': '/'}},
598 |         codecs=[{'name': 'bytes'}],
599 |         fill_value=0,
600 |         attributes={'units': 'meters'}
601 |     )
602 | 
603 |     # Define a custom GroupSpec with typed members
604 |     class TimeseriesGroup(GroupSpec):
605 |         members: TimeseriesMembers
606 | 
607 |     # This succeeds - all required members present
608 |     ts_group = TimeseriesGroup(
609 |         attributes={'sensor': 'accelerometer'},
610 |         members={'timestamps': timestamps, 'values': values}
611 |     )
612 | 
613 |     # This fails because the required member 'values' is missing
614 |     try:
615 |         ts_group = TimeseriesGroup(
616 |             attributes={'sensor': 'accelerometer'},
617 |             members={'timestamps': timestamps}
618 |         )
619 |     except ValueError as e:
620 |         print(e)
621 |         """
622 |         1 validation error for TimeseriesGroup
623 |         members.values
624 |           Field required [type=missing, input_value={'timestamps': ArraySpec(..., dimension_names=None)}, input_type=dict]
625 |             For further information visit https://errors.pydantic.dev/2.11/v/missing
626 |         """
627 |     ```
628 | 


--------------------------------------------------------------------------------
/tests/test_pydantic_zarr/test_v2.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Testts for pydantic_zarr.v2.
  3 | """
  4 | 
  5 | from __future__ import annotations
  6 | 
  7 | import json
  8 | import re
  9 | from contextlib import suppress
 10 | from typing import TYPE_CHECKING, Any
 11 | 
 12 | import pytest
 13 | from pydantic import ValidationError
 14 | 
 15 | from pydantic_zarr.core import tuplify_json
 16 | 
 17 | from .conftest import DTYPE_EXAMPLES_V2, ZARR_PYTHON_VERSION, DTypeExample
 18 | 
 19 | if TYPE_CHECKING:
 20 |     from typing import Literal
 21 | 
 22 | import sys
 23 | from dataclasses import dataclass
 24 | from typing import TYPE_CHECKING, Literal
 25 | 
 26 | if TYPE_CHECKING:
 27 |     from numcodecs.abc import Codec
 28 | 
 29 | import numpy as np
 30 | import numpy.typing as npt
 31 | from packaging.version import Version
 32 | 
 33 | from pydantic_zarr.v2 import (
 34 |     ArraySpec,
 35 |     GroupSpec,
 36 |     auto_attributes,
 37 |     auto_chunks,
 38 |     auto_compresser,
 39 |     auto_dimension_separator,
 40 |     auto_fill_value,
 41 |     auto_filters,
 42 |     auto_order,
 43 |     from_flat,
 44 |     from_zarr,
 45 |     to_flat,
 46 |     to_zarr,
 47 | )
 48 | 
 49 | if sys.version_info < (3, 12):
 50 |     from typing_extensions import TypedDict
 51 | else:
 52 |     from typing import TypedDict
 53 | 
 54 | try:
 55 |     import numcodecs
 56 | except ImportError:
 57 |     numcodecs = None
 58 | 
 59 | with suppress(ImportError):
 60 |     from zarr.errors import ContainsArrayError, ContainsGroupError
 61 | 
 62 | ArrayMemoryOrder = Literal["C", "F"]
 63 | DimensionSeparator = Literal[".", "/"]
 64 | 
 65 | 
 66 | @pytest.fixture(params=("C", "F"), ids=["C", "F"])
 67 | def memory_order(request: pytest.FixtureRequest) -> ArrayMemoryOrder:
 68 |     """
 69 |     Fixture that returns either "C" or "F"
 70 |     """
 71 |     if request.param == "C":
 72 |         return "C"
 73 |     elif request.param == "F":
 74 |         return "F"
 75 |     msg = f"Invalid array memory order requested. Got {request.param}, expected one of (C, F)."
 76 |     raise ValueError(msg)
 77 | 
 78 | 
 79 | @pytest.fixture(params=("/", "."), ids=["/", "."])
 80 | def dimension_separator(request: pytest.FixtureRequest) -> DimensionSeparator:
 81 |     """
 82 |     Fixture that returns either "." or "/"
 83 |     """
 84 |     if request.param == ".":
 85 |         return "."
 86 |     elif request.param == "/":
 87 |         return "/"
 88 |     msg = f"Invalid dimension separator requested. Got {request.param}, expected one of (., /)."
 89 |     raise ValueError(msg)
 90 | 
 91 | 
 92 | @pytest.mark.parametrize("chunks", [(1,), (1, 2), ((1, 2, 3))])
 93 | @pytest.mark.parametrize("dtype", ["bool", "uint8", "float64"])
 94 | @pytest.mark.parametrize("compressor", [None, "LZMA", "GZip"])
 95 | @pytest.mark.parametrize(
 96 |     "filters", [(None,), ("delta",), ("scale_offset",), ("delta", "scale_offset")]
 97 | )
 98 | def test_array_spec(
 99 |     chunks: tuple[int, ...],
100 |     memory_order: ArrayMemoryOrder,
101 |     dtype: str,
102 |     dimension_separator: DimensionSeparator,
103 |     compressor: str | None,
104 |     filters: tuple[str, ...] | None,
105 | ) -> None:
106 |     zarr = pytest.importorskip("zarr")
107 |     numcodecs = pytest.importorskip("numcodecs")
108 | 
109 |     if compressor is not None:
110 |         compressor = getattr(numcodecs, compressor)()
111 | 
112 |     store = zarr.storage.MemoryStore()
113 |     _filters: list[Codec] | None
114 |     if filters is not None:
115 |         _filters = []
116 |         for filter in filters:
117 |             if filter == "delta":
118 |                 _filters.append(numcodecs.Delta(dtype))
119 |             if filter == "scale_offset":
120 |                 _filters.append(numcodecs.FixedScaleOffset(0, 1.0, dtype=dtype))
121 |     else:
122 |         _filters = filters
123 | 
124 |     array = zarr.create(
125 |         (100,) * len(chunks),
126 |         path="foo",
127 |         store=store,
128 |         chunks=chunks,
129 |         dtype=dtype,
130 |         order=memory_order,
131 |         dimension_separator=dimension_separator,
132 |         compressor=compressor,
133 |         filters=_filters,
134 |         zarr_format=2,
135 |     )
136 |     attributes = {"foo": [100, 200, 300], "bar": "hello"}
137 |     array.attrs.put(attributes)
138 |     spec = ArraySpec.from_zarr(array)
139 | 
140 |     assert spec.zarr_format == array.metadata.zarr_format
141 |     assert spec.dtype == array.dtype
142 |     assert spec.attributes == array.attrs.asdict()
143 |     assert spec.chunks == array.chunks
144 | 
145 |     assert spec.dimension_separator == array.metadata.dimension_separator
146 |     assert spec.shape == array.shape
147 |     assert spec.fill_value == array.fill_value
148 |     # this is a sign that nullability is being misused in zarr-python
149 |     # the correct approach would be to use an empty list to express "no filters".
150 |     if len(array.filters):
151 |         assert spec.filters == [f.get_config() for f in array.filters]
152 |     else:
153 |         assert spec.filters is None
154 | 
155 |     if len(array.compressors):
156 |         assert spec.compressor == array.compressors[0].get_config()
157 |     else:
158 |         assert spec.compressor is None
159 | 
160 |     assert spec.order == array.order
161 | 
162 |     array2 = spec.to_zarr(store, "foo2")
163 | 
164 |     assert spec.zarr_format == array2.metadata.zarr_format
165 |     assert spec.dtype == array2.dtype
166 |     assert spec.attributes == array2.attrs
167 |     assert spec.chunks == array2.chunks
168 | 
169 |     if len(array2.compressors):
170 |         assert spec.compressor == array2.compressors[0].get_config()
171 |     else:
172 |         assert spec.compressor is None
173 | 
174 |     if len(array2.filters):
175 |         assert spec.filters == [f.get_config() for f in array2.filters]
176 |     else:
177 |         assert spec.filters is None
178 | 
179 |     assert spec.dimension_separator == array2.metadata.dimension_separator
180 |     assert spec.shape == array2.shape
181 |     assert spec.fill_value == array2.fill_value
182 | 
183 |     # test serialization
184 |     store = zarr.storage.MemoryStore()
185 |     stored = spec.to_zarr(store, path="foo")
186 |     assert ArraySpec.from_zarr(stored) == spec
187 | 
188 |     # test that to_zarr is idempotent
189 |     assert spec.to_zarr(store, path="foo") == stored
190 | 
191 |     # test that to_zarr raises if the extant array is different
192 |     spec_2 = spec.model_copy(update={"attributes": {"baz": 10}})
193 |     with pytest.raises(ContainsArrayError):
194 |         spec_2.to_zarr(store, path="foo")
195 | 
196 |     # test that we can overwrite the dissimilar array
197 |     stored_2 = spec_2.to_zarr(store, path="foo", overwrite=True)
198 |     assert ArraySpec.from_zarr(stored_2) == spec_2
199 | 
200 |     assert spec_2.to_zarr(store, path="foo").read_only is False
201 | 
202 | 
203 | @dataclass
204 | class FakeArray:
205 |     shape: tuple[int, ...]
206 |     dtype: np.dtype[Any]
207 | 
208 | 
209 | @dataclass
210 | class WithAttrs:
211 |     attrs: dict[str, Any]
212 | 
213 | 
214 | @dataclass
215 | class WithChunksize:
216 |     chunksize: tuple[int, ...]
217 | 
218 | 
219 | @dataclass
220 | class FakeDaskArray(FakeArray, WithChunksize): ...
221 | 
222 | 
223 | @dataclass
224 | class FakeXarray(FakeDaskArray, WithAttrs): ...
225 | 
226 | 
227 | @pytest.mark.parametrize(
228 |     "array",
229 |     [
230 |         np.zeros((100), dtype="uint8"),
231 |         FakeArray(shape=(11,), dtype=np.dtype("float64")),
232 |         FakeDaskArray(shape=(22,), dtype=np.dtype("uint8"), chunksize=(11,)),
233 |         FakeXarray(shape=(22,), dtype=np.dtype("uint8"), chunksize=(11,), attrs={"foo": "bar"}),
234 |     ],
235 | )
236 | @pytest.mark.parametrize("chunks", ["omit", "auto", (10,)])
237 | @pytest.mark.parametrize("attributes", ["omit", "auto", {"foo": 10}])
238 | @pytest.mark.parametrize("fill_value", ["omit", "auto", 15])
239 | @pytest.mark.parametrize("order", ["omit", "auto", "F"])
240 | @pytest.mark.parametrize("filters", ["omit", "auto", []])
241 | @pytest.mark.parametrize("dimension_separator", ["omit", "auto", "."])
242 | @pytest.mark.parametrize("compressor", ["omit", "auto", {"id": "gzip", "level": 1}])
243 | def test_array_spec_from_array(
244 |     *,
245 |     array: npt.NDArray[Any],
246 |     chunks: str | tuple[int, ...],
247 |     attributes: str | dict[str, object],
248 |     fill_value: object,
249 |     order: str,
250 |     filters: str | list[Codec],
251 |     dimension_separator: str,
252 |     compressor: str | dict[str, object],
253 | ) -> None:
254 |     auto_options = ("omit", "auto")
255 |     kwargs_out: dict[str, object] = {}
256 | 
257 |     kwargs_out["chunks"] = chunks
258 |     kwargs_out["attributes"] = attributes
259 |     kwargs_out["fill_value"] = fill_value
260 |     kwargs_out["order"] = order
261 |     kwargs_out["filters"] = filters
262 |     kwargs_out["dimension_separator"] = dimension_separator
263 |     kwargs_out["compressor"] = compressor
264 | 
265 |     # remove all the keyword arguments that should be defaulted
266 |     kwargs_out = dict(filter(lambda kvp: kvp[1] != "omit", kwargs_out.items()))
267 | 
268 |     spec = ArraySpec.from_array(array, **kwargs_out)
269 |     # arrayspec should round-trip from_array with no arguments
270 |     assert spec.from_array(spec) == spec
271 | 
272 |     assert spec.dtype == array.dtype.str
273 |     assert np.dtype(spec.dtype) == array.dtype
274 | 
275 |     assert spec.shape == array.shape
276 | 
277 |     if chunks in auto_options:
278 |         assert spec.chunks == auto_chunks(array)
279 |     else:
280 |         assert spec.chunks == chunks
281 | 
282 |     if attributes in auto_options:
283 |         assert spec.attributes == auto_attributes(array)
284 |     else:
285 |         assert spec.attributes == attributes
286 | 
287 |     if fill_value in auto_options:
288 |         assert spec.fill_value == auto_fill_value(array)
289 |     else:
290 |         assert spec.fill_value == fill_value
291 | 
292 |     if order in auto_options:
293 |         assert spec.order == auto_order(array)
294 |     else:
295 |         assert spec.order == order
296 | 
297 |     if filters in auto_options:
298 |         assert spec.filters == auto_filters(array)
299 |     else:
300 |         assert spec.filters is None
301 | 
302 |     if dimension_separator in auto_options:
303 |         assert spec.dimension_separator == auto_dimension_separator(array)
304 |     else:
305 |         assert spec.dimension_separator == dimension_separator
306 | 
307 |     if compressor in auto_options:
308 |         assert spec.compressor == auto_compresser(array)
309 |     else:
310 |         assert spec.compressor == compressor
311 | 
312 | 
313 | @pytest.mark.parametrize("chunks", [(1,), (1, 2), ((1, 2, 3))])
314 | @pytest.mark.parametrize("dtype", ["bool", "uint8", np.dtype("uint8"), "float64"])
315 | @pytest.mark.parametrize("dimension_separator", [".", "/"])
316 | @pytest.mark.parametrize(
317 |     "compressor",
318 |     [{"id": "lzma", "format": 1, "check": -1, "preset": None, "filters": None}, "GZip"],
319 | )
320 | @pytest.mark.parametrize("filters", [(), ("delta",), ("scale_offset",), ("delta", "scale_offset")])
321 | def test_serialize_deserialize_groupspec(
322 |     chunks: tuple[int, ...],
323 |     memory_order: ArrayMemoryOrder,
324 |     dtype: str,
325 |     dimension_separator: Literal[".", "/"],
326 |     compressor: Any,
327 |     filters: tuple[str, ...] | None,
328 | ) -> None:
329 |     zarr = pytest.importorskip("zarr")
330 |     numcodecs = pytest.importorskip("numcodecs")
331 |     if isinstance(compressor, str):
332 |         compressor = getattr(numcodecs, compressor)()
333 | 
334 |     _filters: list[Codec] | None
335 |     if filters is not None:
336 |         _filters = []
337 |         for filter in filters:
338 |             if filter == "delta":
339 |                 _filters.append(numcodecs.Delta(dtype))
340 |             if filter == "scale_offset":
341 |                 _filters.append(numcodecs.FixedScaleOffset(0, 1.0, dtype=dtype))
342 |     else:
343 |         _filters = filters
344 | 
345 |     class RootAttrs(TypedDict):
346 |         foo: int
347 |         bar: list[int]
348 | 
349 |     class SubGroupAttrs(TypedDict):
350 |         a: str
351 |         b: float
352 | 
353 |     SubGroup = GroupSpec[SubGroupAttrs, Any]
354 | 
355 |     class ArrayAttrs(TypedDict):
356 |         scale: list[float]
357 | 
358 |     store = zarr.storage.MemoryStore()
359 | 
360 |     spec = GroupSpec[RootAttrs, ArraySpec | SubGroup](
361 |         attributes=RootAttrs(foo=10, bar=[0, 1, 2]),
362 |         members={
363 |             "s0": ArraySpec[ArrayAttrs](
364 |                 shape=(10,) * len(chunks),
365 |                 chunks=chunks,
366 |                 dtype=dtype,
367 |                 filters=_filters,
368 |                 compressor=compressor,
369 |                 order=memory_order,
370 |                 dimension_separator=dimension_separator,
371 |                 attributes=ArrayAttrs(scale=[1.0]),
372 |             ),
373 |             "s1": ArraySpec[ArrayAttrs](
374 |                 shape=(5,) * len(chunks),
375 |                 chunks=chunks,
376 |                 dtype=dtype,
377 |                 filters=_filters,
378 |                 compressor=compressor,
379 |                 order=memory_order,
380 |                 dimension_separator=dimension_separator,
381 |                 attributes=ArrayAttrs(scale=[2.0]),
382 |             ),
383 |             "subgroup": SubGroup(attributes=SubGroupAttrs(a="foo", b=1.0)),
384 |         },
385 |     )
386 |     # check that the model round-trips dict representation
387 |     assert spec == GroupSpec(**spec.model_dump())
388 | 
389 |     # materialize a zarr group, based on the spec
390 |     group = to_zarr(spec, store, "/group_a")
391 | 
392 |     # parse the spec from that group
393 |     observed = from_zarr(group)
394 |     assert observed == spec
395 | 
396 |     # assert that we get the same group twice
397 |     assert to_zarr(spec, store, "/group_a", overwrite=True) == group
398 | 
399 |     # check that we can't call to_zarr targeting the original group with a different spec
400 |     spec_2 = spec.model_copy(update={"attributes": RootAttrs(foo=99, bar=[0, 1, 2])})
401 |     with pytest.raises(ContainsGroupError):
402 |         _ = to_zarr(spec_2, store, "/group_a")
403 | 
404 |     # check that we can't call to_zarr with the original spec if the group has changed
405 |     group.attrs["foo"] = 100
406 |     with pytest.raises(ContainsGroupError):
407 |         _ = to_zarr(spec, store, "/group_a")
408 |     group.attrs["foo"] = 10
409 | 
410 |     # materialize again with overwrite
411 |     group2 = to_zarr(spec, store, "/group_a", overwrite=True)
412 |     assert group2 == group
413 | 
414 |     # again with class methods
415 |     group3 = spec.to_zarr(store, "/group_b")
416 |     observed = spec.from_zarr(group3)
417 |     assert observed == spec
418 | 
419 | 
420 | @pytest.mark.parametrize("base", range(1, 5))
421 | def test_shape_chunks(base: int) -> None:
422 |     """
423 |     Test that the length of the chunks and the shape match
424 |     """
425 |     with pytest.raises(ValidationError):
426 |         ArraySpec(shape=(1,) * base, chunks=(1,) * (base + 1), dtype="uint8", attributes={})
427 |     with pytest.raises(ValidationError):
428 |         ArraySpec(shape=(1,) * (base + 1), chunks=(1,) * base, dtype="uint8", attributes={})
429 | 
430 | 
431 | def test_validation() -> None:
432 |     """
433 |     Test that specialized GroupSpec and ArraySpec instances cannot be serialized from
434 |     the wrong inputs without a ValidationError.
435 |     """
436 |     zarr = pytest.importorskip("zarr")
437 | 
438 |     class GroupAttrsA(TypedDict):
439 |         group_a: bool
440 | 
441 |     class GroupAttrsB(TypedDict):
442 |         group_b: bool
443 | 
444 |     class ArrayAttrsA(TypedDict):
445 |         array_a: bool
446 | 
447 |     class ArrayAttrsB(TypedDict):
448 |         array_b: bool
449 | 
450 |     ArrayA = ArraySpec[ArrayAttrsA]
451 |     ArrayB = ArraySpec[ArrayAttrsB]
452 |     GroupA = GroupSpec[GroupAttrsA, ArrayA]
453 |     GroupB = GroupSpec[GroupAttrsB, ArrayB]
454 | 
455 |     store = zarr.storage.MemoryStore
456 | 
457 |     specA = GroupA(
458 |         attributes=GroupAttrsA(group_a=True),
459 |         members={
460 |             "a": ArrayA(
461 |                 attributes=ArrayAttrsA(array_a=True),
462 |                 shape=(100,),
463 |                 dtype="uint8",
464 |                 chunks=(10,),
465 |             )
466 |         },
467 |     )
468 | 
469 |     specB = GroupB(
470 |         attributes=GroupAttrsB(group_b=True),
471 |         members={
472 |             "a": ArrayB(
473 |                 attributes=ArrayAttrsB(array_b=True),
474 |                 shape=(100,),
475 |                 dtype="uint8",
476 |                 chunks=(10,),
477 |             )
478 |         },
479 |     )
480 | 
481 |     # check that we cannot create a specialized GroupSpec with the wrong attributes
482 |     with pytest.raises(ValidationError):
483 |         GroupB(
484 |             attributes=GroupAttrsA(group_a=True),
485 |             members={},
486 |         )
487 | 
488 |     store = zarr.storage.MemoryStore()
489 |     groupAMat = specA.to_zarr(store, path="group_a")
490 |     groupBMat = specB.to_zarr(store, path="group_b")
491 | 
492 |     GroupA.from_zarr(groupAMat)
493 |     GroupB.from_zarr(groupBMat)
494 | 
495 |     ArrayA.from_zarr(groupAMat["a"])
496 |     ArrayB.from_zarr(groupBMat["a"])
497 | 
498 |     with pytest.raises(ValidationError):
499 |         ArrayA.from_zarr(groupBMat["a"])
500 | 
501 |     with pytest.raises(ValidationError):
502 |         ArrayB.from_zarr(groupAMat["a"])
503 | 
504 |     with pytest.raises(ValidationError):
505 |         GroupB.from_zarr(groupAMat)
506 | 
507 |     with pytest.raises(ValidationError):
508 |         GroupA.from_zarr(groupBMat)
509 | 
510 | 
511 | @pytest.mark.parametrize("shape", [(1,), (2, 2), (3, 4, 5)])
512 | @pytest.mark.parametrize("dtype", [None, "uint8", "float32"])
513 | def test_from_array(shape: tuple[int, ...], dtype: str | None) -> None:
514 |     template = np.zeros(shape=shape, dtype=dtype)
515 |     spec = ArraySpec.from_array(template)  # type: ignore[var-annotated]
516 | 
517 |     assert spec.shape == template.shape
518 |     assert np.dtype(spec.dtype) == np.dtype(template.dtype)
519 |     assert spec.chunks == template.shape
520 |     assert spec.attributes == {}
521 | 
522 |     chunks = template.ndim * (1,)
523 |     attrs = {"foo": 100}
524 |     spec2 = ArraySpec.from_array(template, chunks=chunks, attributes=attrs)
525 |     assert spec2.chunks == chunks
526 |     assert spec2.attributes == attrs
527 | 
528 | 
529 | @pytest.mark.parametrize("data", ["/", "a/b/c"])
530 | def test_member_name(data: str) -> None:
531 |     with pytest.raises(ValidationError, match='Strings containing "/" are invalid.'):
532 |         GroupSpec(attributes={}, members={data: GroupSpec(attributes={}, members={})})
533 | 
534 | 
535 | @pytest.mark.parametrize(
536 |     ("data", "expected"),
537 |     [
538 |         (
539 |             ArraySpec.from_array(np.arange(10)),
540 |             {"": ArraySpec.from_array(np.arange(10))},
541 |         ),
542 |         (
543 |             GroupSpec(
544 |                 attributes={"foo": 10},
545 |                 members={"a": ArraySpec.from_array(np.arange(5), attributes={"foo": 100})},
546 |             ),
547 |             {
548 |                 "": GroupSpec(attributes={"foo": 10}, members=None),
549 |                 "/a": ArraySpec.from_array(np.arange(5), attributes={"foo": 100}),
550 |             },
551 |         ),
552 |         (
553 |             GroupSpec(
554 |                 attributes={},
555 |                 members={
556 |                     "a": GroupSpec(
557 |                         attributes={"foo": 10},
558 |                         members={"a": ArraySpec.from_array(np.arange(5), attributes={"foo": 100})},
559 |                     ),
560 |                     "b": ArraySpec.from_array(np.arange(2), attributes={"foo": 3}),
561 |                 },
562 |             ),
563 |             {
564 |                 "": GroupSpec(attributes={}, members=None),
565 |                 "/a": GroupSpec(attributes={"foo": 10}, members=None),
566 |                 "/a/a": ArraySpec.from_array(np.arange(5), attributes={"foo": 100}),
567 |                 "/b": ArraySpec.from_array(np.arange(2), attributes={"foo": 3}),
568 |             },
569 |         ),
570 |     ],
571 | )
572 | def test_flatten_unflatten(
573 |     data: ArraySpec | GroupSpec, expected: dict[str, ArraySpec | GroupSpec]
574 | ) -> None:
575 |     flattened = to_flat(data)
576 |     assert flattened == expected
577 |     assert from_flat(flattened) == data
578 | 
579 | 
580 | # todo: parametrize
581 | def test_array_like() -> None:
582 |     a = ArraySpec.from_array(np.arange(10))  # type: ignore[var-annotated]
583 |     assert a.like(a)
584 | 
585 |     b = a.model_copy(update={"dtype": "uint8"})
586 |     assert not a.like(b)
587 |     assert a.like(b, exclude={"dtype"})
588 |     assert a.like(b, include={"shape"})
589 | 
590 |     c = a.model_copy(update={"shape": (100, 100)})
591 |     assert not a.like(c)
592 |     assert a.like(c, exclude={"shape"})
593 |     assert a.like(c, include={"dtype"})
594 | 
595 | 
596 | def test_array_like_with_zarr() -> None:
597 |     zarr = pytest.importorskip("zarr")
598 |     arr = ArraySpec(shape=(1,), dtype="uint8", chunks=(1,), attributes={})
599 |     store = zarr.storage.MemoryStore()
600 |     arr_stored = arr.to_zarr(store, path="arr")
601 |     assert arr.like(arr_stored)
602 | 
603 | 
604 | # todo: parametrize
605 | def test_group_like() -> None:
606 |     tree: dict[str, GroupSpec | ArraySpec] = {
607 |         "": GroupSpec(attributes={"path": ""}, members=None),
608 |         "/a": GroupSpec(attributes={"path": "/a"}, members=None),
609 |         "/b": ArraySpec.from_array(np.arange(10), attributes={"path": "/b"}),
610 |         "/a/b": ArraySpec.from_array(np.arange(10), attributes={"path": "/a/b"}),
611 |     }
612 |     group = GroupSpec.from_flat(tree)  # type: ignore[var-annotated]
613 |     assert group.like(group)
614 |     assert not group.like(group.model_copy(update={"attributes": None}))
615 |     assert group.like(group.model_copy(update={"attributes": None}), exclude={"attributes"})
616 |     assert group.like(group.model_copy(update={"attributes": None}), include={"members"})
617 | 
618 | 
619 | # todo: parametrize
620 | def test_from_zarr_depth() -> None:
621 |     zarr = pytest.importorskip("zarr")
622 |     tree: dict[str, GroupSpec | ArraySpec] = {
623 |         "": GroupSpec(members=None, attributes={"level": 0, "type": "group"}),
624 |         "/1": GroupSpec(members=None, attributes={"level": 1, "type": "group"}),
625 |         "/1/2": GroupSpec(members=None, attributes={"level": 2, "type": "group"}),
626 |         "/1/2/1": GroupSpec(members=None, attributes={"level": 3, "type": "group"}),
627 |         "/1/2/2": ArraySpec.from_array(np.arange(20), attributes={"level": 3, "type": "array"}),
628 |     }
629 | 
630 |     store = zarr.storage.MemoryStore()
631 |     group_out = GroupSpec.from_flat(tree).to_zarr(store, path="test")
632 |     group_in_0 = GroupSpec.from_zarr(group_out, depth=0)  # type: ignore[var-annotated]
633 |     assert group_in_0 == tree[""]
634 | 
635 |     group_in_1 = GroupSpec.from_zarr(group_out, depth=1)  # type: ignore[var-annotated]
636 |     assert group_in_1.attributes == tree[""].attributes  # type: ignore[attr-defined]
637 |     assert group_in_1.members is not None
638 |     assert group_in_1.members["1"] == tree["/1"]
639 | 
640 |     group_in_2 = GroupSpec.from_zarr(group_out, depth=2)  # type: ignore[var-annotated]
641 |     assert group_in_2.members is not None
642 |     assert group_in_2.members["1"].members["2"] == tree["/1/2"]
643 |     assert group_in_2.attributes == tree[""].attributes  # type: ignore[attr-defined]
644 |     assert group_in_2.members["1"].attributes == tree["/1"].attributes  # type: ignore[attr-defined]
645 | 
646 |     group_in_3 = GroupSpec.from_zarr(group_out, depth=3)  # type: ignore[var-annotated]
647 |     assert group_in_3.members is not None
648 |     assert group_in_3.members["1"].members["2"].members["1"] == tree["/1/2/1"]
649 |     assert group_in_3.attributes == tree[""].attributes  # type: ignore[attr-defined]
650 |     assert group_in_3.members["1"].attributes == tree["/1"].attributes  # type: ignore[attr-defined]
651 |     assert group_in_3.members["1"].members["2"].attributes == tree["/1/2"].attributes  # type: ignore[attr-defined]
652 | 
653 | 
654 | @pytest.mark.parametrize(("dtype_example"), DTYPE_EXAMPLES_V2, ids=str)
655 | def test_arrayspec_from_zarr(dtype_example: DTypeExample) -> None:
656 |     """
657 |     Test that deserializing an ArraySpec from a zarr python store works as expected.
658 |     """
659 |     zarr = pytest.importorskip("zarr")
660 |     store = {}
661 |     data_type = dtype_example.name
662 |     if ZARR_PYTHON_VERSION >= Version("3.1.0") and data_type == "|O":
663 |         pytest.skip(reason="Data type inference with an object dtype will fail in zarr>=3.1.0")
664 |     arr = zarr.create_array(store=store, shape=(10,), dtype=data_type, zarr_format=2)
665 | 
666 |     arr_spec = ArraySpec.from_zarr(arr)
667 | 
668 |     observed = {"attributes": arr.attrs.asdict()} | json.loads(
669 |         store[".zarray"].to_bytes(), object_hook=tuplify_json
670 |     )
671 |     if observed["filters"] is not None:
672 |         observed["filters"] = list(observed["filters"])
673 |     # this covers the case of the structured data type, which would otherwise be deserialized as a
674 |     # tuple of tuples, but is stored on the arrayspec as a list of tuples.
675 |     if isinstance(observed["dtype"], tuple):
676 |         observed["dtype"] = list(observed["dtype"])
677 | 
678 |     assert arr_spec.model_dump() == observed
679 | 
680 | 
681 | def test_mix_v3_v2_fails() -> None:
682 |     from pydantic_zarr.v3 import ArraySpec as ArraySpecv3
683 | 
684 |     members_flat = {"/a": ArraySpecv3.from_array(np.ones(1))}
685 |     with pytest.raises(
686 |         ValueError,
687 |         match=re.escape(
688 |             "Value at '/a' is not a v2 ArraySpec or GroupSpec (got type(value)=<class 'pydantic_zarr.v3.ArraySpec'>)"
689 |         ),
690 |     ):
691 |         GroupSpec.from_flat(members_flat)  # type: ignore[arg-type]
692 | 


--------------------------------------------------------------------------------
/tests/test_pydantic_zarr/test_experimental/test_v2.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Testts for pydantic_zarr.v2.
  3 | """
  4 | 
  5 | from __future__ import annotations
  6 | 
  7 | import json
  8 | import re
  9 | import sys
 10 | from collections.abc import Mapping  # noqa: TC003
 11 | from contextlib import suppress
 12 | from typing import TYPE_CHECKING, Any
 13 | 
 14 | import dask.array as da
 15 | import pytest
 16 | import xarray as xr
 17 | from pydantic import ValidationError
 18 | 
 19 | from pydantic_zarr.core import tuplify_json
 20 | from pydantic_zarr.experimental.core import json_eq
 21 | 
 22 | from ..conftest import DTYPE_EXAMPLES_V2, ZARR_AVAILABLE, ZARR_PYTHON_VERSION, DTypeExample
 23 | 
 24 | if TYPE_CHECKING:
 25 |     from numcodecs.abc import Codec
 26 | 
 27 | import numpy as np
 28 | import numpy.typing as npt
 29 | from packaging.version import Version
 30 | 
 31 | from pydantic_zarr.experimental.v2 import (
 32 |     DIMENSION_SEPARATOR,
 33 |     MEMORY_ORDER,
 34 |     ArraySpec,
 35 |     BaseGroupSpec,
 36 |     CodecDict,
 37 |     DimensionSeparator,
 38 |     GroupSpec,
 39 |     MemoryOrder,
 40 |     auto_attributes,
 41 |     auto_chunks,
 42 |     auto_compresser,
 43 |     auto_dimension_separator,
 44 |     auto_fill_value,
 45 |     auto_filters,
 46 |     auto_order,
 47 |     from_flat,
 48 |     from_zarr,
 49 |     to_flat,
 50 |     to_zarr,
 51 | )
 52 | 
 53 | if sys.version_info < (3, 12):
 54 |     from typing_extensions import TypedDict
 55 | else:
 56 |     from typing import TypedDict
 57 | 
 58 | try:
 59 |     import numcodecs
 60 | except ImportError:
 61 |     numcodecs = None
 62 | 
 63 | with suppress(ImportError):
 64 |     from zarr.errors import ContainsArrayError, ContainsGroupError
 65 | 
 66 | 
 67 | @pytest.mark.parametrize(("chunks", "shape"), [((1,), (10,)), ((1, 2, 3), (4, 5, 6))])
 68 | @pytest.mark.parametrize("dtype", ["bool", "float64", "|u1", np.float32])
 69 | @pytest.mark.parametrize("compressor", [None, {"id": "gzip", "level": 1}])
 70 | @pytest.mark.parametrize(
 71 |     "filters",
 72 |     [
 73 |         None,
 74 |         (),
 75 |         ({"id": "delta", "dtype": "uint8"},),
 76 |         ({"id": "delta", "dtype": "uint8"}, {"id": "gzip", "level": 1}),
 77 |     ],
 78 | )
 79 | @pytest.mark.parametrize("dimension_separator", DIMENSION_SEPARATOR)
 80 | @pytest.mark.parametrize("memory_order", MEMORY_ORDER)
 81 | @pytest.mark.parametrize("attributes", [{}, {"a": [100]}, {"b": ("e", "f")}])
 82 | def test_array_spec(
 83 |     chunks: tuple[int, ...],
 84 |     shape: tuple[int, ...],
 85 |     memory_order: MemoryOrder,
 86 |     dtype: str,
 87 |     dimension_separator: DimensionSeparator,
 88 |     compressor: str | CodecDict,
 89 |     filters: tuple[str, ...] | None,
 90 |     attributes: dict[str, object],
 91 | ) -> None:
 92 |     zarr = pytest.importorskip("zarr")
 93 |     import numcodecs
 94 | 
 95 |     if filters is not None:
 96 |         _filters = tuple(numcodecs.get_codec(f) for f in filters)
 97 |     else:
 98 |         _filters = None
 99 |     store = {}
100 | 
101 |     array = zarr.create_array(
102 |         shape=shape,
103 |         store=store,
104 |         chunks=chunks,
105 |         dtype=dtype,
106 |         order=memory_order,
107 |         chunk_key_encoding={"name": "v2", "configuration": {"separator": dimension_separator}},
108 |         compressors=compressor,
109 |         filters=_filters,
110 |         zarr_format=2,
111 |         attributes=attributes,
112 |     )
113 | 
114 |     spec = ArraySpec.from_zarr(array)
115 | 
116 |     assert json_eq(
117 |         spec.model_dump(), {**json.loads(store[".zarray"].to_bytes()), "attributes": attributes}
118 |     )
119 | 
120 | 
121 | @pytest.mark.parametrize("overwrite", [True, False])
122 | @pytest.mark.parametrize("path", ["", "foo"])
123 | @pytest.mark.parametrize("config", [None, {}, {"order": "C", "write_empty_chunks": True}])
124 | def test_arrayspec_to_zarr(overwrite: bool, path: str, config: dict[str, object] | None) -> None:
125 |     """
126 |     Test serializing an arrayspec to zarr and back again
127 |     """
128 |     zarr = pytest.importorskip("zarr")
129 |     from zarr.core.array_spec import ArrayConfig
130 | 
131 |     spec = ArraySpec(
132 |         shape=(10,),
133 |         dtype="uint8",
134 |         chunks=(1,),
135 |         attributes={"a": 10},
136 |     )
137 | 
138 |     # test serialization
139 |     store = zarr.storage.MemoryStore()
140 |     stored = spec.to_zarr(store, path=path, config=config)  # type: ignore[arg-type]
141 | 
142 |     if config not in (None, {}):
143 |         assert stored._async_array._config == ArrayConfig(
144 |             order=config["order"], write_empty_chunks=config["write_empty_chunks"]
145 |         )
146 | 
147 |     assert json_eq(ArraySpec.from_zarr(stored).model_dump(), spec.model_dump())
148 | 
149 |     # test that to_zarr is idempotent when the arrays match
150 |     assert json_eq(spec.to_zarr(store, path=path).metadata.to_dict(), stored.metadata.to_dict())
151 | 
152 |     # test that to_zarr raises if the extant array is different
153 |     # unless overwrite is True
154 |     spec_2 = spec.model_copy(update={"attributes": {"baz": 10}})
155 |     if not overwrite:
156 |         with pytest.raises(ContainsArrayError):
157 |             spec_2.to_zarr(store, path=path, overwrite=overwrite)
158 |     else:
159 |         arr_2 = spec_2.to_zarr(store, path=path, overwrite=overwrite)
160 |         assert json_eq(arr_2.attrs.asdict(), spec_2.attributes)
161 | 
162 | 
163 | @pytest.mark.parametrize(
164 |     "array",
165 |     [
166 |         np.zeros((100), dtype="uint8"),
167 |         xr.DataArray(np.arange(10), attrs={"foo": 10}),
168 |         xr.DataArray(da.arange(10), attrs={"foo": 10}),
169 |         da.arange(10),
170 |     ],
171 | )
172 | @pytest.mark.parametrize("chunks", ["omit", "auto", (10,)])
173 | @pytest.mark.parametrize("attributes", ["omit", "auto", {"foo": 10}])
174 | @pytest.mark.parametrize("fill_value", ["omit", "auto", 15])
175 | @pytest.mark.parametrize("order", ["omit", "auto", "F"])
176 | @pytest.mark.parametrize("filters", ["omit", "auto", []])
177 | @pytest.mark.parametrize("dimension_separator", ["omit", "auto", "."])
178 | @pytest.mark.parametrize("compressor", ["omit", "auto", {"id": "gzip", "level": 1}])
179 | def test_array_spec_from_array(
180 |     *,
181 |     array: npt.NDArray[Any],
182 |     chunks: str | tuple[int, ...],
183 |     attributes: str | dict[str, object],
184 |     fill_value: object,
185 |     order: str,
186 |     filters: str | list[Codec],
187 |     dimension_separator: str,
188 |     compressor: str | dict[str, object],
189 | ) -> None:
190 |     auto_options = ("omit", "auto")
191 |     kwargs_out: dict[str, object] = {}
192 | 
193 |     kwargs_out["chunks"] = chunks
194 |     kwargs_out["attributes"] = attributes
195 |     kwargs_out["fill_value"] = fill_value
196 |     kwargs_out["order"] = order
197 |     kwargs_out["filters"] = filters
198 |     kwargs_out["dimension_separator"] = dimension_separator
199 |     kwargs_out["compressor"] = compressor
200 | 
201 |     # remove all the keyword arguments that should be defaulted
202 |     kwargs_out = dict(filter(lambda kvp: kvp[1] != "omit", kwargs_out.items()))
203 | 
204 |     spec = ArraySpec.from_array(array, **kwargs_out)
205 |     # arrayspec should round-trip from_array with no arguments
206 |     assert spec.from_array(spec) == spec
207 | 
208 |     assert spec.dtype == array.dtype.str
209 |     assert np.dtype(spec.dtype) == array.dtype
210 | 
211 |     assert spec.shape == array.shape
212 | 
213 |     if chunks in auto_options:
214 |         assert spec.chunks == auto_chunks(array)
215 |     else:
216 |         assert spec.chunks == chunks
217 | 
218 |     if attributes in auto_options:
219 |         assert spec.attributes == auto_attributes(array)
220 |     else:
221 |         assert spec.attributes == attributes
222 | 
223 |     if fill_value in auto_options:
224 |         assert spec.fill_value == auto_fill_value(array)
225 |     else:
226 |         assert spec.fill_value == fill_value
227 | 
228 |     if order in auto_options:
229 |         assert spec.order == auto_order(array)
230 |     else:
231 |         assert spec.order == order
232 | 
233 |     if filters in auto_options:
234 |         assert spec.filters == auto_filters(array)
235 |     else:
236 |         assert spec.filters is None
237 | 
238 |     if dimension_separator in auto_options:
239 |         assert spec.dimension_separator == auto_dimension_separator(array)
240 |     else:
241 |         assert spec.dimension_separator == dimension_separator
242 | 
243 |     if compressor in auto_options:
244 |         assert spec.compressor == auto_compresser(array)
245 |     else:
246 |         assert spec.compressor == compressor
247 | 
248 | 
249 | def test_serialize_deserialize_groupspec() -> None:
250 |     zarr = pytest.importorskip("zarr")
251 | 
252 |     class RootAttrs(TypedDict):
253 |         foo: int
254 |         bar: list[int]
255 | 
256 |     class SubGroupAttrs(TypedDict):
257 |         a: str
258 |         b: float
259 | 
260 |     class SubGroup(GroupSpec):
261 |         attributes: SubGroupAttrs
262 | 
263 |     class ArrayAttrs(TypedDict):
264 |         scale: list[float]
265 | 
266 |     class MemberArray(ArraySpec):
267 |         attributes: ArrayAttrs
268 | 
269 |     class RootGroup(GroupSpec):
270 |         attributes: RootAttrs
271 |         members: Mapping[str, MemberArray | SubGroup]
272 | 
273 |     store = zarr.storage.MemoryStore()
274 | 
275 |     spec = RootGroup(
276 |         attributes=RootAttrs(foo=10, bar=[0, 1, 2]),
277 |         members={
278 |             "s0": MemberArray(
279 |                 shape=(10,),
280 |                 chunks=(1,),
281 |                 dtype="uint8",
282 |                 filters=None,
283 |                 compressor=None,
284 |                 order="C",
285 |                 dimension_separator="/",
286 |                 attributes=ArrayAttrs(scale=[1.0]),
287 |             ),
288 |             "s1": MemberArray(
289 |                 shape=(10,),
290 |                 chunks=(1,),
291 |                 dtype="uint8",
292 |                 filters=None,
293 |                 compressor=None,
294 |                 order="C",
295 |                 dimension_separator="/",
296 |                 attributes=ArrayAttrs(scale=[2.0]),
297 |             ),
298 |             "subgroup": SubGroup(attributes=SubGroupAttrs(a="foo", b=1.0), members={}),
299 |         },
300 |     )
301 |     # check that the model round-trips dict representation
302 |     assert spec.model_dump() == GroupSpec(**spec.model_dump()).model_dump()
303 | 
304 |     # materialize a zarr group, based on the spec
305 |     group = to_zarr(spec, store, "/group_a")
306 | 
307 |     # parse the spec from that group
308 |     observed = from_zarr(group)
309 |     assert json_eq(observed.model_dump(), spec.model_dump())
310 | 
311 |     # assert that we get the same group twice
312 |     assert to_zarr(spec, store, "/group_a", overwrite=True) == group
313 | 
314 |     # check that we can't call to_zarr targeting the original group with a different spec
315 |     spec_2 = spec.model_copy(update={"attributes": RootAttrs(foo=99, bar=[0, 1, 2])})
316 |     with pytest.raises(ContainsGroupError):
317 |         _ = to_zarr(spec_2, store, "/group_a")
318 | 
319 |     # check that we can't call to_zarr with the original spec if the group has changed
320 |     group.attrs["foo"] = 100
321 |     with pytest.raises(ContainsGroupError):
322 |         _ = to_zarr(spec, store, "/group_a")
323 |     group.attrs["foo"] = 10
324 | 
325 |     # materialize again with overwrite
326 |     group2 = to_zarr(spec, store, "/group_a", overwrite=True)
327 |     assert group2 == group
328 | 
329 |     # again with class methods
330 |     group3 = spec.to_zarr(store, "/group_b")
331 |     observed = spec.from_zarr(group3)
332 |     assert observed == spec
333 | 
334 | 
335 | @pytest.mark.parametrize("base", range(1, 5))
336 | def test_shape_chunks(base: int) -> None:
337 |     """
338 |     Test that the length of the chunks and the shape match
339 |     """
340 |     with pytest.raises(ValidationError):
341 |         ArraySpec(shape=(1,) * base, chunks=(1,) * (base + 1), dtype="uint8", attributes={})
342 |     with pytest.raises(ValidationError):
343 |         ArraySpec(shape=(1,) * (base + 1), chunks=(1,) * base, dtype="uint8", attributes={})
344 | 
345 | 
346 | def test_validation() -> None:
347 |     """
348 |     Test that specialized GroupSpec and ArraySpec instances cannot be serialized from
349 |     the wrong inputs without a ValidationError.
350 |     """
351 |     zarr = pytest.importorskip("zarr")
352 | 
353 |     class GroupAttrsA(TypedDict):
354 |         group_a: bool
355 | 
356 |     class GroupAttrsB(TypedDict):
357 |         group_b: bool
358 | 
359 |     class ArrayAttrsA(TypedDict):
360 |         array_a: bool
361 | 
362 |     class ArrayAttrsB(TypedDict):
363 |         array_b: bool
364 | 
365 |     class ArrayA(ArraySpec):
366 |         attributes: ArrayAttrsA
367 | 
368 |     class ArrayB(ArraySpec):
369 |         attributes: ArrayAttrsB
370 | 
371 |     class GroupA(GroupSpec):
372 |         attributes: GroupAttrsA
373 |         members: Mapping[str, ArrayA]
374 | 
375 |     class GroupB(GroupSpec):
376 |         attributes: GroupAttrsB
377 |         members: Mapping[str, ArrayB]
378 | 
379 |     store = zarr.storage.MemoryStore
380 | 
381 |     specA = GroupA(
382 |         attributes=GroupAttrsA(group_a=True),
383 |         members={
384 |             "a": ArrayA(
385 |                 attributes=ArrayAttrsA(array_a=True),
386 |                 shape=(100,),
387 |                 dtype="uint8",
388 |                 chunks=(10,),
389 |             )
390 |         },
391 |     )
392 | 
393 |     specB = GroupB(
394 |         attributes=GroupAttrsB(group_b=True),
395 |         members={
396 |             "a": ArrayB(
397 |                 attributes=ArrayAttrsB(array_b=True),
398 |                 shape=(100,),
399 |                 dtype="uint8",
400 |                 chunks=(10,),
401 |             )
402 |         },
403 |     )
404 | 
405 |     # check that we cannot create a specialized GroupSpec with the wrong attributes
406 |     with pytest.raises(ValidationError):
407 |         GroupB(
408 |             attributes=GroupAttrsA(group_a=True),
409 |             members={},
410 |         )
411 | 
412 |     store = zarr.storage.MemoryStore()
413 |     groupAMat = specA.to_zarr(store, path="group_a")
414 |     groupBMat = specB.to_zarr(store, path="group_b")
415 | 
416 |     GroupA.from_zarr(groupAMat)
417 |     GroupB.from_zarr(groupBMat)
418 | 
419 |     ArrayA.from_zarr(groupAMat["a"])
420 |     ArrayB.from_zarr(groupBMat["a"])
421 | 
422 |     with pytest.raises(ValidationError):
423 |         ArrayA.from_zarr(groupBMat["a"])
424 | 
425 |     with pytest.raises(ValidationError):
426 |         ArrayB.from_zarr(groupAMat["a"])
427 | 
428 |     with pytest.raises(ValidationError):
429 |         GroupB.from_zarr(groupAMat)
430 | 
431 |     with pytest.raises(ValidationError):
432 |         GroupA.from_zarr(groupBMat)
433 | 
434 | 
435 | @pytest.mark.parametrize("data", ["/", "a/b/c"])
436 | def test_member_name(data: str) -> None:
437 |     with pytest.raises(ValidationError, match='Strings containing "/" are invalid.'):
438 |         GroupSpec(attributes={}, members={data: GroupSpec(attributes={}, members={})})
439 | 
440 | 
441 | @pytest.mark.parametrize(
442 |     ("data", "expected"),
443 |     [
444 |         (
445 |             ArraySpec.from_array(np.arange(10)),
446 |             {"": ArraySpec.from_array(np.arange(10))},
447 |         ),
448 |         (
449 |             GroupSpec(
450 |                 attributes={"foo": 10},
451 |                 members={"a": ArraySpec.from_array(np.arange(5), attributes={"foo": 100})},
452 |             ),
453 |             {
454 |                 "": BaseGroupSpec(attributes={"foo": 10}),
455 |                 "/a": ArraySpec.from_array(np.arange(5), attributes={"foo": 100}),
456 |             },
457 |         ),
458 |         (
459 |             GroupSpec(
460 |                 attributes={},
461 |                 members={
462 |                     "a": GroupSpec(
463 |                         attributes={"foo": 10},
464 |                         members={"a": ArraySpec.from_array(np.arange(5), attributes={"foo": 100})},
465 |                     ),
466 |                     "b": ArraySpec.from_array(np.arange(2), attributes={"foo": 3}),
467 |                 },
468 |             ),
469 |             {
470 |                 "": BaseGroupSpec(attributes={}),
471 |                 "/a": BaseGroupSpec(attributes={"foo": 10}),
472 |                 "/a/a": ArraySpec.from_array(np.arange(5), attributes={"foo": 100}),
473 |                 "/b": ArraySpec.from_array(np.arange(2), attributes={"foo": 3}),
474 |             },
475 |         ),
476 |     ],
477 | )
478 | def test_flatten_unflatten(
479 |     data: ArraySpec | GroupSpec, expected: dict[str, ArraySpec | GroupSpec]
480 | ) -> None:
481 |     flattened = to_flat(data)
482 |     assert flattened == expected
483 |     assert from_flat(flattened) == data
484 | 
485 | 
486 | # todo: parametrize
487 | def test_array_like() -> None:
488 |     a = ArraySpec.from_array(np.arange(10))  # type: ignore[var-annotated]
489 |     assert a.like(a)
490 | 
491 |     b = a.model_copy(update={"dtype": "uint8"})
492 |     assert not a.like(b)
493 |     assert a.like(b, exclude={"dtype"})
494 |     assert a.like(b, include={"shape"})
495 | 
496 |     c = a.model_copy(update={"shape": (100, 100)})
497 |     assert not a.like(c)
498 |     assert a.like(c, exclude={"shape"})
499 |     assert a.like(c, include={"dtype"})
500 | 
501 | 
502 | def test_array_like_with_zarr() -> None:
503 |     zarr = pytest.importorskip("zarr")
504 |     arr = ArraySpec(shape=(1,), dtype="uint8", chunks=(1,), attributes={})
505 |     store = zarr.storage.MemoryStore()
506 |     arr_stored = arr.to_zarr(store, path="arr")
507 |     assert arr.like(arr_stored)
508 | 
509 |     dissimilar_arr = arr.model_copy(update={"attributes": {"a": 10}}).to_zarr(store, path="arr_2")
510 |     assert not arr.like(dissimilar_arr)
511 |     assert arr.like(dissimilar_arr, exclude={"attributes"})
512 | 
513 | 
514 | # todo: parametrize
515 | def test_group_like() -> None:
516 |     tree: dict[str, BaseGroupSpec | ArraySpec] = {
517 |         "": BaseGroupSpec(attributes={"path": ""}),
518 |         "/a": BaseGroupSpec(attributes={"path": "/a"}),
519 |         "/b": ArraySpec.from_array(np.arange(10), attributes={"path": "/b"}),
520 |         "/a/b": ArraySpec.from_array(np.arange(10), attributes={"path": "/a/b"}),
521 |     }
522 |     group = GroupSpec.from_flat(tree)  # type: ignore[var-annotated]
523 |     assert group.like(group)
524 |     assert not group.like(group.model_copy(update={"attributes": {}}))
525 |     assert group.like(group.model_copy(update={"attributes": {}}), exclude={"attributes"})
526 |     assert group.like(group.model_copy(update={"attributes": {}}), include={"members"})
527 | 
528 | 
529 | # todo: parametrize
530 | def test_from_zarr_depth() -> None:
531 |     zarr = pytest.importorskip("zarr")
532 |     tree: dict[str, BaseGroupSpec | ArraySpec] = {
533 |         "": BaseGroupSpec(attributes={"level": 0, "type": "group"}),
534 |         "/1": BaseGroupSpec(attributes={"level": 1, "type": "group"}),
535 |         "/1/2": BaseGroupSpec(attributes={"level": 2, "type": "group"}),
536 |         "/1/2/1": BaseGroupSpec(attributes={"level": 3, "type": "group"}),
537 |         "/1/2/2": ArraySpec.from_array(np.arange(20), attributes={"level": 3, "type": "array"}),
538 |     }
539 | 
540 |     store = zarr.storage.MemoryStore()
541 |     group_out = GroupSpec.from_flat(tree).to_zarr(store, path="test")
542 |     group_in_0 = GroupSpec.from_zarr(group_out, depth=0)  # type: ignore[var-annotated]
543 |     assert group_in_0.attributes == tree[""].attributes
544 | 
545 |     group_in_1 = GroupSpec.from_zarr(group_out, depth=1)  # type: ignore[var-annotated]
546 |     assert group_in_1.attributes == tree[""].attributes  # type: ignore[attr-defined]
547 |     assert group_in_1.members["1"].attributes == tree["/1"].attributes
548 | 
549 |     group_in_2 = GroupSpec.from_zarr(group_out, depth=2)  # type: ignore[var-annotated]
550 |     assert group_in_2.members["1"].members["2"].attributes == tree["/1/2"].attributes
551 |     assert group_in_2.attributes == tree[""].attributes  # type: ignore[attr-defined]
552 |     assert group_in_2.members["1"].attributes == tree["/1"].attributes  # type: ignore[attr-defined]
553 | 
554 |     group_in_3 = GroupSpec.from_zarr(group_out, depth=3)  # type: ignore[var-annotated]
555 |     assert group_in_3.members["1"].members["2"].members["1"].attributes == tree["/1/2/1"].attributes
556 |     assert group_in_3.attributes == tree[""].attributes  # type: ignore[attr-defined]
557 |     assert group_in_3.members["1"].attributes == tree["/1"].attributes  # type: ignore[attr-defined]
558 |     assert group_in_3.members["1"].members["2"].attributes == tree["/1/2"].attributes  # type: ignore[attr-defined]
559 | 
560 | 
561 | @pytest.mark.parametrize(("dtype_example"), DTYPE_EXAMPLES_V2, ids=str)
562 | def test_arrayspec_from_zarr(dtype_example: DTypeExample) -> None:
563 |     """
564 |     Test that deserializing an ArraySpec from a zarr python store works as expected.
565 |     """
566 |     zarr = pytest.importorskip("zarr")
567 |     store = {}
568 |     data_type = dtype_example.name
569 |     if ZARR_PYTHON_VERSION >= Version("3.1.0") and data_type == "|O":
570 |         pytest.skip(reason="Data type inference with an object dtype will fail in zarr>=3.1.0")
571 |     arr = zarr.create_array(store=store, shape=(10,), dtype=data_type, zarr_format=2)
572 | 
573 |     arr_spec = ArraySpec.from_zarr(arr)
574 | 
575 |     observed = {"attributes": arr.attrs.asdict()} | json.loads(
576 |         store[".zarray"].to_bytes(), object_hook=tuplify_json
577 |     )
578 |     if observed["filters"] is not None:
579 |         observed["filters"] = list(observed["filters"])
580 |     # this covers the case of the structured data type, which would otherwise be deserialized as a
581 |     # tuple of tuples, but is stored on the arrayspec as a list of tuples.
582 |     if isinstance(observed["dtype"], tuple):
583 |         observed["dtype"] = list(observed["dtype"])
584 | 
585 |     assert json_eq(arr_spec.model_dump(), observed)
586 | 
587 | 
588 | def test_mix_v3_v2_fails() -> None:
589 |     from pydantic_zarr.v3 import ArraySpec as ArraySpecv3
590 | 
591 |     members_flat = {"/a": ArraySpecv3.from_array(np.ones(1))}
592 |     with pytest.raises(
593 |         ValueError,
594 |         match=re.escape(
595 |             "Value at '/a' is not a v2 ArraySpec or GroupSpec (got type(value)=<class 'pydantic_zarr.v3.ArraySpec'>)"
596 |         ),
597 |     ):
598 |         GroupSpec.from_flat(members_flat)  # type: ignore[arg-type]
599 | 
600 | 
601 | @pytest.mark.skipif(not ZARR_AVAILABLE, reason="zarr-python is not installed")
602 | def test_typed_members() -> None:
603 |     """
604 |     Test GroupSpec creation with typed members
605 |     """
606 |     array1d = ArraySpec(
607 |         shape=(1,),
608 |         dtype="uint8",
609 |         chunks=(1,),
610 |         fill_value=0,
611 |         compressor=None,
612 |         attributes={},
613 |     )
614 | 
615 |     class DatasetMembers(TypedDict):
616 |         x: ArraySpec
617 |         y: ArraySpec
618 | 
619 |     class DatasetGroup(GroupSpec):
620 |         members: DatasetMembers
621 | 
622 |     class ExpectedMembers(TypedDict):
623 |         r10m: DatasetGroup
624 |         r20m: DatasetGroup
625 | 
626 |     class ExpectedGroup(GroupSpec):
627 |         members: ExpectedMembers
628 | 
629 |     flat = {
630 |         "": BaseGroupSpec(attributes={}),
631 |         "/r10m": BaseGroupSpec(attributes={}),
632 |         "/r20m": BaseGroupSpec(attributes={}),
633 |         "/r10m/x": array1d,
634 |         "/r10m/y": array1d,
635 |         "/r20m/x": array1d,
636 |         "/r20m/y": array1d,
637 |     }
638 | 
639 |     zg = GroupSpec.from_flat(flat).to_zarr({}, path="")
640 |     ExpectedGroup.from_zarr(zg)
641 | 
642 | 
643 | def test_arrayspec_with_methods() -> None:
644 |     """
645 |     Test that ArraySpec with_* methods create new validated copies
646 |     """
647 |     original = ArraySpec.from_array(np.arange(10), attributes={"foo": "bar"})
648 | 
649 |     # Test with_attributes
650 |     new_attrs = original.with_attributes({"baz": "qux"})
651 |     assert new_attrs.attributes == {"baz": "qux"}
652 |     assert original.attributes == {"foo": "bar"}  # Original unchanged
653 |     assert new_attrs is not original
654 | 
655 |     # Test with_shape
656 |     new_shape = original.with_shape((20,))
657 |     assert new_shape.shape == (20,)
658 |     assert original.shape == (10,)
659 | 
660 |     # Test with_chunks
661 |     new_chunks = original.with_chunks((5,))
662 |     assert new_chunks.chunks == (5,)
663 |     assert original.chunks == (10,)
664 | 
665 |     # Test with_dtype
666 |     new_dtype = original.with_dtype("float32")
667 |     assert new_dtype.dtype == "<f4"
668 |     assert original.dtype == "<i8"
669 | 
670 |     # Test with_fill_value
671 |     new_fill = original.with_fill_value(999)
672 |     assert new_fill.fill_value == 999
673 |     assert original.fill_value == 0
674 | 
675 |     # Test with_order
676 |     new_order = original.with_order("F")
677 |     assert new_order.order == "F"
678 |     assert original.order == "C"
679 | 
680 |     # Test with_filters
681 |     new_filters = original.with_filters(({"id": "delta", "dtype": "uint8"},))
682 |     assert new_filters.filters == ({"id": "delta", "dtype": "uint8"},)
683 |     assert original.filters is None
684 | 
685 |     # Test with_dimension_separator
686 |     new_sep = original.with_dimension_separator(".")
687 |     assert new_sep.dimension_separator == "."
688 |     assert original.dimension_separator == "/"
689 | 
690 |     # Test with_compressor
691 |     new_comp = original.with_compressor({"id": "gzip", "level": 1})
692 |     assert new_comp.compressor == {"id": "gzip", "level": 1}
693 |     assert original.compressor is None
694 | 
695 | 
696 | def test_arrayspec_with_methods_validation() -> None:
697 |     """
698 |     Test that ArraySpec with_* methods trigger validation
699 |     """
700 |     spec = ArraySpec(shape=(10,), chunks=(5,), dtype="uint8", attributes={})
701 | 
702 |     # Test that validation fails when shape and chunks have mismatched lengths
703 |     with pytest.raises(ValidationError):
704 |         spec.with_shape((10, 10))  # Shape has 2 dims but chunks still has 1
705 | 
706 | 
707 | def test_groupspec_with_methods() -> None:
708 |     """
709 |     Test that GroupSpec with_* methods create new validated copies
710 |     """
711 |     array_spec = ArraySpec.from_array(np.arange(10), attributes={})
712 |     original = GroupSpec(attributes={"group": "attr"}, members={"arr": array_spec})
713 | 
714 |     # Test with_attributes
715 |     new_attrs = original.with_attributes({"new": "attr"})
716 |     assert new_attrs.attributes == {"new": "attr"}
717 |     assert original.attributes == {"group": "attr"}  # Original unchanged
718 |     assert new_attrs is not original
719 | 
720 |     # Test with_members
721 |     new_array = ArraySpec.from_array(np.arange(5), attributes={})
722 |     new_members = original.with_members({"new_arr": new_array})
723 |     assert "new_arr" in new_members.members
724 |     assert "arr" not in new_members.members  # Replacement, not merge
725 |     assert "arr" in original.members  # Original unchanged
726 | 
727 | 
728 | def test_groupspec_with_members_validation() -> None:
729 |     """
730 |     Test that GroupSpec with_members triggers validation
731 |     """
732 |     spec = GroupSpec(attributes={}, members={})
733 | 
734 |     # Test that validation fails with invalid member names
735 |     with pytest.raises(ValidationError, match='Strings containing "/" are invalid'):
736 |         spec.with_members({"a/b": ArraySpec.from_array(np.arange(10), attributes={})})
737 | 


--------------------------------------------------------------------------------