├── anyschema
    ├── py.typed
    ├── exceptions.py
    ├── parsers
    │   ├── __init__.py
    │   ├── _annotated.py
    │   ├── attrs.py
    │   ├── _union.py
    │   └── pydantic.py
    ├── __init__.py
    ├── _utils.py
    ├── _dependencies.py
    ├── typing.py
    └── _metadata.py
├── tests
    ├── __init__.py
    ├── field
    │   ├── __init__.py
    │   ├── metadata_mutation_test.py
    │   └── field_test.py
    ├── adapters
    │   ├── __init__.py
    │   ├── into_ordered_dict_adapter_test.py
    │   ├── typed_dict_adapter_test.py
    │   ├── attrs_adapter_test.py
    │   ├── pydantic_adapter_test.py
    │   ├── dataclass_adapter_test.py
    │   └── custom_adapters_test.py
    ├── anyschema
    │   ├── __init__.py
    │   ├── names_test.py
    │   ├── dtypes_test.py
    │   ├── field_test.py
    │   ├── uniques_test.py
    │   ├── nullables_test.py
    │   ├── equality_test.py
    │   ├── hash_test.py
    │   ├── fields_test.py
    │   ├── descriptions_test.py
    │   ├── to_polars_test.py
    │   ├── initialization_test.py
    │   ├── to_arrow_test.py
    │   └── to_pandas_test.py
    ├── parsers
    │   ├── __init__.py
    │   ├── pydantic_derived_types_test.py
    │   ├── parsers_dependency_mock_test.py
    │   ├── pydantic_extra_types_test.py
    │   ├── _union_test.py
    │   ├── _builtin_derived_types_test.py
    │   ├── _annotated_test.py
    │   ├── attrs_test.py
    │   ├── forward_ref_dependency_mock_test.py
    │   ├── parsers_test.py
    │   └── sqlalchemy_test.py
    ├── pydantic
    │   ├── __init__.py
    │   ├── utils.py
    │   ├── boolean_test.py
    │   ├── struct_test.py
    │   ├── datetime_test.py
    │   ├── parsing_exception_test.py
    │   ├── string_test.py
    │   ├── date_test.py
    │   ├── float_test.py
    │   ├── literal_test.py
    │   └── list_test.py
    ├── spec_to_schema
    │   ├── __init__.py
    │   ├── dataclass_test.py
    │   ├── x_anyschema_test.py
    │   ├── pydantic_test.py
    │   ├── attrs_test.py
    │   ├── typed_dict_test.py
    │   └── sqlalchemy_test.py
    ├── version_test.py
    ├── module_getattr_test.py
    └── show_versions_test.py
├── docs
    ├── css
    │   └── extra.css
    ├── api-reference
    │   ├── anyschema.md
    │   ├── exceptions.md
    │   ├── index.md
    │   ├── serde.md
    │   ├── typing.md
    │   ├── adapters.md
    │   └── parsers.md
    ├── javascript
    │   └── extra.js
    └── user-guide
    │   ├── custom-end-to-end-example.md
    │   └── openapi-compatibility.md
├── .github
    ├── dependabot.yaml
    ├── ISSUE_TEMPLATE
    │   ├── config.yaml
    │   ├── release-drafter.yaml
    │   ├── bug-report.yaml
    │   ├── feature-request.yaml
    │   └── documentation.yaml
    ├── workflows
    │   ├── release.yaml
    │   ├── pre-commit-update.yaml
    │   ├── deploy-docs.yaml
    │   └── pull-request.yaml
    ├── PULL_REQUEST_TEMPLATE.md
    └── release-drafter.yaml
├── .pre-commit-config.yaml
├── Makefile
├── bump-version.py
├── README.md
├── mkdocs.yaml
├── .gitignore
├── pyproject.toml
└── CODE_OF_CONDUCT.md


/anyschema/py.typed:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/field/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/adapters/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/anyschema/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/parsers/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/pydantic/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/spec_to_schema/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docs/css/extra.css:
--------------------------------------------------------------------------------
1 | .md-typeset ol li,
2 | .md-typeset ul li {
3 |   margin-bottom: 0.1em !important;
4 | }
5 | .md-main__inner.md-grid {
6 |   max-width: initial;
7 |   margin-left: 5vw;
8 | }
9 | 


--------------------------------------------------------------------------------
/.github/dependabot.yaml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 |   # Maintain dependencies for GitHub Actions
4 |   - package-ecosystem: "github-actions"
5 |     directory: "/"
6 |     schedule:
7 |       interval: "monthly"
8 | 


--------------------------------------------------------------------------------
/docs/api-reference/anyschema.md:
--------------------------------------------------------------------------------
 1 | # `anyschema` top level API
 2 | 
 3 | ::: anyschema
 4 |     handler: python
 5 |     members:
 6 |       - AnySchema
 7 |       - AnyField
 8 |       - show_versions
 9 |     options:
10 |       show_root_heading: true
11 |       show_source: false
12 | 


--------------------------------------------------------------------------------
/docs/api-reference/exceptions.md:
--------------------------------------------------------------------------------
 1 | # Exceptions
 2 | 
 3 | ::: anyschema.exceptions
 4 |     handler: python
 5 |     options:
 6 |       show_root_heading: true
 7 |       show_source: true
 8 |     members:
 9 |       - UnavailablePipelineError
10 |       - UnsupportedDTypeError
11 | 


--------------------------------------------------------------------------------
/docs/api-reference/index.md:
--------------------------------------------------------------------------------
1 | # API Reference
2 | 
3 | This page provides detailed documentation for all public APIs in anyschema.
4 | 
5 | For conceptual explanations, see the [Architecture](../architecture.md) page. For practical examples, see
6 | [Getting Started](../user-guide/getting-started.md) and [Advanced Usage](../user-guide/advanced.md).
7 | 


--------------------------------------------------------------------------------
/docs/api-reference/serde.md:
--------------------------------------------------------------------------------
 1 | # Serialization & Deserialization
 2 | 
 3 | The `serde` module provides utilities for serializing and deserializing Narwhals dtypes to and from string
 4 | representations.
 5 | 
 6 | ## API Reference
 7 | 
 8 | ::: anyschema.serde
 9 |     handler: python
10 |     options:
11 |       show_root_heading: true
12 |       show_source: true
13 | 


--------------------------------------------------------------------------------
/anyschema/exceptions.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | __all__ = ("UnavailablePipelineError", "UnsupportedDTypeError")
 4 | 
 5 | 
 6 | class UnavailablePipelineError(ValueError):
 7 |     """Exception raised when a parser does not have a ParserPipeline set."""
 8 | 
 9 | 
10 | class UnsupportedDTypeError(ValueError):
11 |     """Exception raised when a DType is not supported."""
12 | 


--------------------------------------------------------------------------------
/docs/api-reference/typing.md:
--------------------------------------------------------------------------------
 1 | # Type Aliases
 2 | 
 3 | The following type aliases are used throughout the anyschema codebase:
 4 | 
 5 | ::: anyschema.typing
 6 |     handler: python
 7 |     options:
 8 |       show_root_heading: true
 9 |       show_source: false
10 |       members:
11 |         - Adapter
12 |         - FieldSpec
13 |         - FieldSpecIterable
14 |         - IntoOrderedDict
15 |         - IntoParserPipeline
16 |         - Spec
17 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yaml:
--------------------------------------------------------------------------------
 1 | blank_issues_enabled: true
 2 | contact_links:
 3 |   - name: Blank Issue
 4 |     about: Create a new issue
 5 |     url: https://github.com/fbruzzesi/anyschema/issues/new
 6 |   - name: 📖 Documentation
 7 |     url: https://fbruzzesi.github.io/anyschema/
 8 |     about: Read the official documentation
 9 |   - name: 🔍 Search Existing Issues
10 |     url: https://github.com/fbruzzesi/anyschema/issues
11 |     about: Search existing issues before creating a new one
12 | 


--------------------------------------------------------------------------------
/tests/version_test.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import re
 4 | from pathlib import Path
 5 | 
 6 | import anyschema
 7 | 
 8 | 
 9 | def test_version_matches_pyproject() -> None:
10 |     """Tests version is same of pyproject."""
11 |     with Path("pyproject.toml").open(encoding="utf-8") as file:
12 |         content = file.read()
13 |         pyproject_version = re.search(r'version = "(.*)"', content).group(1)  # type: ignore[union-attr]
14 | 
15 |     assert anyschema.__version__ == pyproject_version
16 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/release-drafter.yaml:
--------------------------------------------------------------------------------
 1 | name: Release Drafter
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |   pull_request:
 8 |     types: [opened, reopened, synchronize]
 9 | 
10 | permissions:
11 |   contents: read
12 | 
13 | jobs:
14 |   update_release_draft:
15 |     permissions:
16 |       contents: write
17 |       pull-requests: write
18 |     runs-on: ubuntu-latest
19 |     steps:
20 |       - uses: release-drafter/release-drafter@v6
21 |         env:
22 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
23 | 


--------------------------------------------------------------------------------
/tests/module_getattr_test.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import TYPE_CHECKING
 4 | 
 5 | import pytest
 6 | from typing_extensions import Any, assert_type
 7 | 
 8 | import anyschema
 9 | 
10 | 
11 | def test_package_getattr() -> None:
12 |     assert_type(anyschema.__version__, str)
13 |     assert_type(anyschema.__title__, str)
14 | 
15 |     if TYPE_CHECKING:
16 |         bad = anyschema.not_real  # type: ignore[attr-defined]
17 |         assert_type(bad, Any)
18 | 
19 |     with pytest.raises(AttributeError):
20 |         very_bad = anyschema.not_real  # type: ignore[attr-defined]  # noqa: F841
21 | 


--------------------------------------------------------------------------------
/anyschema/parsers/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from anyschema.parsers._annotated import AnnotatedStep
 4 | from anyschema.parsers._base import ParserStep
 5 | from anyschema.parsers._builtin import PyTypeStep
 6 | from anyschema.parsers._forward_ref import ForwardRefStep
 7 | from anyschema.parsers._pipeline import ParserPipeline, make_pipeline
 8 | from anyschema.parsers._union import UnionTypeStep
 9 | 
10 | __all__ = (
11 |     "AnnotatedStep",
12 |     "ForwardRefStep",
13 |     "ParserPipeline",
14 |     "ParserStep",
15 |     "PyTypeStep",
16 |     "UnionTypeStep",
17 |     "make_pipeline",
18 | )
19 | 


--------------------------------------------------------------------------------
/tests/pydantic/utils.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import TYPE_CHECKING
 4 | 
 5 | from narwhals import Schema
 6 | 
 7 | from anyschema.adapters import pydantic_adapter
 8 | 
 9 | if TYPE_CHECKING:
10 |     from pydantic import BaseModel
11 | 
12 |     from anyschema.parsers import ParserPipeline
13 | 
14 | 
15 | def model_to_nw_schema(spec: type[BaseModel], pipeline: ParserPipeline) -> Schema:
16 |     return Schema(
17 |         {
18 |             name: pipeline.parse(input_type, constraints, metadata)
19 |             for name, input_type, constraints, metadata in pydantic_adapter(spec)
20 |         }
21 |     )
22 | 


--------------------------------------------------------------------------------
/tests/anyschema/names_test.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import TYPE_CHECKING
 4 | 
 5 | import pytest
 6 | 
 7 | from anyschema import AnySchema
 8 | 
 9 | if TYPE_CHECKING:
10 |     from anyschema.typing import Spec
11 | 
12 | 
13 | @pytest.mark.parametrize(
14 |     ("spec", "expected"),
15 |     [
16 |         ({}, ()),
17 |         ({"only_field": str}, ("only_field",)),
18 |         ({"id": int, "name": str, "age": int}, ("id", "name", "age")),
19 |     ],
20 | )
21 | def test_names(spec: Spec, expected: tuple[str, ...]) -> None:
22 |     schema = AnySchema(spec=spec)
23 |     result = schema.names()
24 | 
25 |     assert result == expected
26 | 


--------------------------------------------------------------------------------
/tests/adapters/into_ordered_dict_adapter_test.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import TYPE_CHECKING
 4 | 
 5 | import pytest
 6 | 
 7 | from anyschema.adapters import into_ordered_dict_adapter
 8 | 
 9 | if TYPE_CHECKING:
10 |     from anyschema.typing import FieldSpec, IntoOrderedDict
11 | 
12 | 
13 | @pytest.mark.parametrize(
14 |     "spec",
15 |     [
16 |         {"name": str, "age": int},
17 |         [("name", str), ("age", int)],
18 |     ],
19 | )
20 | def test_into_ordered_dict_adapter(spec: IntoOrderedDict) -> None:
21 |     expected: tuple[FieldSpec, ...] = (("name", str, (), {}), ("age", int, (), {}))
22 |     result = tuple(into_ordered_dict_adapter(spec))
23 |     assert result == expected
24 | 


--------------------------------------------------------------------------------
/anyschema/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import typing as _t
 4 | 
 5 | from anyschema._anyschema import AnyField, AnySchema
 6 | from anyschema._utils import show_versions
 7 | 
 8 | __all__ = ("AnyField", "AnySchema", "show_versions")
 9 | __title__ = __name__
10 | __version__: str
11 | 
12 | 
13 | if not _t.TYPE_CHECKING:
14 | 
15 |     def __getattr__(name: str) -> _t.Any:
16 |         if name == "__version__":
17 |             global __version__  # noqa: PLW0603
18 | 
19 |             from importlib import metadata
20 | 
21 |             __version__ = metadata.version(__name__)
22 |             return __version__
23 |         msg = f"module {__name__!r} has no attribute {name!r}"
24 |         raise AttributeError(msg)
25 | else:  # pragma: no cover
26 |     ...
27 | 


--------------------------------------------------------------------------------
/tests/adapters/typed_dict_adapter_test.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from datetime import date
 4 | from typing import TYPE_CHECKING, TypedDict
 5 | 
 6 | import pytest
 7 | 
 8 | from anyschema.adapters import typed_dict_adapter
 9 | 
10 | if TYPE_CHECKING:
11 |     from anyschema.typing import FieldSpec, TypedDictType
12 | 
13 | 
14 | class PersonTypedDict(TypedDict):
15 |     name: str
16 |     age: int
17 |     date_of_birth: date
18 | 
19 | 
20 | @pytest.mark.parametrize(
21 |     "spec",
22 |     [
23 |         PersonTypedDict,
24 |     ],
25 | )
26 | def test_typed_dict_adapter(spec: TypedDictType) -> None:
27 |     expected: tuple[FieldSpec, ...] = (("name", str, (), {}), ("age", int, (), {}), ("date_of_birth", date, (), {}))
28 |     result = tuple(typed_dict_adapter(spec))
29 |     assert result == expected
30 | 


--------------------------------------------------------------------------------
/tests/show_versions_test.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import warnings
 4 | from typing import TYPE_CHECKING
 5 | 
 6 | from anyschema import show_versions
 7 | 
 8 | if TYPE_CHECKING:
 9 |     import pytest
10 | 
11 | 
12 | def test_show_versions(capsys: pytest.CaptureFixture[str]) -> None:
13 |     with warnings.catch_warnings():
14 |         warnings.filterwarnings("ignore")
15 |         show_versions()
16 |         out, _ = capsys.readouterr()
17 | 
18 |     assert "python" in out
19 |     assert "machine" in out
20 | 
21 |     assert "anyschema" in out
22 |     assert "narwhals" in out
23 |     assert "typing_extensions" in out
24 |     assert "attrs" in out
25 |     assert "pydantic" in out
26 |     assert "sqlalchemy" in out
27 |     assert "pandas" in out
28 |     assert "polars" in out
29 |     assert "pyarrow" in out
30 | 
31 |     assert "numpy" not in out
32 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yaml:
--------------------------------------------------------------------------------
 1 | name: "Publish Python 🐍 distribution 📦 to PyPI"
 2 | 
 3 | on:
 4 |   push:
 5 |     tags:
 6 |       - "v[0-9]+.[0-9]+.[0-9]+*"
 7 | 
 8 | jobs:
 9 |   pypi-release:
10 |     runs-on: ubuntu-latest
11 |     environment:
12 |       name: pypi
13 |     permissions:
14 |       id-token: write
15 |       contents: read
16 |     steps:
17 |       - name: Checkout source code
18 |         uses: actions/checkout@v6
19 |       - name: Install uv
20 |         uses: astral-sh/setup-uv@v7
21 |       - name: Build
22 |         run: uv build
23 |       - name: Smoke test (wheel)
24 |         run: uv run --isolated --no-project --with dist/*.whl tests/version_test.py
25 |       - name: Smoke test (source distribution)
26 |         run: uv run --isolated --no-project --with dist/*.tar.gz tests/version_test.py
27 |       - name: Publish
28 |         run: uv publish --trusted-publishing always
29 | 


--------------------------------------------------------------------------------
/tests/pydantic/boolean_test.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import TYPE_CHECKING
 4 | 
 5 | import narwhals as nw
 6 | from pydantic import BaseModel, StrictBool
 7 | 
 8 | from tests.pydantic.utils import model_to_nw_schema
 9 | 
10 | if TYPE_CHECKING:
11 |     from anyschema.parsers import ParserPipeline
12 | 
13 | 
14 | def test_parse_boolean(auto_pipeline: ParserPipeline) -> None:
15 |     class BooleanModel(BaseModel):
16 |         # python bool type
17 |         py_bool: bool
18 |         py_bool_optional: bool | None
19 |         py_bool_or_none: bool | None
20 |         none_or_py_bool: None | bool
21 | 
22 |         # pydantic StrictBool type
23 |         strict_bool: StrictBool
24 |         strict_bool_optional: StrictBool | None
25 |         strict_bool_or_none: StrictBool | None
26 |         none_or_strict_bool: None | StrictBool
27 | 
28 |     schema = model_to_nw_schema(BooleanModel, pipeline=auto_pipeline)
29 | 
30 |     assert all(value == nw.Boolean() for value in schema.values())
31 | 


--------------------------------------------------------------------------------
/.github/workflows/pre-commit-update.yaml:
--------------------------------------------------------------------------------
 1 | name: "Pre-commit auto-update"
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   schedule:
 6 |     - cron: "0 0 10 * *"  # Every 10th of the month at 00:00 UTC
 7 | 
 8 | permissions: write-all
 9 | 
10 | jobs:
11 |   auto-update:
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |       - name: Checkout source code
15 |         uses: actions/checkout@v6
16 |       - name: Install uv
17 |         uses: astral-sh/setup-uv@v7
18 |         with:
19 |           python-version: "3.12"
20 |           enable-cache: "true"
21 |           cache-dependency-glob: "pyproject.toml"
22 |       - name: pre-commit install autoupdate
23 |         run: uvx pre-commit autoupdate
24 |       - name: Commit and push changes
25 |         uses: peter-evans/create-pull-request@v7
26 |         with:
27 |           branch: update-pre-commit-hooks
28 |           title: 'Update pre-commit hooks'
29 |           commit-message: 'Update pre-commit hooks'
30 |           body: |
31 |               Update versions of pre-commit hooks to latest versions.
32 | 


--------------------------------------------------------------------------------
/tests/anyschema/dtypes_test.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import TYPE_CHECKING, Any, TypeAlias
 4 | 
 5 | import narwhals as nw
 6 | import pytest
 7 | 
 8 | from anyschema import AnySchema
 9 | 
10 | if TYPE_CHECKING:
11 |     from narwhals.dtypes import DType
12 | 
13 |     AnyDict: TypeAlias = dict[str, Any]
14 | 
15 | 
16 | @pytest.mark.parametrize(
17 |     ("spec", "expected"),
18 |     [
19 |         ({"x": int}, (nw.Int64(),)),
20 |         ({"x": str}, (nw.String(),)),
21 |         ({"x": float}, (nw.Float64(),)),
22 |         ({"x": bool}, (nw.Boolean(),)),
23 |         ({"x": list[int]}, (nw.List(nw.Int64()),)),
24 |         ({"id": int, "name": str, "score": float}, (nw.Int64(), nw.String(), nw.Float64())),
25 |     ],
26 | )
27 | def test_dtypes(spec: AnyDict, expected: tuple[DType, ...]) -> None:
28 |     schema = AnySchema(spec=spec)
29 | 
30 |     result_tuple = schema.dtypes()
31 |     assert result_tuple == expected
32 | 
33 |     result_dict = schema.dtypes(named=True)
34 |     assert result_dict == dict(zip(spec.keys(), expected, strict=True))
35 | 


--------------------------------------------------------------------------------
/tests/spec_to_schema/dataclass_test.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import TYPE_CHECKING, Mapping
 4 | 
 5 | import narwhals as nw
 6 | import pytest
 7 | 
 8 | from anyschema import AnySchema
 9 | from tests.conftest import DataclassEventWithTimeMetadata
10 | 
11 | if TYPE_CHECKING:
12 |     from anyschema.typing import DataclassType
13 | 
14 | 
15 | @pytest.mark.parametrize(
16 |     ("spec", "expected_schema"),
17 |     [
18 |         (
19 |             DataclassEventWithTimeMetadata,
20 |             {
21 |                 "name": nw.String(),
22 |                 "created_at": nw.Datetime("us"),
23 |                 "scheduled_at": nw.Datetime("us", time_zone="UTC"),
24 |                 "started_at": nw.Datetime("ms"),
25 |                 "completed_at": nw.Datetime("ns", time_zone="Europe/Berlin"),
26 |             },
27 |         ),
28 |     ],
29 | )
30 | def test_dataclass(spec: DataclassType, expected_schema: Mapping[str, nw.dtypes.DType]) -> None:
31 |     schema = AnySchema(spec=spec)
32 |     nw_schema = schema._nw_schema
33 |     assert nw_schema == nw.Schema(expected_schema)
34 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | ci:
 2 |   autoupdate_schedule: monthly
 3 | repos:
 4 | - repo: https://github.com/astral-sh/ruff-pre-commit
 5 |   # Ruff version.
 6 |   rev: 'v0.14.6'
 7 |   hooks:
 8 |     # Run the formatter.
 9 |     - id: ruff-format
10 |     # Run the linter.
11 |     - id: ruff
12 |       args: [--fix]
13 | - repo: https://github.com/codespell-project/codespell
14 |   rev: 'v2.3.0'
15 |   hooks:
16 |     - id: codespell
17 |       files: \.(py|rst|md)$
18 |       args: [--ignore-words-list=ser]
19 | - repo: https://github.com/adamchainz/blacken-docs
20 |   rev: "1.19.1"  # replace with latest tag on GitHub
21 |   hooks:
22 |   -   id: blacken-docs
23 |       args: [--skip-errors]
24 |       additional_dependencies:
25 |       - black==22.12.0
26 | - repo: https://github.com/pre-commit/pre-commit-hooks
27 |   rev: v5.0.0
28 |   hooks:
29 |   -   id: trailing-whitespace
30 |   -   id: name-tests-test
31 |   -   id: end-of-file-fixer
32 |   -   id: requirements-txt-fixer
33 |   -   id: no-commit-to-branch
34 |   -   id: check-json
35 |   -   id: check-yaml
36 |   -   id: check-ast
37 |   -   id: check-added-large-files
38 | 


--------------------------------------------------------------------------------
/tests/pydantic/struct_test.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import TYPE_CHECKING, Annotated
 4 | 
 5 | import narwhals as nw
 6 | from annotated_types import Interval
 7 | from pydantic import BaseModel
 8 | 
 9 | from tests.pydantic.utils import model_to_nw_schema
10 | 
11 | if TYPE_CHECKING:
12 |     from anyschema.parsers import ParserPipeline
13 | 
14 | 
15 | def test_parse_struct(auto_pipeline: ParserPipeline) -> None:
16 |     class BaseStruct(BaseModel):
17 |         x1: Annotated[int, Interval(gt=0, lt=123)]
18 |         x2: str
19 |         x3: float | None
20 |         x4: None | bool
21 | 
22 |     class StructModel(BaseModel):
23 |         struct: BaseStruct | None
24 | 
25 |     schema = model_to_nw_schema(StructModel, pipeline=auto_pipeline)
26 |     expected = {
27 |         "struct": nw.Struct(
28 |             [
29 |                 nw.Field("x1", nw.UInt8()),
30 |                 nw.Field("x2", nw.String()),
31 |                 nw.Field("x3", nw.Float64()),
32 |                 nw.Field("x4", nw.Boolean()),
33 |             ]
34 |         )
35 |     }
36 |     assert schema == expected
37 | 


--------------------------------------------------------------------------------
/tests/anyschema/field_test.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import pytest
 4 | 
 5 | from anyschema import AnyField, AnySchema
 6 | 
 7 | 
 8 | def test_field_correct_attributes() -> None:
 9 |     schema = AnySchema(spec={"id": int, "name": str, "age": int | None})
10 | 
11 |     id_field = schema.field("id")
12 |     assert isinstance(id_field, AnyField)
13 |     assert id_field.name == "id"
14 |     assert id_field.nullable is False
15 | 
16 |     age_field = schema.field("age")
17 |     assert isinstance(age_field, AnyField)
18 |     assert age_field.name == "age"
19 |     assert age_field.nullable is True
20 | 
21 | 
22 | def test_field_raises_keyerror_for_missing_field() -> None:
23 |     schema = AnySchema(spec={"id": int, "name": str})
24 | 
25 |     with pytest.raises(KeyError):
26 |         schema.field("non_existent")
27 | 
28 | 
29 | def test_field_with_all_field_names() -> None:
30 |     spec = {"id": int, "name": str, "age": int, "active": bool}
31 |     schema = AnySchema(spec=spec)
32 | 
33 |     for field_name in spec:
34 |         field = schema.field(field_name)
35 |         assert field.name == field_name
36 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | ARG := $(word 2, $(MAKECMDGOALS))
 2 | $(eval $(ARG):;@:)
 3 | 
 4 | sources = anyschema tests
 5 | 
 6 | lint:
 7 | 	uvx ruff version
 8 | 	uvx ruff format $(sources)
 9 | 	uvx ruff check $(sources) --fix
10 | 	uvx ruff clean
11 | 	uv tool run rumdl check .
12 | 
13 | test:
14 | 	uv run --active --no-sync --group tests pytest tests --cov=anyschema --cov=tests --cov-fail-under=90
15 | 	uv run --active --no-sync --group tests pytest anyschema --doctest-modules
16 | 
17 | typing:
18 | 	uv run --active --no-sync --group typing ty check $(sources) --output-format concise
19 | 	uv run --active --no-sync --group typing pyright $(sources)
20 | 	uv run --active --no-sync --group typing mypy $(sources)
21 | 
22 | docs-serve:
23 | 	uv run --active --no-sync --group docs mkdocs serve --watch anyschema --watch docs --dirty
24 | 
25 | docs-build:
26 | 	uv run --active --no-sync --group docs mkdocs build --strict
27 | 
28 | setup-release:
29 | 	git checkout main
30 | 	git fetch upstream
31 | 	git reset --hard upstream/main
32 | 	git checkout -b bump-version
33 | 	python bump-version.py $(ARG)
34 | 	gh pr create --title "release: Bump version to " --body "Bump version" --base main --label release
35 | 


--------------------------------------------------------------------------------
/tests/spec_to_schema/x_anyschema_test.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import TYPE_CHECKING
 4 | 
 5 | import narwhals as nw
 6 | import pytest
 7 | 
 8 | from anyschema import AnySchema
 9 | from anyschema.parsers import ParserPipeline
10 | from tests.conftest import AttrsEventWithXAnyschema, PydanticEventWithXAnyschema
11 | 
12 | if TYPE_CHECKING:
13 |     from anyschema.typing import Spec
14 | 
15 | 
16 | @pytest.mark.parametrize("spec", [AttrsEventWithXAnyschema, PydanticEventWithXAnyschema])
17 | def test_spec_with_x_anyschema(spec: Spec) -> None:
18 |     schema = AnySchema(spec=spec)
19 | 
20 |     assert schema.fields["created_at"].dtype == nw.Datetime(time_zone="UTC", time_unit="us")
21 |     assert schema.fields["started_at"].dtype == nw.Datetime(time_unit="ms")
22 | 
23 | 
24 | @pytest.mark.parametrize("metadata_key", ["anyschema", "x-anyschema"])
25 | def test_dict_spec_with_both_prefixes(metadata_key: str) -> None:
26 |     metadata = {metadata_key: {"nullable": True, "unique": True}}
27 | 
28 |     pipeline = ParserPipeline()
29 |     field = pipeline.parse_into_field("test_field", int, (), metadata)
30 | 
31 |     assert field.nullable is True
32 |     assert field.unique is True
33 | 


--------------------------------------------------------------------------------
/.github/workflows/deploy-docs.yaml:
--------------------------------------------------------------------------------
 1 | name: "Deploy Documentation"
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   push:
 6 |     tags:
 7 |       - "v[0-9]+.[0-9]+.[0-9]+*"
 8 |     branches:
 9 |       - main
10 | 
11 | permissions:
12 |   contents: write
13 | 
14 | jobs:
15 |   deploy:
16 |     runs-on: ubuntu-latest
17 |     steps:
18 |       - name: Checkout source code
19 |         uses: actions/checkout@v6
20 |       - name: Configure Git Credentials
21 |         run: |
22 |           git config user.name github-actions[bot]
23 |           git config user.email 41898282+github-actions[bot]@users.noreply.github.com
24 |       - name: Install uv
25 |         uses: astral-sh/setup-uv@v7
26 |         with:
27 |           python-version: "3.12"
28 |           enable-cache: "true"
29 |           cache-dependency-glob: "pyproject.toml"
30 |       - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV
31 |       - uses: actions/cache@v4
32 |         with:
33 |           key: mkdocs-material-${{ env.cache_id }}
34 |           path: .cache
35 |           restore-keys: |
36 |             mkdocs-material-
37 |       - name: Install dependencies and deploy
38 |         run: |
39 |           uv run --group docs mkdocs gh-deploy --force
40 | 


--------------------------------------------------------------------------------
/tests/anyschema/uniques_test.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import TYPE_CHECKING
 4 | 
 5 | import pytest
 6 | 
 7 | from anyschema import AnySchema
 8 | from tests.conftest import ProductORM, array_fixed_table
 9 | 
10 | if TYPE_CHECKING:
11 |     from anyschema.typing import Spec
12 | 
13 | 
14 | @pytest.mark.parametrize(
15 |     ("spec", "expected"),
16 |     [
17 |         (array_fixed_table, (True, False, False)),
18 |         (ProductORM, (True, False, False, False)),
19 |     ],
20 | )
21 | def test_uniques_named_false(spec: Spec, expected: tuple[bool, ...]) -> None:
22 |     schema = AnySchema(spec=spec)
23 |     result = schema.uniques(named=False)
24 | 
25 |     assert result == expected
26 | 
27 | 
28 | @pytest.mark.parametrize(
29 |     ("spec", "expected"),
30 |     [
31 |         (array_fixed_table, {"id": True, "coordinates": False, "matrix": False}),
32 |         (
33 |             ProductORM,
34 |             {"id": True, "name": False, "price": False, "in_stock": False},
35 |         ),
36 |     ],
37 | )
38 | def test_uniques_named_true(spec: Spec, expected: dict[str, bool]) -> None:
39 |     schema = AnySchema(spec=spec)
40 |     result = schema.uniques(named=True)
41 | 
42 |     assert result == expected
43 | 


--------------------------------------------------------------------------------
/tests/anyschema/nullables_test.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import Any, TypeAlias
 4 | 
 5 | import pytest
 6 | 
 7 | from anyschema import AnySchema
 8 | 
 9 | AnyDict: TypeAlias = dict[str, Any]
10 | 
11 | 
12 | @pytest.mark.parametrize(
13 |     ("spec", "expected"),
14 |     [
15 |         ({}, ()),
16 |         ({"id": int, "name": str}, (False, False)),
17 |         ({"id": int, "name": str, "age": int}, (False, False, False)),
18 |         ({"id": int, "name": str, "age": int | None}, (False, False, True)),
19 |     ],
20 | )
21 | def test_nullables_named_false(spec: AnyDict, expected: tuple[bool, ...]) -> None:
22 |     schema = AnySchema(spec=spec)
23 |     result = schema.nullables()
24 | 
25 |     assert result == expected
26 | 
27 | 
28 | @pytest.mark.parametrize(
29 |     ("spec", "expected"),
30 |     [
31 |         ({}, {}),
32 |         ({"id": int, "name": str}, (False, False)),
33 |         ({"id": int, "name": str, "age": int}, (False, False, False)),
34 |         ({"id": int, "name": str, "age": int | None}, (False, False, True)),
35 |     ],
36 | )
37 | def test_nullables_named_true(spec: AnyDict, expected: tuple[bool, ...]) -> None:
38 |     schema = AnySchema(spec=spec)
39 |     result = schema.nullables(named=True)
40 | 
41 |     assert result == dict(zip(spec, expected, strict=True))
42 | 


--------------------------------------------------------------------------------
/tests/anyschema/equality_test.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import TYPE_CHECKING, Any
 4 | 
 5 | import pytest
 6 | 
 7 | from anyschema import AnySchema
 8 | 
 9 | if TYPE_CHECKING:
10 |     from anyschema.typing import Spec
11 | 
12 | 
13 | @pytest.mark.parametrize(
14 |     "spec",
15 |     [
16 |         {"name": str, "age": int},
17 |         {"users": list[str], "counts": dict[str, int]},
18 |     ],
19 | )
20 | def test_equal(spec: Spec) -> None:
21 |     assert AnySchema(spec=spec) == AnySchema(spec=spec)
22 | 
23 | 
24 | @pytest.mark.parametrize(
25 |     ("spec1", "spec2"),
26 |     [
27 |         ({"name": str, "age": int}, {"age": int, "name": str}),
28 |         ({"name": str, "age": int}, {"name": str}),
29 |         ({"value": int}, {"value": float}),
30 |         ({"name": str}, {"name": str | None}),
31 |     ],
32 | )
33 | def test_not_equal(spec1: Spec, spec2: Spec) -> None:
34 |     schema1 = AnySchema(spec=spec1)
35 |     schema2 = AnySchema(spec=spec2)
36 | 
37 |     assert schema1 != schema2
38 | 
39 | 
40 | @pytest.mark.parametrize(
41 |     "other",
42 |     [
43 |         {"name": str},
44 |         "not a schema",
45 |         42,
46 |         None,
47 |     ],
48 | )
49 | def test_equality_with_non_anyschema_object(other: Any) -> None:
50 |     schema = AnySchema(spec={"name": str})
51 | 
52 |     assert schema != other
53 | 


--------------------------------------------------------------------------------
/tests/anyschema/hash_test.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import TYPE_CHECKING, Any
 4 | 
 5 | import pytest
 6 | 
 7 | from anyschema import AnySchema
 8 | 
 9 | if TYPE_CHECKING:
10 |     from anyschema.typing import Spec
11 | 
12 | 
13 | @pytest.mark.parametrize(
14 |     "spec",
15 |     [
16 |         {"name": str, "age": int},
17 |         {"users": list[str], "counts": dict[str, int]},
18 |     ],
19 | )
20 | def test_same_hash(spec: Spec) -> None:
21 |     assert hash(AnySchema(spec=spec)) == hash(AnySchema(spec=spec))
22 | 
23 | 
24 | @pytest.mark.parametrize(
25 |     ("spec1", "spec2"),
26 |     [
27 |         ({"name": str, "age": int}, {"age": int, "name": str}),
28 |         ({"name": str, "age": int}, {"name": str}),
29 |         ({"value": int}, {"value": float}),
30 |         ({"name": str}, {"name": str | None}),
31 |     ],
32 | )
33 | def test_different_hash(spec1: Spec, spec2: Spec) -> None:
34 |     schema1 = AnySchema(spec=spec1)
35 |     schema2 = AnySchema(spec=spec2)
36 | 
37 |     assert hash(schema1) != hash(schema2)
38 | 
39 | 
40 | @pytest.mark.parametrize(
41 |     "other",
42 |     [
43 |         "not a schema",
44 |         42,
45 |         None,
46 |     ],
47 | )
48 | def test_different_hash_object(other: Any) -> None:
49 |     schema = AnySchema(spec={"name": str})
50 | 
51 |     assert hash(schema) != hash(other)
52 | 


--------------------------------------------------------------------------------
/docs/api-reference/adapters.md:
--------------------------------------------------------------------------------
 1 | # Spec Adapters
 2 | 
 3 | Adapters convert various input specifications into a normalized format for parsing.
 4 | 
 5 | Learn how to create custom adapters in the [Advanced Usage](../user-guide/advanced.md#custom-spec-adapters) guide.
 6 | 
 7 | The following built-in adapters are not meant to be used directly. They serve more as an example than anything else.
 8 | 
 9 | ::: anyschema.adapters
10 |     handler: python
11 |     options:
12 |       show_root_heading: true
13 |       show_source: true
14 | 
15 | ## Adapters specification
16 | 
17 | Adapters must follow this signature:
18 | 
19 | ```python
20 | from typing import Iterator, TypeAlias, Callable, Any, Generator
21 | from anyschema.typing import FieldConstraints, FieldMetadata, FieldName, FieldType
22 | 
23 | FieldSpec: TypeAlias = tuple[FieldName, FieldType, FieldConstraints, FieldMetadata]
24 | 
25 | 
26 | def my_custom_adapter(spec: Any) -> Iterator[FieldSpec]:
27 |     """
28 |     Yields tuples of (field_name, field_type, constraints, metadata).
29 | 
30 |     - name (str): The name of the field
31 |     - type (type): The type annotation of the field
32 |     - constraints (tuple): Type constraints (e.g., Gt(0), Le(100) from annotated-types)
33 |     - metadata (dict): Custom metadata dictionary for additional information
34 |     """
35 |     ...
36 | ```
37 | 
38 | They don't need to be functions; any callable is acceptable.
39 | 


--------------------------------------------------------------------------------
/anyschema/parsers/_annotated.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import TYPE_CHECKING, Annotated
 4 | 
 5 | from typing_extensions import get_args, get_origin  # noqa: UP035
 6 | 
 7 | from anyschema.parsers._base import ParserStep
 8 | 
 9 | if TYPE_CHECKING:
10 |     from narwhals.dtypes import DType
11 | 
12 |     from anyschema.typing import FieldConstraints, FieldMetadata, FieldType
13 | 
14 | 
15 | class AnnotatedStep(ParserStep):
16 |     """Parser for `typing.Annotated` types.
17 | 
18 |     Handles:
19 | 
20 |     - `Annotated[T, metadata...]` - extracts the type and metadata for further parsing
21 |     """
22 | 
23 |     def parse(self, input_type: FieldType, constraints: FieldConstraints, metadata: FieldMetadata) -> DType | None:
24 |         """Parse Annotated types by extracting the base type and constraints.
25 | 
26 |         Arguments:
27 |             input_type: The type to parse.
28 |             constraints: Constraints associated with the type.
29 |             metadata: Custom metadata dictionary.
30 | 
31 |         Returns:
32 |             A Narwhals DType by extracting the base type and delegating to the chain.
33 |         """
34 |         if get_origin(input_type) is Annotated:
35 |             base_type, *extra_constraints = get_args(input_type)
36 |             return self.pipeline.parse(base_type, (*constraints, *extra_constraints), metadata, strict=True)
37 | 
38 |         return None
39 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | <!--
 2 | Thank you for contributing to anyschema! 🎉
 3 | Please ensure all CI checks pass locally before requesting review.
 4 | -->
 5 | 
 6 | ## Description
 7 | 
 8 | <!-- Provide a clear and concise description of your changes -->
 9 | 
10 | ## Type of Change
11 | 
12 | <!-- Check the relevant option(s) -->
13 | 
14 | - [ ] 🐛 Bug fix (non-breaking change which fixes an issue)
15 | - [ ] ✨ New feature (non-breaking change which adds functionality)
16 | - [ ] ⚠️ Breaking change (fix or feature that would cause existing functionality to not work as expected)
17 | - [ ] 📚 Documentation update
18 | - [ ] 🧪 Test improvement
19 | - [ ] 🔧 Maintenance/Refactoring
20 | - [ ] ⚡ Performance improvement
21 | - [ ] 🏗️ Build/CI improvement
22 | 
23 | ## Related Issues
24 | 
25 | <!-- Link related issues using #issue_number or "Closes #issue_number" -->
26 | 
27 | - Closes #
28 | - Related to #
29 | 
30 | ## Changes Made
31 | 
32 | <!-- Please describe the main changes made in this PR -->
33 | 
34 | ## Checklist
35 | 
36 | <!-- Ensure all items are checked before requesting review -->
37 | 
38 | - [ ] My code follows the project's style guidelines (ruff)
39 | - [ ] I have performed a self-review of my code
40 | - [ ] I have commented my code, particularly in hard-to-understand areas
41 | - [ ] I have made corresponding changes to the documentation
42 | - [ ] I have added tests that prove my fix is effective or that my feature works
43 | - [ ] New and existing unit tests pass locally with my changes
44 | 


--------------------------------------------------------------------------------
/tests/pydantic/datetime_test.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from datetime import datetime  # noqa: TC003
 4 | from typing import TYPE_CHECKING
 5 | 
 6 | import narwhals as nw
 7 | from pydantic import BaseModel, FutureDatetime, NaiveDatetime, PastDatetime
 8 | 
 9 | from tests.pydantic.utils import model_to_nw_schema
10 | 
11 | if TYPE_CHECKING:
12 |     from anyschema.parsers import ParserPipeline
13 | 
14 | 
15 | def test_parse_datetime(auto_pipeline: ParserPipeline) -> None:
16 |     class DatetimeModel(BaseModel):
17 |         # python datetime type
18 |         py_dt: datetime
19 |         py_dt_optional: datetime | None
20 |         py_dt_or_none: datetime | None
21 |         none_or_py_dt: None | datetime
22 | 
23 |         # pydantic NaiveDatetime type
24 |         naive_dt: NaiveDatetime
25 |         naive_dt_optional: NaiveDatetime | None
26 |         naive_dt_or_none: NaiveDatetime | None
27 |         none_or_naive_dt: None | NaiveDatetime
28 | 
29 |         # pydantic PastDatetime type
30 |         past_dt: PastDatetime
31 |         past_dt_optional: PastDatetime | None
32 |         past_dt_or_none: PastDatetime | None
33 |         none_or_past_dt: None | PastDatetime
34 | 
35 |         # pydantic FutureDatetime type
36 |         future_dt: FutureDatetime
37 |         future_dt_optional: FutureDatetime | None
38 |         future_dt_or_none: FutureDatetime | None
39 |         none_or_future_dt: None | FutureDatetime
40 | 
41 |     schema = model_to_nw_schema(DatetimeModel, pipeline=auto_pipeline)
42 | 
43 |     assert all(value == nw.Datetime() for value in schema.values())
44 | 


--------------------------------------------------------------------------------
/tests/pydantic/parsing_exception_test.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import TYPE_CHECKING, Optional
 4 | 
 5 | import pytest
 6 | from pydantic import AwareDatetime, create_model
 7 | 
 8 | from anyschema.exceptions import UnsupportedDTypeError
 9 | from tests.pydantic.utils import model_to_nw_schema
10 | 
11 | if TYPE_CHECKING:
12 |     from anyschema.parsers import ParserPipeline
13 | 
14 | 
15 | @pytest.mark.parametrize(
16 |     ("input_type", "msg"),
17 |     [
18 |         (str | float | int, "Union with more than two types is not supported."),
19 |         (str | float, "Union with mixed types is not supported."),
20 |     ],
21 | )
22 | def test_raise_parse_union(auto_pipeline: ParserPipeline, input_type: type, msg: str) -> None:
23 |     ExceptionModel = create_model("ExceptionModel", foo=(input_type, ...))  # noqa: N806
24 | 
25 |     with pytest.raises(UnsupportedDTypeError, match=msg):
26 |         model_to_nw_schema(ExceptionModel, pipeline=auto_pipeline)
27 | 
28 | 
29 | @pytest.mark.parametrize(
30 |     "input_type",
31 |     [
32 |         AwareDatetime,
33 |         Optional[AwareDatetime],
34 |         AwareDatetime | None,
35 |         None | AwareDatetime,
36 |     ],
37 | )
38 | def test_raise_aware_datetime(auto_pipeline: ParserPipeline, input_type: type) -> None:
39 |     AwareDatetimeModel = create_model("AwareDatetimeModel", foo=(input_type, ...))  # noqa: N806
40 | 
41 |     msg = "pydantic AwareDatetime does not specify a fixed timezone."
42 |     with pytest.raises(UnsupportedDTypeError, match=msg):
43 |         model_to_nw_schema(AwareDatetimeModel, pipeline=auto_pipeline)
44 | 


--------------------------------------------------------------------------------
/tests/anyschema/fields_test.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from anyschema import AnyField, AnySchema
 4 | 
 5 | 
 6 | def test_fields_correct_keys_and_values() -> None:
 7 |     spec = {"id": int, "name": str, "age": int}
 8 |     schema = AnySchema(spec=spec)
 9 |     result = schema.fields
10 | 
11 |     assert isinstance(result, dict)
12 |     assert result.keys() == spec.keys()
13 | 
14 |     for field_name, field_obj in result.items():
15 |         assert isinstance(field_obj, AnyField)
16 |         assert field_obj.name == field_name
17 | 
18 | 
19 | def test_fields_empty_schema() -> None:
20 |     schema = AnySchema(spec={})
21 |     result = schema.fields
22 | 
23 |     assert result == {}
24 | 
25 | 
26 | def test_fields_returns_copy() -> None:
27 |     schema = AnySchema(spec={"id": int, "name": str})
28 |     result1 = schema.fields
29 |     result2 = schema.fields
30 | 
31 |     assert result1 is not result2  # Should return a new dict each time
32 |     assert result1 == result2  # But with equal contents
33 | 
34 | 
35 | def test_fields_modification_does_not_affect_schema() -> None:
36 |     schema = AnySchema(spec={"id": int, "name": str})
37 |     fields = schema.fields
38 | 
39 |     # Modify the returned dict
40 |     fields["new_field"] = AnyField(name="new_field", dtype=schema.field("id").dtype)
41 | 
42 |     # Original schema should be unchanged
43 |     assert "new_field" not in schema.names()
44 | 
45 | 
46 | def test_fields_with_nullable_and_metadata() -> None:
47 |     schema = AnySchema(spec={"id": int, "name": str, "age": int | None})
48 |     fields = schema.fields
49 | 
50 |     assert fields["id"].nullable is False
51 |     assert fields["name"].nullable is False
52 |     assert fields["age"].nullable is True
53 | 


--------------------------------------------------------------------------------
/tests/pydantic/string_test.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import TYPE_CHECKING, Annotated, Optional
 4 | 
 5 | import narwhals as nw
 6 | import pydantic
 7 | import pytest
 8 | from narwhals.utils import parse_version
 9 | from pydantic import BaseModel, StrictStr
10 | 
11 | from tests.pydantic.utils import model_to_nw_schema
12 | 
13 | if TYPE_CHECKING:
14 |     from anyschema.parsers import ParserPipeline
15 | 
16 | 
17 | def test_parse_string(auto_pipeline: ParserPipeline) -> None:
18 |     class StringModel(BaseModel):
19 |         # python str type
20 |         py_str: str
21 |         py_str_optional: str | None
22 |         py_str_or_none: str | None
23 |         none_or_py_str: None | str
24 | 
25 |         # pydantic StrictStr type
26 |         strict_str: StrictStr
27 |         strict_str_optional: StrictStr | None
28 |         strict_str_or_none: StrictStr | None
29 |         none_or_strict_str: None | StrictStr
30 | 
31 |     schema = model_to_nw_schema(StringModel, pipeline=auto_pipeline)
32 | 
33 |     assert all(value == nw.String() for value in schema.values())
34 | 
35 | 
36 | @pytest.mark.skipif(parse_version(pydantic.__version__) < (2, 1), reason="too old for StringConstraints")
37 | def test_parse_string_with_constraints(auto_pipeline: ParserPipeline) -> None:
38 |     from pydantic import StringConstraints
39 | 
40 |     str_constraint = StringConstraints(strip_whitespace=True, to_upper=True, pattern=r"^[A-Z]+$")
41 | 
42 |     class StringConstraintsModel(BaseModel):
43 |         str_con: Annotated[str, str_constraint]
44 |         str_con_optional: Optional[Annotated[str, str_constraint]]
45 |         str_con_or_none: Annotated[str, str_constraint] | None
46 |         none_or_str_con: None | Annotated[str, str_constraint]
47 | 
48 |     schema = model_to_nw_schema(StringConstraintsModel, pipeline=auto_pipeline)
49 | 
50 |     assert all(value == nw.String() for value in schema.values())
51 | 


--------------------------------------------------------------------------------
/tests/spec_to_schema/pydantic_test.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import TYPE_CHECKING, Mapping
 4 | 
 5 | import narwhals as nw
 6 | import pytest
 7 | 
 8 | from anyschema import AnySchema
 9 | from tests.conftest import (
10 |     PydanticEventWithTimeMetadata,
11 |     PydanticSpecialDatetimeWithMetadata,
12 |     PydanticStudent,
13 | )
14 | 
15 | if TYPE_CHECKING:
16 |     from pydantic import BaseModel
17 | 
18 | 
19 | @pytest.mark.parametrize(
20 |     ("spec", "expected_schema"),
21 |     [
22 |         (
23 |             PydanticStudent,
24 |             {
25 |                 "name": nw.String(),
26 |                 "date_of_birth": nw.Date(),
27 |                 "age": nw.UInt64(),
28 |                 "classes": nw.List(nw.String()),
29 |                 "has_graduated": nw.Boolean(),
30 |             },
31 |         ),
32 |         (
33 |             PydanticEventWithTimeMetadata,
34 |             {
35 |                 "name": nw.String(),
36 |                 "created_at": nw.Datetime("us"),
37 |                 "scheduled_at": nw.Datetime("us", time_zone="UTC"),
38 |                 "started_at": nw.Datetime("ms"),
39 |                 "completed_at": nw.Datetime("ns", time_zone="Europe/Berlin"),
40 |             },
41 |         ),
42 |         (
43 |             PydanticSpecialDatetimeWithMetadata,
44 |             {
45 |                 "aware": nw.Datetime("us", time_zone="UTC"),
46 |                 "aware_ms": nw.Datetime("ms", time_zone="Asia/Tokyo"),
47 |                 "naive_ms": nw.Datetime("ms"),
48 |                 "past_utc": nw.Datetime("us", time_zone="UTC"),
49 |                 "future_ns": nw.Datetime("ns"),
50 |             },
51 |         ),
52 |     ],
53 | )
54 | def test_pydantic_model(spec: type[BaseModel], expected_schema: Mapping[str, nw.dtypes.DType]) -> None:
55 |     schema = AnySchema(spec=spec)
56 |     nw_schema = schema._nw_schema
57 |     assert nw_schema == nw.Schema(expected_schema)
58 | 


--------------------------------------------------------------------------------
/tests/parsers/pydantic_derived_types_test.py:
--------------------------------------------------------------------------------
 1 | """Tests for pydantic parser with derived types.
 2 | 
 3 | This module tests that PydanticTypeStep correctly handles types that inherit
 4 | from Pydantic's date/datetime types.
 5 | """
 6 | 
 7 | from __future__ import annotations
 8 | 
 9 | from typing import Any
10 | 
11 | import narwhals as nw
12 | import pytest
13 | from pydantic import FutureDate, PastDate, PastDatetime
14 | 
15 | from anyschema.parsers import ParserPipeline, PyTypeStep, UnionTypeStep
16 | from anyschema.parsers.pydantic import PydanticTypeStep
17 | 
18 | 
19 | # Custom types that inherit from Pydantic types
20 | class CustomPastDate(PastDate): ...
21 | 
22 | 
23 | class CustomFutureDate(FutureDate): ...
24 | 
25 | 
26 | class CustomPastDatetime(PastDatetime): ...
27 | 
28 | 
29 | @pytest.fixture(scope="module")
30 | def parser_pipeline() -> ParserPipeline:
31 |     """Create a parser pipeline with pydantic support."""
32 |     union_parser = UnionTypeStep()
33 |     pydantic_parser = PydanticTypeStep()
34 |     py_parser = PyTypeStep()
35 |     return ParserPipeline([union_parser, pydantic_parser, py_parser])
36 | 
37 | 
38 | @pytest.mark.parametrize(
39 |     ("input_type", "expected"),
40 |     [
41 |         # Base Pydantic types
42 |         (PastDate, nw.Date()),
43 |         (FutureDate, nw.Date()),
44 |         (PastDatetime, nw.Datetime()),
45 |         # Derived types
46 |         (CustomPastDate, nw.Date()),
47 |         (CustomFutureDate, nw.Date()),
48 |         (CustomPastDatetime, nw.Datetime()),
49 |         # In lists
50 |         (list[CustomPastDate], nw.List(nw.Date())),
51 |         (list[CustomPastDatetime], nw.List(nw.Datetime())),
52 |     ],
53 | )
54 | def test_pydantic_derived_types(parser_pipeline: ParserPipeline, input_type: Any, expected: nw.dtypes.DType) -> None:
55 |     """Test that pydantic parser handles derived types correctly."""
56 |     result = parser_pipeline.parse(input_type, (), {})
57 |     assert result == expected
58 | 


--------------------------------------------------------------------------------
/tests/anyschema/descriptions_test.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import TYPE_CHECKING
 4 | 
 5 | import pytest
 6 | 
 7 | from anyschema import AnySchema
 8 | from tests.conftest import DataclassEventWithTimeMetadata, PydanticStudent, user_table
 9 | 
10 | if TYPE_CHECKING:
11 |     from anyschema.typing import Spec
12 | 
13 | 
14 | @pytest.mark.parametrize(
15 |     ("spec", "expected"),
16 |     [
17 |         (PydanticStudent, ("Student full name", None, "Student age in years", None, None)),
18 |         (DataclassEventWithTimeMetadata, ("Event name", None, "Scheduled time", None, None)),
19 |         (user_table, ("Primary key", None, "User age", None)),
20 |     ],
21 | )
22 | def test_descriptions_named_false(spec: Spec, expected: tuple[str | None, ...]) -> None:
23 |     schema = AnySchema(spec=spec)
24 |     result = schema.descriptions(named=False)
25 | 
26 |     assert result == expected
27 | 
28 | 
29 | @pytest.mark.parametrize(
30 |     ("spec", "expected"),
31 |     [
32 |         (
33 |             PydanticStudent,
34 |             {
35 |                 "name": "Student full name",
36 |                 "date_of_birth": None,
37 |                 "age": "Student age in years",
38 |                 "classes": None,
39 |                 "has_graduated": None,
40 |             },
41 |         ),
42 |         (
43 |             DataclassEventWithTimeMetadata,
44 |             {
45 |                 "name": "Event name",
46 |                 "created_at": None,
47 |                 "scheduled_at": "Scheduled time",
48 |                 "started_at": None,
49 |                 "completed_at": None,
50 |             },
51 |         ),
52 |         (user_table, {"id": "Primary key", "name": None, "age": "User age", "email": None}),
53 |     ],
54 | )
55 | def test_descriptions_named_true(spec: Spec, expected: dict[str, str | None]) -> None:
56 |     schema = AnySchema(spec=spec)
57 |     result = schema.descriptions(named=True)
58 | 
59 |     assert result == expected
60 | 


--------------------------------------------------------------------------------
/bump-version.py:
--------------------------------------------------------------------------------
 1 | """Adjusted from narwhals.
 2 | 
 3 | https://github.com/narwhals-dev/narwhals/blob/25701453aaa0556adc491e428f6d5724a1eac177/utils/bump_version.py
 4 | 
 5 | License: MIT
 6 | Copyright (c) 2024 Marco Gorelli
 7 | """
 8 | 
 9 | # python bump-version.py <patch|minor|major>
10 | 
11 | # ruff: noqa: PLW1510, S603, S607, T201
12 | # mypy: ignore
13 | from __future__ import annotations
14 | 
15 | import subprocess
16 | import sys
17 | 
18 | out = subprocess.run(["git", "fetch", "upstream", "--tags"])
19 | if out.returncode != 0:
20 |     print(out)
21 |     sys.exit(1)
22 | subprocess.run(["git", "reset", "--hard", "upstream/main"])
23 | 
24 | if subprocess.run(["git", "branch", "--show-current"], text=True, capture_output=True).stdout.strip() != "bump-version":
25 |     msg = "`bump-version.py` should be run from `bump-version` branch"
26 |     raise RuntimeError(msg)
27 | 
28 | # Delete local tags, if present
29 | try:
30 |     # Get the list of all tags
31 |     result = subprocess.run(["git", "tag", "-l"], capture_output=True, text=True, check=True)
32 |     tags = result.stdout.splitlines()  # Split the tags into a list by lines
33 | 
34 |     # Delete each tag using git tag -d
35 |     subprocess.run(["git", "tag", "-d", *tags], check=True)
36 |     print("All local tags have been deleted.")
37 | except subprocess.CalledProcessError as e:
38 |     print(f"An error occurred: {e}")
39 | 
40 | subprocess.run(["git", "fetch", "upstream", "--tags"])
41 | subprocess.run(["git", "fetch", "upstream", "--prune", "--tags"])
42 | 
43 | how = sys.argv[1]
44 | version = subprocess.run(["uv", "version", "--bump", how, "--short"], text=True, capture_output=True).stdout.strip()
45 | 
46 | subprocess.run(["git", "commit", "-a", "-m", f"release: Bump version to {version}"])
47 | subprocess.run(["git", "tag", "-a", f"v{version}", "-m", f"v{version}"])
48 | subprocess.run(["git", "push", "upstream", "HEAD", "--follow-tags"])
49 | subprocess.run(["git", "push", "upstream", "HEAD:stable", "-f", "--follow-tags"])
50 | 


--------------------------------------------------------------------------------
/tests/pydantic/date_test.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from datetime import date  # noqa: TC003
 4 | from typing import TYPE_CHECKING, Annotated, Optional
 5 | 
 6 | import hypothesis.strategies as st
 7 | import narwhals as nw
 8 | from annotated_types import Interval
 9 | from hypothesis import assume, given
10 | from pydantic import BaseModel, FutureDate, PastDate
11 | 
12 | from tests.pydantic.utils import model_to_nw_schema
13 | 
14 | if TYPE_CHECKING:
15 |     from anyschema.parsers import ParserPipeline
16 | 
17 | 
18 | def test_parse_date(auto_pipeline: ParserPipeline) -> None:
19 |     class DateModel(BaseModel):
20 |         # python datetime type
21 |         py_dt: date
22 |         py_dt_optional: date | None
23 |         py_dt_or_none: date | None
24 |         none_or_py_dt: None | date
25 | 
26 |         # pydantic PastDate type
27 |         past_dt: PastDate
28 |         past_dt_optional: PastDate | None
29 |         past_dt_or_none: PastDate | None
30 |         none_or_past_dt: None | PastDate
31 | 
32 |         # pydantic FutureDate type
33 |         future_dt: FutureDate
34 |         future_dt_optional: FutureDate | None
35 |         future_dt_or_none: FutureDate | None
36 |         none_or_future_dt: None | FutureDate
37 | 
38 |     schema = model_to_nw_schema(DateModel, pipeline=auto_pipeline)
39 | 
40 |     assert all(value == nw.Date() for value in schema.values())
41 | 
42 | 
43 | @given(min_date=st.dates(), max_date=st.dates())
44 | def test_parse_date_with_constraints(auto_pipeline: ParserPipeline, min_date: date, max_date: date) -> None:
45 |     assume(min_date < max_date)
46 | 
47 |     class DateConstraintModel(BaseModel):
48 |         x: Annotated[date, Interval(gt=min_date, lt=max_date)]
49 |         y: Optional[Annotated[date, Interval(ge=min_date, lt=max_date)]] | None
50 |         z: Annotated[date, Interval(gt=min_date, le=max_date)] | None
51 |         w: None | Annotated[date, Interval(ge=min_date, le=max_date)]
52 | 
53 |     schema = model_to_nw_schema(DateConstraintModel, pipeline=auto_pipeline)
54 | 
55 |     assert all(value == nw.Date() for value in schema.values())
56 | 


--------------------------------------------------------------------------------
/tests/anyschema/to_polars_test.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import TYPE_CHECKING
 4 | 
 5 | import polars as pl
 6 | from narwhals import Schema
 7 | 
 8 | from anyschema import AnySchema
 9 | 
10 | if TYPE_CHECKING:
11 |     from pydantic import BaseModel
12 | 
13 | 
14 | def test_pydantic_to_polars(pydantic_student_cls: type[BaseModel]) -> None:
15 |     anyschema = AnySchema(spec=pydantic_student_cls)
16 |     pl_schema = anyschema.to_polars()
17 | 
18 |     assert isinstance(pl_schema, pl.Schema)
19 |     assert pl_schema == pl.Schema(
20 |         {
21 |             "name": pl.String(),
22 |             "date_of_birth": pl.Date(),
23 |             "age": pl.UInt64(),
24 |             "classes": pl.List(pl.String()),
25 |             "has_graduated": pl.Boolean(),
26 |         }
27 |     )
28 | 
29 | 
30 | def test_nw_schema_to_arrow(nw_schema: Schema) -> None:
31 |     unsupported_dtypes = {"array", "enum", "uint128", "int128", "decimal"}
32 |     model = Schema({k: v for k, v in nw_schema.items() if k not in unsupported_dtypes})
33 |     anyschema = AnySchema(spec=model)
34 |     pl_schema = anyschema.to_polars()
35 | 
36 |     assert isinstance(pl_schema, pl.Schema)
37 |     assert pl_schema == pl.Schema(
38 |         {
39 |             "boolean": pl.Boolean(),
40 |             "categorical": pl.Categorical(),
41 |             "date": pl.Date(),
42 |             "datetime": pl.Datetime(),
43 |             "duration": pl.Duration(),
44 |             "float32": pl.Float32(),
45 |             "float64": pl.Float64(),
46 |             "int8": pl.Int8(),
47 |             "int16": pl.Int16(),
48 |             "int32": pl.Int32(),
49 |             "int64": pl.Int64(),
50 |             "list": pl.List(pl.Float32()),
51 |             "object": pl.Object(),
52 |             "string": pl.String(),
53 |             "struct": pl.Struct(fields=[pl.Field("field_1", pl.String()), pl.Field("field_2", pl.Boolean())]),
54 |             "uint8": pl.UInt8(),
55 |             "uint16": pl.UInt16(),
56 |             "uint32": pl.UInt32(),
57 |             "uint64": pl.UInt64(),
58 |             "unknown": pl.Unknown(),
59 |         }
60 |     )
61 | 


--------------------------------------------------------------------------------
/anyschema/parsers/attrs.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import TYPE_CHECKING
 4 | 
 5 | import narwhals as nw
 6 | 
 7 | from anyschema._dependencies import is_attrs_class
 8 | from anyschema.parsers._base import ParserStep
 9 | 
10 | if TYPE_CHECKING:
11 |     from narwhals.dtypes import DType
12 | 
13 |     from anyschema.typing import AttrsClassType, FieldConstraints, FieldMetadata, FieldType
14 | 
15 | 
16 | __all__ = ("AttrsTypeStep",)
17 | 
18 | 
19 | class AttrsTypeStep(ParserStep):
20 |     """Parser for attrs-specific types.
21 | 
22 |     Handles:
23 | 
24 |     - attrs classes (Struct types)
25 | 
26 |     Warning:
27 |         It requires [attrs](https://www.attrs.org/) to be installed.
28 |     """
29 | 
30 |     def parse(
31 |         self,
32 |         input_type: FieldType,
33 |         constraints: FieldConstraints,  # noqa: ARG002
34 |         metadata: FieldMetadata,  # noqa: ARG002
35 |     ) -> DType | None:
36 |         """Parse attrs-specific types into Narwhals dtypes.
37 | 
38 |         Arguments:
39 |             input_type: The type to parse.
40 |             constraints: Constraints associated with the type.
41 |             metadata: Custom metadata dictionary.
42 | 
43 |         Returns:
44 |             A Narwhals DType if this parser can handle the type, None otherwise.
45 |         """
46 |         if is_attrs_class(input_type):
47 |             return self._parse_attrs_class(input_type)
48 | 
49 |         # This parser doesn't handle this type
50 |         return None
51 | 
52 |     def _parse_attrs_class(self, attrs_class: AttrsClassType) -> DType:
53 |         """Parse an attrs class into a Struct type.
54 | 
55 |         Arguments:
56 |             attrs_class: The attrs class.
57 | 
58 |         Returns:
59 |             A Narwhals Struct dtype.
60 |         """
61 |         from anyschema.adapters import attrs_adapter
62 | 
63 |         return nw.Struct(
64 |             [
65 |                 nw.Field(
66 |                     name=field_name,
67 |                     dtype=self.pipeline.parse(field_type, field_constraints, field_metadata, strict=True),
68 |                 )
69 |                 for field_name, field_type, field_constraints, field_metadata in attrs_adapter(attrs_class)
70 |             ]
71 |         )
72 | 


--------------------------------------------------------------------------------
/tests/adapters/attrs_adapter_test.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from datetime import date, datetime
 4 | from typing import TYPE_CHECKING
 5 | 
 6 | import pytest
 7 | 
 8 | from anyschema.adapters import attrs_adapter
 9 | from tests.conftest import (
10 |     AttrsBookWithMetadata,
11 |     AttrsDerived,
12 |     AttrsEventWithTimeMetadata,
13 |     AttrsPerson,
14 |     AttrsPersonFrozen,
15 |     create_missing_decorator_test_case,
16 | )
17 | 
18 | if TYPE_CHECKING:
19 |     from anyschema.typing import AttrsClassType, FieldSpec
20 | 
21 | 
22 | @pytest.mark.parametrize(
23 |     "spec",
24 |     [
25 |         AttrsPerson,
26 |         AttrsPersonFrozen,
27 |     ],
28 | )
29 | def test_attrs_adapter(spec: AttrsClassType) -> None:
30 |     result = list(attrs_adapter(spec))
31 |     assert ("name", str, (), {}) in result
32 |     assert ("age", int, (), {}) in result
33 |     assert ("date_of_birth", date, (), {}) in result
34 | 
35 | 
36 | def test_attrs_adapter_with_metadata() -> None:
37 |     result = list(attrs_adapter(AttrsBookWithMetadata))  # ty: ignore[invalid-argument-type]
38 |     assert result == [("title", str, (), {"description": "Book title"}), ("author", str, (), {"max_length": 100})]
39 | 
40 | 
41 | def test_attrs_adapter_with_inheritance() -> None:
42 |     result = list(attrs_adapter(AttrsDerived))  # ty: ignore[invalid-argument-type]
43 |     assert result == [("foo", str, (), {}), ("bar", int, (), {}), ("baz", float, (), {})]
44 | 
45 | 
46 | def test_attrs_adapter_missing_decorator_raises() -> None:
47 |     child_cls, expected_msg = create_missing_decorator_test_case()
48 |     with pytest.raises(AssertionError, match=expected_msg.replace("(", r"\(").replace(")", r"\)")):
49 |         list(attrs_adapter(child_cls))  # ty: ignore[invalid-argument-type]
50 | 
51 | 
52 | def test_attrs_adapter_with_time_metadata() -> None:
53 |     result = tuple(attrs_adapter(AttrsEventWithTimeMetadata))  # ty: ignore[invalid-argument-type]
54 |     expected: tuple[FieldSpec, ...] = (
55 |         ("name", str, (), {}),
56 |         ("created_at", datetime, (), {}),
57 |         ("scheduled_at", datetime, (), {"anyschema": {"time_zone": "UTC"}}),
58 |         ("started_at", datetime, (), {"anyschema": {"time_unit": "ms"}}),
59 |         ("completed_at", datetime, (), {"anyschema": {"time_zone": "Europe/Berlin", "time_unit": "ns"}}),
60 |     )
61 | 
62 |     assert result == expected
63 | 


--------------------------------------------------------------------------------
/docs/javascript/extra.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copied from Ruff https://github.com/astral-sh/ruff/blob/924741cb11a68ed037899f9db1bea6969c48385e/docs/js/extra.js
 3 |  *
 4 |  * @author Astral Software Inc. <hey@astral.sh>
 5 |  * @license MIT
 6 |  */
 7 | 
 8 | 
 9 | function cleanupClipboardText(targetSelector) {
10 |     const targetElement = document.querySelector(targetSelector);
11 | 
12 |     // exclude "Generic Prompt" and "Generic Output" spans from copy
13 |     const excludedClasses = ["gp", "go"];
14 | 
15 |     const clipboardText = Array.from(targetElement.childNodes)
16 |       .filter(
17 |         (node) =>
18 |           !excludedClasses.some((className) =>
19 |             node?.classList?.contains(className),
20 |           ),
21 |       )
22 |       .map((node) => node.textContent)
23 |       .filter((s) => s !== "");
24 |     return clipboardText.join("").trim();
25 |   }
26 | 
27 |   // Sets copy text to attributes lazily using an Intersection Observer.
28 |   function setCopyText() {
29 |     // The `data-clipboard-text` attribute allows for customized content in the copy
30 |     // See: https://www.npmjs.com/package/clipboard#copy-text-from-attribute
31 |     const attr = "clipboardText";
32 |     // all "copy" buttons whose target selector is a <code> element
33 |     const elements = document.querySelectorAll(
34 |       'button[data-clipboard-target$="code"]',
35 |     );
36 | 
37 |     if (elements.length === 0) {
38 |       return;
39 |     }
40 | 
41 |     const observer = new IntersectionObserver((entries) => {
42 |       entries.forEach((entry) => {
43 |         // target in the viewport that have not been patched
44 |         if (
45 |           entry.intersectionRatio > 0 &&
46 |           entry.target.dataset[attr] === undefined
47 |         ) {
48 |           entry.target.dataset[attr] = cleanupClipboardText(
49 |             entry.target.dataset.clipboardTarget,
50 |           );
51 |         }
52 |       });
53 |     });
54 | 
55 |     elements.forEach((elt) => {
56 |       observer.observe(elt);
57 |     });
58 |   }
59 | 
60 |   // Using the document$ observable is particularly important if you are using instant loading since
61 |   // it will not result in a page refresh in the browser
62 |   // See `How to integrate with third-party JavaScript libraries` guideline:
63 |   // https://squidfunk.github.io/mkdocs-material/customization/?h=javascript#additional-javascript
64 |   document$.subscribe(function () {
65 |     setCopyText();
66 |   });
67 | 


--------------------------------------------------------------------------------
/tests/adapters/pydantic_adapter_test.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from datetime import datetime
 4 | from typing import TYPE_CHECKING, Annotated
 5 | 
 6 | import pytest
 7 | from annotated_types import Ge
 8 | from pydantic import BaseModel, Field
 9 | 
10 | from anyschema.adapters import pydantic_adapter
11 | from tests.conftest import PydanticEventWithTimeMetadata
12 | 
13 | if TYPE_CHECKING:
14 |     from anyschema.typing import FieldMetadata, FieldSpec
15 | 
16 | EMPTY_METADATA: FieldMetadata = {}  # Type hinted empty metadata dict
17 | 
18 | 
19 | class SimpleModel(BaseModel):
20 |     name: str
21 |     age: int
22 | 
23 | 
24 | class ModelWithConstraints(BaseModel):
25 |     name: str
26 |     age: Annotated[int, Field(ge=0)]
27 | 
28 | 
29 | class ModelWithDescriptions(BaseModel):
30 |     id: int = Field(description="ID")
31 |     name: str = Field(description="Product name", json_schema_extra={"format": "name"})
32 |     tags: list[str] = Field(description="tags", json_schema_extra={"anyschema": {"description": "Override"}})
33 | 
34 | 
35 | @pytest.mark.parametrize(
36 |     ("spec", "expected"),
37 |     [
38 |         (SimpleModel, (("name", str, (), {}), ("age", int, (), {}))),
39 |         (ModelWithConstraints, (("name", str, (), {}), ("age", int, (Ge(ge=0),), {}))),
40 |         (
41 |             ModelWithDescriptions,
42 |             (
43 |                 ("id", int, (), {"anyschema": {"description": "ID"}}),
44 |                 ("name", str, (), {"format": "name", "anyschema": {"description": "Product name"}}),
45 |                 ("tags", list[str], (), {"anyschema": {"description": "Override"}}),
46 |             ),
47 |         ),
48 |     ],
49 | )
50 | def test_pydantic_adapter(spec: type[BaseModel], expected: tuple[FieldSpec, ...]) -> None:
51 |     result = tuple(pydantic_adapter(spec))
52 |     assert result == expected
53 | 
54 | 
55 | def test_pydantic_adapter_with_json_schema_extra() -> None:
56 |     result = tuple(pydantic_adapter(PydanticEventWithTimeMetadata))
57 | 
58 |     expected: tuple[FieldSpec, ...] = (
59 |         ("name", str, (), EMPTY_METADATA),
60 |         ("created_at", datetime, (), EMPTY_METADATA),
61 |         ("scheduled_at", datetime, (), {"anyschema": {"time_zone": "UTC"}}),
62 |         ("started_at", datetime, (), {"anyschema": {"time_unit": "ms"}}),
63 |         ("completed_at", datetime, (), {"anyschema": {"time_zone": "Europe/Berlin", "time_unit": "ns"}}),
64 |     )
65 | 
66 |     assert result == expected
67 | 


--------------------------------------------------------------------------------
/tests/parsers/parsers_dependency_mock_test.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from unittest.mock import patch
 4 | 
 5 | import pytest
 6 | 
 7 | from anyschema.parsers import ParserPipeline
 8 | 
 9 | 
10 | @pytest.mark.parametrize(
11 |     ("dependency_flag", "excluded_step"),
12 |     [
13 |         ("ANNOTATED_TYPES_AVAILABLE", "AnnotatedTypesStep"),
14 |         ("ATTRS_AVAILABLE", "AttrsTypeStep"),
15 |         ("PYDANTIC_AVAILABLE", "PydanticTypeStep"),
16 |         ("SQLALCHEMY_AVAILABLE", "SQLAlchemyTypeStep"),
17 |     ],
18 | )
19 | def test_auto_pipeline_without_optional_dependency(dependency_flag: str, excluded_step: str) -> None:
20 |     """Test that optional parser steps are excluded when their dependency is unavailable."""
21 |     with patch(target=f"anyschema.parsers._pipeline.{dependency_flag}", new=False):
22 |         pipeline = ParserPipeline("auto")
23 |         step_names = [str(step) for step in pipeline.steps]
24 | 
25 |         # The corresponding step should NOT be in the pipeline
26 |         assert excluded_step not in step_names
27 | 
28 |         # Core steps should still be there
29 |         assert "ForwardRefStep" in step_names
30 |         assert "PyTypeStep" in step_names
31 | 
32 | 
33 | def test_auto_pipeline_without_all_optional_deps() -> None:
34 |     """Test pipeline with only core dependencies."""
35 |     patches = (
36 |         patch(target="anyschema.parsers._pipeline.ANNOTATED_TYPES_AVAILABLE", new=False),
37 |         patch(target="anyschema.parsers._pipeline.ATTRS_AVAILABLE", new=False),
38 |         patch(target="anyschema.parsers._pipeline.PYDANTIC_AVAILABLE", new=False),
39 |         patch(target="anyschema.parsers._pipeline.SQLALCHEMY_AVAILABLE", new=False),
40 |     )
41 | 
42 |     for p in patches:
43 |         p.start()
44 | 
45 |     try:
46 |         pipeline = ParserPipeline("auto")
47 |         step_names = [str(step) for step in pipeline.steps]
48 | 
49 |         # Only core steps should be present
50 |         assert "ForwardRefStep" in step_names
51 |         assert "UnionTypeStep" in step_names
52 |         assert "AnnotatedStep" in step_names
53 |         assert "PyTypeStep" in step_names
54 | 
55 |         # Optional steps should NOT be present
56 |         assert "AnnotatedTypesStep" not in step_names
57 |         assert "AttrsTypeStep" not in step_names
58 |         assert "PydanticTypeStep" not in step_names
59 |         assert "SQLAlchemyTypeStep" not in step_names
60 |     finally:
61 |         for p in patches:
62 |             p.stop()
63 | 


--------------------------------------------------------------------------------
/docs/api-reference/parsers.md:
--------------------------------------------------------------------------------
 1 | # Parsers
 2 | 
 3 | ## Pipeline
 4 | 
 5 | A parser pipeline is a sequence of [parser steps](#parser-steps) that process type annotations to produce Narwhals
 6 | dtypes.
 7 | 
 8 | ::: anyschema.parsers.ParserPipeline
 9 |     handler: python
10 |     options:
11 |       show_root_heading: true
12 |       show_source: false
13 |       heading_level: 3
14 | 
15 | ::: anyschema.parsers.make_pipeline
16 |     handler: python
17 |     options:
18 |       show_root_heading: true
19 |       show_source: false
20 |       heading_level: 3
21 | 
22 | ## Parser Steps
23 | 
24 | Parser steps are the building blocks of the type parsing pipeline. Each step handles specific type patterns.
25 | 
26 | For more details on how these work together, see the [parser steps](../architecture.md#parser-steps)
27 | section in the Architecture guide.
28 | 
29 | ::: anyschema.parsers.ParserStep
30 |     handler: python
31 |     options:
32 |       show_root_heading: true
33 |       show_source: false
34 |       heading_level: 3
35 | 
36 | ---
37 | 
38 | The following steps are built-in and come dependency-free.
39 | 
40 | ::: anyschema.parsers.ForwardRefStep
41 |     handler: python
42 |     options:
43 |       show_root_heading: true
44 |       show_source: false
45 |       heading_level: 3
46 | 
47 | ::: anyschema.parsers.UnionTypeStep
48 |     handler: python
49 |     options:
50 |       show_root_heading: true
51 |       show_source: false
52 |       heading_level: 3
53 | 
54 | ::: anyschema.parsers.AnnotatedStep
55 |     handler: python
56 |     options:
57 |       show_root_heading: true
58 |       show_source: false
59 |       heading_level: 3
60 | 
61 | ::: anyschema.parsers.PyTypeStep
62 |     handler: python
63 |     options:
64 |       show_root_heading: true
65 |       show_source: false
66 |       heading_level: 3
67 | 
68 | ---
69 | 
70 | ::: anyschema.parsers.annotated_types.AnnotatedTypesStep
71 |     handler: python
72 |     options:
73 |       show_root_heading: true
74 |       show_source: false
75 |       heading_level: 3
76 | 
77 | ::: anyschema.parsers.attrs.AttrsTypeStep
78 |     handler: python
79 |     options:
80 |       show_root_heading: true
81 |       show_source: false
82 |       heading_level: 3
83 | 
84 | ::: anyschema.parsers.pydantic.PydanticTypeStep
85 |     handler: python
86 |     options:
87 |       show_root_heading: true
88 |       show_source: false
89 |       heading_level: 3
90 | 
91 | ::: anyschema.parsers.sqlalchemy.SQLAlchemyTypeStep
92 |     handler: python
93 |     options:
94 |       show_root_heading: true
95 |       show_source: false
96 |       heading_level: 3
97 | 


--------------------------------------------------------------------------------
/tests/parsers/pydantic_extra_types_test.py:
--------------------------------------------------------------------------------
 1 | """Tests using actual pydantic-extra-types to verify derived type handling.
 2 | 
 3 | This module tests that PyTypeStep works with real types from the pydantic-extra-types library.
 4 | Note that some pydantic-extra-types require additional dependencies (like pycountry, phonenumbers).
 5 | 
 6 | References:
 7 | - https://docs.pydantic.dev/latest/api/pydantic_extra_types_country/
 8 | - https://docs.pydantic.dev/latest/api/pydantic_extra_types_phone_numbers/
 9 | - https://docs.pydantic.dev/latest/api/pydantic_extra_types_coordinate/
10 | """
11 | 
12 | from __future__ import annotations
13 | 
14 | from typing import Any
15 | 
16 | import narwhals as nw
17 | import pytest
18 | from pydantic_extra_types.coordinate import Latitude, Longitude
19 | from pydantic_extra_types.country import (
20 |     CountryAlpha2,
21 |     CountryAlpha3,
22 |     CountryNumericCode,
23 |     CountryShortName,
24 | )
25 | from pydantic_extra_types.phone_numbers import PhoneNumber
26 | 
27 | from anyschema.parsers import ParserPipeline, PyTypeStep, UnionTypeStep
28 | 
29 | 
30 | @pytest.fixture(scope="module")
31 | def py_type_parser() -> PyTypeStep:
32 |     """Create a PyTypeStep instance with pipeline set."""
33 |     union_parser = UnionTypeStep()
34 |     py_parser = PyTypeStep()
35 |     _ = ParserPipeline([union_parser, py_parser])
36 |     return py_parser
37 | 
38 | 
39 | @pytest.mark.parametrize(
40 |     ("input_type", "expected"),
41 |     [
42 |         # coordinate
43 |         (Latitude, nw.Float64()),
44 |         (Longitude, nw.Float64()),
45 |         (list[Latitude], nw.List(nw.Float64())),
46 |         (list[list[Latitude]], nw.List(nw.List(nw.Float64()))),
47 |         (tuple[Longitude, Longitude], nw.Array(nw.Float64(), shape=2)),
48 |         (tuple[Latitude, Latitude, Latitude], nw.Array(nw.Float64(), shape=3)),
49 |         # country
50 |         (CountryAlpha2, nw.String()),
51 |         (CountryAlpha3, nw.String()),
52 |         (CountryNumericCode, nw.String()),
53 |         (CountryShortName, nw.String()),
54 |         (list[CountryAlpha2], nw.List(nw.String())),
55 |         (list[list[CountryAlpha2]], nw.List(nw.List(nw.String()))),
56 |         # phone number
57 |         (PhoneNumber, nw.String()),
58 |         (list[PhoneNumber], nw.List(nw.String())),
59 |         (tuple[PhoneNumber, PhoneNumber, PhoneNumber], nw.Array(nw.String(), shape=3)),
60 |     ],
61 | )
62 | def test_pydantic_extra_types(py_type_parser: PyTypeStep, input_type: Any, expected: nw.dtypes.DType) -> None:
63 |     result = py_type_parser.parse(input_type, (), {})
64 |     assert result == expected
65 | 


--------------------------------------------------------------------------------
/tests/parsers/_union_test.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from types import NoneType
 4 | from typing import Any, Optional, Union
 5 | 
 6 | import narwhals as nw
 7 | import pytest
 8 | 
 9 | from anyschema.exceptions import UnsupportedDTypeError
10 | from anyschema.parsers import ParserPipeline, PyTypeStep, UnionTypeStep
11 | 
12 | 
13 | @pytest.fixture(scope="module")
14 | def union_parser() -> UnionTypeStep:
15 |     """Create a UnionTypeStep instance with pipeline set."""
16 |     union_parser = UnionTypeStep()
17 |     py_parser = PyTypeStep()
18 |     _ = ParserPipeline([union_parser, py_parser])
19 |     return union_parser
20 | 
21 | 
22 | @pytest.mark.parametrize(
23 |     ("input_type", "expected"),
24 |     [
25 |         (Optional[int], nw.Int64()),
26 |         (Optional[str], nw.String()),
27 |         (Optional[float], nw.Float64()),
28 |         (Optional[bool], nw.Boolean()),
29 |         (int | None, nw.Int64()),
30 |         (str | None, nw.String()),
31 |         (None | int, nw.Int64()),
32 |         (None | str, nw.String()),
33 |         (Union[int, None], nw.Int64()),
34 |         (Union[None, str], nw.String()),
35 |         (Optional[list[int]], nw.List(nw.Int64())),
36 |         (list[str] | None, nw.List(nw.String())),
37 |         (list[str | None] | None, nw.List(nw.String())),
38 |     ],
39 | )
40 | def test_parse_union_types(union_parser: UnionTypeStep, input_type: Any, expected: nw.dtypes.DType) -> None:
41 |     result = union_parser.parse(input_type, (), {})
42 |     assert result == expected
43 | 
44 | 
45 | @pytest.mark.parametrize(
46 |     "input_type",
47 |     [
48 |         int,
49 |         str,
50 |         list[int],
51 |         NoneType,
52 |     ],
53 | )
54 | def test_parse_non_union_types(union_parser: UnionTypeStep, input_type: Any) -> None:
55 |     result = union_parser.parse(input_type, (), {})
56 |     assert result is None
57 | 
58 | 
59 | @pytest.mark.parametrize(
60 |     ("input_type", "error_msg"),
61 |     [
62 |         (Union[int, str, float], "Union with more than two types is not supported."),
63 |         (int | str | float, "Union with more than two types is not supported."),
64 |         (Union[int, str], "Union with mixed types is not supported."),
65 |         (int | str, "Union with mixed types is not supported."),
66 |         (float | bool, "Union with mixed types is not supported."),
67 |     ],
68 | )
69 | def test_parse_unsupported_unions_parametrized(union_parser: UnionTypeStep, input_type: Any, error_msg: str) -> None:
70 |     with pytest.raises(UnsupportedDTypeError, match=error_msg):
71 |         union_parser.parse(input_type, (), {})
72 | 


--------------------------------------------------------------------------------
/tests/pydantic/float_test.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import TYPE_CHECKING, Annotated, Optional
 4 | 
 5 | import hypothesis.strategies as st
 6 | import narwhals as nw
 7 | from annotated_types import Interval
 8 | from hypothesis import assume, given
 9 | from pydantic import BaseModel, FiniteFloat, NegativeFloat, NonNegativeFloat, NonPositiveFloat, PositiveFloat
10 | 
11 | from tests.pydantic.utils import model_to_nw_schema
12 | 
13 | if TYPE_CHECKING:
14 |     from anyschema.parsers import ParserPipeline
15 | 
16 | 
17 | @given(lb=st.floats(), ub=st.floats())
18 | def test_parse_float(auto_pipeline: ParserPipeline, lb: float, ub: float) -> None:
19 |     assume(lb < ub)
20 | 
21 |     class FloatModel(BaseModel):
22 |         # python float type
23 |         py_int: float
24 |         py_float_optional: float | None
25 |         py_float_or_none: float | None
26 |         none_or_py_float: None | float
27 | 
28 |         # pydantic NonNegativeFloat type
29 |         non_negative: NonNegativeFloat
30 |         non_negative_optional: NonNegativeFloat | None
31 |         non_negative_or_none: NonNegativeFloat | None
32 |         none_or_non_negative: None | NonNegativeFloat
33 | 
34 |         # pydantic NonPositiveFloat type
35 |         non_positive: NonPositiveFloat
36 |         non_positive_optional: NonPositiveFloat | None
37 |         non_positive_or_none: NonPositiveFloat | None
38 |         none_or_non_positive: None | NonPositiveFloat
39 | 
40 |         # pydantic PositiveFloat type
41 |         positive: PositiveFloat
42 |         positive_optional: PositiveFloat | None
43 |         positive_or_none: PositiveFloat | None
44 |         none_or_positive: None | PositiveFloat
45 | 
46 |         # pydantic NegativeFloat type
47 |         negative: NegativeFloat
48 |         negative_optional: NegativeFloat | None
49 |         negative_or_none: NegativeFloat | None
50 |         none_or_negative: None | NegativeFloat
51 | 
52 |         # pydantic NegativeFloat type
53 |         finite: FiniteFloat
54 |         finite_optional: FiniteFloat | None
55 |         finite_or_none: FiniteFloat | None
56 |         none_or_finite: None | NegativeFloat
57 | 
58 |         # pydantic annotated float with constraints
59 |         con_float: Annotated[float, Interval(gt=lb, lt=ub)]
60 |         con_float_optional: Optional[Annotated[float, Interval(ge=lb, lt=ub)]]
61 |         con_float_or_none: Annotated[float, Interval(gt=lb, le=ub)] | None
62 |         non_or_con_float: None | Annotated[float, Interval(ge=lb, le=ub)]
63 | 
64 |     schema = model_to_nw_schema(FloatModel, pipeline=auto_pipeline)
65 | 
66 |     assert all(value == nw.Float64() for value in schema.values())
67 | 


--------------------------------------------------------------------------------
/tests/spec_to_schema/attrs_test.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import TYPE_CHECKING, Mapping
 4 | 
 5 | import narwhals as nw
 6 | import pytest
 7 | 
 8 | from anyschema import AnySchema
 9 | from tests.conftest import (
10 |     AttrsAddressWithPydantic,
11 |     AttrsDerived,
12 |     AttrsEventWithTimeMetadata,
13 |     AttrsPerson,
14 |     AttrsPersonWithLiterals,
15 |     create_missing_decorator_test_case,
16 | )
17 | 
18 | if TYPE_CHECKING:
19 |     from anyschema.typing import AttrsClassType
20 | 
21 | 
22 | @pytest.mark.parametrize(
23 |     ("spec", "expected_schema"),
24 |     [
25 |         (
26 |             AttrsPerson,
27 |             {
28 |                 "name": nw.String(),
29 |                 "age": nw.Int64(),
30 |                 "date_of_birth": nw.Date(),
31 |                 "is_active": nw.Boolean(),
32 |                 "classes": nw.List(nw.String()),
33 |                 "grades": nw.List(nw.Float64()),
34 |             },
35 |         ),
36 |         (
37 |             AttrsPersonWithLiterals,
38 |             {
39 |                 "username": nw.String(),
40 |                 "role": nw.Enum(["admin", "user", "guest"]),
41 |                 "status": nw.Enum(["active", "inactive", "pending"]),
42 |             },
43 |         ),
44 |         (
45 |             AttrsAddressWithPydantic,
46 |             {
47 |                 "street": nw.String(),
48 |                 "city": nw.String(),
49 |                 "zipcode": nw.Struct([nw.Field("zipcode", nw.UInt64())]),
50 |             },
51 |         ),
52 |         (
53 |             AttrsDerived,
54 |             {
55 |                 "foo": nw.String(),
56 |                 "bar": nw.Int64(),
57 |                 "baz": nw.Float64(),
58 |             },
59 |         ),
60 |         (
61 |             AttrsEventWithTimeMetadata,
62 |             {
63 |                 "name": nw.String(),
64 |                 "created_at": nw.Datetime("us"),
65 |                 "scheduled_at": nw.Datetime("us", time_zone="UTC"),
66 |                 "started_at": nw.Datetime("ms"),
67 |                 "completed_at": nw.Datetime("ns", time_zone="Europe/Berlin"),
68 |             },
69 |         ),
70 |     ],
71 | )
72 | def test_attrs_class(spec: AttrsClassType, expected_schema: Mapping[str, nw.dtypes.DType]) -> None:
73 |     schema = AnySchema(spec=spec)
74 |     nw_schema = schema._nw_schema
75 |     assert nw_schema == nw.Schema(expected_schema)
76 | 
77 | 
78 | def test_attrs_class_missing_decorator_raises() -> None:
79 |     child_cls, expected_msg = create_missing_decorator_test_case()
80 |     with pytest.raises(AssertionError, match=expected_msg.replace("(", r"\(").replace(")", r"\)")):
81 |         AnySchema(spec=child_cls)
82 | 


--------------------------------------------------------------------------------
/tests/parsers/_builtin_derived_types_test.py:
--------------------------------------------------------------------------------
 1 | """Tests for derived types similar to pydantic-extra-types.
 2 | 
 3 | This module tests that PyTypeStep correctly handles types that inherit
 4 | from basic Python types, similar to those in pydantic-extra-types library.
 5 | 
 6 | The derived types mimic pydantic-extra-types
 7 | 
 8 | References:
 9 | - https://docs.pydantic.dev/latest/api/pydantic_extra_types_country/
10 | - https://docs.pydantic.dev/latest/api/pydantic_extra_types_phone_numbers/
11 | - https://docs.pydantic.dev/latest/api/pydantic_extra_types_routing_numbers/
12 | """
13 | 
14 | from __future__ import annotations
15 | 
16 | from datetime import date, datetime
17 | from decimal import Decimal
18 | from enum import Enum
19 | from typing import Any
20 | 
21 | import narwhals as nw
22 | import pytest
23 | 
24 | from anyschema.parsers import ParserPipeline, PyTypeStep, UnionTypeStep
25 | 
26 | 
27 | class EmailStr(str):
28 |     __slots__ = ()
29 | 
30 | 
31 | class PositiveInt(int): ...
32 | 
33 | 
34 | class PositiveFloat(float): ...
35 | 
36 | 
37 | class SecretBytes(bytes): ...
38 | 
39 | 
40 | class PastDatetime(datetime): ...
41 | 
42 | 
43 | class FutureDate(date): ...
44 | 
45 | 
46 | class NonNegativeDecimal(Decimal): ...
47 | 
48 | 
49 | class HttpMethod(str, Enum):
50 |     """HTTP method enum."""
51 | 
52 |     GET = "GET"
53 |     POST = "POST"
54 |     PUT = "PUT"
55 |     DELETE = "DELETE"
56 | 
57 | 
58 | class Priority(int, Enum):
59 |     """Priority levels."""
60 | 
61 |     LOW = 1
62 |     MEDIUM = 2
63 |     HIGH = 3
64 | 
65 | 
66 | @pytest.fixture(scope="module")
67 | def parser_pipeline() -> ParserPipeline:
68 |     """Create a parser pipeline with UnionTypeStep and PyTypeStep."""
69 |     union_parser = UnionTypeStep()
70 |     py_parser = PyTypeStep()
71 |     return ParserPipeline([union_parser, py_parser])
72 | 
73 | 
74 | @pytest.mark.parametrize(
75 |     ("input_type", "expected"),
76 |     [
77 |         (EmailStr, nw.String()),
78 |         (EmailStr | None, nw.String()),
79 |         (list[EmailStr], nw.List(nw.String())),
80 |         (PositiveInt, nw.Int64()),
81 |         (list[PositiveInt], nw.List(nw.Int64())),
82 |         (PositiveFloat, nw.Float64()),
83 |         (tuple[PositiveFloat, PositiveFloat], nw.Array(nw.Float64(), shape=2)),
84 |         (SecretBytes, nw.Binary()),
85 |         (PastDatetime, nw.Datetime("us")),
86 |         (FutureDate, nw.Date),
87 |         (NonNegativeDecimal, nw.Decimal()),
88 |         (HttpMethod, nw.Enum(HttpMethod)),
89 |         (Priority, nw.Enum(Priority)),
90 |     ],
91 | )
92 | def test_derived_types(parser_pipeline: ParserPipeline, input_type: Any, expected: nw.dtypes.DType) -> None:
93 |     result = parser_pipeline.parse(input_type, constraints=(), metadata={})
94 |     assert result == expected
95 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug-report.yaml:
--------------------------------------------------------------------------------
 1 | name: 🐛 Bug Report
 2 | description: Report a bug or unexpected behavior in anyschema
 3 | title: "[Bug]: "
 4 | labels: ["bug", "needs-triage"]
 5 | body:
 6 |   - type: markdown
 7 |     attributes:
 8 |       value: |
 9 |         Thanks for taking the time to fill out this bug report!
10 |         Please provide as much detail as possible to help us investigate and fix the issue.
11 | 
12 |   - type: textarea
13 |     id: description
14 |     attributes:
15 |       label: Bug Description
16 |       description: A clear and concise description of what the bug is.
17 |       placeholder: What went wrong?
18 |     validations:
19 |       required: true
20 | 
21 |   - type: textarea
22 |     id: reproduction
23 |     attributes:
24 |       label: Steps to Reproduce
25 |       description: Provide a minimal code example that demonstrates the bug.
26 |       placeholder: |
27 |         ```python
28 |         from anyschema import AnySchema
29 | 
30 |         # What code triggers the bug?
31 |         ```
32 |       render: python
33 |     validations:
34 |       required: true
35 | 
36 |   - type: textarea
37 |     id: expected
38 |     attributes:
39 |       label: Expected Behavior
40 |       description: What did you expect to happen?
41 |       placeholder: Describe the expected behavior
42 |     validations:
43 |       required: true
44 | 
45 |   - type: textarea
46 |     id: actual
47 |     attributes:
48 |       label: Actual Behavior
49 |       description: What actually happened? Include any error messages or stack traces.
50 |       placeholder: Paste error messages or describe what happened instead
51 |     validations:
52 |       required: true
53 | 
54 |   - type: textarea
55 |     id: version
56 |     attributes:
57 |       label: Please run `anyschema.show_versions()` and enter the output below
58 |       description: |
59 |         What library versions are you using?
60 |         ```python
61 |         import anyschema
62 |         anyschema.show_versions()
63 |         ```
64 |     validations:
65 |       required: true
66 | 
67 |   - type: textarea
68 |     id: extra
69 |     attributes:
70 |       label: Any Additional Information
71 |       description: Add any other context, screenshots, or information about the problem here.
72 |       placeholder: Optional additional information
73 | 
74 |   - type: checkboxes
75 |     id: terms
76 |     attributes:
77 |       label: Checklist
78 |       description: Please confirm the following
79 |       options:
80 |         - label: I have searched the existing issues to make sure this bug hasn't been reported yet
81 |           required: true
82 |         - label: I have provided a minimal reproducible example
83 |           required: true
84 |         - label: I am willing to submit a PR to fix this issue (optional)
85 | 


--------------------------------------------------------------------------------
/tests/parsers/_annotated_test.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import Annotated, Any
 4 | 
 5 | import narwhals as nw
 6 | import pytest
 7 | 
 8 | from anyschema.parsers import AnnotatedStep, ParserPipeline, PyTypeStep
 9 | 
10 | 
11 | @pytest.fixture(scope="module")
12 | def annotated_parser() -> AnnotatedStep:
13 |     """Create an AnnotatedStep instance with pipeline set."""
14 |     annotated_parser = AnnotatedStep()
15 |     py_parser = PyTypeStep()
16 |     _ = ParserPipeline([annotated_parser, py_parser])
17 |     return annotated_parser
18 | 
19 | 
20 | @pytest.mark.parametrize(
21 |     ("input_type", "expected"),
22 |     [
23 |         (Annotated[int, "meta"], nw.Int64()),
24 |         (Annotated[str, "meta"], nw.String()),
25 |         (Annotated[float, "meta"], nw.Float64()),
26 |         (Annotated[bool, "meta"], nw.Boolean()),
27 |         (Annotated[list[int], "meta"], nw.List(nw.Int64())),
28 |         (Annotated[list[str], "meta"], nw.List(nw.String())),
29 |         (Annotated[tuple[int, ...], "meta"], nw.List(nw.Int64())),
30 |         (Annotated[tuple[str, str, str], "meta"], nw.Array(nw.String(), shape=3)),
31 |     ],
32 | )
33 | def test_parse_annotated(annotated_parser: AnnotatedStep, input_type: type, expected: nw.dtypes.DType) -> None:
34 |     result = annotated_parser.parse(input_type, (), {})
35 |     assert result == expected
36 | 
37 | 
38 | @pytest.mark.parametrize(
39 |     "metadata_items",
40 |     [
41 |         ("meta1",),
42 |         ("meta1", "meta2"),
43 |         ("meta1", "meta2", "meta3"),
44 |         ({"key": "value"},),
45 |         (["item1", "item2"],),
46 |         (1, 2, 3),
47 |     ],
48 | )
49 | def test_parse_annotated_various_metadata(annotated_parser: AnnotatedStep, metadata_items: tuple[Any, ...]) -> None:
50 |     """Parametrized test for Annotated with various metadata."""
51 |     input_type = Annotated[int, metadata_items]
52 |     result = annotated_parser.parse(input_type, (), {})
53 |     assert result == nw.Int64()
54 | 
55 | 
56 | @pytest.mark.parametrize("input_type", [int, str, list[int], tuple[str, ...]])
57 | def test_parse_non_annotated(annotated_parser: AnnotatedStep, input_type: type) -> None:
58 |     result = annotated_parser.parse(input_type, (), {})
59 |     assert result is None
60 | 
61 | 
62 | def test_parse_annotated_with_class_metadata(annotated_parser: AnnotatedStep) -> None:
63 |     class CustomMetadata:
64 |         def __init__(self, value: str) -> None:
65 |             self.value = value
66 | 
67 |     result = annotated_parser.parse(Annotated[int, CustomMetadata("test")], (), {})
68 |     assert result == nw.Int64()
69 | 
70 | 
71 | def test_parse_annotated_with_callable_metadata(annotated_parser: AnnotatedStep) -> None:
72 |     result = annotated_parser.parse(Annotated[int, lambda x: x > 0], (), {})
73 |     assert result == nw.Int64()
74 | 


--------------------------------------------------------------------------------
/tests/pydantic/literal_test.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import TYPE_CHECKING, Literal
 4 | 
 5 | import narwhals as nw
 6 | from pydantic import BaseModel
 7 | 
 8 | from tests.pydantic.utils import model_to_nw_schema
 9 | 
10 | if TYPE_CHECKING:
11 |     from anyschema.parsers import ParserPipeline
12 | 
13 | 
14 | def test_parse_string_literal(auto_pipeline: ParserPipeline) -> None:
15 |     class UserModel(BaseModel):
16 |         username: str
17 |         role: Literal["admin", "user", "guest"]
18 |         status: Literal["active", "inactive", "pending"]
19 | 
20 |     schema = model_to_nw_schema(UserModel, pipeline=auto_pipeline)
21 | 
22 |     assert schema["username"] == nw.String()
23 |     assert schema["role"] == nw.Enum(["admin", "user", "guest"])
24 |     assert schema["status"] == nw.Enum(["active", "inactive", "pending"])
25 | 
26 | 
27 | def test_parse_mixed_literal_types(auto_pipeline: ParserPipeline) -> None:
28 |     class ConfigModel(BaseModel):
29 |         name: str
30 |         log_level: Literal["debug", "info", "warning", "error"]
31 |         max_retries: Literal[1, 2, 3, 5, 10]
32 |         enabled: Literal[True, False]
33 | 
34 |     schema = model_to_nw_schema(ConfigModel, pipeline=auto_pipeline)
35 | 
36 |     assert schema["name"] == nw.String()
37 |     assert schema["log_level"] == nw.Enum(["debug", "info", "warning", "error"])
38 |     assert schema["max_retries"] == nw.Enum([1, 2, 3, 5, 10])  # type: ignore[list-item]
39 |     assert schema["enabled"] == nw.Enum([True, False])  # type: ignore[list-item]
40 | 
41 | 
42 | def test_parse_literal_with_optional(auto_pipeline: ParserPipeline) -> None:
43 |     class ProductModel(BaseModel):
44 |         name: str
45 |         category: Literal["electronics", "clothing", "food"] | None
46 |         priority: Literal["high", "medium", "low"]
47 | 
48 |     schema = model_to_nw_schema(ProductModel, pipeline=auto_pipeline)
49 | 
50 |     assert schema["name"] == nw.String()
51 |     assert schema["category"] == nw.Enum(["electronics", "clothing", "food"])
52 |     assert schema["priority"] == nw.Enum(["high", "medium", "low"])
53 | 
54 | 
55 | def test_parse_nested_model_with_literal(auto_pipeline: ParserPipeline) -> None:
56 |     class AddressModel(BaseModel):
57 |         street: str
58 |         country: Literal["US", "UK", "CA", "AU"]
59 | 
60 |     class PersonModel(BaseModel):
61 |         name: str
62 |         role: Literal["employee", "contractor", "intern"]
63 |         address: AddressModel
64 | 
65 |     schema = model_to_nw_schema(PersonModel, pipeline=auto_pipeline)
66 | 
67 |     assert schema["name"] == nw.String()
68 |     assert schema["role"] == nw.Enum(["employee", "contractor", "intern"])
69 |     assert schema["address"] == nw.Struct(
70 |         [
71 |             nw.Field("street", nw.String()),
72 |             nw.Field("country", nw.Enum(["US", "UK", "CA", "AU"])),
73 |         ]
74 |     )
75 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # anyschema: From Type Specifications to Dataframe Schemas
 2 | 
 3 | > [!CAUTION]
 4 | > `anyschema` is still in early development and possibly unstable.
 5 | 
 6 | ---
 7 | 
 8 | [Documentation](https://fbruzzesi.github.io/anyschema/) | [Source Code](https://github.com/fbruzzesi/anyschema/) | [Issue Tracker](https://github.com/fbruzzesi/anyschema/issues)
 9 | 
10 | ---
11 | 
12 | `anyschema` allows you to convert from type specifications (such as attrs classes, Pydantic models, SQLAlchemy tables,
13 | TypedDict, dataclasses, or plain Python dicts) to _any_ dataframe schema
14 | (by _"any"_ we intend those supported by Narwhals).
15 | 
16 | Let's see how it works in practice with an example:
17 | 
18 | ```python
19 | from anyschema import AnySchema
20 | from pydantic import BaseModel
21 | from pydantic import PositiveInt
22 | 
23 | 
24 | class Student(BaseModel):
25 |     name: str
26 |     age: PositiveInt
27 |     classes: list[str]
28 | 
29 | 
30 | schema = AnySchema(spec=Student)
31 | 
32 | # Convert to pyarrow schema
33 | pa_schema = schema.to_arrow()
34 | 
35 | type(pa_schema)
36 | # pyarrow.lib.Schema
37 | 
38 | pa_schema
39 | # name: string
40 | # age: uint64
41 | # classes: list<item: string>
42 | #   child 0, item: string
43 | 
44 | pl_schema = schema.to_polars()
45 | 
46 | type(pl_schema)
47 | # polars.schema.Schema
48 | 
49 | pl_schema
50 | # Schema([('name', String), ('age', UInt64), ('classes', List(String))])
51 | ```
52 | 
53 | To read more about `anyschema` functionalities and features consider checking out the
54 | [documentation](https://fbruzzesi.github.io/anyschema/) website.
55 | 
56 | ## Installation
57 | 
58 | `anyschema` is available on [pypi](https://pypi.org/project/anyschema/), and it can be installed directly via
59 | any package manager. For instance:
60 | 
61 | ```bash
62 | uv pip install "anyschema[attrs]"
63 | uv pip install "anyschema[pydantic]"
64 | uv pip install "anyschema[sqlalchemy]"
65 | ```
66 | 
67 | To allow interoperability with attrs classes, Pydantic models or SQLAlchemy tables.
68 | 
69 | ## When to use `anyschema`
70 | 
71 | `anyschema` is designed for scenarios where type specifications (e.g., Pydantic models, SQLAlchemy tables) serve as a
72 | single source of truth for both validation and dataframe schema generation.
73 | 
74 | The typical use cases are: Data pipelines, database-to-dataframe workflows, API to database workflows, schema
75 | generation, type-safe data processing.
76 | 
77 | ## Why `anyschema`?
78 | 
79 | The project was inspired by a [Talk Python podcast episode](https://www.youtube.com/live/wuGirNCyTxA?t=2880s) featuring
80 | the creator of [LanceDB](https://github.com/lancedb/lancedb), who mentioned the need to convert from Pydantic models to
81 | PyArrow schemas.
82 | 
83 | This challenge led to a realization: such conversion could be generalized to many dataframe libraries by using Narwhals
84 | as an intermediate representation. `anyschema` makes this conversion seamless and extensible.
85 | 


--------------------------------------------------------------------------------
/tests/anyschema/initialization_test.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import TYPE_CHECKING
 4 | 
 5 | import narwhals as nw
 6 | import pytest
 7 | from narwhals.schema import Schema
 8 | 
 9 | from anyschema import AnySchema
10 | from anyschema.parsers import ParserPipeline, ParserStep, make_pipeline
11 | 
12 | if TYPE_CHECKING:
13 |     from narwhals.dtypes import DType
14 | 
15 |     from anyschema.typing import FieldConstraints, FieldMetadata, FieldSpecIterable, FieldType
16 | 
17 | 
18 | class CustomType:
19 |     pass
20 | 
21 | 
22 | class CustomTypeStep(ParserStep):
23 |     def parse(
24 |         self,
25 |         input_type: FieldType,
26 |         constraints: FieldConstraints,  # noqa: ARG002
27 |         metadata: FieldMetadata,  # noqa: ARG002
28 |     ) -> DType | None:
29 |         return nw.String() if input_type is CustomType else None
30 | 
31 | 
32 | def test_anyschema_with_unknown_spec_and_no_adapter() -> None:
33 |     class UnknownClass:
34 |         """A class that doesn't match any known adapter pattern."""
35 | 
36 |         some_field: int
37 | 
38 |     expected_msg = "`spec` type is unknown and `adapter` is not specified."
39 |     with pytest.raises(ValueError, match=expected_msg):
40 |         AnySchema(spec=UnknownClass)
41 | 
42 | 
43 | def test_anyschema_with_unknown_spec_and_custom_adapter() -> None:
44 |     class CustomSpec:
45 |         """A custom spec class."""
46 | 
47 |         field1: str
48 |         field2: int
49 | 
50 |     def custom_adapter(spec: CustomSpec) -> FieldSpecIterable:  # noqa: ARG001
51 |         yield "field1", str, (), {}
52 |         yield "field2", int, (), {}
53 | 
54 |     schema = AnySchema(spec=CustomSpec, adapter=custom_adapter)
55 |     result = schema.to_polars()
56 | 
57 |     assert "field1" in result
58 |     assert "field2" in result
59 | 
60 | 
61 | def test_anyschema_with_narwhals_schema() -> None:
62 |     nw_schema = Schema({"name": nw.String(), "age": nw.Int64()})
63 |     anyschema = AnySchema(spec=nw_schema)
64 |     assert anyschema._nw_schema is nw_schema
65 | 
66 | 
67 | def test_anyschema_with_dict_spec() -> None:
68 |     spec = {"name": str, "age": int}
69 | 
70 |     schema = AnySchema(spec=spec)
71 |     result = schema.to_polars()
72 | 
73 |     assert "name" in result
74 |     assert "age" in result
75 | 
76 | 
77 | @pytest.mark.parametrize(
78 |     "pipeline",
79 |     [
80 |         make_pipeline("auto").with_steps(CustomTypeStep()),
81 |         ParserPipeline.from_auto(CustomTypeStep()),
82 |         [step.clone() for step in ParserPipeline.from_auto(CustomTypeStep()).steps],
83 |     ],
84 | )
85 | def test_anyschema_with_pipeline(pipeline: ParserPipeline) -> None:
86 |     spec = {"custom_field": CustomType, "normal_field": int}
87 |     schema = AnySchema(spec=spec, pipeline=pipeline)
88 | 
89 |     result = schema._nw_schema
90 |     assert result == Schema(
91 |         {
92 |             "custom_field": nw.String(),
93 |             "normal_field": nw.Int64(),
94 |         }
95 |     )
96 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature-request.yaml:
--------------------------------------------------------------------------------
 1 | name: ✨ Feature Request
 2 | description: Suggest a new feature or enhancement for anyschema
 3 | title: "[Feature]: "
 4 | labels: ["enhancement", "needs-triage"]
 5 | body:
 6 |   - type: markdown
 7 |     attributes:
 8 |       value: |
 9 |         Thanks for your interest in improving anyschema!
10 |         Please describe the feature you'd like to see added.
11 | 
12 |   - type: textarea
13 |     id: problem
14 |     attributes:
15 |       label: Problem Statement
16 |       description: Is your feature request related to a problem? Please describe the problem or use case.
17 |       placeholder: I would like to be able to...
18 |     validations:
19 |       required: true
20 | 
21 |   - type: textarea
22 |     id: solution
23 |     attributes:
24 |       label: Proposed Solution
25 |       description: Describe the solution you'd like to see implemented.
26 |     validations:
27 |       required: true
28 | 
29 |   - type: textarea
30 |     id: example
31 |     attributes:
32 |       label: Example Usage
33 |       description: Provide a code example showing how you'd like to use this feature.
34 |       placeholder: |
35 |         ```python
36 |         from anyschema import AnySchema
37 | 
38 |         # Example of how the feature would be used
39 |         schema = AnySchema(spec=MyClass)
40 |         result = schema.new_feature()
41 |         ```
42 |       render: python
43 | 
44 |   - type: textarea
45 |     id: alternatives
46 |     attributes:
47 |       label: Alternatives Considered
48 |       description: Describe any alternative solutions or features you've considered.
49 |       placeholder: I've considered using... but it doesn't work because...
50 | 
51 |   - type: textarea
52 |     id: additional-context
53 |     attributes:
54 |       label: Additional Context
55 |       description: Add any other context, screenshots, or examples about the feature request here.
56 | 
57 |   - type: dropdown
58 |     id: priority
59 |     attributes:
60 |       label: Priority
61 |       description: How important is this feature to you?
62 |       options:
63 |         - Nice to have
64 |         - Would improve my workflow
65 |         - Critical for my use case
66 |     validations:
67 |       required: true
68 | 
69 |   - type: checkboxes
70 |     id: contribution
71 |     attributes:
72 |       label: Contribution
73 |       description: Would you be willing to contribute?
74 |       options:
75 |         - label: I am willing to submit a PR to implement this feature
76 |         - label: I can help with design/testing
77 |         - label: I can provide more examples or use cases
78 | 
79 |   - type: checkboxes
80 |     id: terms
81 |     attributes:
82 |       label: Checklist
83 |       description: Please confirm the following
84 |       options:
85 |         - label: I have searched the existing issues to make sure this feature hasn't been requested yet
86 |           required: true
87 |         - label: I have checked the documentation to make sure this feature doesn't already exist
88 |           required: true
89 | 


--------------------------------------------------------------------------------
/anyschema/_utils.py:
--------------------------------------------------------------------------------
  1 | # ruff: noqa: T201
  2 | from __future__ import annotations
  3 | 
  4 | from collections.abc import Sequence
  5 | from typing import TYPE_CHECKING, Any, TypeVar
  6 | 
  7 | if TYPE_CHECKING:
  8 |     from typing_extensions import TypeIs
  9 | 
 10 | _T = TypeVar("_T")
 11 | 
 12 | 
 13 | def qualified_type_name(obj: object | type[Any], /) -> str:
 14 |     # Copied from Narwhals: https://github.com/narwhals-dev/narwhals/blob/282a3cb08f406e2f319d86b81a7300a2a6c5f390/narwhals/_utils.py#L1922
 15 |     # Author: Marco Gorelli
 16 |     # License: MIT: https://github.com/narwhals-dev/narwhals/blob/282a3cb08f406e2f319d86b81a7300a2a6c5f390/LICENSE.md
 17 |     tp = obj if isinstance(obj, type) else type(obj)
 18 |     module = tp.__module__ if tp.__module__ != "builtins" else ""
 19 |     return f"{module}.{tp.__name__}".lstrip(".")
 20 | 
 21 | 
 22 | def _get_sys_info() -> dict[str, str]:
 23 |     """System information.
 24 | 
 25 |     Returns system and Python version information
 26 | 
 27 |     Adapted from sklearn.
 28 | 
 29 |     Returns:
 30 |         Dictionary with system info.
 31 |     """
 32 |     import platform
 33 |     import sys
 34 | 
 35 |     python = sys.version.replace("\n", " ")
 36 | 
 37 |     blob = (
 38 |         ("python", python),
 39 |         ("machine", platform.platform()),
 40 |     )
 41 | 
 42 |     return dict(blob)
 43 | 
 44 | 
 45 | def _get_deps_info() -> dict[str, str]:
 46 |     """Overview of the installed version of main dependencies.
 47 | 
 48 |     This function does not import the modules to collect the version numbers
 49 |     but instead relies on standard Python package metadata.
 50 | 
 51 |     Returns version information on relevant Python libraries
 52 | 
 53 |     This function and show_versions were copied from sklearn and adapted
 54 | 
 55 |     Returns:
 56 |         Mapping from dependency to version.
 57 |     """
 58 |     from importlib.metadata import distributions
 59 | 
 60 |     libs = (
 61 |         "anyschema",
 62 |         "narwhals",
 63 |         "typing_extensions",
 64 |         "attrs",
 65 |         "pydantic",
 66 |         "sqlalchemy",
 67 |         "pandas",
 68 |         "polars",
 69 |         "pyarrow",
 70 |     )
 71 |     dist_map = {dist.name.lower(): dist.version for dist in distributions()}
 72 |     return {lib: dist_map.get(lib, "") for lib in libs}
 73 | 
 74 | 
 75 | def show_versions() -> None:
 76 |     """Print useful debugging information.
 77 | 
 78 |     Examples:
 79 |         >>> from anyschema import show_versions
 80 |         >>> show_versions()  # doctest: +SKIP
 81 |     """
 82 |     sys_info = _get_sys_info()
 83 |     deps_info = _get_deps_info()
 84 | 
 85 |     print("\nSystem:")
 86 |     for k, stat in sys_info.items():
 87 |         print(f"{k:>10}: {stat}")
 88 | 
 89 |     print("\nPython dependencies:")
 90 |     for k, stat in deps_info.items():
 91 |         print(f"{k:>20}: {stat}")
 92 | 
 93 | 
 94 | def is_sequence_but_not_str(sequence: Sequence[_T] | Any) -> TypeIs[Sequence[_T]]:
 95 |     return isinstance(sequence, Sequence) and not isinstance(sequence, str)
 96 | 
 97 | 
 98 | def is_sequence_of(obj: Any, tp: type[_T]) -> TypeIs[Sequence[_T]]:
 99 |     # Check if an object is a sequence of `tp`, only sniffing the first element.
100 |     return bool(is_sequence_but_not_str(obj) and (first := next(iter(obj), None)) and isinstance(first, tp))
101 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/documentation.yaml:
--------------------------------------------------------------------------------
 1 | name: 📚 Documentation Issue
 2 | description: Report an issue with documentation or suggest improvements
 3 | title: "[Docs]: "
 4 | labels: ["documentation", "needs-triage"]
 5 | body:
 6 |   - type: markdown
 7 |     attributes:
 8 |       value: |
 9 |         Thanks for helping improve anyschema's documentation!
10 |         Good documentation is crucial for a great developer experience.
11 | 
12 |   - type: dropdown
13 |     id: doc-type
14 |     attributes:
15 |       label: Documentation Type
16 |       description: What type of documentation issue is this?
17 |       options:
18 |         - Missing documentation
19 |         - Incorrect/outdated documentation
20 |         - Unclear/confusing documentation
21 |         - Typo or grammar issue
22 |         - Example code issue
23 |         - API reference issue
24 |         - Tutorial/guide improvement
25 |         - Other
26 |     validations:
27 |       required: true
28 | 
29 |   - type: input
30 |     id: location
31 |     attributes:
32 |       label: Documentation Location
33 |       description: Where is the documentation issue located?
34 |       placeholder: e.g., https://fbruzzesi.github.io/anyschema/user-guide/getting-started/ or "API Reference > AnySchema"
35 |     validations:
36 |       required: true
37 | 
38 |   - type: textarea
39 |     id: issue-description
40 |     attributes:
41 |       label: Issue Description
42 |       description: Describe the documentation issue in detail.
43 |       placeholder: |
44 |         What is wrong, missing, or unclear?
45 |         What did you expect to find?
46 |         What did you actually find?
47 |     validations:
48 |       required: true
49 | 
50 |   - type: textarea
51 |     id: suggested-improvement
52 |     attributes:
53 |       label: Suggested Improvement
54 |       description: How would you improve this documentation?
55 |       placeholder: |
56 |         Provide suggestions for:
57 |         - What content should be added or changed
58 |         - How it could be explained better
59 |         - What examples would be helpful
60 | 
61 |   - type: textarea
62 |     id: example-code
63 |     attributes:
64 |       label: Code Example (if applicable)
65 |       description: If suggesting a code example, provide it here.
66 |       placeholder: |
67 |         ```python
68 |         # Your suggested example code
69 |         from anyschema import AnySchema
70 |         # ...
71 |         ```
72 |       render: python
73 | 
74 |   - type: textarea
75 |     id: additional-context
76 |     attributes:
77 |       label: Additional Context
78 |       description: Add any other context, screenshots, or information about the documentation issue.
79 | 
80 |   - type: checkboxes
81 |     id: contribution
82 |     attributes:
83 |       label: Contribution
84 |       description: Would you be willing to help?
85 |       options:
86 |         - label: I am willing to submit a PR to improve this documentation
87 |         - label: I can provide additional examples or use cases
88 | 
89 |   - type: checkboxes
90 |     id: terms
91 |     attributes:
92 |       label: Checklist
93 |       description: Please confirm the following
94 |       options:
95 |         - label: I have checked that this documentation issue hasn't been reported yet
96 |           required: true
97 |         - label: I have reviewed the current documentation at the specified location
98 |           required: true
99 | 


--------------------------------------------------------------------------------
/anyschema/parsers/_union.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from types import NoneType, UnionType
 4 | from typing import TYPE_CHECKING, Union
 5 | 
 6 | from typing_extensions import get_args, get_origin  # noqa: UP035
 7 | 
 8 | from anyschema._metadata import get_anyschema_value_by_key, set_anyschema_meta
 9 | from anyschema.exceptions import UnsupportedDTypeError
10 | from anyschema.parsers._base import ParserStep
11 | 
12 | if TYPE_CHECKING:
13 |     from narwhals.dtypes import DType
14 | 
15 |     from anyschema.typing import FieldConstraints, FieldMetadata, FieldType
16 | 
17 | 
18 | class UnionTypeStep(ParserStep):
19 |     """Parser for Union types including `Optional`.
20 | 
21 |     Handles:
22 | 
23 |     - `Union[T, None]`, `T | None`, `Optional[T]`
24 |     - Extracts the non-None type and its metadata for further parsing
25 |     """
26 | 
27 |     def parse(self, input_type: FieldType, constraints: FieldConstraints, metadata: FieldMetadata) -> DType | None:
28 |         """Parse Union types, particularly Optional types.
29 | 
30 |         Arguments:
31 |             input_type: The type to parse.
32 |             constraints: Constraints associated with the type (will be preserved and passed through).
33 |             metadata: Custom metadata dictionary (will be preserved and passed through).
34 | 
35 |         Returns:
36 |             A Narwhals DType by extracting the non-None type and delegating to the chain.
37 |         """
38 |         # Handle Union types from typing module (including Optional)
39 |         # Handle UnionType (PEP 604: T | U syntax)
40 |         if get_origin(input_type) is Union or isinstance(input_type, UnionType):
41 |             args = get_args(input_type)
42 |             extracted_type = self._parse_union(args)
43 | 
44 |             # Set nullable metadata if not already explicitly set
45 |             # This way Union[T, None] / Optional[T] automatically marks the field as nullable
46 |             # We mutate the metadata dict in-place so parse_into_field can read it
47 |             if get_anyschema_value_by_key(metadata, key="nullable") is None:
48 |                 set_anyschema_meta(metadata, key="nullable", value=True)
49 | 
50 |             return self.pipeline.parse(extracted_type, constraints, metadata, strict=True)
51 | 
52 |         return None
53 | 
54 |     def _parse_union(self, union: tuple[FieldType, ...]) -> FieldType:
55 |         """Extract the non-None type from a Union.
56 | 
57 |         Arguments:
58 |             union: Tuple of types in the Union.
59 |             outer_constraints: Constraints from the outer type (e.g., from Annotated[Optional[T], ...]).
60 | 
61 |         Returns:
62 |             A tuple of (non-None type, preserved constraints tuple).
63 |             The outer constraints are preserved to ensure constraints aren't lost.
64 | 
65 |         Raises:
66 |             UnsupportedDTypeError: If the Union has more than 2 types or both types are not None.
67 |         """
68 |         if len(union) != 2:  # noqa: PLR2004
69 |             msg = "Union with more than two types is not supported."
70 |             raise UnsupportedDTypeError(msg)
71 | 
72 |         field0, field1 = union
73 | 
74 |         if field0 is not NoneType and field1 is not NoneType:
75 |             msg = "Union with mixed types is not supported."
76 |             raise UnsupportedDTypeError(msg)
77 | 
78 |         return field1 if field0 is NoneType else field0
79 | 


--------------------------------------------------------------------------------
/tests/parsers/attrs_test.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import attrs
 4 | import narwhals as nw
 5 | import pytest
 6 | 
 7 | from anyschema.parsers import ParserPipeline, PyTypeStep
 8 | from anyschema.parsers.attrs import AttrsTypeStep
 9 | from tests.conftest import AttrsDerived, AttrsPerson, AttrsPersonFrozen, create_missing_decorator_test_case
10 | 
11 | 
12 | @pytest.fixture(scope="module")
13 | def attrs_parser() -> AttrsTypeStep:
14 |     attrs_parser = AttrsTypeStep()
15 |     py_parser = PyTypeStep()
16 |     _ = ParserPipeline([attrs_parser, py_parser])
17 |     return attrs_parser
18 | 
19 | 
20 | def test_parse_attrs_class_into_struct(attrs_parser: AttrsTypeStep) -> None:
21 |     result = attrs_parser.parse(AttrsPerson, (), {})
22 | 
23 |     expected_fields = [
24 |         nw.Field(name="name", dtype=nw.String()),
25 |         nw.Field(name="age", dtype=nw.Int64()),
26 |         nw.Field(name="date_of_birth", dtype=nw.Date()),
27 |         nw.Field(name="is_active", dtype=nw.Boolean()),
28 |         nw.Field(name="classes", dtype=nw.List(nw.String())),
29 |         nw.Field(name="grades", dtype=nw.List(nw.Float64())),
30 |     ]
31 |     expected = nw.Struct(expected_fields)
32 |     assert result == expected
33 | 
34 | 
35 | def test_parse_frozen_attrs_class(attrs_parser: AttrsTypeStep) -> None:
36 |     result = attrs_parser.parse(AttrsPersonFrozen, (), {})
37 | 
38 |     expected_fields = [
39 |         nw.Field(name="name", dtype=nw.String()),
40 |         nw.Field(name="age", dtype=nw.Int64()),
41 |         nw.Field(name="date_of_birth", dtype=nw.Date()),
42 |     ]
43 |     expected = nw.Struct(expected_fields)
44 |     assert result == expected
45 | 
46 | 
47 | def test_parse_empty_attrs_class(attrs_parser: AttrsTypeStep) -> None:
48 |     @attrs.define
49 |     class EmptyClass:
50 |         pass
51 | 
52 |     result = attrs_parser.parse(EmptyClass, (), {})
53 |     expected = nw.Struct([])
54 |     assert result == expected
55 | 
56 | 
57 | def test_parse_non_attrs_class_returns_none(attrs_parser: AttrsTypeStep) -> None:
58 |     class RegularClass:
59 |         pass
60 | 
61 |     result = attrs_parser.parse(RegularClass, (), {})
62 |     assert result is None
63 | 
64 | 
65 | def test_parse_classic_attr_s_decorator(attrs_parser: AttrsTypeStep) -> None:
66 |     import attr
67 | 
68 |     @attr.s(auto_attribs=True)
69 |     class ClassicAttrs:
70 |         name: str
71 |         value: int
72 | 
73 |     result = attrs_parser.parse(ClassicAttrs, (), {})
74 | 
75 |     expected_fields = [
76 |         nw.Field(name="name", dtype=nw.String()),
77 |         nw.Field(name="value", dtype=nw.Int64()),
78 |     ]
79 |     expected = nw.Struct(expected_fields)
80 |     assert result == expected
81 | 
82 | 
83 | def test_parse_attrs_with_inheritance(attrs_parser: AttrsTypeStep) -> None:
84 |     result = attrs_parser.parse(AttrsDerived, (), {})
85 | 
86 |     expected_fields = [
87 |         nw.Field(name="foo", dtype=nw.String()),
88 |         nw.Field(name="bar", dtype=nw.Int64()),
89 |         nw.Field(name="baz", dtype=nw.Float64()),
90 |     ]
91 |     expected = nw.Struct(expected_fields)
92 |     assert result == expected
93 | 
94 | 
95 | def test_parse_attrs_missing_decorator_raises(attrs_parser: AttrsTypeStep) -> None:
96 |     child_cls, expected_msg = create_missing_decorator_test_case()
97 |     with pytest.raises(AssertionError, match=expected_msg.replace("(", r"\(").replace(")", r"\)")):
98 |         attrs_parser.parse(child_cls, (), {})
99 | 


--------------------------------------------------------------------------------
/tests/parsers/forward_ref_dependency_mock_test.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from unittest.mock import patch
 4 | 
 5 | from anyschema.parsers import ForwardRefStep
 6 | 
 7 | 
 8 | def test_build_namespace_without_pydantic() -> None:
 9 |     """Test that pydantic types are excluded when PYDANTIC_AVAILABLE is False."""
10 |     with patch(target="anyschema.parsers._forward_ref.PYDANTIC_AVAILABLE", new=False):
11 |         step = ForwardRefStep()
12 | 
13 |         # Pydantic types should NOT be in the namespace
14 |         assert "BaseModel" not in step.globalns
15 |         assert "Field" not in step.globalns
16 |         assert "PositiveInt" not in step.globalns
17 |         assert "conint" not in step.globalns
18 | 
19 |         # But builtin types should still be there
20 |         assert "int" in step.globalns
21 |         assert "str" in step.globalns
22 |         assert "List" in step.globalns
23 | 
24 | 
25 | def test_build_namespace_without_annotated_types() -> None:
26 |     """Test that annotated_types are excluded when ANNOTATED_TYPES_AVAILABLE is False."""
27 |     with patch(target="anyschema.parsers._forward_ref.ANNOTATED_TYPES_AVAILABLE", new=False):
28 |         step = ForwardRefStep()
29 | 
30 |         # annotated_types should NOT be in the namespace
31 |         assert "Gt" not in step.globalns
32 |         assert "Ge" not in step.globalns
33 |         assert "Lt" not in step.globalns
34 |         assert "Le" not in step.globalns
35 |         assert "Interval" not in step.globalns
36 | 
37 |         # But builtin types should still be there
38 |         assert "int" in step.globalns
39 |         assert "str" in step.globalns
40 | 
41 | 
42 | def test_build_namespace_without_both_optional_deps() -> None:
43 |     """Test namespace with neither pydantic nor annotated_types."""
44 |     with (
45 |         patch(target="anyschema.parsers._forward_ref.PYDANTIC_AVAILABLE", new=False),
46 |         patch(target="anyschema.parsers._forward_ref.ANNOTATED_TYPES_AVAILABLE", new=False),
47 |     ):
48 |         step = ForwardRefStep()
49 | 
50 |         # No pydantic types
51 |         assert "BaseModel" not in step.globalns
52 |         assert "PositiveInt" not in step.globalns
53 | 
54 |         # No annotated_types
55 |         assert "Gt" not in step.globalns
56 |         assert "Interval" not in step.globalns
57 | 
58 |         # But builtin types should still be there
59 |         assert "int" in step.globalns
60 |         assert "str" in step.globalns
61 |         assert "list" in step.globalns
62 |         assert "dict" in step.globalns
63 |         assert "Union" in step.globalns
64 | 
65 | 
66 | def test_build_namespace_with_user_globals_override() -> None:
67 |     """Test that user-provided globals can override defaults."""
68 |     with (
69 |         patch(target="anyschema.parsers._forward_ref.PYDANTIC_AVAILABLE", new=False),
70 |         patch(target="anyschema.parsers._forward_ref.ANNOTATED_TYPES_AVAILABLE", new=False),
71 |     ):
72 |         # User provides their own types
73 |         custom_globals = {"CustomType": int, "int": str}  # Intentionally override int
74 |         step = ForwardRefStep(globalns=custom_globals)
75 | 
76 |         # User's custom type should be present
77 |         assert "CustomType" in step.globalns
78 |         assert step.globalns["CustomType"] is int
79 | 
80 |         # User's override should work (though not recommended!)
81 |         assert step.globalns["int"] is str
82 | 
83 |         # Built-in types that weren't overridden should still be there
84 |         assert "str" in step.globalns
85 |         assert "list" in step.globalns
86 | 


--------------------------------------------------------------------------------
/anyschema/_dependencies.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import sys
 4 | from collections.abc import Mapping, Sequence
 5 | from dataclasses import is_dataclass as dc_is_dataclass
 6 | from importlib.util import find_spec
 7 | from typing import TYPE_CHECKING
 8 | 
 9 | from typing_extensions import TypeIs, is_typeddict
10 | 
11 | if TYPE_CHECKING:
12 |     from types import ModuleType
13 | 
14 |     from pydantic import BaseModel
15 | 
16 |     from anyschema.typing import AttrsClassType, DataclassType, IntoOrderedDict, SQLAlchemyTableType, TypedDictType
17 | 
18 | ANNOTATED_TYPES_AVAILABLE = find_spec("annotated_types") is not None
19 | PYDANTIC_AVAILABLE = find_spec("pydantic") is not None
20 | ATTRS_AVAILABLE = find_spec("attrs") is not None
21 | SQLALCHEMY_AVAILABLE = find_spec("sqlalchemy") is not None
22 | 
23 | 
24 | def get_pydantic() -> ModuleType | None:
25 |     """Get pydantic module (if already imported - else return None)."""
26 |     return sys.modules.get("pydantic", None)
27 | 
28 | 
29 | def get_attrs() -> ModuleType | None:
30 |     """Get attrs module (if already imported - else return None)."""
31 |     return sys.modules.get("attrs", None)
32 | 
33 | 
34 | def is_into_ordered_dict(obj: object) -> TypeIs[IntoOrderedDict]:
35 |     """Check if the object can be converted into a python OrderedDict."""
36 |     tpl_size = 2
37 |     return isinstance(obj, Mapping) or (
38 |         isinstance(obj, Sequence) and all(isinstance(s, tuple) and len(s) == tpl_size for s in obj)
39 |     )
40 | 
41 | 
42 | def is_typed_dict(obj: object) -> TypeIs[TypedDictType]:
43 |     """Check if the object is a TypedDict and narrows type checkers."""
44 |     return is_typeddict(obj)
45 | 
46 | 
47 | def is_dataclass(obj: object) -> TypeIs[DataclassType]:
48 |     """Check if the object is a dataclass and narrows type checkers."""
49 |     return dc_is_dataclass(obj)
50 | 
51 | 
52 | def is_pydantic_base_model(obj: object) -> TypeIs[type[BaseModel]]:
53 |     """Check if the object is a pydantic BaseModel."""
54 |     return (
55 |         (pydantic := get_pydantic()) is not None
56 |         and isinstance(obj, type)
57 |         and isinstance(obj, type(pydantic.BaseModel))
58 |         and issubclass(obj, pydantic.BaseModel)
59 |     )
60 | 
61 | 
62 | def is_attrs_class(obj: object) -> TypeIs[AttrsClassType]:
63 |     """Check if the object is an attrs class.
64 | 
65 |     Uses attrs.has() to check if a class is an attrs class.
66 |     Supports @attrs.define/@attrs.frozen decorators.
67 |     """
68 |     return (attrs := get_attrs()) is not None and attrs.has(obj)
69 | 
70 | 
71 | def get_sqlalchemy() -> ModuleType | None:
72 |     """Get sqlalchemy module (if already imported - else return None)."""
73 |     return sys.modules.get("sqlalchemy", None)
74 | 
75 | 
76 | def get_sqlalchemy_orm() -> ModuleType | None:
77 |     """Get sqlalchemy.orm module (if already imported - else return None)."""
78 |     return sys.modules.get("sqlalchemy.orm", None)
79 | 
80 | 
81 | def is_sqlalchemy_table(obj: object) -> TypeIs[SQLAlchemyTableType]:
82 |     """Check if the object is a SQLAlchemy Table or DeclarativeBase class.
83 | 
84 |     Supports both:
85 | 
86 |     - SQLAlchemy Table instances (Core)
87 |     - SQLAlchemy ORM mapped classes (DeclarativeBase subclasses)
88 |     """
89 |     is_table = (sql := get_sqlalchemy()) is not None and isinstance(obj, sql.Table)
90 |     is_declarative_base = (
91 |         (sql_orm := get_sqlalchemy_orm()) is not None
92 |         and isinstance(obj, type)
93 |         and issubclass(obj, sql_orm.DeclarativeBase)
94 |     )
95 |     return is_table or is_declarative_base
96 | 


--------------------------------------------------------------------------------
/mkdocs.yaml:
--------------------------------------------------------------------------------
  1 | site_name: AnySchema
  2 | site_url: https://fbruzzesi.github.io/anyschema/
  3 | site_author: Francesco Bruzzesi
  4 | 
  5 | repo_url: https://github.com/FBruzzesi/anyschema
  6 | repo_name: FBruzzesi/anyschema
  7 | edit_uri: edit/main/docs/
  8 | 
  9 | nav:
 10 |   - Home: index.md
 11 |   - User Guide:
 12 |     - Getting Started: user-guide/getting-started.md
 13 |     - Metadata: user-guide/metadata.md
 14 |     - Advanced Usage: user-guide/advanced.md
 15 |     - Serialization & Deserialization: user-guide/serde.md
 16 |     - Best Practices: user-guide/best-practices.md
 17 |     - End to End Example with Custom Components: user-guide/custom-end-to-end-example.md
 18 |     - Troubleshooting: user-guide/troubleshooting.md
 19 |     - OpenAPI Compatibility: user-guide/openapi-compatibility.md
 20 |   - Architecture: architecture.md
 21 |   - API Reference:
 22 |     - Home: api-reference/index.md
 23 |     - AnySchema: api-reference/anyschema.md
 24 |     - Parsers: api-reference/parsers.md
 25 |     - Spec Adapters: api-reference/adapters.md
 26 |     - Serialization & Deserialization: api-reference/serde.md
 27 |     - Exceptions: api-reference/exceptions.md
 28 |     - Typing: api-reference/typing.md
 29 | 
 30 | theme:
 31 |   name: material
 32 |   font: false
 33 |   # favicon: assets/logo.svg  # TODO(FBruzzesi)
 34 |   # logo: assets/logo.svg  # TODO(FBruzzesi)
 35 |   features:
 36 |     - content.code.copy
 37 |     - content.code.annotate
 38 |     - navigation.footer
 39 |     - navigation.indexes
 40 |     - navigation.top
 41 |   palette:
 42 |     # Palette toggle for automatic mode
 43 |     - media: "(prefers-color-scheme)"
 44 |       toggle:
 45 |         icon: material/brightness-auto
 46 |         name: Switch to light mode
 47 | 
 48 |     # Palette toggle for light mode
 49 |     - media: "(prefers-color-scheme: light)"
 50 |       scheme: default
 51 |       toggle:
 52 |         icon: material/brightness-7
 53 |         name: Switch to dark mode
 54 | 
 55 |     # Palette toggle for dark mode
 56 |     - media: "(prefers-color-scheme: dark)"
 57 |       scheme: slate
 58 |       toggle:
 59 |         icon: material/brightness-4
 60 |         name: Switch to system preference
 61 | 
 62 | plugins:
 63 | - autorefs
 64 | - search:
 65 |     enabled: true
 66 |     separator: '[\s\-,:!=\[\]()"`/]+|\.(?!\d)|&[lg]t;|(?!\b)(?=[A-Z][a-z])'
 67 | - mkdocstrings:
 68 |     default_handler: python
 69 |     enable_inventory: true
 70 |     handlers:
 71 |       python:
 72 |         options:
 73 |           members_order: alphabetical
 74 |           line_length: 100
 75 |           show_overloads: true
 76 |           show_signature_annotations: true
 77 |           signature_crossrefs: true
 78 | 
 79 | markdown_extensions:
 80 | - footnotes
 81 | - admonition
 82 | - md_in_html
 83 | - attr_list
 84 | - def_list
 85 | - toc:
 86 |     permalink: true
 87 |     toc_depth: 5
 88 | - pymdownx.details
 89 | - pymdownx.tabbed:
 90 |     alternate_style: true
 91 | - pymdownx.superfences:
 92 |     custom_fences:
 93 |     - name: python
 94 |       class: python
 95 |       validator: !!python/name:markdown_exec.validator
 96 |       format: !!python/name:markdown_exec.formatter
 97 |     - name: mermaid
 98 |       class: mermaid
 99 |       format: !!python/name:pymdownx.superfences.fence_code_format
100 | - pymdownx.highlight:
101 |     anchor_linenums: true
102 |     line_spans: __span
103 |     pygments_lang_class: true
104 | - pymdownx.inlinehilite
105 | - pymdownx.snippets
106 | - pymdownx.arithmatex:
107 |     generic: true
108 | 
109 | extra_javascript:
110 |   - javascript/extra.js
111 | 
112 | extra_css:
113 |   - css/extra.css
114 | 


--------------------------------------------------------------------------------
/tests/adapters/dataclass_adapter_test.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import sys
 4 | from dataclasses import dataclass, field, make_dataclass
 5 | from datetime import date, datetime
 6 | from typing import TYPE_CHECKING
 7 | 
 8 | import pytest
 9 | from pydantic.dataclasses import dataclass as pydantic_dataclass
10 | 
11 | from anyschema.adapters import dataclass_adapter
12 | from tests.conftest import DataclassEventWithTimeMetadata
13 | 
14 | if TYPE_CHECKING:
15 |     from anyschema.typing import DataclassType, FieldSpec
16 | 
17 | 
18 | class PersonIntoDataclass:
19 |     name: str
20 |     age: int
21 |     date_of_birth: date
22 | 
23 | 
24 | @pytest.mark.parametrize(
25 |     "spec",
26 |     [
27 |         pydantic_dataclass(PersonIntoDataclass),
28 |         dataclass(PersonIntoDataclass),
29 |         make_dataclass("Test", [("name", str), ("age", int), ("date_of_birth", date)]),
30 |     ],
31 | )
32 | def test_dataclass_adapter(spec: DataclassType) -> None:
33 |     expected: tuple[FieldSpec, ...] = (("name", str, (), {}), ("age", int, (), {}), ("date_of_birth", date, (), {}))
34 |     result = tuple(dataclass_adapter(spec))
35 |     assert result == expected
36 | 
37 | 
38 | def test_dataclass_adapter_missing_decorator_raises() -> None:
39 |     """Test that adapter raises helpful error when child class isn't decorated."""
40 | 
41 |     @dataclass
42 |     class Base:
43 |         foo: str
44 | 
45 |     class ChildWithoutDecorator(Base):
46 |         bar: int
47 | 
48 |     expected_msg = (
49 |         "Class 'ChildWithoutDecorator' has annotations ('bar') that are not dataclass fields. "
50 |         "If this class inherits from a dataclass, you must also decorate it with @dataclass "
51 |         "to properly define these fields."
52 |     )
53 | 
54 |     with pytest.raises(AssertionError, match=expected_msg.replace("(", r"\(").replace(")", r"\)")):
55 |         list(dataclass_adapter(ChildWithoutDecorator))  # ty: ignore[invalid-argument-type]
56 | 
57 | 
58 | def test_dataclass_adapter_with_time_metadata() -> None:
59 |     result = tuple(dataclass_adapter(DataclassEventWithTimeMetadata))  # ty: ignore[invalid-argument-type]
60 | 
61 |     expected: tuple[FieldSpec, ...] = (
62 |         ("name", str, (), {"anyschema": {"description": "Event name"}}),
63 |         ("created_at", datetime, (), {}),
64 |         ("scheduled_at", datetime, (), {"anyschema": {"time_zone": "UTC", "description": "Scheduled time"}}),
65 |         ("started_at", datetime, (), {"anyschema": {"time_unit": "ms"}}),
66 |         ("completed_at", datetime, (), {"anyschema": {"time_zone": "Europe/Berlin", "time_unit": "ns"}}),
67 |     )
68 | 
69 |     assert result == expected
70 | 
71 | 
72 | @pytest.mark.skipif(sys.version_info < (3, 14), reason="doc parameter requires Python 3.14+")
73 | def test_dataclass_adapter_with_doc_argument() -> None:
74 |     @dataclass
75 |     class Product:
76 |         name: str = field(doc="Product name")  # pyright: ignore[reportCallIssue]  # ty: ignore[no-matching-overload]
77 |         price: float = field(  # pyright: ignore[reportCallIssue]  # ty: ignore[no-matching-overload]
78 |             doc="Product price",
79 |             metadata={"anyschema": {"description": "From metadata"}},  # anyschema metadata have precedence
80 |         )
81 |         in_stock: bool
82 | 
83 |     result = list(dataclass_adapter(Product))  # ty: ignore[invalid-argument-type]
84 |     expected = [
85 |         ("name", str, (), {"anyschema": {"description": "Product name"}}),
86 |         ("price", float, (), {"anyschema": {"description": "From metadata"}}),
87 |         ("in_stock", bool, (), {}),
88 |     ]
89 |     assert result == expected
90 | 


--------------------------------------------------------------------------------
/docs/user-guide/custom-end-to-end-example.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # End to End Example with Custom Components
  3 | 
  4 | Let's now combine the learnings from the [previous section](advanced.md) to show an example that combines a custom
  5 | parser and a custom adapter.
  6 | 
  7 | ## 1. Define custom types
  8 | 
  9 | ```python exec="true" source="above" session="end-to-end"
 10 | from typing import Any
 11 | 
 12 | import narwhals as nw
 13 | from narwhals.dtypes import DType
 14 | 
 15 | from anyschema import AnySchema
 16 | from anyschema.parsers import (
 17 |     ParserStep,
 18 |     ForwardRefStep,
 19 |     UnionTypeStep,
 20 |     AnnotatedStep,
 21 |     PyTypeStep,
 22 | )
 23 | from anyschema.typing import FieldSpecIterable
 24 | 
 25 | 
 26 | class Email:
 27 |     """Email address type."""
 28 | 
 29 | 
 30 | class PhoneNumber:
 31 |     """Phone number type."""
 32 | 
 33 | 
 34 | class Currency:
 35 |     """Monetary value type."""
 36 | ```
 37 | 
 38 | ## 2. Create custom parser for these such types
 39 | 
 40 | ```python exec="true" source="above" session="end-to-end"
 41 | from anyschema.typing import FieldConstraints, FieldMetadata, FieldType
 42 | 
 43 | 
 44 | class CustomerTypesStep(ParserStep):
 45 |     """Parser for custom types."""
 46 | 
 47 |     def parse(
 48 |         self,
 49 |         input_type: FieldType,
 50 |         constraints: FieldConstraints,
 51 |         metadata: FieldMetadata,
 52 |     ) -> DType | None:
 53 |         if input_type is Email:
 54 |             return nw.String()
 55 |         elif input_type is PhoneNumber:
 56 |             return nw.String()
 57 |         elif input_type is Currency:
 58 |             return nw.Float32()
 59 |         return None
 60 | ```
 61 | 
 62 | ## 3. Define custom schema format
 63 | 
 64 | ```python exec="true" source="above" session="end-to-end"
 65 | class CustomerSchema:
 66 |     """Custom schema format."""
 67 | 
 68 |     def __init__(self, entity_name: str, fields: list[dict]):
 69 |         self.entity_name = entity_name
 70 |         self.fields = fields
 71 | ```
 72 | 
 73 | ## 4. Create adapter for the custom format
 74 | 
 75 | ```python exec="true" source="above" session="end-to-end"
 76 | def customer_schema_adapter(spec: CustomerSchema) -> FieldSpecIterable:
 77 |     """Adapter for CustomerSchema format."""
 78 |     for field in spec.fields:
 79 |         field_name = field["name"]
 80 |         field_type = field["type"]
 81 |         required = field.get("required", True)
 82 | 
 83 |         # Convert required=False to Optional
 84 |         if not required:
 85 |             field_type = field_type | None
 86 | 
 87 |         yield field_name, field_type, (), {}
 88 | ```
 89 | 
 90 | ## 5. Create pipeline steps with custom parser
 91 | 
 92 | ```python exec="true" source="above" session="end-to-end"
 93 | pipeline_steps = [
 94 |     ForwardRefStep(),
 95 |     UnionTypeStep(),
 96 |     AnnotatedStep(),
 97 |     CustomerTypesStep(),
 98 |     PyTypeStep(),
 99 | ]
100 | ```
101 | 
102 | ## 6. Use everything together
103 | 
104 | ```python exec="true" source="above" result="python" session="end-to-end"
105 | customer_schema = CustomerSchema(
106 |     entity_name="Customer",
107 |     fields=[
108 |         {"name": "id", "type": int, "required": True},
109 |         {"name": "name", "type": str, "required": True},
110 |         {"name": "email", "type": Email, "required": True},
111 |         {"name": "phone", "type": PhoneNumber, "required": False},
112 |         {"name": "balance", "type": Currency, "required": True},
113 |     ],
114 | )
115 | 
116 | schema = AnySchema(
117 |     spec=customer_schema,
118 |     pipeline=pipeline_steps,
119 |     adapter=customer_schema_adapter,
120 | )
121 | 
122 | print(schema.to_polars())
123 | ```
124 | 


--------------------------------------------------------------------------------
/.github/release-drafter.yaml:
--------------------------------------------------------------------------------
  1 | # Configuration for Release Drafter
  2 | # Automatically generates release notes from pull requests
  3 | 
  4 | name-template: 'v$RESOLVED_VERSION'
  5 | tag-template: 'v$RESOLVED_VERSION'
  6 | 
  7 | # Categories for organizing changes in release notes
  8 | categories:
  9 |   - title: '🚀 Features'
 10 |     labels:
 11 |       - 'enhancement'
 12 |       - 'feature'
 13 |   - title: '🐛 Bug Fixes'
 14 |     labels:
 15 |       - 'bug'
 16 |       - 'bugfix'
 17 |       - 'fix'
 18 |   - title: '📚 Documentation'
 19 |     labels:
 20 |       - 'documentation'
 21 |       - 'docs'
 22 |   - title: '🧪 Tests'
 23 |     labels:
 24 |       - 'tests'
 25 |       - 'testing'
 26 |   - title: '⚡ Performance'
 27 |     labels:
 28 |       - 'performance'
 29 |       - 'optimization'
 30 |   - title: '🔧 Maintenance'
 31 |     labels:
 32 |       - 'maintenance'
 33 |       - 'chore'
 34 |       - 'refactor'
 35 |   - title: '🏗️ Build & CI'
 36 |     labels:
 37 |       - 'ci-cd'
 38 |       - 'build'
 39 |       - 'github-actions'
 40 |   - title: '📦 Dependencies'
 41 |     labels:
 42 |       - 'dependencies'
 43 |       - 'deps'
 44 |   - title: '⚠️ Breaking Changes'
 45 |     labels:
 46 |       - 'breaking-change'
 47 |       - 'breaking'
 48 | 
 49 | # Exclude certain labels from release notes
 50 | exclude-labels:
 51 |   - 'skip-changelog'
 52 |   - 'duplicate'
 53 |   - 'invalid'
 54 |   - 'wontfix'
 55 | 
 56 | # Template for the release body
 57 | template: |
 58 |   ## What's Changed
 59 | 
 60 |   $CHANGES
 61 | 
 62 |   ## Statistics
 63 | 
 64 |   **Full Changelog**: https://github.com/fbruzzesi/anyschema/compare/$PREVIOUS_TAG...v$RESOLVED_VERSION
 65 | 
 66 |   ## Installation
 67 | 
 68 |   ```bash
 69 |   pip install anyschema==$RESOLVED_VERSION
 70 |   ```
 71 | 
 72 |   ## Contributors
 73 | 
 74 |   Thanks to all contributors who made this release possible! 🎉
 75 | 
 76 |   $CONTRIBUTORS
 77 | 
 78 | # Change template for each PR
 79 | change-template: '- $TITLE (#$NUMBER) @$AUTHOR'
 80 | change-title-escapes: '\<*_&' # You can add # and @ to disable mentions
 81 | 
 82 | # Template for contributor list
 83 | contributors-template: '- @$AUTHOR'
 84 | 
 85 | # Exclude contributors (bots, etc.)
 86 | exclude-contributors:
 87 |   - 'dependabot'
 88 |   - 'dependabot[bot]'
 89 |   - 'github-actions'
 90 |   - 'github-actions[bot]'
 91 | 
 92 | # Automatically label PRs that don't have labels
 93 | autolabeler:
 94 |   - label: 'bug'
 95 |     title:
 96 |       - '/fix(es|ed)?/i'
 97 |   - label: 'enhancement'
 98 |     title:
 99 |       - '/feat(ure)?/i'
100 |   - label: 'documentation'
101 |     title:
102 |       - '/docs?/i'
103 |       - '/documentation/i'
104 |   - label: 'tests'
105 |     title:
106 |       - '/tests?/i'
107 |   - label: 'ci-cd'
108 |     title:
109 |       - '/ci/i'
110 |   - label: 'dependencies'
111 |     title:
112 |       - '/deps/i'
113 |   - label: 'performance'
114 |     title:
115 |       - '/perf(ormance)?/i'
116 |   - label: 'breaking-change'
117 |     title:
118 |       - '/breaking/i'
119 |   - label: 'maintenance'
120 |     title:
121 |       - '/refactor/i'
122 |       - '/chore/i'
123 | 
124 | # Replacers for cleaning up titles
125 | replacers:
126 |   - search: '/^(feat|feature)(\(.*\))?:\s*/i'
127 |     replace: ''
128 |   - search: '/^fix(\(.*\))?:\s*/i'
129 |     replace: ''
130 |   - search: '/^docs?(\(.*\))?:\s*/i'
131 |     replace: ''
132 |   - search: '/^test(\(.*\))?:\s*/i'
133 |     replace: ''
134 |   - search: '/^chore(\(.*\))?:\s*/i'
135 |     replace: ''
136 |   - search: '/^refactor(\(.*\))?:\s*/i'
137 |     replace: ''
138 |   - search: '/^perf(\(.*\))?:\s*/i'
139 |     replace: ''
140 |   - search: '/^ci(\(.*\))?:\s*/i'
141 |     replace: ''
142 |   - search: '/^build(\(.*\))?:\s*/i'
143 |     replace: ''
144 | 
145 | sort-by: merged_at
146 | sort-direction: ascending
147 | 


--------------------------------------------------------------------------------
/docs/user-guide/openapi-compatibility.md:
--------------------------------------------------------------------------------
 1 | # OpenAPI Compatibility
 2 | 
 3 | `anyschema` supports OpenAPI-compatible metadata through the `x-anyschema` prefix, which is an extension point defined
 4 | in the [OpenAPI specification](https://swagger.io/specification/#specification-extensions).
 5 | 
 6 | ## What is OpenAPI?
 7 | 
 8 | OpenAPI (formerly known as Swagger) is a widely-adopted specification for describing REST APIs.
 9 | It allows you to define your API's structure, endpoints, request/response formats, and more in a standardized way.
10 | 
11 | ## Extension fields in OpenAPI
12 | 
13 | The OpenAPI specification allows custom extensions through fields prefixed with `x-`.
14 | These extension fields can contain any valid JSON and are used to add vendor-specific or custom information that's not
15 | part of the core OpenAPI specification.
16 | 
17 | ## Using `x-anyschema` prefix
18 | 
19 | In `anyschema`, you can use either `"anyschema"` or `"x-anyschema"` as the metadata namespace key.
20 | Both work identically:
21 | 
22 | ```python exec="true" source="above" result="python" session="openapi-intro"
23 | from pydantic import BaseModel, Field
24 | from anyschema import AnySchema
25 | 
26 | 
27 | class Product(BaseModel):
28 |     # Standard anyschema format
29 |     name: str = Field(json_schema_extra={"anyschema": {"nullable": False}})
30 | 
31 |     # OpenAPI-compatible format (with x- prefix)
32 |     price: float = Field(json_schema_extra={"x-anyschema": {"nullable": True}})
33 | 
34 | 
35 | schema = AnySchema(spec=Product)
36 | 
37 | print(f"name nullable: {schema.fields['name'].nullable}")
38 | print(f"price nullable: {schema.fields['price'].nullable}")
39 | ```
40 | 
41 | ## Why support `x-anyschema`?
42 | 
43 | There are several reasons to support the `x-anyschema` prefix:
44 | 
45 | 1. **OpenAPI Integration**: If you're generating OpenAPI specifications from Pydantic models and want to include
46 |     anyschema metadata, using the `x-` prefix makes it clear that this is an extension field.
47 | 
48 | 2. **Tool Compatibility**: Some OpenAPI tools and validators may flag unknown fields without the `x-` prefix as errors.
49 |     Using `x-anyschema` ensures better compatibility.
50 | 
51 | 3. **Standards Compliance**: Following the OpenAPI convention makes your API documentation more standardized and easier
52 |     for other developers to understand.
53 | 
54 | ## Choosing between `anyschema` and `x-anyschema`
55 | 
56 | Both formats work identically in `anyschema`. Choose based on your needs:
57 | 
58 | * Use `"anyschema"` if:
59 |     * You're only using anyschema internally
60 |     * You want cleaner, shorter metadata keys
61 |     * You're not generating OpenAPI specifications
62 | 
63 | * Use `"x-anyschema"` if:
64 |     * You're generating OpenAPI specifications
65 |     * You want to be explicit that this is an extension field
66 |     * You're integrating with OpenAPI tooling
67 |     * You want maximum standards compliance
68 | 
69 | ## Mixing both formats
70 | 
71 | !!! warning
72 |     You should **not** mix both formats in the same metadata dictionary.
73 | 
74 |     If both `"anyschema"` and `"x-anyschema"` are present, `anyschema` will use whichever it finds first
75 |     (with `"anyschema"` taking precedence).
76 | 
77 | ```python
78 | # ❌ Don't do this - mixing both formats
79 | metadata = {
80 |     "anyschema": {"nullable": True},
81 |     "x-anyschema": {"unique": True},  # This will be ignored!
82 | }
83 | 
84 | # ✅ Do this - use one format consistently
85 | metadata = {
86 |     "x-anyschema": {
87 |         "nullable": True,
88 |         "unique": True,
89 |     }
90 | }
91 | ```
92 | 
93 | ## Further Reading
94 | 
95 | * [OpenAPI Specification](https://swagger.io/specification/)
96 | * [OpenAPI Extension Fields](https://swagger.io/specification/#specification-extensions)
97 | * [Pydantic and OpenAPI](https://docs.pydantic.dev/latest/concepts/json_schema/)
98 | 


--------------------------------------------------------------------------------
/tests/spec_to_schema/typed_dict_test.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from typing import TYPE_CHECKING, Literal, Mapping, TypedDict
  4 | 
  5 | import narwhals as nw
  6 | import pytest
  7 | from pydantic import BaseModel, PositiveInt
  8 | 
  9 | from anyschema import AnySchema
 10 | 
 11 | if TYPE_CHECKING:
 12 |     from anyschema.typing import TypedDictType
 13 | 
 14 | 
 15 | class PersonTypedDict(TypedDict):
 16 |     """Simple TypedDict for testing."""
 17 | 
 18 |     name: str
 19 |     age: int
 20 |     is_active: bool
 21 | 
 22 | 
 23 | class AddressTypedDict(TypedDict):
 24 |     """Nested TypedDict for testing."""
 25 | 
 26 |     street: str
 27 |     city: str
 28 |     zipcode: int
 29 | 
 30 | 
 31 | class PersonWithAddressTypedDict(TypedDict):
 32 |     """TypedDict with nested TypedDict for testing."""
 33 | 
 34 |     name: str
 35 |     age: int
 36 |     address: AddressTypedDict
 37 | 
 38 | 
 39 | class StudentTypedDict(TypedDict):
 40 |     """TypedDict with list field for testing."""
 41 | 
 42 |     name: str
 43 |     age: int
 44 |     classes: list[str]
 45 |     grades: list[float]
 46 | 
 47 | 
 48 | class UserTypedDict(TypedDict):
 49 |     """TypedDict with Literal fields for testing."""
 50 | 
 51 |     username: str
 52 |     role: Literal["admin", "user", "guest"]
 53 |     status: Literal["active", "inactive", "pending"]
 54 |     age: int
 55 | 
 56 | 
 57 | class ConfigTypedDict(TypedDict):
 58 |     """TypedDict with mixed Literal types for testing."""
 59 | 
 60 |     name: str
 61 |     log_level: Literal["debug", "info", "warning", "error"]
 62 |     max_retries: Literal[1, 2, 3, 5, 10]
 63 |     enabled: Literal[True, False]
 64 | 
 65 | 
 66 | class ZipcodeModel(BaseModel):
 67 |     zipcode: PositiveInt
 68 | 
 69 | 
 70 | class AddressTypedDictWithZipcodeModel(TypedDict):
 71 |     """TypedDict with Nested pydantic model for testing."""
 72 | 
 73 |     street: str
 74 |     city: str
 75 |     zipcode: ZipcodeModel
 76 | 
 77 | 
 78 | @pytest.mark.parametrize(
 79 |     ("spec", "expected_schema"),
 80 |     [
 81 |         (PersonTypedDict, {"name": nw.String(), "age": nw.Int64(), "is_active": nw.Boolean()}),
 82 |         (
 83 |             PersonWithAddressTypedDict,
 84 |             {
 85 |                 "name": nw.String(),
 86 |                 "age": nw.Int64(),
 87 |                 "address": nw.Struct(
 88 |                     [
 89 |                         nw.Field("street", nw.String()),
 90 |                         nw.Field("city", nw.String()),
 91 |                         nw.Field("zipcode", nw.Int64()),
 92 |                     ]
 93 |                 ),
 94 |             },
 95 |         ),
 96 |         (
 97 |             StudentTypedDict,
 98 |             {"name": nw.String(), "age": nw.Int64(), "classes": nw.List(nw.String()), "grades": nw.List(nw.Float64())},
 99 |         ),
100 |         (
101 |             UserTypedDict,
102 |             {
103 |                 "username": nw.String(),
104 |                 "role": nw.Enum(["admin", "user", "guest"]),
105 |                 "status": nw.Enum(["active", "inactive", "pending"]),
106 |                 "age": nw.Int64(),
107 |             },
108 |         ),
109 |         (
110 |             ConfigTypedDict,
111 |             {
112 |                 "name": nw.String(),
113 |                 "log_level": nw.Enum(["debug", "info", "warning", "error"]),
114 |                 "max_retries": nw.Enum([1, 2, 3, 5, 10]),  # type: ignore[list-item]
115 |                 "enabled": nw.Enum([True, False]),  # type: ignore[list-item]
116 |             },
117 |         ),
118 |         (
119 |             AddressTypedDictWithZipcodeModel,
120 |             {
121 |                 "street": nw.String(),
122 |                 "city": nw.String(),
123 |                 "zipcode": nw.Struct([nw.Field("zipcode", nw.UInt64())]),
124 |             },
125 |         ),
126 |     ],
127 | )
128 | def test_typed_dict(spec: TypedDictType, expected_schema: Mapping[str, nw.dtypes.DType]) -> None:
129 |     schema = AnySchema(spec=spec)
130 |     nw_schema = schema._nw_schema
131 |     assert nw_schema == nw.Schema(expected_schema)
132 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | *.lock
  2 | # Byte-compiled / optimized / DLL files
  3 | __pycache__/
  4 | *.py[cod]
  5 | *$py.class
  6 | 
  7 | # C extensions
  8 | *.so
  9 | 
 10 | # Distribution / packaging
 11 | .Python
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | cover/
 54 | 
 55 | # Translations
 56 | *.mo
 57 | *.pot
 58 | 
 59 | # Django stuff:
 60 | *.log
 61 | local_settings.py
 62 | db.sqlite3
 63 | db.sqlite3-journal
 64 | 
 65 | # Flask stuff:
 66 | instance/
 67 | .webassets-cache
 68 | 
 69 | # Scrapy stuff:
 70 | .scrapy
 71 | 
 72 | # Sphinx documentation
 73 | docs/_build/
 74 | 
 75 | # PyBuilder
 76 | .pybuilder/
 77 | target/
 78 | 
 79 | # Jupyter Notebook
 80 | .ipynb_checkpoints
 81 | 
 82 | # IPython
 83 | profile_default/
 84 | ipython_config.py
 85 | 
 86 | # pyenv
 87 | #   For a library or package, you might want to ignore these files since the code is
 88 | #   intended to run in multiple environments; otherwise, check them in:
 89 | # .python-version
 90 | 
 91 | # pipenv
 92 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 93 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 94 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 95 | #   install all needed dependencies.
 96 | #Pipfile.lock
 97 | 
 98 | # UV
 99 | #   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
100 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
101 | #   commonly ignored for libraries.
102 | #uv.lock
103 | 
104 | # poetry
105 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
106 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
107 | #   commonly ignored for libraries.
108 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
109 | #poetry.lock
110 | 
111 | # pdm
112 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
113 | #pdm.lock
114 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
115 | #   in version control.
116 | #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
117 | .pdm.toml
118 | .pdm-python
119 | .pdm-build/
120 | 
121 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
122 | __pypackages__/
123 | 
124 | # Celery stuff
125 | celerybeat-schedule
126 | celerybeat.pid
127 | 
128 | # SageMath parsed files
129 | *.sage.py
130 | 
131 | # Environments
132 | .env
133 | .venv
134 | env/
135 | venv/
136 | ENV/
137 | env.bak/
138 | venv.bak/
139 | 
140 | # Spyder project settings
141 | .spyderproject
142 | .spyproject
143 | 
144 | # Rope project settings
145 | .ropeproject
146 | 
147 | # mkdocs documentation
148 | /site
149 | 
150 | # mypy
151 | .mypy_cache/
152 | .dmypy.json
153 | dmypy.json
154 | 
155 | # Pyre type checker
156 | .pyre/
157 | 
158 | # pytype static type analyzer
159 | .pytype/
160 | 
161 | # Cython debug symbols
162 | cython_debug/
163 | 
164 | # PyCharm
165 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
166 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
167 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
168 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
169 | #.idea/
170 | 
171 | # PyPI configuration file
172 | .pypirc
173 | 


--------------------------------------------------------------------------------
/.github/workflows/pull-request.yaml:
--------------------------------------------------------------------------------
  1 | name: "PR Checks"
  2 | 
  3 | on:
  4 |   pull_request:
  5 | 
  6 | env:
  7 |   DEFAULT_PYTHON: "3.14"
  8 | 
  9 | jobs:
 10 |   pytest:
 11 |     strategy:
 12 |       matrix:
 13 |         python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
 14 |     runs-on: ubuntu-latest
 15 |     steps:
 16 |       - name: Checkout source code
 17 |         uses: actions/checkout@v6
 18 |       - name: Install uv
 19 |         uses: astral-sh/setup-uv@v7
 20 |         with:
 21 |           python-version: ${{ matrix.python-version }}
 22 |           enable-cache: "true"
 23 |           cache-dependency-glob: "pyproject.toml"
 24 |       - name: Run pytest
 25 |         run: uv run --all-extras --group tests pytest tests --cov=anyschema --cov=tests --cov-fail-under=95
 26 |       - name: Run doctest
 27 |         if: matrix.python-version == '3.14'
 28 |         run: uv run --all-extras --group tests pytest anyschema --doctest-modules
 29 | 
 30 |   minimum_versions:
 31 |     strategy:
 32 |       matrix:
 33 |         python-version: ["3.10", "3.11"]
 34 |     runs-on: ubuntu-latest
 35 |     steps:
 36 |       - name: Checkout source code
 37 |         uses: actions/checkout@v6
 38 |       - name: Install uv
 39 |         uses: astral-sh/setup-uv@v7
 40 |         with:
 41 |           python-version: ${{ matrix.python-version }}
 42 |           enable-cache: "true"
 43 |           cache-dependency-glob: "pyproject.toml"
 44 |       - name: Run pytest
 45 |         run: |
 46 |           uv run \
 47 |           --with narwhals==2.0.0,attrs==22.1.0,pydantic==2.0.0,sqlalchemy==2.0.0 \
 48 |           --all-extras --group tests \
 49 |           pytest tests --cov=anyschema --cov=tests --cov-fail-under=95
 50 | 
 51 |   typing:
 52 |     runs-on: ubuntu-latest
 53 |     steps:
 54 |       - name: Checkout source code
 55 |         uses: actions/checkout@v6
 56 |       - name: Install uv
 57 |         uses: astral-sh/setup-uv@v7
 58 |         with:
 59 |           python-version: ${{ env.DEFAULT_PYTHON }}
 60 |           enable-cache: "true"
 61 |           cache-dependency-glob: "pyproject.toml"
 62 |       - name: Run type checking
 63 |         run: |
 64 |           uv run --all-extras --group typing ty check anyschema tests --output-format github
 65 |           uv run --all-extras --group typing pyright anyschema tests
 66 |           uv run --all-extras --group typing mypy anyschema tests
 67 | 
 68 |   mkdocs-build:
 69 |     runs-on: ubuntu-latest
 70 |     steps:
 71 |       - name: Checkout source code
 72 |         uses: actions/checkout@v6
 73 |       - name: Install uv
 74 |         uses: astral-sh/setup-uv@v7
 75 |         with:
 76 |           python-version: ${{ env.DEFAULT_PYTHON }}
 77 |           enable-cache: "true"
 78 |           cache-dependency-glob: "pyproject.toml"
 79 |       - name: Build docs strict mode
 80 |         run: uv run --all-extras --group docs mkdocs build --strict
 81 | 
 82 |   ruff:
 83 |     runs-on: ubuntu-latest
 84 |     steps:
 85 |       - name: Checkout source code
 86 |         uses: actions/checkout@v6
 87 |       - name: Install uv
 88 |         uses: astral-sh/setup-uv@v7
 89 |         with:
 90 |           python-version: ${{ env.DEFAULT_PYTHON }}
 91 |           enable-cache: "true"
 92 |           cache-dependency-glob: "pyproject.toml"
 93 |       - name: Run ruff linter and formatter
 94 |         run: |
 95 |           uvx ruff version
 96 |           uvx ruff format anyschema tests --diff
 97 |           uvx ruff check anyschema tests --output-format=github
 98 |           uvx ruff clean
 99 | 
100 |   rumdl:
101 |     runs-on: ubuntu-latest
102 |     steps:
103 |       - name: Checkout source code
104 |         uses: actions/checkout@v6
105 |       - name: Install uv
106 |         uses: astral-sh/setup-uv@v7
107 |         with:
108 |           python-version: ${{ env.DEFAULT_PYTHON }}
109 |           enable-cache: "true"
110 |           cache-dependency-glob: "pyproject.toml"
111 |       - name: Run rumdl linter and formatter
112 |         run: |
113 |           uv tool run rumdl version
114 |           uv tool run rumdl check --output-format=github .
115 | 
116 |   check-typos:
117 |     runs-on: ubuntu-latest
118 |     steps:
119 |       - name: Checkout source code
120 |         uses: actions/checkout@v6
121 |       - name: Check for typos
122 |         uses: crate-ci/typos@master
123 |         with:
124 |           files: .
125 | 


--------------------------------------------------------------------------------
/tests/anyschema/to_arrow_test.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from typing import TYPE_CHECKING, Any
  4 | 
  5 | import pyarrow as pa
  6 | import pytest
  7 | from narwhals import Schema
  8 | from pydantic import BaseModel
  9 | from pydantic import Field as PydanticField
 10 | 
 11 | from anyschema import AnySchema
 12 | 
 13 | if TYPE_CHECKING:
 14 |     from anyschema.typing import Spec
 15 | 
 16 | 
 17 | class User(BaseModel):
 18 |     id: int = PydanticField(
 19 |         json_schema_extra={
 20 |             "anyschema": {"nullable": False},
 21 |             "description": "User ID",  # Description outside anyschema namespace will end up in Field metadata
 22 |         }
 23 |     )
 24 |     username: str = PydanticField(json_schema_extra={"anyschema": {"nullable": True}})
 25 |     email: str | None
 26 | 
 27 | 
 28 | class Product(BaseModel):
 29 |     name: str | None = PydanticField(
 30 |         json_schema_extra={
 31 |             "anyschema": {"nullable": False, "description": "Product name"},
 32 |             "max_length": 100,
 33 |         }
 34 |     )
 35 |     price: float = PydanticField(
 36 |         json_schema_extra={
 37 |             "anyschema": {"nullable": True},
 38 |             "currency": "USD",
 39 |             "min": 0,
 40 |         }
 41 |     )
 42 | 
 43 | 
 44 | def test_pydantic_to_arrow(pydantic_student_cls: type[BaseModel]) -> None:
 45 |     anyschema = AnySchema(spec=pydantic_student_cls)
 46 |     pa_schema = anyschema.to_arrow()
 47 | 
 48 |     assert isinstance(pa_schema, pa.Schema)
 49 |     names_and_types = (
 50 |         ("name", pa.string()),
 51 |         ("date_of_birth", pa.date32()),
 52 |         ("age", pa.uint64()),
 53 |         ("classes", pa.list_(pa.string())),
 54 |         ("has_graduated", pa.bool_()),
 55 |     )
 56 |     fields: tuple[pa.Field[Any], ...] = tuple(pa.field(name, dtype, nullable=False) for name, dtype in names_and_types)
 57 |     assert pa_schema == pa.schema(fields)
 58 | 
 59 | 
 60 | def test_nw_schema_to_arrow(nw_schema: Schema) -> None:
 61 |     unsupported_dtypes = {"array", "int128", "uint128", "decimal", "enum", "object", "unknown"}
 62 |     model = Schema({k: v for k, v in nw_schema.items() if k not in unsupported_dtypes})
 63 |     anyschema = AnySchema(spec=model)
 64 |     pa_schema = anyschema.to_arrow()
 65 | 
 66 |     assert isinstance(pa_schema, pa.Schema)
 67 | 
 68 |     struct_dtype = pa.struct([("field_1", pa.string()), ("field_2", pa.bool_())])
 69 |     names_and_dtypes = (
 70 |         ("boolean", pa.bool_()),
 71 |         ("categorical", pa.dictionary(pa.uint32(), pa.string())),
 72 |         ("date", pa.date32()),
 73 |         ("datetime", pa.timestamp(unit="us", tz=None)),
 74 |         ("duration", pa.duration(unit="us")),
 75 |         ("float32", pa.float32()),
 76 |         ("float64", pa.float64()),
 77 |         ("int8", pa.int8()),
 78 |         ("int16", pa.int16()),
 79 |         ("int32", pa.int32()),
 80 |         ("int64", pa.int64()),
 81 |         ("list", pa.list_(pa.float32())),
 82 |         ("string", pa.string()),
 83 |         ("struct", struct_dtype),
 84 |         ("uint8", pa.uint8()),
 85 |         ("uint16", pa.uint16()),
 86 |         ("uint32", pa.uint32()),
 87 |         ("uint64", pa.uint64()),
 88 |     )
 89 |     assert pa_schema == pa.schema((pa.field(name, dtype, nullable=False) for name, dtype in names_and_dtypes))
 90 | 
 91 | 
 92 | @pytest.mark.parametrize(
 93 |     ("spec", "expected_nullable"),
 94 |     [
 95 |         ({"id": int, "name": str, "email": None | str}, (False, False, True)),
 96 |         (User, (False, True, True)),
 97 |         (Product, (False, True)),
 98 |     ],
 99 | )
100 | def test_to_arrow_nullable_flags(spec: Spec, expected_nullable: tuple[bool, ...]) -> None:
101 |     schema = AnySchema(spec=spec)
102 |     pa_schema = schema.to_arrow()
103 | 
104 |     for field, nullable in zip(pa_schema, expected_nullable, strict=True):
105 |         assert field.nullable is nullable
106 | 
107 | 
108 | @pytest.mark.parametrize(
109 |     ("spec", "expected_metadata"),
110 |     [
111 |         ({"id": int, "name": str, "email": None | str}, (None, None, None)),
112 |         (User, ({b"description": b"User ID"}, None, None)),
113 |         (Product, ({b"max_length": b"100"}, {b"currency": b"USD", b"min": b"0"})),
114 |     ],
115 | )
116 | def test_to_arrow_with_metadata(spec: Spec, expected_metadata: tuple[dict[bytes, bytes], ...]) -> None:
117 |     schema = AnySchema(spec=spec)
118 |     pa_schema = schema.to_arrow()
119 | 
120 |     for field, _metadata in zip(pa_schema, expected_metadata, strict=True):
121 |         assert field.metadata == _metadata
122 | 


--------------------------------------------------------------------------------
/anyschema/typing.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from collections.abc import Callable, Generator, Mapping, Sequence
  4 | from typing import TYPE_CHECKING, Annotated, Any, Literal, Protocol, TypeAlias, TypedDict
  5 | 
  6 | if TYPE_CHECKING:
  7 |     from dataclasses import Field as DataclassField
  8 |     from typing import ClassVar
  9 | 
 10 |     from attrs import AttrsInstance
 11 |     from narwhals.dtypes import DType
 12 |     from narwhals.schema import Schema
 13 |     from narwhals.typing import TimeUnit
 14 |     from pydantic import BaseModel
 15 |     from sqlalchemy import Table
 16 |     from sqlalchemy.orm import DeclarativeBase
 17 |     from sqlalchemy.sql.type_api import TypeEngine
 18 | 
 19 |     from anyschema.parsers import ParserStep
 20 | 
 21 |     AttrsClassType: TypeAlias = type[AttrsInstance]
 22 |     SQLAlchemyTableType: TypeAlias = Table | type[DeclarativeBase]
 23 | 
 24 | 
 25 | IntoOrderedDict: TypeAlias = Mapping[str, type] | Sequence[tuple[str, type]]
 26 | """An object that can be converted into a python [`OrderedDict`][ordered-dict].
 27 | 
 28 | We check for the object to be either a mapping or a sequence of sized 2 tuples.
 29 | 
 30 | [ordered-dict]: https://docs.python.org/3/library/collections.html#collections.OrderedDict
 31 | """
 32 | 
 33 | IntoParserPipeline: TypeAlias = "Literal['auto'] | Sequence['ParserStep']"
 34 | """An object that can be converted into a [`ParserPipeline`][anyschema.parsers.ParserPipeline].
 35 | 
 36 | Either "auto" or a sequence of [`ParserStep`][anyschema.parsers.ParserStep].
 37 | """
 38 | 
 39 | UnknownSpec: TypeAlias = Any
 40 | """An unknown specification."""
 41 | 
 42 | Spec: TypeAlias = "Schema | IntoOrderedDict | type[BaseModel] | DataclassType | TypedDictType | AttrsClassType | SQLAlchemyTableType | UnknownSpec"  # noqa: E501
 43 | """Input specification supported directly by [`AnySchema`][anyschema.AnySchema]."""
 44 | 
 45 | FieldName: TypeAlias = str
 46 | FieldType: TypeAlias = "type[Any] | Annotated[Any, ...] | TypeEngine[Any]"
 47 | FieldConstraints: TypeAlias = tuple[Any, ...]
 48 | FieldMetadata: TypeAlias = dict[str, Any]
 49 | 
 50 | FieldSpec: TypeAlias = tuple[FieldName, FieldType, FieldConstraints, FieldMetadata]
 51 | """Field specification: alias for a tuple of `(str, type, tuple(constraints, ...), dict(metadata))`."""
 52 | 
 53 | FieldSpecIterable: TypeAlias = Generator[FieldSpec, None, None]
 54 | """Return type of an adapter."""
 55 | 
 56 | Adapter: TypeAlias = Callable[[Any], FieldSpecIterable]
 57 | """Adapter expected signature.
 58 | 
 59 | An adapter is a callable that adapts a spec into field specifications.
 60 | """
 61 | 
 62 | 
 63 | class DataclassInstance(Protocol):
 64 |     """Protocol that represents a dataclass in Python."""
 65 | 
 66 |     # dataclasses are runtime composed entities making them tricky to type, this may not work perfectly
 67 |     #   with all type checkers
 68 |     # code adapted from typeshed:
 69 |     # https://github.com/python/typeshed/blob/9ab7fde0a0cd24ed7a72837fcb21093b811b80d8/stdlib/_typeshed/__init__.pyi#L351
 70 |     __dataclass_fields__: ClassVar[dict[str, DataclassField[Any]]]
 71 | 
 72 | 
 73 | DataclassType = type[DataclassInstance]
 74 | 
 75 | 
 76 | class TypedDictType(Protocol):
 77 |     """Protocol that represents a TypedDict in Python."""
 78 | 
 79 |     __annotations__: dict[str, type]
 80 |     __required_keys__: frozenset[str]
 81 |     __optional_keys__: frozenset[str]
 82 | 
 83 | 
 84 | class AnySchemaMetadata(TypedDict, total=False):
 85 |     """TypedDict for anyschema-specific metadata keys.
 86 | 
 87 |     This structure defines the nested metadata format that anyschema recognizes
 88 |     for controlling field parsing behavior. All keys are optional.
 89 | 
 90 |     Attributes:
 91 |         description: Human-readable description of the field.
 92 |         dtype: Narwhals DType (or its serialized/string representation)
 93 |         nullable: Whether the field can contain null values.
 94 |         time_zone: Timezone for datetime fields (e.g., "UTC", "Europe/Berlin").
 95 |         time_unit: Time precision for datetime fields ("s", "ms", "us", "ns").
 96 |         unique: Whether all values in the field must be unique.
 97 | 
 98 |     Examples:
 99 |         >>> metadata: AnySchemaMetadata = {"nullable": True, "time_zone": "UTC"}
100 |         >>> metadata["unique"] = False
101 |     """
102 | 
103 |     description: str | None
104 |     dtype: str | DType
105 |     nullable: bool
106 |     time_zone: str
107 |     time_unit: TimeUnit
108 |     unique: bool
109 | 
110 | 
111 | AnySchemaMetadataKey: TypeAlias = Literal["description", "dtype", "nullable", "time_zone", "time_unit", "unique"]
112 | AnySchemaNamespaceKey: TypeAlias = Literal["anyschema", "x-anyschema"]
113 | 


--------------------------------------------------------------------------------
/tests/parsers/parsers_test.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from typing import Annotated, Optional
  4 | 
  5 | import narwhals as nw
  6 | import pytest
  7 | from annotated_types import Gt
  8 | from pydantic import BaseModel, PositiveInt
  9 | 
 10 | from anyschema.parsers import (
 11 |     AnnotatedStep,
 12 |     ForwardRefStep,
 13 |     ParserPipeline,
 14 |     ParserStep,
 15 |     PyTypeStep,
 16 |     UnionTypeStep,
 17 |     make_pipeline,
 18 | )
 19 | from anyschema.parsers.annotated_types import AnnotatedTypesStep
 20 | from anyschema.parsers.attrs import AttrsTypeStep
 21 | from anyschema.parsers.pydantic import PydanticTypeStep
 22 | from anyschema.parsers.sqlalchemy import SQLAlchemyTypeStep
 23 | 
 24 | AUTO_PIPELINE_CLS_ORDER = (
 25 |     ForwardRefStep,
 26 |     UnionTypeStep,
 27 |     AnnotatedStep,
 28 |     AnnotatedTypesStep,
 29 |     AttrsTypeStep,
 30 |     PydanticTypeStep,
 31 |     SQLAlchemyTypeStep,
 32 |     PyTypeStep,
 33 | )
 34 | 
 35 | PY_TYPE_STEP = PyTypeStep()
 36 | 
 37 | 
 38 | class Address(BaseModel):
 39 |     street: str
 40 |     city: str
 41 | 
 42 | 
 43 | class Person(BaseModel):
 44 |     name: str
 45 |     address: Address
 46 | 
 47 | 
 48 | def test_make_pipeline_auto(auto_pipeline: ParserPipeline) -> None:
 49 |     assert isinstance(auto_pipeline, ParserPipeline)
 50 |     assert len(auto_pipeline.steps) == len(AUTO_PIPELINE_CLS_ORDER)
 51 | 
 52 |     for _parser, _cls in zip(auto_pipeline.steps, AUTO_PIPELINE_CLS_ORDER, strict=True):
 53 |         assert isinstance(_parser, _cls)
 54 |         assert _parser.pipeline is auto_pipeline
 55 | 
 56 | 
 57 | @pytest.mark.parametrize(
 58 |     "steps",
 59 |     [
 60 |         (PyTypeStep(),),
 61 |         (UnionTypeStep(), PyTypeStep()),
 62 |         (UnionTypeStep(), AnnotatedStep(), PyTypeStep()),
 63 |     ],
 64 | )
 65 | def test_make_pipeline_custom(steps: tuple[ParserStep, ...]) -> None:
 66 |     pipeline = make_pipeline(steps)
 67 |     assert isinstance(pipeline, ParserPipeline)
 68 |     assert len(pipeline.steps) == len(steps)
 69 | 
 70 |     for _pipeline_parser, _parser in zip(pipeline.steps, steps, strict=True):
 71 |         assert _parser is _pipeline_parser
 72 |         assert _parser.pipeline is pipeline
 73 | 
 74 | 
 75 | def test_make_pipeline_invalid_steps() -> None:
 76 |     class NotAParserStep:
 77 |         """A class that is not a ParserStep."""
 78 | 
 79 |     invalid_steps = [PyTypeStep(), NotAParserStep(), "also not a step"]
 80 |     expected_msg = "Expected a sequence of `ParserStep` instances, found.*NotAParserStep.*str"
 81 | 
 82 |     with pytest.raises(TypeError, match=expected_msg):
 83 |         make_pipeline(invalid_steps)  # type: ignore[arg-type]
 84 | 
 85 | 
 86 | @pytest.mark.parametrize(
 87 |     ("input_type", "expected"),
 88 |     [
 89 |         (int, nw.Int64()),
 90 |         (str, nw.String()),
 91 |         (list[int], nw.List(nw.Int64())),
 92 |         (Optional[int], nw.Int64()),
 93 |         (list[str], nw.List(nw.String())),
 94 |         (Optional[float], nw.Float64()),
 95 |         (Annotated[int, Gt(0)], nw.UInt64()),
 96 |         (PositiveInt, nw.UInt64()),
 97 |         (Optional[str], nw.String()),
 98 |         (list[list[int]], nw.List(nw.List(nw.Int64()))),
 99 |         (Optional[Annotated[int, Gt(0)]], nw.UInt64()),
100 |         (Annotated[Optional[int], "meta"], nw.Int64()),
101 |         (Optional[list[int]], nw.List(nw.Int64())),
102 |         (list[Optional[int]], nw.List(nw.Int64())),
103 |     ],
104 | )
105 | def test_non_nested_parsing(auto_pipeline: ParserPipeline, input_type: type, expected: nw.dtypes.DType) -> None:
106 |     result = auto_pipeline.parse(input_type, (), {})
107 |     assert result == expected
108 | 
109 | 
110 | @pytest.mark.parametrize(
111 |     ("input_type", "expected"),
112 |     [
113 |         (Address, nw.Struct([nw.Field(name="street", dtype=nw.String()), nw.Field(name="city", dtype=nw.String())])),
114 |         (
115 |             Person,
116 |             nw.Struct(
117 |                 [
118 |                     nw.Field(name="name", dtype=nw.String()),
119 |                     nw.Field(
120 |                         name="address",
121 |                         dtype=nw.Struct(
122 |                             [
123 |                                 nw.Field(name="street", dtype=nw.String()),
124 |                                 nw.Field(name="city", dtype=nw.String()),
125 |                             ]
126 |                         ),
127 |                     ),
128 |                 ]
129 |             ),
130 |         ),
131 |     ],
132 | )
133 | def test_nested_parsing(auto_pipeline: ParserPipeline, input_type: type, expected: nw.dtypes.DType) -> None:
134 |     result = auto_pipeline.parse(input_type, (), {})
135 |     assert result == expected
136 | 


--------------------------------------------------------------------------------
/tests/pydantic/list_test.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from typing import TYPE_CHECKING, Annotated, Optional
  4 | 
  5 | import narwhals as nw
  6 | from annotated_types import Interval, Len
  7 | from pydantic import BaseModel
  8 | 
  9 | from tests.pydantic.utils import model_to_nw_schema
 10 | 
 11 | if TYPE_CHECKING:
 12 |     from anyschema.parsers import ParserPipeline
 13 | 
 14 | 
 15 | def test_parse_list_optional_outer(auto_pipeline: ParserPipeline) -> None:
 16 |     class ListModel(BaseModel):
 17 |         # python list[...] type
 18 |         py_list: list[int]
 19 |         py_list_optional: list[str] | None
 20 |         py_list_or_none: list[float] | None
 21 |         none_or_py_list: None | list[bool]
 22 | 
 23 |         # pydantic conlist type
 24 |         con_list: Annotated[list[int], Len(min_length=2)]
 25 |         con_list_optional: Optional[Annotated[list[str], Len(max_length=6)]]
 26 |         con_list_or_none: Annotated[list[float], Len(0)] | None
 27 |         none_or_con_list: None | Annotated[list[bool], Len(0)]
 28 | 
 29 |     schema = model_to_nw_schema(ListModel, pipeline=auto_pipeline)
 30 |     expected = {
 31 |         "py_list": nw.List(nw.Int64()),
 32 |         "py_list_optional": nw.List(nw.String()),
 33 |         "py_list_or_none": nw.List(nw.Float64()),
 34 |         "none_or_py_list": nw.List(nw.Boolean()),
 35 |         "con_list": nw.List(nw.Int64()),
 36 |         "con_list_optional": nw.List(nw.String()),
 37 |         "con_list_or_none": nw.List(nw.Float64()),
 38 |         "none_or_con_list": nw.List(nw.Boolean()),
 39 |     }
 40 |     assert schema == expected
 41 | 
 42 | 
 43 | def test_parse_list_optional_inner(auto_pipeline: ParserPipeline) -> None:
 44 |     class ListModel(BaseModel):
 45 |         # python list[...] type
 46 |         py_list_optional: list[str | None]
 47 |         py_list_or_none: list[float | None] | None
 48 |         none_or_py_list: list[None | bool]
 49 | 
 50 |         # pydantic conlist type
 51 |         con_list_optional: Annotated[list[Optional[int]], Len(min_length=2)]
 52 |         con_list_or_none: Annotated[list[str | None], Len(max_length=6)]
 53 |         none_or_con_list: Annotated[list[None | float], Len(0)]
 54 | 
 55 |     schema = model_to_nw_schema(ListModel, pipeline=auto_pipeline)
 56 |     expected = {
 57 |         "py_list_optional": nw.List(nw.String()),
 58 |         "py_list_or_none": nw.List(nw.Float64()),
 59 |         "none_or_py_list": nw.List(nw.Boolean()),
 60 |         "con_list_optional": nw.List(nw.Int64()),
 61 |         "con_list_or_none": nw.List(nw.String()),
 62 |         "none_or_con_list": nw.List(nw.Float64()),
 63 |     }
 64 |     assert schema == expected
 65 | 
 66 | 
 67 | def test_parse_list_optional_outer_and_inner(auto_pipeline: ParserPipeline) -> None:
 68 |     class ListModel(BaseModel):
 69 |         # python list[...] type
 70 |         py_list_optional_optional: list[int | None] | None
 71 |         py_list_optional_none: list[str | None] | None
 72 |         py_list_none_optional: list[float | None] | None
 73 |         py_list_none_none: list[None | bool] | None
 74 | 
 75 |         # pydantic conlist type
 76 |         con_list_optional_optional: Optional[Annotated[list[Optional[int]], Len(min_length=2)]]
 77 |         con_list_optional_none: Annotated[list[Optional[str]], Len(max_length=6)] | None
 78 |         con_list_none_optional: Optional[Annotated[list[float | None], Len(0)]]
 79 |         con_list_none_none: Annotated[list[None | bool], Len(0)] | None
 80 | 
 81 |     schema = model_to_nw_schema(ListModel, pipeline=auto_pipeline)
 82 |     expected = {
 83 |         "py_list_optional_optional": nw.List(nw.Int64()),
 84 |         "py_list_optional_none": nw.List(nw.String()),
 85 |         "py_list_none_optional": nw.List(nw.Float64()),
 86 |         "py_list_none_none": nw.List(nw.Boolean()),
 87 |         "con_list_optional_optional": nw.List(nw.Int64()),
 88 |         "con_list_optional_none": nw.List(nw.String()),
 89 |         "con_list_none_optional": nw.List(nw.Float64()),
 90 |         "con_list_none_none": nw.List(nw.Boolean()),
 91 |     }
 92 |     assert schema == expected
 93 | 
 94 | 
 95 | def test_parse_conlist_conint(auto_pipeline: ParserPipeline) -> None:
 96 |     class ListModel(BaseModel):
 97 |         # python list[...] type
 98 |         py_list_int8: list[Annotated[int, Interval(gt=-64, lt=64)]] | None
 99 |         py_list_uint8: list[Annotated[int, Interval(gt=0, lt=64)] | None]
100 | 
101 |         # pydantic conlist type
102 |         con_list_int8: Annotated[list[None | Annotated[int, Interval(gt=-64, lt=64)]], Len(0)]
103 |         con_list_uint8: Annotated[list[Optional[Annotated[int, Interval(gt=0, lt=64)]]], Len(0)]
104 | 
105 |     schema = model_to_nw_schema(ListModel, pipeline=auto_pipeline)
106 |     expected = {
107 |         "py_list_int8": nw.List(nw.Int8()),
108 |         "py_list_uint8": nw.List(nw.UInt8()),
109 |         "con_list_int8": nw.List(nw.Int8()),
110 |         "con_list_uint8": nw.List(nw.UInt8()),
111 |     }
112 |     assert schema == expected
113 | 


--------------------------------------------------------------------------------
/tests/spec_to_schema/sqlalchemy_test.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from typing import TYPE_CHECKING, Mapping
  4 | 
  5 | import narwhals as nw
  6 | import pytest
  7 | 
  8 | from anyschema import AnySchema
  9 | from tests.conftest import (
 10 |     ComplexORM,
 11 |     EventORMWithTimeMetadata,
 12 |     ProductORM,
 13 |     SimpleUserORM,
 14 |     array_fixed_table,
 15 |     array_list_table,
 16 |     bigint_table,
 17 |     complex_table,
 18 |     event_table_with_time_metadata,
 19 |     event_table_with_tz_aware,
 20 |     user_table,
 21 | )
 22 | 
 23 | if TYPE_CHECKING:
 24 |     from anyschema.typing import SQLAlchemyTableType
 25 | 
 26 | 
 27 | @pytest.mark.parametrize(
 28 |     ("spec", "expected_schema"),
 29 |     [
 30 |         # ORM models
 31 |         (
 32 |             SimpleUserORM,
 33 |             {
 34 |                 "id": nw.Int32(),
 35 |                 "name": nw.String(),
 36 |             },
 37 |         ),
 38 |         (
 39 |             ProductORM,
 40 |             {
 41 |                 "id": nw.Int32(),
 42 |                 "name": nw.String(),
 43 |                 "price": nw.Float32(),
 44 |                 "in_stock": nw.Boolean(),
 45 |             },
 46 |         ),
 47 |         (
 48 |             ComplexORM,
 49 |             {
 50 |                 "id": nw.Int32(),
 51 |                 "name": nw.String(),
 52 |                 "description": nw.String(),
 53 |                 "age": nw.Int32(),
 54 |                 "score": nw.Float32(),
 55 |                 "is_active": nw.Boolean(),
 56 |                 "created_at": nw.Datetime(),
 57 |                 "birth_date": nw.Date(),
 58 |             },
 59 |         ),
 60 |         # Core tables
 61 |         (
 62 |             user_table,
 63 |             {
 64 |                 "id": nw.Int32(),
 65 |                 "name": nw.String(),
 66 |                 "age": nw.Int32(),
 67 |                 "email": nw.String(),
 68 |             },
 69 |         ),
 70 |         (
 71 |             complex_table,
 72 |             {
 73 |                 "id": nw.Int32(),
 74 |                 "name": nw.String(),
 75 |                 "description": nw.String(),
 76 |                 "age": nw.Int32(),
 77 |                 "score": nw.Float32(),
 78 |                 "is_active": nw.Boolean(),
 79 |                 "created_at": nw.Datetime(),
 80 |                 "birth_date": nw.Date(),
 81 |             },
 82 |         ),
 83 |         (
 84 |             bigint_table,
 85 |             {
 86 |                 "id": nw.Int64(),
 87 |                 "count": nw.Int64(),
 88 |             },
 89 |         ),
 90 |         # Array types - List (no dimensions)
 91 |         (
 92 |             array_list_table,
 93 |             {
 94 |                 "id": nw.Int32(),
 95 |                 "tags": nw.List(nw.String()),
 96 |                 "scores": nw.List(nw.Float32()),
 97 |             },
 98 |         ),
 99 |         # Array types - Fixed dimensions (Array)
100 |         (
101 |             array_fixed_table,
102 |             {
103 |                 "id": nw.Int32(),
104 |                 "coordinates": nw.Array(nw.Float32(), shape=(3,)),
105 |                 "matrix": nw.Array(nw.Int32(), shape=(2,)),
106 |             },
107 |         ),
108 |     ],
109 | )
110 | def test_sqlalchemy_spec(spec: SQLAlchemyTableType, expected_schema: Mapping[str, nw.dtypes.DType]) -> None:
111 |     schema = AnySchema(spec=spec)
112 |     nw_schema = schema._nw_schema
113 |     assert nw_schema == nw.Schema(expected_schema)
114 | 
115 | 
116 | @pytest.mark.parametrize(
117 |     ("spec", "expected_schema"),
118 |     [
119 |         # Table with time metadata
120 |         (
121 |             event_table_with_time_metadata,
122 |             {
123 |                 "id": nw.Int32(),
124 |                 "name": nw.String(),
125 |                 "created_at": nw.Datetime(),
126 |                 "scheduled_at": nw.Datetime(time_zone="UTC"),
127 |                 "started_at": nw.Datetime(time_unit="ms"),
128 |                 "completed_at": nw.Datetime(time_unit="ns", time_zone="Europe/Berlin"),
129 |             },
130 |         ),
131 |         # ORM with time metadata
132 |         (
133 |             EventORMWithTimeMetadata,
134 |             {
135 |                 "id": nw.Int32(),
136 |                 "name": nw.String(),
137 |                 "created_at": nw.Datetime(),
138 |                 "scheduled_at": nw.Datetime(time_zone="UTC"),
139 |                 "started_at": nw.Datetime(time_unit="ms"),
140 |                 "completed_at": nw.Datetime(time_unit="ns", time_zone="Europe/Berlin"),
141 |             },
142 |         ),
143 |         # Table with timezone-aware datetime
144 |         (
145 |             event_table_with_tz_aware,
146 |             {
147 |                 "id": nw.Int32(),
148 |                 "timestamp_utc": nw.Datetime(time_zone="UTC"),
149 |                 "timestamp_berlin": nw.Datetime(time_unit="ms", time_zone="Europe/Berlin"),
150 |             },
151 |         ),
152 |     ],
153 | )
154 | def test_sqlalchemy_spec_with_time_metadata(
155 |     spec: SQLAlchemyTableType, expected_schema: Mapping[str, nw.dtypes.DType]
156 | ) -> None:
157 |     """Test that SQLAlchemy specs with time metadata are correctly converted to narwhals schema."""
158 |     schema = AnySchema(spec=spec)
159 |     nw_schema = schema._nw_schema
160 |     assert nw_schema == nw.Schema(expected_schema)
161 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [build-system]
  2 | requires = ["hatchling"]
  3 | build-backend = "hatchling.build"
  4 | 
  5 | [project]
  6 | name = "anyschema"
  7 | version = "0.3.0"
  8 | description = "From type specifications to dataframe schemas"
  9 | readme = "README.md"
 10 | license = {file = "LICENSE"}
 11 | authors = [{ name = "Francesco Bruzzesi"}]
 12 | requires-python = ">=3.10"
 13 | dependencies = [
 14 |   "narwhals>=2.0.0",
 15 |   "typing_extensions>=4.14.0",
 16 | ]
 17 | 
 18 | classifiers = [
 19 |   "Development Status :: 3 - Alpha",
 20 |   "License :: OSI Approved :: Apache Software License",
 21 |   "Operating System :: OS Independent",
 22 |   "Programming Language :: Python",
 23 |   "Programming Language :: Python :: 3",
 24 |   "Programming Language :: Python :: 3.10",
 25 |   "Programming Language :: Python :: 3.11",
 26 |   "Programming Language :: Python :: 3.12",
 27 |   "Programming Language :: Python :: 3.13",
 28 |   "Programming Language :: Python :: 3.14",
 29 |   "Typing :: Typed"
 30 | ]
 31 | 
 32 | [project.urls]
 33 | Homepage = "https://github.com/fbruzzesi/anyschema"
 34 | Documentation = "https://fbruzzesi.github.io/anyschema/"
 35 | Repository = "https://github.com/fbruzzesi/anyschema"
 36 | "Issue Tracker" = "https://github.com/fbruzzesi/anyschema/issues"
 37 | 
 38 | [project.optional-dependencies]
 39 | attrs = ["attrs>=22.1.0"]
 40 | pydantic = ["pydantic>=2.0.0"]
 41 | sqlalchemy = ["sqlalchemy>=2.0.0"]
 42 | 
 43 | [dependency-groups]
 44 | tests = [
 45 |   "anyschema[attrs,pydantic,sqlalchemy]",
 46 |   "covdefaults>=2.3.0",
 47 |   "hypothesis>=6.0.0",
 48 |   "narwhals[pandas,polars,pyarrow]",
 49 |   "pydantic-extra-types[pycountry,phonenumbers]",
 50 |   "pytest>=8.0.0",
 51 |   "pytest-cov>=6.0.0",
 52 |   "pytest-xdist>=3.8.0",
 53 | ]
 54 | 
 55 | typing = [
 56 |   "mypy",
 57 |   "pandas-stubs",
 58 |   "pyarrow-stubs",
 59 |   "pyright",
 60 |   "ty",
 61 |   {include-group = "tests"},
 62 | ]
 63 | 
 64 | docs = [
 65 |   "anyschema[attrs,pydantic,sqlalchemy]",
 66 |   "black",  # required by mkdocstrings_handlers
 67 |   "markdown-exec[ansi]",
 68 |   "mkdocs",
 69 |   "mkdocs-material",
 70 |   "mkdocstrings[python]",
 71 |   "mkdocstrings-python",
 72 |   "mkdocs-autorefs",
 73 |   "narwhals[pandas,polars,pyarrow]",
 74 |   "pytest",
 75 | ]
 76 | 
 77 | local-dev = [
 78 |   "pre-commit",
 79 |   "ruff",
 80 |   {include-group = "tests"},
 81 |   {include-group = "typing"},
 82 |   {include-group = "docs"}
 83 | ]
 84 | 
 85 | [tool.hatch.build.targets.sdist]
 86 | include = [
 87 |   "anyschema/*",
 88 |   "tests/*",
 89 | ]
 90 | 
 91 | [tool.hatch.build.targets.wheel]
 92 | include = [
 93 |   "anyschema/*",
 94 | ]
 95 | 
 96 | [tool.pytest.ini_options]
 97 | testpaths = ["tests"]
 98 | filterwarnings = [
 99 |   "error",
100 | ]
101 | xfail_strict = true
102 | 
103 | [tool.ruff]
104 | line-length = 120
105 | target-version = "py310"
106 | fix = true
107 | 
108 | [tool.ruff.lint]
109 | select = ["ALL"]
110 | ignore = [
111 |   "ANN401",
112 |   "PLC0415",
113 |   "UP045",
114 |   "COM812",
115 |   "ISC001",
116 |   "D100",  # Checks for undocumented public module definitions.
117 |   "D104",  # Checks for undocumented public package definitions.
118 |   "D107",  # Checks for public __init__ method definitions that are missing docstrings.
119 |   "D205",
120 |   "RET505",
121 |   "FIX002",
122 |   "TD003",
123 |   "PLR0911", # Too many return statements
124 | ]
125 | 
126 | [tool.ruff.lint.per-file-ignores]
127 | "tests/*" = [
128 |   "D",
129 |   "S101",
130 |   "UP",
131 |   "SLF",
132 | ]
133 | 
134 | [tool.ruff.lint.isort]
135 | required-imports = ["from __future__ import annotations"]
136 | 
137 | [tool.ruff.lint.pydocstyle]
138 | convention = "google"
139 | 
140 | [tool.ruff.lint.pylint]
141 | max-args = 6
142 | 
143 | [tool.ruff.lint.pyupgrade]
144 | keep-runtime-typing = true
145 | 
146 | [tool.ruff.format]
147 | docstring-code-format = true
148 | 
149 | [tool.mypy]
150 | disallow_untyped_defs = true
151 | disallow_any_unimported = true
152 | no_implicit_optional = true
153 | check_untyped_defs = true
154 | warn_return_any = true
155 | show_error_codes = true
156 | warn_unused_ignores = true
157 | pretty = true
158 | strict = true
159 | files = ["anyschema", "tests"]
160 | plugins = [
161 |   "sqlalchemy.ext.mypy.plugin",
162 |   "pydantic.mypy",
163 | ]
164 | 
165 | 
166 | [tool.pyright]
167 | pythonPlatform = "All"
168 | pythonVersion = "3.10"
169 | reportMissingTypeStubs = "none"
170 | reportPrivateUsage = "none"
171 | reportUnknownVariableType = "none"
172 | reportUnknownArgumentType = "none"
173 | reportUnknownMemberType = "none"
174 | reportUnknownLambdaType = "none"
175 | reportUnusedFunction = "none"
176 | enableExperimentalFeatures = true
177 | typeCheckingMode = "strict"
178 | 
179 | include = ["anyschema", "tests"]
180 | 
181 | [tool.coverage.run]
182 | plugins = ["covdefaults"]
183 | 
184 | [tool.coverage.report]
185 | fail_under = 95
186 | omit = [
187 |   "anyschema/typing.py",
188 | ]
189 | 
190 | [tool.rumdl]
191 | line-length = 120
192 | flavor = "mkdocs"
193 | include = ["docs/**", "README.md", "AGENTS.md"]
194 | disable = [
195 |   # NOTE: These look like either false positives or unsupported case
196 |   # TODO(FBruzzesi): Report upstream
197 |   # No blank line after admonition block
198 |   "MD031",
199 |   # Use fenced code blocks
200 |   "MD046",
201 | ]
202 | 
203 | [tool.rumdl.MD007]
204 | # Keep list indentation consistent
205 | indent = 4
206 | start-indented = false
207 | 


--------------------------------------------------------------------------------
/tests/field/metadata_mutation_test.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from dataclasses import dataclass, field
  4 | from typing import Optional
  5 | 
  6 | import attrs
  7 | from pydantic import BaseModel
  8 | from pydantic import Field as PydanticField
  9 | 
 10 | from anyschema import AnySchema
 11 | 
 12 | 
 13 | def test_pydantic_field_metadata_not_mutated_by_optional() -> None:
 14 |     """Test that parsing Optional fields doesn't mutate Pydantic Field metadata."""
 15 | 
 16 |     class User(BaseModel):
 17 |         name: str = PydanticField(json_schema_extra={"anyschema": {"description": "User name"}})
 18 |         email: Optional[str] = PydanticField(json_schema_extra={"format": "email"})
 19 | 
 20 |     name_metadata_before = User.model_fields["name"].json_schema_extra
 21 |     email_metadata_before = User.model_fields["email"].json_schema_extra
 22 | 
 23 |     schema = AnySchema(spec=User)
 24 | 
 25 |     assert schema.fields["name"].nullable is False
 26 |     assert schema.fields["email"].nullable is True
 27 | 
 28 |     name_metadata_after = User.model_fields["name"].json_schema_extra
 29 |     email_metadata_after = User.model_fields["email"].json_schema_extra
 30 | 
 31 |     # !NOTE: Ensure original metadata was not mutated
 32 |     assert name_metadata_before == name_metadata_after
 33 |     assert email_metadata_before == email_metadata_after
 34 | 
 35 | 
 36 | def test_pydantic_field_metadata_with_explicit_anyschema_keys() -> None:
 37 |     """Test that existing anyschema/* keys in Pydantic metadata are not modified."""
 38 | 
 39 |     class Product(BaseModel):
 40 |         id: int = PydanticField(
 41 |             json_schema_extra={
 42 |                 "anyschema": {"nullable": False, "unique": True, "description": "Product ID"},
 43 |             }
 44 |         )
 45 |         name: Optional[str] = PydanticField(
 46 |             json_schema_extra={
 47 |                 "anyschema": {"nullable": True},
 48 |                 "max_length": 100,
 49 |             }
 50 |         )
 51 | 
 52 |     schema = AnySchema(spec=Product)
 53 | 
 54 |     assert schema.fields["id"].nullable is False
 55 |     assert schema.fields["id"].unique is True
 56 | 
 57 |     id_metadata_after = Product.model_fields["id"].json_schema_extra
 58 |     name_metadata_after = Product.model_fields["name"].json_schema_extra
 59 | 
 60 |     assert id_metadata_after == {"anyschema": {"nullable": False, "unique": True, "description": "Product ID"}}
 61 |     assert name_metadata_after == {"anyschema": {"nullable": True}, "max_length": 100}
 62 | 
 63 | 
 64 | def test_dataclass_field_metadata_not_mutated() -> None:
 65 |     """Test that parsing doesn't mutate dataclass field metadata."""
 66 | 
 67 |     @dataclass
 68 |     class Person:
 69 |         name: str = field(metadata={"description": "Person name"})
 70 |         email: Optional[str] = field(metadata={"format": "email"})
 71 | 
 72 |     # Get original metadata (dataclass fields are in __dataclass_fields__)
 73 |     name_field_before = Person.__dataclass_fields__["name"]
 74 |     email_field_before = Person.__dataclass_fields__["email"]
 75 |     name_metadata_before = dict(name_field_before.metadata)
 76 |     email_metadata_before = dict(email_field_before.metadata)
 77 | 
 78 |     schema = AnySchema(spec=Person)
 79 | 
 80 |     assert schema.fields["name"].nullable is False
 81 |     assert schema.fields["email"].nullable is True
 82 | 
 83 |     name_field_after = Person.__dataclass_fields__["name"]
 84 |     email_field_after = Person.__dataclass_fields__["email"]
 85 | 
 86 |     # !NOTE: Original metadata should not be mutated
 87 |     assert dict(name_field_after.metadata) == name_metadata_before
 88 |     assert dict(email_field_after.metadata) == email_metadata_before
 89 | 
 90 | 
 91 | def test_attrs_field_metadata_not_mutated() -> None:
 92 |     """Test that parsing doesn't mutate attrs field metadata."""
 93 | 
 94 |     @attrs.define
 95 |     class Book:
 96 |         title: str = attrs.field(metadata={"anyschema": {"description": "Book title"}})
 97 |         isbn: Optional[str] = attrs.field(metadata={"format": "isbn"})
 98 | 
 99 |     # Get original metadata
100 |     attrs_fields = attrs.fields(Book)
101 |     title_field_before = next(f for f in attrs_fields if f.name == "title")
102 |     isbn_field_before = next(f for f in attrs_fields if f.name == "isbn")
103 |     title_metadata_before = dict(title_field_before.metadata)
104 |     isbn_metadata_before = dict(isbn_field_before.metadata)
105 | 
106 |     schema = AnySchema(spec=Book)
107 | 
108 |     assert schema.fields["title"].nullable is False
109 |     assert schema.fields["isbn"].nullable is True
110 | 
111 |     # Get metadata after
112 |     attrs_fields_after = attrs.fields(Book)
113 |     title_field_after = next(f for f in attrs_fields_after if f.name == "title")
114 |     isbn_field_after = next(f for f in attrs_fields_after if f.name == "isbn")
115 | 
116 |     # !NOTE: Original metadata should not be mutated
117 |     assert dict(title_field_after.metadata) == title_metadata_before
118 |     assert dict(isbn_field_after.metadata) == isbn_metadata_before
119 | 
120 | 
121 | def test_dict_spec_is_safe() -> None:
122 |     """Test that dict specs work correctly (they don't share metadata)."""
123 |     spec = {"id": int, "name": Optional[str]}
124 | 
125 |     schema1 = AnySchema(spec=spec)
126 |     schema2 = AnySchema(spec=spec)
127 | 
128 |     assert schema1.fields["name"].nullable is True
129 |     assert schema2.fields["name"].nullable is True
130 | 
131 |     assert spec == {"id": int, "name": Optional[str]}
132 | 


--------------------------------------------------------------------------------
/anyschema/parsers/pydantic.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from typing import TYPE_CHECKING
  4 | 
  5 | import narwhals as nw
  6 | from pydantic import AwareDatetime, BaseModel, FutureDate, FutureDatetime, NaiveDatetime, PastDate, PastDatetime
  7 | 
  8 | from anyschema._dependencies import is_pydantic_base_model
  9 | from anyschema._metadata import get_anyschema_value_by_key
 10 | from anyschema.exceptions import UnsupportedDTypeError
 11 | from anyschema.parsers._base import ParserStep
 12 | 
 13 | if TYPE_CHECKING:
 14 |     from narwhals.dtypes import DType
 15 | 
 16 |     from anyschema.typing import FieldConstraints, FieldMetadata, FieldType
 17 | 
 18 | __all__ = ("PydanticTypeStep",)
 19 | 
 20 | 
 21 | class PydanticTypeStep(ParserStep):
 22 |     """Parser for Pydantic-specific types.
 23 | 
 24 |     Handles:
 25 | 
 26 |     - Pydantic datetime types (`AwareDatetime`, `NaiveDatetime`, etc.)
 27 |     - Pydantic date types (`PastDate`, `FutureDate`)
 28 |     - Pydantic `BaseModel` (Struct types)
 29 | 
 30 |     Warning:
 31 |         It requires [pydantic](https://docs.pydantic.dev/latest/) to be installed.
 32 |     """
 33 | 
 34 |     def parse(
 35 |         self,
 36 |         input_type: FieldType,
 37 |         constraints: FieldConstraints,  # noqa: ARG002
 38 |         metadata: FieldMetadata,
 39 |     ) -> DType | None:
 40 |         """Parse Pydantic-specific types into Narwhals dtypes.
 41 | 
 42 |         Arguments:
 43 |             input_type: The type to parse.
 44 |             constraints: Constraints associated with the type.
 45 |             metadata: Custom metadata dictionary.
 46 | 
 47 |         Returns:
 48 |             A Narwhals DType if this parser can handle the type, None otherwise.
 49 |         """
 50 |         # Check if it's a type/class first (not a generic alias or other special form)
 51 |         if not isinstance(input_type, type):
 52 |             return None
 53 | 
 54 |         # Handle AwareDatetime
 55 |         if issubclass(input_type, AwareDatetime):  # pyright: ignore[reportArgumentType]  # ty: ignore[invalid-argument-type]
 56 |             # Pydantic AwareDatetime does not fix a single timezone, but any timezone would work.
 57 |             # See https://github.com/pydantic/pydantic/issues/5829
 58 |             # Unless a timezone is specified via {"anyschema": {"time_zone": ...}}, we raise an error.
 59 |             if (time_zone := get_anyschema_value_by_key(metadata, key="time_zone")) is None:
 60 |                 msg = (
 61 |                     "pydantic AwareDatetime does not specify a fixed timezone.\n\n"
 62 |                     "Hint: You can specify a timezone via "
 63 |                     "`Field(..., json_schema_extra={'anyschema': {'time_zone': 'UTC'}})`"
 64 |                 )
 65 |                 raise UnsupportedDTypeError(msg)
 66 | 
 67 |             return nw.Datetime(
 68 |                 time_unit=get_anyschema_value_by_key(metadata, key="time_unit", default="us"), time_zone=time_zone
 69 |             )
 70 | 
 71 |         if issubclass(input_type, NaiveDatetime):  # pyright: ignore[reportArgumentType] # ty: ignore[invalid-argument-type]
 72 |             # Pydantic NaiveDatetime should not receive a timezone.
 73 |             # If a timezone is specified via {"anyschema": {"time_zone": ...}}, we raise an error.
 74 |             if (time_zone := get_anyschema_value_by_key(metadata, key="time_zone")) is not None:
 75 |                 msg = f"pydantic NaiveDatetime should not specify a timezone, found {time_zone}."
 76 |                 raise UnsupportedDTypeError(msg)
 77 | 
 78 |             return nw.Datetime(
 79 |                 time_unit=get_anyschema_value_by_key(metadata, key="time_unit", default="us"), time_zone=None
 80 |             )
 81 | 
 82 |         # Handle datetime types
 83 |         if issubclass(input_type, (PastDatetime, FutureDatetime)):  # pyright: ignore[reportArgumentType]
 84 |             # PastDatetime and FutureDatetime accept both aware and naive datetimes.
 85 |             return nw.Datetime(
 86 |                 time_unit=get_anyschema_value_by_key(metadata, key="time_unit", default="us"),
 87 |                 time_zone=get_anyschema_value_by_key(metadata, key="time_zone"),
 88 |             )
 89 | 
 90 |         # Handle date types
 91 |         if issubclass(input_type, (PastDate, FutureDate)):  # pyright: ignore[reportArgumentType]
 92 |             return nw.Date()
 93 | 
 94 |         # Handle Pydantic models (Struct types)
 95 |         if is_pydantic_base_model(input_type):
 96 |             return self._parse_pydantic_model(input_type)
 97 | 
 98 |         # TODO(FBruzzesi): Add support for more pydantic types. See https://github.com/FBruzzesi/anyschema/issues/45
 99 | 
100 |         # This parser doesn't handle this type
101 |         return None
102 | 
103 |     def _parse_pydantic_model(self, model: type[BaseModel]) -> DType:
104 |         """Parse a Pydantic model into a Struct type.
105 | 
106 |         Arguments:
107 |             model: The Pydantic model class or instance.
108 | 
109 |         Returns:
110 |             A Narwhals Struct dtype.
111 |         """
112 |         from anyschema.adapters import pydantic_adapter
113 | 
114 |         return nw.Struct(
115 |             [
116 |                 nw.Field(
117 |                     name=field_name,
118 |                     dtype=self.pipeline.parse(field_info, field_constraints, field_metadata, strict=True),
119 |                 )
120 |                 for field_name, field_info, field_constraints, field_metadata in pydantic_adapter(model)
121 |             ]
122 |         )
123 | 


--------------------------------------------------------------------------------
/tests/parsers/sqlalchemy_test.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import re
  4 | from enum import Enum
  5 | from typing import TYPE_CHECKING, Any
  6 | 
  7 | import narwhals as nw
  8 | import pytest
  9 | from sqlalchemy import types as sqltypes
 10 | 
 11 | from anyschema.exceptions import UnsupportedDTypeError
 12 | from anyschema.parsers import make_pipeline
 13 | from anyschema.parsers.sqlalchemy import SQLAlchemyTypeStep
 14 | 
 15 | if TYPE_CHECKING:
 16 |     from narwhals.typing import TimeUnit
 17 | 
 18 | 
 19 | @pytest.fixture
 20 | def sqlalchemy_step() -> SQLAlchemyTypeStep:
 21 |     """Create a SQLAlchemyTypeStep with pipeline."""
 22 |     step = SQLAlchemyTypeStep()
 23 |     _ = make_pipeline(steps=[step])
 24 |     return step
 25 | 
 26 | 
 27 | class Color(Enum):
 28 |     RED = 1
 29 |     BLUE = 2
 30 | 
 31 | 
 32 | @pytest.mark.parametrize(
 33 |     ("input_type", "expected"),
 34 |     # SQLAlchemy types have incomplete generic parameters
 35 |     [
 36 |         (sqltypes.Boolean(), nw.Boolean()),
 37 |         (sqltypes.SmallInteger(), nw.Int16()),
 38 |         (sqltypes.Integer(), nw.Int32()),
 39 |         (sqltypes.BigInteger(), nw.Int64()),
 40 |         (sqltypes.String(50), nw.String()),
 41 |         (sqltypes.Text(), nw.String()),
 42 |         (sqltypes.Unicode(50), nw.String()),
 43 |         (sqltypes.UnicodeText(), nw.String()),
 44 |         (sqltypes.Float(), nw.Float32()),
 45 |         (sqltypes.Double(), nw.Float64()),
 46 |         (sqltypes.Numeric(10, 2), nw.Float64()),
 47 |         (sqltypes.DECIMAL(10, 2), nw.Decimal()),
 48 |         (sqltypes.Date(), nw.Date()),
 49 |         (sqltypes.DateTime(), nw.Datetime()),
 50 |         (sqltypes.TIMESTAMP(), nw.Datetime()),
 51 |         (sqltypes.Time(), nw.Time()),
 52 |         (sqltypes.Interval(), nw.Duration()),
 53 |         (sqltypes.LargeBinary(), nw.Binary()),
 54 |         (sqltypes.BINARY(), nw.Binary()),
 55 |         (sqltypes.VARBINARY(), nw.Binary()),
 56 |         (sqltypes.JSON(), nw.String()),
 57 |         (sqltypes.Uuid(), nw.String()),
 58 |         (sqltypes.Enum("red", "green", "blue"), nw.Enum(["red", "green", "blue"])),
 59 |         (sqltypes.Enum(Color), nw.Enum(Color)),
 60 |         (sqltypes.ARRAY(sqltypes.Float()), nw.List(nw.Float32())),
 61 |         (sqltypes.ARRAY(sqltypes.Float(), dimensions=3), nw.Array(nw.Float32(), shape=(3,))),
 62 |         (int, None),
 63 |         ("not a sqlalchemy type", None),
 64 |     ],
 65 | )
 66 | def test_sqlalchemy_parse_step(
 67 |     sqlalchemy_step: SQLAlchemyTypeStep, input_type: Any, expected: nw.dtypes.DType | None
 68 | ) -> None:
 69 |     result = sqlalchemy_step.parse(input_type=input_type, constraints=(), metadata={})
 70 |     assert result == expected
 71 | 
 72 | 
 73 | @pytest.mark.parametrize("time_unit", ["s", "ms", "ns", "us"])
 74 | def test_sqlalchemy_datetime_naive_with_time_unit_metadata(
 75 |     sqlalchemy_step: SQLAlchemyTypeStep, time_unit: TimeUnit
 76 | ) -> None:
 77 |     result = sqlalchemy_step.parse(
 78 |         input_type=sqltypes.DateTime(), constraints=(), metadata={"anyschema": {"time_unit": time_unit}}
 79 |     )
 80 |     assert result == nw.Datetime(time_unit)
 81 | 
 82 | 
 83 | def test_sqlalchemy_datetime_tz_aware_without_metadata_raises(sqlalchemy_step: SQLAlchemyTypeStep) -> None:
 84 |     msg = re.escape("SQLAlchemy `DateTime(timezone=True)` does not specify a fixed timezone.")
 85 |     with pytest.raises(UnsupportedDTypeError, match=msg):
 86 |         sqlalchemy_step.parse(input_type=sqltypes.DateTime(timezone=True), constraints=(), metadata={})
 87 | 
 88 | 
 89 | @pytest.mark.parametrize(
 90 |     ("metadata", "expected"),
 91 |     [
 92 |         ({"anyschema": {"time_zone": "UTC"}}, nw.Datetime("us", time_zone="UTC")),
 93 |         ({"anyschema": {"time_zone": "Europe/Rome"}}, nw.Datetime("us", time_zone="Europe/Rome")),
 94 |         ({"anyschema": {"time_unit": "ms", "time_zone": "UTC"}}, nw.Datetime("ms", time_zone="UTC")),
 95 |         (
 96 |             {"anyschema": {"time_unit": "ns", "time_zone": "America/New_York"}},
 97 |             nw.Datetime("ns", time_zone="America/New_York"),
 98 |         ),
 99 |     ],
100 | )
101 | def test_sqlalchemy_datetime_tz_aware_with_metadata(
102 |     sqlalchemy_step: SQLAlchemyTypeStep, metadata: dict[str, Any], expected: nw.dtypes.DType
103 | ) -> None:
104 |     result = sqlalchemy_step.parse(
105 |         input_type=sqltypes.DateTime(timezone=True),
106 |         constraints=(),
107 |         metadata=metadata,
108 |     )
109 |     assert result == expected
110 | 
111 | 
112 | def test_sqlalchemy_datetime_naive_with_timezone_raises(sqlalchemy_step: SQLAlchemyTypeStep) -> None:
113 |     msg = re.escape("SQLAlchemy `DateTime(timezone=False)` should not specify a fixed timezone, found UTC")
114 |     with pytest.raises(Exception, match=msg):
115 |         sqlalchemy_step.parse(
116 |             input_type=sqltypes.DateTime(timezone=False),
117 |             constraints=(),
118 |             metadata={"anyschema": {"time_zone": "UTC"}},
119 |         )
120 | 
121 | 
122 | @pytest.mark.parametrize("input_type", [int, str, list[int], dict])
123 | def test_sqlalchemy_non_sqlalchemy_types_return_none(sqlalchemy_step: SQLAlchemyTypeStep, input_type: Any) -> None:
124 |     result = sqlalchemy_step.parse(input_type=input_type, constraints=(), metadata={})
125 |     assert result is None
126 | 
127 | 
128 | @pytest.mark.parametrize(
129 |     "input_type",
130 |     [
131 |         sqltypes.PickleType(),
132 |         sqltypes.NullType(),
133 |     ],
134 | )
135 | def test_sqlalchemy_unhandled_types_return_none(sqlalchemy_step: SQLAlchemyTypeStep, input_type: Any) -> None:
136 |     result = sqlalchemy_step.parse(input_type=input_type, constraints=(), metadata={})
137 |     assert result is None
138 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
  1 | # Contributor Covenant Code of Conduct
  2 | 
  3 | ## Our Pledge
  4 | 
  5 | We as members, contributors, and leaders pledge to make participation in our
  6 | community a harassment-free experience for everyone, regardless of age, body
  7 | size, visible or invisible disability, ethnicity, sex characteristics, gender
  8 | identity and expression, level of experience, education, socio-economic status,
  9 | nationality, personal appearance, race, religion, or sexual identity
 10 | and orientation.
 11 | 
 12 | We pledge to act and interact in ways that contribute to an open, welcoming,
 13 | diverse, inclusive, and healthy community.
 14 | 
 15 | ## Our Standards
 16 | 
 17 | Examples of behavior that contributes to a positive environment for our
 18 | community include:
 19 | 
 20 | * Demonstrating empathy and kindness toward other people
 21 | * Being respectful of differing opinions, viewpoints, and experiences
 22 | * Giving and gracefully accepting constructive feedback
 23 | * Accepting responsibility and apologizing to those affected by our mistakes,
 24 |   and learning from the experience
 25 | * Focusing on what is best not just for us as individuals, but for the
 26 |   overall community
 27 | 
 28 | Examples of unacceptable behavior include:
 29 | 
 30 | * The use of sexualized language or imagery, and sexual attention or
 31 |   advances of any kind
 32 | * Trolling, insulting or derogatory comments, and personal or political attacks
 33 | * Public or private harassment
 34 | * Publishing others' private information, such as a physical or email
 35 |   address, without their explicit permission
 36 | * Other conduct which could reasonably be considered inappropriate in a
 37 |   professional setting
 38 | 
 39 | ## Enforcement Responsibilities
 40 | 
 41 | Community leaders are responsible for clarifying and enforcing our standards of
 42 | acceptable behavior and will take appropriate and fair corrective action in
 43 | response to any behavior that they deem inappropriate, threatening, offensive,
 44 | or harmful.
 45 | 
 46 | Community leaders have the right and responsibility to remove, edit, or reject
 47 | comments, commits, code, wiki edits, issues, and other contributions that are
 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation
 49 | decisions when appropriate.
 50 | 
 51 | ## Scope
 52 | 
 53 | This Code of Conduct applies within all community spaces, and also applies when
 54 | an individual is officially representing the community in public spaces.
 55 | Examples of representing our community include using an official e-mail address,
 56 | posting via an official social media account, or acting as an appointed
 57 | representative at an online or offline event.
 58 | 
 59 | ## Enforcement
 60 | 
 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
 62 | reported to the community leaders responsible for enforcement at
 63 | https://www.linkedin.com/in/francesco-bruzzesi/.
 64 | All complaints will be reviewed and investigated promptly and fairly.
 65 | 
 66 | All community leaders are obligated to respect the privacy and security of the
 67 | reporter of any incident.
 68 | 
 69 | ## Enforcement Guidelines
 70 | 
 71 | Community leaders will follow these Community Impact Guidelines in determining
 72 | the consequences for any action they deem in violation of this Code of Conduct:
 73 | 
 74 | ### 1. Correction
 75 | 
 76 | **Community Impact**: Use of inappropriate language or other behavior deemed
 77 | unprofessional or unwelcome in the community.
 78 | 
 79 | **Consequence**: A private, written warning from community leaders, providing
 80 | clarity around the nature of the violation and an explanation of why the
 81 | behavior was inappropriate. A public apology may be requested.
 82 | 
 83 | ### 2. Warning
 84 | 
 85 | **Community Impact**: A violation through a single incident or series
 86 | of actions.
 87 | 
 88 | **Consequence**: A warning with consequences for continued behavior. No
 89 | interaction with the people involved, including unsolicited interaction with
 90 | those enforcing the Code of Conduct, for a specified period of time. This
 91 | includes avoiding interactions in community spaces as well as external channels
 92 | like social media. Violating these terms may lead to a temporary or
 93 | permanent ban.
 94 | 
 95 | ### 3. Temporary Ban
 96 | 
 97 | **Community Impact**: A serious violation of community standards, including
 98 | sustained inappropriate behavior.
 99 | 
100 | **Consequence**: A temporary ban from any sort of interaction or public
101 | communication with the community for a specified period of time. No public or
102 | private interaction with the people involved, including unsolicited interaction
103 | with those enforcing the Code of Conduct, is allowed during this period.
104 | Violating these terms may lead to a permanent ban.
105 | 
106 | ### 4. Permanent Ban
107 | 
108 | **Community Impact**: Demonstrating a pattern of violation of community
109 | standards, including sustained inappropriate behavior,  harassment of an
110 | individual, or aggression toward or disparagement of classes of individuals.
111 | 
112 | **Consequence**: A permanent ban from any sort of public interaction within
113 | the community.
114 | 
115 | ## Attribution
116 | 
117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
118 | version 2.0, available at
119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
120 | 
121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct
122 | enforcement ladder](https://github.com/mozilla/diversity).
123 | 
124 | [homepage]: https://www.contributor-covenant.org
125 | 
126 | For answers to common questions about this code of conduct, see the FAQ at
127 | https://www.contributor-covenant.org/faq. Translations are available at
128 | https://www.contributor-covenant.org/translations.
129 | 


--------------------------------------------------------------------------------
/tests/field/field_test.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from dataclasses import asdict
  4 | from typing import TYPE_CHECKING, Any, TypedDict, cast
  5 | 
  6 | import narwhals as nw
  7 | import pytest
  8 | 
  9 | from anyschema import AnyField
 10 | 
 11 | if TYPE_CHECKING:
 12 |     from collections.abc import Mapping
 13 | 
 14 |     class IntoAnyField(TypedDict, total=False):
 15 |         """Arguments required to create a `AnyField` object."""
 16 | 
 17 |         name: str
 18 |         dtype: nw.dtypes.DType
 19 |         nullable: bool
 20 |         unique: bool
 21 |         description: str | None
 22 |         metadata: Mapping[str, Any]
 23 | 
 24 | 
 25 | @pytest.mark.parametrize("dtype", [nw.String(), nw.Int32(), nw.Array(nw.Int32(), shape=(3, 2))])
 26 | @pytest.mark.parametrize("nullable", [True, False, None])
 27 | @pytest.mark.parametrize("unique", [True, False, None])
 28 | @pytest.mark.parametrize("description", ["some description", None])
 29 | @pytest.mark.parametrize("metadata", [{"min": 0, "max": 150}, None])
 30 | def test_anyfield(
 31 |     dtype: nw.dtypes.DType,
 32 |     *,
 33 |     nullable: bool | None,
 34 |     unique: bool | None,
 35 |     description: str | None,
 36 |     metadata: Mapping[str, Any] | None,
 37 | ) -> None:
 38 |     kwargs = {
 39 |         "name": "id",
 40 |         "dtype": dtype,
 41 |         "nullable": nullable,
 42 |         "unique": unique,
 43 |         "description": description,
 44 |         "metadata": metadata,
 45 |     }
 46 |     expected: IntoAnyField = {
 47 |         "name": "id",
 48 |         "dtype": dtype,
 49 |         "nullable": nullable if nullable is not None else False,
 50 |         "unique": unique if unique is not None else False,
 51 |         "description": description,
 52 |         "metadata": metadata if metadata is not None else {},
 53 |     }
 54 |     into_field = cast("IntoAnyField", {k: v for k, v in kwargs.items() if v is not None})
 55 |     field = AnyField(**into_field)
 56 |     assert asdict(field) == expected
 57 | 
 58 |     field2 = AnyField(**into_field)
 59 | 
 60 |     assert field == field2
 61 |     assert hash(field) == hash(field2)
 62 | 
 63 | 
 64 | @pytest.mark.parametrize(
 65 |     ("field1_kwargs", "field2_kwargs"),
 66 |     [
 67 |         (
 68 |             {"name": "id", "dtype": nw.Int64()},
 69 |             {"name": "user_id", "dtype": nw.Int64()},
 70 |         ),
 71 |         (
 72 |             {"name": "age", "dtype": nw.Int64()},
 73 |             {"name": "age", "dtype": nw.Int32()},
 74 |         ),
 75 |         (
 76 |             {"name": "email", "dtype": nw.String(), "nullable": True},
 77 |             {"name": "email", "dtype": nw.String(), "nullable": False},
 78 |         ),
 79 |         (
 80 |             {"name": "username", "dtype": nw.String(), "unique": False},
 81 |             {"name": "username", "dtype": nw.String(), "unique": True},
 82 |         ),
 83 |         (
 84 |             {"name": "score", "dtype": nw.Float64(), "metadata": {"min": 0}},
 85 |             {"name": "score", "dtype": nw.Float64(), "metadata": {"max": 100}},
 86 |         ),
 87 |     ],
 88 | )
 89 | def test_field_unequal_fields(field1_kwargs: IntoAnyField, field2_kwargs: IntoAnyField) -> None:
 90 |     field1, field2 = AnyField(**field1_kwargs), AnyField(**field2_kwargs)
 91 |     assert field1 != field2
 92 | 
 93 | 
 94 | @pytest.mark.parametrize(
 95 |     "other_value",
 96 |     [
 97 |         "not a field",
 98 |         42,
 99 |         None,
100 |         {"name": "test"},
101 |         [],
102 |         nw.String(),
103 |     ],
104 | )
105 | def test_field_equality_with_non_field(other_value: object) -> None:
106 |     """Test that Field is not equal to non-Field objects."""
107 |     field = AnyField(name="test", dtype=nw.String())
108 |     assert field != other_value
109 | 
110 | 
111 | @pytest.mark.parametrize(
112 |     ("field_configs", "expected_unique_count"),
113 |     [
114 |         (
115 |             [
116 |                 {"name": "id", "dtype": nw.Int64()},
117 |                 {"name": "id", "dtype": nw.Int64()},  # Duplicate
118 |                 {"name": "name", "dtype": nw.String()},
119 |             ],
120 |             2,
121 |         ),
122 |         (
123 |             [
124 |                 {"name": "a", "dtype": nw.String()},
125 |                 {"name": "b", "dtype": nw.String()},
126 |                 {"name": "c", "dtype": nw.String()},
127 |             ],
128 |             3,
129 |         ),
130 |         (
131 |             [
132 |                 {"name": "id", "dtype": nw.Int64(), "nullable": True},
133 |                 {"name": "id", "dtype": nw.Int64(), "nullable": True},  # Duplicate
134 |                 {"name": "id", "dtype": nw.Int64(), "nullable": False},  # Different
135 |             ],
136 |             2,
137 |         ),
138 |     ],
139 | )
140 | def test_field_use_in_set(field_configs: list[IntoAnyField], expected_unique_count: int) -> None:
141 |     """Test that Field instances work correctly in sets."""
142 |     fields = [AnyField(**config) for config in field_configs]
143 |     field_set = set(fields)
144 |     assert len(field_set) == expected_unique_count
145 | 
146 | 
147 | @pytest.mark.parametrize(
148 |     ("field_kwargs", "expected_description"),
149 |     [
150 |         ({"name": "user_id", "dtype": nw.Int64(), "description": "Unique user identifier"}, "Unique user identifier"),
151 |         ({"name": "user_id", "dtype": nw.Int64(), "description": None}, None),
152 |         ({"name": "test", "dtype": nw.String()}, None),  # Default
153 |         ({"name": "email", "dtype": nw.String(), "description": ""}, ""),  # Empty string
154 |     ],
155 | )
156 | def test_field_description_values(field_kwargs: IntoAnyField, expected_description: str | None) -> None:
157 |     """Test Field creation with various description values."""
158 |     field = AnyField(**field_kwargs)
159 |     assert field.description == expected_description
160 | 


--------------------------------------------------------------------------------
/tests/adapters/custom_adapters_test.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from typing import TYPE_CHECKING, Any, Generator, TypedDict, cast
  4 | 
  5 | import pyarrow as pa
  6 | import pytest
  7 | 
  8 | from anyschema import AnySchema
  9 | 
 10 | if TYPE_CHECKING:
 11 |     from anyschema.typing import FieldSpec
 12 | 
 13 | 
 14 | class SimpleSchema:
 15 |     """A simple schema format for testing."""
 16 | 
 17 |     def __init__(self, fields: dict[str, type[Any]]) -> None:
 18 |         self.fields = fields
 19 | 
 20 | 
 21 | def simple_dict_adapter(spec: SimpleSchema) -> Generator[FieldSpec, None, None]:
 22 |     """Adapter for SimpleSchema format.
 23 | 
 24 |     Arguments:
 25 |         spec: A SimpleSchema instance.
 26 | 
 27 |     Yields:
 28 |         Tuples of (field_name, field_type, constraints, metadata).
 29 |     """
 30 |     for field_name, field_type in spec.fields.items():
 31 |         yield field_name, field_type, (), {}
 32 | 
 33 | 
 34 | class NestedSchema:
 35 |     """A schema that can contain nested schemas."""
 36 | 
 37 |     def __init__(self, fields: dict[str, Any]) -> None:
 38 |         self.fields = fields
 39 | 
 40 | 
 41 | def nested_adapter(spec: NestedSchema) -> Generator[FieldSpec, None, None]:
 42 |     """Adapter for nested schema structures.
 43 | 
 44 |     For nested schemas, we dynamically create a TypedDict so the parser
 45 |     can properly extract the field structure.
 46 | 
 47 |     Arguments:
 48 |         spec: A NestedSchema instance.
 49 | 
 50 |     Yields:
 51 |         Tuples of (field_name, field_type, constraints, metadata).
 52 |     """
 53 |     for field_name, field_value in spec.fields.items():
 54 |         if isinstance(field_value, NestedSchema):
 55 |             # For nested schemas, create a TypedDict with the proper structure
 56 |             nested_dict = {name: type_ for name, type_, *_ in nested_adapter(field_value)}
 57 |             # Create a dynamic TypedDict with the nested fields
 58 |             nested_typed_dict = TypedDict(  # type: ignore[misc]
 59 |                 f"{field_name.title()}TypedDict",  # Generate a unique name
 60 |                 nested_dict,  # Field name -> type mapping
 61 |             )
 62 |             yield field_name, nested_typed_dict, (), {}
 63 |         else:
 64 |             yield field_name, field_value, (), {}
 65 | 
 66 | 
 67 | def test_simple_dict_spec() -> None:
 68 |     """Test that dict types are converted to Struct."""
 69 |     fields = {"id": int, "metadata": dict}
 70 |     schema_spec = SimpleSchema(fields=fields)
 71 | 
 72 |     schema = AnySchema(spec=schema_spec, adapter=simple_dict_adapter)
 73 |     arrow_schema = schema.to_arrow()
 74 | 
 75 |     assert len(arrow_schema) == len(fields)
 76 |     assert arrow_schema.names == ["id", "metadata"]
 77 |     assert "struct" in str(arrow_schema.types[1]).lower()
 78 | 
 79 | 
 80 | def test_typed_dict_spec() -> None:
 81 |     """Test that TypedDict is converted to Struct with fields."""
 82 | 
 83 |     class PersonTypedDict(TypedDict):
 84 |         name: str
 85 |         age: int
 86 | 
 87 |     fields = {"person": PersonTypedDict}
 88 |     schema_spec = SimpleSchema(fields=fields)
 89 | 
 90 |     schema = AnySchema(spec=schema_spec, adapter=simple_dict_adapter)
 91 |     arrow_schema = schema.to_arrow()
 92 | 
 93 |     assert len(arrow_schema) == len(fields)
 94 |     assert arrow_schema.names == ["person"]
 95 |     # Should be a struct with name and age fields
 96 |     assert "struct" in str(arrow_schema.types[0]).lower()
 97 | 
 98 | 
 99 | def test_nested_schema_adapter() -> None:
100 |     """Test the nested schema adapter from the advanced documentation."""
101 |     fields = {
102 |         "id": int,
103 |         "profile": NestedSchema(
104 |             fields={
105 |                 "name": str,
106 |                 "age": int,
107 |             }
108 |         ),
109 |     }
110 |     schema_spec = NestedSchema(fields=fields)
111 |     schema = AnySchema(spec=schema_spec, adapter=nested_adapter)
112 |     arrow_schema = schema.to_arrow()
113 | 
114 |     assert len(arrow_schema) == len(fields)
115 |     assert arrow_schema.names == ["id", "profile"]
116 |     assert "struct" in str(arrow_schema.types[1]).lower()
117 |     # Check that the nested struct has the correct fields
118 |     profile_type = arrow_schema.types[1]
119 |     assert profile_type.num_fields == len(cast("NestedSchema", fields["profile"]).fields)  # Should have 2 fields
120 |     assert pa.types.is_struct(profile_type)
121 |     assert profile_type.names == ["name", "age"]
122 | 
123 | 
124 | def test_polars_schema_with_dict() -> None:
125 |     """Test that dict types work with Polars schema conversion."""
126 |     fields = {"id": int, "metadata": dict, "name": str}
127 |     schema_spec = SimpleSchema(fields=fields)
128 | 
129 |     schema = AnySchema(spec=schema_spec, adapter=simple_dict_adapter)
130 |     polars_schema = schema.to_polars()
131 | 
132 |     assert len(polars_schema) == len(fields)
133 |     # Polars schema items are DType classes, not instances
134 |     assert str(polars_schema["id"]) == "Int64"
135 |     # Polars represents empty struct as {} instead of []
136 |     assert "Struct" in str(polars_schema["metadata"])
137 |     assert str(polars_schema["name"]) == "String"
138 | 
139 | 
140 | @pytest.mark.parametrize(
141 |     "dict_type",
142 |     [dict, dict[str, int], dict[str, str]],
143 | )
144 | def test_various_dict_types(dict_type: type) -> None:
145 |     """Test that various dict type annotations are handled."""
146 |     fields = {"data": dict_type}
147 |     schema_spec = SimpleSchema(fields=fields)
148 | 
149 |     schema = AnySchema(spec=schema_spec, adapter=simple_dict_adapter)
150 |     arrow_schema = schema.to_arrow()
151 | 
152 |     assert len(arrow_schema) == len(fields)
153 |     assert arrow_schema.names == ["data"]
154 |     # All dict types should become structs
155 |     assert "struct" in str(arrow_schema.types[0]).lower()
156 | 


--------------------------------------------------------------------------------
/anyschema/_metadata.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from typing import TYPE_CHECKING, Literal, overload
  4 | 
  5 | if TYPE_CHECKING:
  6 |     from narwhals.dtypes import DType
  7 |     from narwhals.typing import TimeUnit
  8 | 
  9 |     from anyschema.typing import AnySchemaMetadata, AnySchemaMetadataKey, AnySchemaNamespaceKey, FieldMetadata
 10 | 
 11 | 
 12 | def _get_anyschema_metadata(metadata: FieldMetadata) -> AnySchemaMetadata:
 13 |     """Get the nested anyschema metadata dictionary from field metadata.
 14 | 
 15 |     Supports both "anyschema" and "x-anyschema" keys (OpenAPI convention).
 16 |     Returns empty dict if neither key exists or if the value is not a dictionary.
 17 | 
 18 |     Arguments:
 19 |         metadata: The field metadata dictionary.
 20 | 
 21 |     Returns:
 22 |         The anyschema metadata dictionary, or empty dict if not found.
 23 | 
 24 |     Notes:
 25 |         This function tries "x-anyschema" (OpenAPI convention) first, then "anyschema".
 26 |     """
 27 |     for key in ("x-anyschema", "anyschema"):
 28 |         if anyschema_meta := metadata.get(key):
 29 |             return anyschema_meta  # type: ignore[no-any-return]
 30 |     return {}
 31 | 
 32 | 
 33 | @overload
 34 | def get_anyschema_value_by_key(
 35 |     metadata: FieldMetadata, *, key: Literal["nullable", "unique"], default: bool
 36 | ) -> bool: ...
 37 | 
 38 | 
 39 | @overload
 40 | def get_anyschema_value_by_key(
 41 |     metadata: FieldMetadata, *, key: Literal["time_unit"], default: Literal["us"]
 42 | ) -> TimeUnit: ...
 43 | 
 44 | 
 45 | @overload
 46 | def get_anyschema_value_by_key(
 47 |     metadata: FieldMetadata, *, key: Literal["nullable", "unique"], default: None = None
 48 | ) -> bool | None: ...
 49 | 
 50 | 
 51 | @overload
 52 | def get_anyschema_value_by_key(
 53 |     metadata: FieldMetadata, *, key: Literal["time_unit"], default: Literal["us"] | None
 54 | ) -> TimeUnit | None: ...
 55 | 
 56 | 
 57 | @overload
 58 | def get_anyschema_value_by_key(
 59 |     metadata: FieldMetadata, *, key: Literal["description", "time_zone"], default: str | None = None
 60 | ) -> str | None: ...
 61 | 
 62 | 
 63 | @overload
 64 | def get_anyschema_value_by_key(
 65 |     metadata: FieldMetadata, *, key: Literal["dtype"], default: None = None
 66 | ) -> DType | str | None: ...
 67 | 
 68 | 
 69 | def get_anyschema_value_by_key(
 70 |     metadata: FieldMetadata,
 71 |     *,
 72 |     key: AnySchemaMetadataKey,
 73 |     default: bool | str | None = None,
 74 | ) -> bool | str | DType | TimeUnit | None:
 75 |     """Get a specific anyschema metadata value with fallback to default.
 76 | 
 77 |     Supports both "anyschema" and "x-anyschema" keys (OpenAPI convention).
 78 | 
 79 |     Arguments:
 80 |         metadata: The field metadata dictionary.
 81 |         key: The anyschema metadata key to retrieve.
 82 |         default: Default value to return if key is not found.
 83 | 
 84 |     Returns:
 85 |         The metadata value, or default if not found.
 86 | 
 87 |     Examples:
 88 |         >>> metadata = {"anyschema": {"nullable": True, "unique": False}}
 89 |         >>> get_anyschema_value_by_key(metadata, key="nullable")
 90 |         True
 91 |         >>> get_anyschema_value_by_key(metadata, key="time_zone", default="UTC")
 92 |         'UTC'
 93 |         >>> metadata_openapi = {"x-anyschema": {"nullable": True}}
 94 |         >>> get_anyschema_value_by_key(metadata_openapi, key="nullable")
 95 |         True
 96 |     """
 97 |     return _get_anyschema_metadata(metadata).get(key, default)
 98 | 
 99 | 
100 | @overload
101 | def set_anyschema_meta(metadata: FieldMetadata, *, key: Literal["nullable", "unique"], value: bool) -> None: ...
102 | 
103 | 
104 | @overload
105 | def set_anyschema_meta(
106 |     metadata: FieldMetadata, *, key: Literal["description", "time_zone"], value: str | None
107 | ) -> None: ...
108 | 
109 | 
110 | @overload
111 | def set_anyschema_meta(metadata: FieldMetadata, *, key: Literal["time_unit"], value: TimeUnit) -> None: ...
112 | 
113 | 
114 | def set_anyschema_meta(
115 |     metadata: FieldMetadata, *, key: AnySchemaMetadataKey, value: bool | str | TimeUnit | None
116 | ) -> None:
117 |     """Set a specific anyschema metadata value in the nested structure.
118 | 
119 |     Creates the nested dictionary if it doesn't exist. Modifies the metadata dict in-place.
120 | 
121 |     Arguments:
122 |         metadata: The field metadata dictionary to modify.
123 |         key: The anyschema metadata key to set.
124 |         value: The value to set.
125 | 
126 |     Examples:
127 |         >>> metadata: dict = {}
128 |         >>> set_anyschema_meta(metadata, key="nullable", value=True)
129 |         >>> metadata
130 |         {'anyschema': {'nullable': True}}
131 |         >>> set_anyschema_meta(metadata, key="unique", value=False)
132 |         >>> metadata
133 |         {'anyschema': {'nullable': True, 'unique': False}}
134 | 
135 |     Notes:
136 |         If "x-anyschema" already exists in the metadata, it will be used;
137 |         otherwise "anyschema" is used (the default). This preserves the user's
138 |         choice of namespace key.
139 |     """
140 |     # Preserve existing key if present, otherwise default to "anyschema"
141 |     anyschema_key: AnySchemaNamespaceKey = "x-anyschema" if "x-anyschema" in metadata else "anyschema"
142 |     if anyschema_key not in metadata:
143 |         namespace: AnySchemaMetadata = {}
144 |         metadata[anyschema_key] = namespace
145 | 
146 |     metadata[anyschema_key][key] = value
147 | 
148 | 
149 | def filter_anyschema_metadata(metadata: FieldMetadata) -> FieldMetadata:
150 |     """Filter out anyschema-specific metadata keys, returning only custom metadata.
151 | 
152 |     Removes both "anyschema" and "x-anyschema" keys to support both conventions.
153 | 
154 |     Arguments:
155 |         metadata: The field metadata dictionary.
156 | 
157 |     Returns:
158 |         A new dictionary with anyschema keys removed.
159 | 
160 |     Examples:
161 |         >>> metadata = {"anyschema": {"nullable": True}, "custom": "value", "x-anyschema": {"unique": False}}
162 |         >>> filter_anyschema_metadata(metadata)
163 |         {'custom': 'value'}
164 |     """
165 |     return {key: value for key, value in metadata.items() if key not in ("anyschema", "x-anyschema")}
166 | 


--------------------------------------------------------------------------------
/tests/anyschema/to_pandas_test.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from typing import TYPE_CHECKING
  4 | 
  5 | import narwhals as nw
  6 | import pandas as pd
  7 | import pyarrow as pa
  8 | import pytest
  9 | 
 10 | from anyschema import AnySchema
 11 | 
 12 | if TYPE_CHECKING:
 13 |     from narwhals.typing import DTypeBackend
 14 |     from pydantic import BaseModel
 15 | 
 16 | 
 17 | @pytest.mark.parametrize(
 18 |     ("dtype_backend", "expected"),
 19 |     [
 20 |         (
 21 |             None,
 22 |             {
 23 |                 "name": str,
 24 |                 "date_of_birth": "date32[pyarrow]",
 25 |                 "age": "uint64",
 26 |                 "classes": pd.ArrowDtype(pa.list_(pa.string())),
 27 |                 "has_graduated": "bool",
 28 |             },
 29 |         ),
 30 |         (
 31 |             "numpy_nullable",
 32 |             {
 33 |                 "name": "string",
 34 |                 "date_of_birth": "date32[pyarrow]",
 35 |                 "age": "UInt64",
 36 |                 "classes": pd.ArrowDtype(pa.list_(pa.string())),
 37 |                 "has_graduated": "boolean",
 38 |             },
 39 |         ),
 40 |         (
 41 |             "pyarrow",
 42 |             {
 43 |                 "name": "string[pyarrow]",
 44 |                 "date_of_birth": "date32[pyarrow]",
 45 |                 "age": "UInt64[pyarrow]",
 46 |                 "classes": pd.ArrowDtype(pa.list_(pa.string())),
 47 |                 "has_graduated": "boolean[pyarrow]",
 48 |             },
 49 |         ),
 50 |     ],
 51 | )
 52 | def test_pydantic_to_pandas(
 53 |     pydantic_student_cls: type[BaseModel],
 54 |     dtype_backend: DTypeBackend,
 55 |     expected: dict[str, str | pd.ArrowDtype | type],
 56 | ) -> None:
 57 |     anyschema = AnySchema(spec=pydantic_student_cls)
 58 |     pd_schema = anyschema.to_pandas(dtype_backend=dtype_backend)
 59 |     assert isinstance(pd_schema, dict)
 60 |     assert pd_schema == expected
 61 | 
 62 | 
 63 | @pytest.mark.parametrize(
 64 |     ("dtype_backend", "expected"),
 65 |     [
 66 |         (
 67 |             None,
 68 |             {
 69 |                 "boolean": "bool",
 70 |                 "categorical": "category",
 71 |                 "date": "date32[pyarrow]",
 72 |                 "datetime": "datetime64[us]",
 73 |                 "duration": "timedelta64[us]",
 74 |                 "float32": "float32",
 75 |                 "float64": "float64",
 76 |                 "int8": "int8",
 77 |                 "int16": "int16",
 78 |                 "int32": "int32",
 79 |                 "int64": "int64",
 80 |                 "list": pd.ArrowDtype(pa.list_(pa.float32())),
 81 |                 "string": str,
 82 |                 "struct": pd.ArrowDtype(
 83 |                     pa.struct(
 84 |                         [
 85 |                             ("field_1", pa.string()),
 86 |                             ("field_2", pa.bool_()),
 87 |                         ]
 88 |                     )
 89 |                 ),
 90 |                 "uint8": "uint8",
 91 |                 "uint16": "uint16",
 92 |                 "uint32": "uint32",
 93 |                 "uint64": "uint64",
 94 |             },
 95 |         ),
 96 |         (
 97 |             "numpy_nullable",
 98 |             {
 99 |                 "boolean": "boolean",
100 |                 "categorical": "category",
101 |                 "date": "date32[pyarrow]",
102 |                 "datetime": "datetime64[us]",
103 |                 "duration": "timedelta64[us]",
104 |                 "float32": "Float32",
105 |                 "float64": "Float64",
106 |                 "int8": "Int8",
107 |                 "int16": "Int16",
108 |                 "int32": "Int32",
109 |                 "int64": "Int64",
110 |                 "list": pd.ArrowDtype(pa.list_(pa.float32())),
111 |                 "string": "string",
112 |                 "struct": pd.ArrowDtype(
113 |                     pa.struct(
114 |                         [
115 |                             ("field_1", pa.string()),
116 |                             ("field_2", pa.bool_()),
117 |                         ]
118 |                     )
119 |                 ),
120 |                 "uint8": "UInt8",
121 |                 "uint16": "UInt16",
122 |                 "uint32": "UInt32",
123 |                 "uint64": "UInt64",
124 |             },
125 |         ),
126 |         (
127 |             "pyarrow",
128 |             {
129 |                 "boolean": "boolean[pyarrow]",
130 |                 "categorical": "category",
131 |                 "date": "date32[pyarrow]",
132 |                 "datetime": "timestamp[us][pyarrow]",
133 |                 "duration": "duration[us][pyarrow]",
134 |                 "float32": "Float32[pyarrow]",
135 |                 "float64": "Float64[pyarrow]",
136 |                 "int8": "Int8[pyarrow]",
137 |                 "int16": "Int16[pyarrow]",
138 |                 "int32": "Int32[pyarrow]",
139 |                 "int64": "Int64[pyarrow]",
140 |                 "list": pd.ArrowDtype(pa.list_(pa.float32())),
141 |                 "string": "string[pyarrow]",
142 |                 "struct": pd.ArrowDtype(
143 |                     pa.struct(
144 |                         [
145 |                             ("field_1", pa.string()),
146 |                             ("field_2", pa.bool_()),
147 |                         ]
148 |                     )
149 |                 ),
150 |                 "uint8": "UInt8[pyarrow]",
151 |                 "uint16": "UInt16[pyarrow]",
152 |                 "uint32": "UInt32[pyarrow]",
153 |                 "uint64": "UInt64[pyarrow]",
154 |             },
155 |         ),
156 |     ],
157 | )
158 | def test_nw_schema_to_arrow(
159 |     nw_schema: nw.Schema,
160 |     dtype_backend: DTypeBackend,
161 |     expected: dict[str, str | pd.ArrowDtype | type],
162 | ) -> None:
163 |     unsupported_dtypes = {"array", "enum", "uint128", "int128", "decimal", "object", "unknown"}
164 |     model = nw.Schema({k: v for k, v in nw_schema.items() if k not in unsupported_dtypes})
165 |     anyschema = AnySchema(spec=model)
166 |     pd_schema = anyschema.to_pandas(dtype_backend=dtype_backend)
167 | 
168 |     assert isinstance(pd_schema, dict)
169 |     assert pd_schema == expected
170 | 


--------------------------------------------------------------------------------