├── anyschema ├── py.typed ├── exceptions.py ├── parsers │ ├── __init__.py │ ├── _annotated.py │ ├── attrs.py │ ├── _union.py │ └── pydantic.py ├── __init__.py ├── _utils.py ├── _dependencies.py ├── typing.py └── _metadata.py ├── tests ├── __init__.py ├── field │ ├── __init__.py │ ├── metadata_mutation_test.py │ └── field_test.py ├── adapters │ ├── __init__.py │ ├── into_ordered_dict_adapter_test.py │ ├── typed_dict_adapter_test.py │ ├── attrs_adapter_test.py │ ├── pydantic_adapter_test.py │ ├── dataclass_adapter_test.py │ └── custom_adapters_test.py ├── anyschema │ ├── __init__.py │ ├── names_test.py │ ├── dtypes_test.py │ ├── field_test.py │ ├── uniques_test.py │ ├── nullables_test.py │ ├── equality_test.py │ ├── hash_test.py │ ├── fields_test.py │ ├── descriptions_test.py │ ├── to_polars_test.py │ ├── initialization_test.py │ ├── to_arrow_test.py │ └── to_pandas_test.py ├── parsers │ ├── __init__.py │ ├── pydantic_derived_types_test.py │ ├── parsers_dependency_mock_test.py │ ├── pydantic_extra_types_test.py │ ├── _union_test.py │ ├── _builtin_derived_types_test.py │ ├── _annotated_test.py │ ├── attrs_test.py │ ├── forward_ref_dependency_mock_test.py │ ├── parsers_test.py │ └── sqlalchemy_test.py ├── pydantic │ ├── __init__.py │ ├── utils.py │ ├── boolean_test.py │ ├── struct_test.py │ ├── datetime_test.py │ ├── parsing_exception_test.py │ ├── string_test.py │ ├── date_test.py │ ├── float_test.py │ ├── literal_test.py │ └── list_test.py ├── spec_to_schema │ ├── __init__.py │ ├── dataclass_test.py │ ├── x_anyschema_test.py │ ├── pydantic_test.py │ ├── attrs_test.py │ ├── typed_dict_test.py │ └── sqlalchemy_test.py ├── version_test.py ├── module_getattr_test.py └── show_versions_test.py ├── docs ├── css │ └── extra.css ├── api-reference │ ├── anyschema.md │ ├── exceptions.md │ ├── index.md │ ├── serde.md │ ├── typing.md │ ├── adapters.md │ └── parsers.md ├── javascript │ └── extra.js └── user-guide │ ├── custom-end-to-end-example.md │ └── openapi-compatibility.md ├── .github ├── dependabot.yaml ├── ISSUE_TEMPLATE │ ├── config.yaml │ ├── release-drafter.yaml │ ├── bug-report.yaml │ ├── feature-request.yaml │ └── documentation.yaml ├── workflows │ ├── release.yaml │ ├── pre-commit-update.yaml │ ├── deploy-docs.yaml │ └── pull-request.yaml ├── PULL_REQUEST_TEMPLATE.md └── release-drafter.yaml ├── .pre-commit-config.yaml ├── Makefile ├── bump-version.py ├── README.md ├── mkdocs.yaml ├── .gitignore ├── pyproject.toml └── CODE_OF_CONDUCT.md /anyschema/py.typed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/field/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/adapters/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/anyschema/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/parsers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/pydantic/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/spec_to_schema/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/css/extra.css: -------------------------------------------------------------------------------- 1 | .md-typeset ol li, 2 | .md-typeset ul li { 3 | margin-bottom: 0.1em !important; 4 | } 5 | .md-main__inner.md-grid { 6 | max-width: initial; 7 | margin-left: 5vw; 8 | } 9 | -------------------------------------------------------------------------------- /.github/dependabot.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | # Maintain dependencies for GitHub Actions 4 | - package-ecosystem: "github-actions" 5 | directory: "/" 6 | schedule: 7 | interval: "monthly" 8 | -------------------------------------------------------------------------------- /docs/api-reference/anyschema.md: -------------------------------------------------------------------------------- 1 | # `anyschema` top level API 2 | 3 | ::: anyschema 4 | handler: python 5 | members: 6 | - AnySchema 7 | - AnyField 8 | - show_versions 9 | options: 10 | show_root_heading: true 11 | show_source: false 12 | -------------------------------------------------------------------------------- /docs/api-reference/exceptions.md: -------------------------------------------------------------------------------- 1 | # Exceptions 2 | 3 | ::: anyschema.exceptions 4 | handler: python 5 | options: 6 | show_root_heading: true 7 | show_source: true 8 | members: 9 | - UnavailablePipelineError 10 | - UnsupportedDTypeError 11 | -------------------------------------------------------------------------------- /docs/api-reference/index.md: -------------------------------------------------------------------------------- 1 | # API Reference 2 | 3 | This page provides detailed documentation for all public APIs in anyschema. 4 | 5 | For conceptual explanations, see the [Architecture](../architecture.md) page. For practical examples, see 6 | [Getting Started](../user-guide/getting-started.md) and [Advanced Usage](../user-guide/advanced.md). 7 | -------------------------------------------------------------------------------- /docs/api-reference/serde.md: -------------------------------------------------------------------------------- 1 | # Serialization & Deserialization 2 | 3 | The `serde` module provides utilities for serializing and deserializing Narwhals dtypes to and from string 4 | representations. 5 | 6 | ## API Reference 7 | 8 | ::: anyschema.serde 9 | handler: python 10 | options: 11 | show_root_heading: true 12 | show_source: true 13 | -------------------------------------------------------------------------------- /anyschema/exceptions.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | __all__ = ("UnavailablePipelineError", "UnsupportedDTypeError") 4 | 5 | 6 | class UnavailablePipelineError(ValueError): 7 | """Exception raised when a parser does not have a ParserPipeline set.""" 8 | 9 | 10 | class UnsupportedDTypeError(ValueError): 11 | """Exception raised when a DType is not supported.""" 12 | -------------------------------------------------------------------------------- /docs/api-reference/typing.md: -------------------------------------------------------------------------------- 1 | # Type Aliases 2 | 3 | The following type aliases are used throughout the anyschema codebase: 4 | 5 | ::: anyschema.typing 6 | handler: python 7 | options: 8 | show_root_heading: true 9 | show_source: false 10 | members: 11 | - Adapter 12 | - FieldSpec 13 | - FieldSpecIterable 14 | - IntoOrderedDict 15 | - IntoParserPipeline 16 | - Spec 17 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yaml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: true 2 | contact_links: 3 | - name: Blank Issue 4 | about: Create a new issue 5 | url: https://github.com/fbruzzesi/anyschema/issues/new 6 | - name: 📖 Documentation 7 | url: https://fbruzzesi.github.io/anyschema/ 8 | about: Read the official documentation 9 | - name: 🔍 Search Existing Issues 10 | url: https://github.com/fbruzzesi/anyschema/issues 11 | about: Search existing issues before creating a new one 12 | -------------------------------------------------------------------------------- /tests/version_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import re 4 | from pathlib import Path 5 | 6 | import anyschema 7 | 8 | 9 | def test_version_matches_pyproject() -> None: 10 | """Tests version is same of pyproject.""" 11 | with Path("pyproject.toml").open(encoding="utf-8") as file: 12 | content = file.read() 13 | pyproject_version = re.search(r'version = "(.*)"', content).group(1) # type: ignore[union-attr] 14 | 15 | assert anyschema.__version__ == pyproject_version 16 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/release-drafter.yaml: -------------------------------------------------------------------------------- 1 | name: Release Drafter 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | types: [opened, reopened, synchronize] 9 | 10 | permissions: 11 | contents: read 12 | 13 | jobs: 14 | update_release_draft: 15 | permissions: 16 | contents: write 17 | pull-requests: write 18 | runs-on: ubuntu-latest 19 | steps: 20 | - uses: release-drafter/release-drafter@v6 21 | env: 22 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 23 | -------------------------------------------------------------------------------- /tests/module_getattr_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING 4 | 5 | import pytest 6 | from typing_extensions import Any, assert_type 7 | 8 | import anyschema 9 | 10 | 11 | def test_package_getattr() -> None: 12 | assert_type(anyschema.__version__, str) 13 | assert_type(anyschema.__title__, str) 14 | 15 | if TYPE_CHECKING: 16 | bad = anyschema.not_real # type: ignore[attr-defined] 17 | assert_type(bad, Any) 18 | 19 | with pytest.raises(AttributeError): 20 | very_bad = anyschema.not_real # type: ignore[attr-defined] # noqa: F841 21 | -------------------------------------------------------------------------------- /anyschema/parsers/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from anyschema.parsers._annotated import AnnotatedStep 4 | from anyschema.parsers._base import ParserStep 5 | from anyschema.parsers._builtin import PyTypeStep 6 | from anyschema.parsers._forward_ref import ForwardRefStep 7 | from anyschema.parsers._pipeline import ParserPipeline, make_pipeline 8 | from anyschema.parsers._union import UnionTypeStep 9 | 10 | __all__ = ( 11 | "AnnotatedStep", 12 | "ForwardRefStep", 13 | "ParserPipeline", 14 | "ParserStep", 15 | "PyTypeStep", 16 | "UnionTypeStep", 17 | "make_pipeline", 18 | ) 19 | -------------------------------------------------------------------------------- /tests/pydantic/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING 4 | 5 | from narwhals import Schema 6 | 7 | from anyschema.adapters import pydantic_adapter 8 | 9 | if TYPE_CHECKING: 10 | from pydantic import BaseModel 11 | 12 | from anyschema.parsers import ParserPipeline 13 | 14 | 15 | def model_to_nw_schema(spec: type[BaseModel], pipeline: ParserPipeline) -> Schema: 16 | return Schema( 17 | { 18 | name: pipeline.parse(input_type, constraints, metadata) 19 | for name, input_type, constraints, metadata in pydantic_adapter(spec) 20 | } 21 | ) 22 | -------------------------------------------------------------------------------- /tests/anyschema/names_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING 4 | 5 | import pytest 6 | 7 | from anyschema import AnySchema 8 | 9 | if TYPE_CHECKING: 10 | from anyschema.typing import Spec 11 | 12 | 13 | @pytest.mark.parametrize( 14 | ("spec", "expected"), 15 | [ 16 | ({}, ()), 17 | ({"only_field": str}, ("only_field",)), 18 | ({"id": int, "name": str, "age": int}, ("id", "name", "age")), 19 | ], 20 | ) 21 | def test_names(spec: Spec, expected: tuple[str, ...]) -> None: 22 | schema = AnySchema(spec=spec) 23 | result = schema.names() 24 | 25 | assert result == expected 26 | -------------------------------------------------------------------------------- /tests/adapters/into_ordered_dict_adapter_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING 4 | 5 | import pytest 6 | 7 | from anyschema.adapters import into_ordered_dict_adapter 8 | 9 | if TYPE_CHECKING: 10 | from anyschema.typing import FieldSpec, IntoOrderedDict 11 | 12 | 13 | @pytest.mark.parametrize( 14 | "spec", 15 | [ 16 | {"name": str, "age": int}, 17 | [("name", str), ("age", int)], 18 | ], 19 | ) 20 | def test_into_ordered_dict_adapter(spec: IntoOrderedDict) -> None: 21 | expected: tuple[FieldSpec, ...] = (("name", str, (), {}), ("age", int, (), {})) 22 | result = tuple(into_ordered_dict_adapter(spec)) 23 | assert result == expected 24 | -------------------------------------------------------------------------------- /anyschema/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import typing as _t 4 | 5 | from anyschema._anyschema import AnyField, AnySchema 6 | from anyschema._utils import show_versions 7 | 8 | __all__ = ("AnyField", "AnySchema", "show_versions") 9 | __title__ = __name__ 10 | __version__: str 11 | 12 | 13 | if not _t.TYPE_CHECKING: 14 | 15 | def __getattr__(name: str) -> _t.Any: 16 | if name == "__version__": 17 | global __version__ # noqa: PLW0603 18 | 19 | from importlib import metadata 20 | 21 | __version__ = metadata.version(__name__) 22 | return __version__ 23 | msg = f"module {__name__!r} has no attribute {name!r}" 24 | raise AttributeError(msg) 25 | else: # pragma: no cover 26 | ... 27 | -------------------------------------------------------------------------------- /tests/adapters/typed_dict_adapter_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from datetime import date 4 | from typing import TYPE_CHECKING, TypedDict 5 | 6 | import pytest 7 | 8 | from anyschema.adapters import typed_dict_adapter 9 | 10 | if TYPE_CHECKING: 11 | from anyschema.typing import FieldSpec, TypedDictType 12 | 13 | 14 | class PersonTypedDict(TypedDict): 15 | name: str 16 | age: int 17 | date_of_birth: date 18 | 19 | 20 | @pytest.mark.parametrize( 21 | "spec", 22 | [ 23 | PersonTypedDict, 24 | ], 25 | ) 26 | def test_typed_dict_adapter(spec: TypedDictType) -> None: 27 | expected: tuple[FieldSpec, ...] = (("name", str, (), {}), ("age", int, (), {}), ("date_of_birth", date, (), {})) 28 | result = tuple(typed_dict_adapter(spec)) 29 | assert result == expected 30 | -------------------------------------------------------------------------------- /tests/show_versions_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import warnings 4 | from typing import TYPE_CHECKING 5 | 6 | from anyschema import show_versions 7 | 8 | if TYPE_CHECKING: 9 | import pytest 10 | 11 | 12 | def test_show_versions(capsys: pytest.CaptureFixture[str]) -> None: 13 | with warnings.catch_warnings(): 14 | warnings.filterwarnings("ignore") 15 | show_versions() 16 | out, _ = capsys.readouterr() 17 | 18 | assert "python" in out 19 | assert "machine" in out 20 | 21 | assert "anyschema" in out 22 | assert "narwhals" in out 23 | assert "typing_extensions" in out 24 | assert "attrs" in out 25 | assert "pydantic" in out 26 | assert "sqlalchemy" in out 27 | assert "pandas" in out 28 | assert "polars" in out 29 | assert "pyarrow" in out 30 | 31 | assert "numpy" not in out 32 | -------------------------------------------------------------------------------- /.github/workflows/release.yaml: -------------------------------------------------------------------------------- 1 | name: "Publish Python 🐍 distribution 📦 to PyPI" 2 | 3 | on: 4 | push: 5 | tags: 6 | - "v[0-9]+.[0-9]+.[0-9]+*" 7 | 8 | jobs: 9 | pypi-release: 10 | runs-on: ubuntu-latest 11 | environment: 12 | name: pypi 13 | permissions: 14 | id-token: write 15 | contents: read 16 | steps: 17 | - name: Checkout source code 18 | uses: actions/checkout@v6 19 | - name: Install uv 20 | uses: astral-sh/setup-uv@v7 21 | - name: Build 22 | run: uv build 23 | - name: Smoke test (wheel) 24 | run: uv run --isolated --no-project --with dist/*.whl tests/version_test.py 25 | - name: Smoke test (source distribution) 26 | run: uv run --isolated --no-project --with dist/*.tar.gz tests/version_test.py 27 | - name: Publish 28 | run: uv publish --trusted-publishing always 29 | -------------------------------------------------------------------------------- /tests/pydantic/boolean_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING 4 | 5 | import narwhals as nw 6 | from pydantic import BaseModel, StrictBool 7 | 8 | from tests.pydantic.utils import model_to_nw_schema 9 | 10 | if TYPE_CHECKING: 11 | from anyschema.parsers import ParserPipeline 12 | 13 | 14 | def test_parse_boolean(auto_pipeline: ParserPipeline) -> None: 15 | class BooleanModel(BaseModel): 16 | # python bool type 17 | py_bool: bool 18 | py_bool_optional: bool | None 19 | py_bool_or_none: bool | None 20 | none_or_py_bool: None | bool 21 | 22 | # pydantic StrictBool type 23 | strict_bool: StrictBool 24 | strict_bool_optional: StrictBool | None 25 | strict_bool_or_none: StrictBool | None 26 | none_or_strict_bool: None | StrictBool 27 | 28 | schema = model_to_nw_schema(BooleanModel, pipeline=auto_pipeline) 29 | 30 | assert all(value == nw.Boolean() for value in schema.values()) 31 | -------------------------------------------------------------------------------- /.github/workflows/pre-commit-update.yaml: -------------------------------------------------------------------------------- 1 | name: "Pre-commit auto-update" 2 | 3 | on: 4 | workflow_dispatch: 5 | schedule: 6 | - cron: "0 0 10 * *" # Every 10th of the month at 00:00 UTC 7 | 8 | permissions: write-all 9 | 10 | jobs: 11 | auto-update: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - name: Checkout source code 15 | uses: actions/checkout@v6 16 | - name: Install uv 17 | uses: astral-sh/setup-uv@v7 18 | with: 19 | python-version: "3.12" 20 | enable-cache: "true" 21 | cache-dependency-glob: "pyproject.toml" 22 | - name: pre-commit install autoupdate 23 | run: uvx pre-commit autoupdate 24 | - name: Commit and push changes 25 | uses: peter-evans/create-pull-request@v7 26 | with: 27 | branch: update-pre-commit-hooks 28 | title: 'Update pre-commit hooks' 29 | commit-message: 'Update pre-commit hooks' 30 | body: | 31 | Update versions of pre-commit hooks to latest versions. 32 | -------------------------------------------------------------------------------- /tests/anyschema/dtypes_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING, Any, TypeAlias 4 | 5 | import narwhals as nw 6 | import pytest 7 | 8 | from anyschema import AnySchema 9 | 10 | if TYPE_CHECKING: 11 | from narwhals.dtypes import DType 12 | 13 | AnyDict: TypeAlias = dict[str, Any] 14 | 15 | 16 | @pytest.mark.parametrize( 17 | ("spec", "expected"), 18 | [ 19 | ({"x": int}, (nw.Int64(),)), 20 | ({"x": str}, (nw.String(),)), 21 | ({"x": float}, (nw.Float64(),)), 22 | ({"x": bool}, (nw.Boolean(),)), 23 | ({"x": list[int]}, (nw.List(nw.Int64()),)), 24 | ({"id": int, "name": str, "score": float}, (nw.Int64(), nw.String(), nw.Float64())), 25 | ], 26 | ) 27 | def test_dtypes(spec: AnyDict, expected: tuple[DType, ...]) -> None: 28 | schema = AnySchema(spec=spec) 29 | 30 | result_tuple = schema.dtypes() 31 | assert result_tuple == expected 32 | 33 | result_dict = schema.dtypes(named=True) 34 | assert result_dict == dict(zip(spec.keys(), expected, strict=True)) 35 | -------------------------------------------------------------------------------- /tests/spec_to_schema/dataclass_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING, Mapping 4 | 5 | import narwhals as nw 6 | import pytest 7 | 8 | from anyschema import AnySchema 9 | from tests.conftest import DataclassEventWithTimeMetadata 10 | 11 | if TYPE_CHECKING: 12 | from anyschema.typing import DataclassType 13 | 14 | 15 | @pytest.mark.parametrize( 16 | ("spec", "expected_schema"), 17 | [ 18 | ( 19 | DataclassEventWithTimeMetadata, 20 | { 21 | "name": nw.String(), 22 | "created_at": nw.Datetime("us"), 23 | "scheduled_at": nw.Datetime("us", time_zone="UTC"), 24 | "started_at": nw.Datetime("ms"), 25 | "completed_at": nw.Datetime("ns", time_zone="Europe/Berlin"), 26 | }, 27 | ), 28 | ], 29 | ) 30 | def test_dataclass(spec: DataclassType, expected_schema: Mapping[str, nw.dtypes.DType]) -> None: 31 | schema = AnySchema(spec=spec) 32 | nw_schema = schema._nw_schema 33 | assert nw_schema == nw.Schema(expected_schema) 34 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | ci: 2 | autoupdate_schedule: monthly 3 | repos: 4 | - repo: https://github.com/astral-sh/ruff-pre-commit 5 | # Ruff version. 6 | rev: 'v0.14.6' 7 | hooks: 8 | # Run the formatter. 9 | - id: ruff-format 10 | # Run the linter. 11 | - id: ruff 12 | args: [--fix] 13 | - repo: https://github.com/codespell-project/codespell 14 | rev: 'v2.3.0' 15 | hooks: 16 | - id: codespell 17 | files: \.(py|rst|md)$ 18 | args: [--ignore-words-list=ser] 19 | - repo: https://github.com/adamchainz/blacken-docs 20 | rev: "1.19.1" # replace with latest tag on GitHub 21 | hooks: 22 | - id: blacken-docs 23 | args: [--skip-errors] 24 | additional_dependencies: 25 | - black==22.12.0 26 | - repo: https://github.com/pre-commit/pre-commit-hooks 27 | rev: v5.0.0 28 | hooks: 29 | - id: trailing-whitespace 30 | - id: name-tests-test 31 | - id: end-of-file-fixer 32 | - id: requirements-txt-fixer 33 | - id: no-commit-to-branch 34 | - id: check-json 35 | - id: check-yaml 36 | - id: check-ast 37 | - id: check-added-large-files 38 | -------------------------------------------------------------------------------- /tests/pydantic/struct_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING, Annotated 4 | 5 | import narwhals as nw 6 | from annotated_types import Interval 7 | from pydantic import BaseModel 8 | 9 | from tests.pydantic.utils import model_to_nw_schema 10 | 11 | if TYPE_CHECKING: 12 | from anyschema.parsers import ParserPipeline 13 | 14 | 15 | def test_parse_struct(auto_pipeline: ParserPipeline) -> None: 16 | class BaseStruct(BaseModel): 17 | x1: Annotated[int, Interval(gt=0, lt=123)] 18 | x2: str 19 | x3: float | None 20 | x4: None | bool 21 | 22 | class StructModel(BaseModel): 23 | struct: BaseStruct | None 24 | 25 | schema = model_to_nw_schema(StructModel, pipeline=auto_pipeline) 26 | expected = { 27 | "struct": nw.Struct( 28 | [ 29 | nw.Field("x1", nw.UInt8()), 30 | nw.Field("x2", nw.String()), 31 | nw.Field("x3", nw.Float64()), 32 | nw.Field("x4", nw.Boolean()), 33 | ] 34 | ) 35 | } 36 | assert schema == expected 37 | -------------------------------------------------------------------------------- /tests/anyschema/field_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import pytest 4 | 5 | from anyschema import AnyField, AnySchema 6 | 7 | 8 | def test_field_correct_attributes() -> None: 9 | schema = AnySchema(spec={"id": int, "name": str, "age": int | None}) 10 | 11 | id_field = schema.field("id") 12 | assert isinstance(id_field, AnyField) 13 | assert id_field.name == "id" 14 | assert id_field.nullable is False 15 | 16 | age_field = schema.field("age") 17 | assert isinstance(age_field, AnyField) 18 | assert age_field.name == "age" 19 | assert age_field.nullable is True 20 | 21 | 22 | def test_field_raises_keyerror_for_missing_field() -> None: 23 | schema = AnySchema(spec={"id": int, "name": str}) 24 | 25 | with pytest.raises(KeyError): 26 | schema.field("non_existent") 27 | 28 | 29 | def test_field_with_all_field_names() -> None: 30 | spec = {"id": int, "name": str, "age": int, "active": bool} 31 | schema = AnySchema(spec=spec) 32 | 33 | for field_name in spec: 34 | field = schema.field(field_name) 35 | assert field.name == field_name 36 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | ARG := $(word 2, $(MAKECMDGOALS)) 2 | $(eval $(ARG):;@:) 3 | 4 | sources = anyschema tests 5 | 6 | lint: 7 | uvx ruff version 8 | uvx ruff format $(sources) 9 | uvx ruff check $(sources) --fix 10 | uvx ruff clean 11 | uv tool run rumdl check . 12 | 13 | test: 14 | uv run --active --no-sync --group tests pytest tests --cov=anyschema --cov=tests --cov-fail-under=90 15 | uv run --active --no-sync --group tests pytest anyschema --doctest-modules 16 | 17 | typing: 18 | uv run --active --no-sync --group typing ty check $(sources) --output-format concise 19 | uv run --active --no-sync --group typing pyright $(sources) 20 | uv run --active --no-sync --group typing mypy $(sources) 21 | 22 | docs-serve: 23 | uv run --active --no-sync --group docs mkdocs serve --watch anyschema --watch docs --dirty 24 | 25 | docs-build: 26 | uv run --active --no-sync --group docs mkdocs build --strict 27 | 28 | setup-release: 29 | git checkout main 30 | git fetch upstream 31 | git reset --hard upstream/main 32 | git checkout -b bump-version 33 | python bump-version.py $(ARG) 34 | gh pr create --title "release: Bump version to " --body "Bump version" --base main --label release 35 | -------------------------------------------------------------------------------- /tests/spec_to_schema/x_anyschema_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING 4 | 5 | import narwhals as nw 6 | import pytest 7 | 8 | from anyschema import AnySchema 9 | from anyschema.parsers import ParserPipeline 10 | from tests.conftest import AttrsEventWithXAnyschema, PydanticEventWithXAnyschema 11 | 12 | if TYPE_CHECKING: 13 | from anyschema.typing import Spec 14 | 15 | 16 | @pytest.mark.parametrize("spec", [AttrsEventWithXAnyschema, PydanticEventWithXAnyschema]) 17 | def test_spec_with_x_anyschema(spec: Spec) -> None: 18 | schema = AnySchema(spec=spec) 19 | 20 | assert schema.fields["created_at"].dtype == nw.Datetime(time_zone="UTC", time_unit="us") 21 | assert schema.fields["started_at"].dtype == nw.Datetime(time_unit="ms") 22 | 23 | 24 | @pytest.mark.parametrize("metadata_key", ["anyschema", "x-anyschema"]) 25 | def test_dict_spec_with_both_prefixes(metadata_key: str) -> None: 26 | metadata = {metadata_key: {"nullable": True, "unique": True}} 27 | 28 | pipeline = ParserPipeline() 29 | field = pipeline.parse_into_field("test_field", int, (), metadata) 30 | 31 | assert field.nullable is True 32 | assert field.unique is True 33 | -------------------------------------------------------------------------------- /.github/workflows/deploy-docs.yaml: -------------------------------------------------------------------------------- 1 | name: "Deploy Documentation" 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | tags: 7 | - "v[0-9]+.[0-9]+.[0-9]+*" 8 | branches: 9 | - main 10 | 11 | permissions: 12 | contents: write 13 | 14 | jobs: 15 | deploy: 16 | runs-on: ubuntu-latest 17 | steps: 18 | - name: Checkout source code 19 | uses: actions/checkout@v6 20 | - name: Configure Git Credentials 21 | run: | 22 | git config user.name github-actions[bot] 23 | git config user.email 41898282+github-actions[bot]@users.noreply.github.com 24 | - name: Install uv 25 | uses: astral-sh/setup-uv@v7 26 | with: 27 | python-version: "3.12" 28 | enable-cache: "true" 29 | cache-dependency-glob: "pyproject.toml" 30 | - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV 31 | - uses: actions/cache@v4 32 | with: 33 | key: mkdocs-material-${{ env.cache_id }} 34 | path: .cache 35 | restore-keys: | 36 | mkdocs-material- 37 | - name: Install dependencies and deploy 38 | run: | 39 | uv run --group docs mkdocs gh-deploy --force 40 | -------------------------------------------------------------------------------- /tests/anyschema/uniques_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING 4 | 5 | import pytest 6 | 7 | from anyschema import AnySchema 8 | from tests.conftest import ProductORM, array_fixed_table 9 | 10 | if TYPE_CHECKING: 11 | from anyschema.typing import Spec 12 | 13 | 14 | @pytest.mark.parametrize( 15 | ("spec", "expected"), 16 | [ 17 | (array_fixed_table, (True, False, False)), 18 | (ProductORM, (True, False, False, False)), 19 | ], 20 | ) 21 | def test_uniques_named_false(spec: Spec, expected: tuple[bool, ...]) -> None: 22 | schema = AnySchema(spec=spec) 23 | result = schema.uniques(named=False) 24 | 25 | assert result == expected 26 | 27 | 28 | @pytest.mark.parametrize( 29 | ("spec", "expected"), 30 | [ 31 | (array_fixed_table, {"id": True, "coordinates": False, "matrix": False}), 32 | ( 33 | ProductORM, 34 | {"id": True, "name": False, "price": False, "in_stock": False}, 35 | ), 36 | ], 37 | ) 38 | def test_uniques_named_true(spec: Spec, expected: dict[str, bool]) -> None: 39 | schema = AnySchema(spec=spec) 40 | result = schema.uniques(named=True) 41 | 42 | assert result == expected 43 | -------------------------------------------------------------------------------- /tests/anyschema/nullables_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import Any, TypeAlias 4 | 5 | import pytest 6 | 7 | from anyschema import AnySchema 8 | 9 | AnyDict: TypeAlias = dict[str, Any] 10 | 11 | 12 | @pytest.mark.parametrize( 13 | ("spec", "expected"), 14 | [ 15 | ({}, ()), 16 | ({"id": int, "name": str}, (False, False)), 17 | ({"id": int, "name": str, "age": int}, (False, False, False)), 18 | ({"id": int, "name": str, "age": int | None}, (False, False, True)), 19 | ], 20 | ) 21 | def test_nullables_named_false(spec: AnyDict, expected: tuple[bool, ...]) -> None: 22 | schema = AnySchema(spec=spec) 23 | result = schema.nullables() 24 | 25 | assert result == expected 26 | 27 | 28 | @pytest.mark.parametrize( 29 | ("spec", "expected"), 30 | [ 31 | ({}, {}), 32 | ({"id": int, "name": str}, (False, False)), 33 | ({"id": int, "name": str, "age": int}, (False, False, False)), 34 | ({"id": int, "name": str, "age": int | None}, (False, False, True)), 35 | ], 36 | ) 37 | def test_nullables_named_true(spec: AnyDict, expected: tuple[bool, ...]) -> None: 38 | schema = AnySchema(spec=spec) 39 | result = schema.nullables(named=True) 40 | 41 | assert result == dict(zip(spec, expected, strict=True)) 42 | -------------------------------------------------------------------------------- /tests/anyschema/equality_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING, Any 4 | 5 | import pytest 6 | 7 | from anyschema import AnySchema 8 | 9 | if TYPE_CHECKING: 10 | from anyschema.typing import Spec 11 | 12 | 13 | @pytest.mark.parametrize( 14 | "spec", 15 | [ 16 | {"name": str, "age": int}, 17 | {"users": list[str], "counts": dict[str, int]}, 18 | ], 19 | ) 20 | def test_equal(spec: Spec) -> None: 21 | assert AnySchema(spec=spec) == AnySchema(spec=spec) 22 | 23 | 24 | @pytest.mark.parametrize( 25 | ("spec1", "spec2"), 26 | [ 27 | ({"name": str, "age": int}, {"age": int, "name": str}), 28 | ({"name": str, "age": int}, {"name": str}), 29 | ({"value": int}, {"value": float}), 30 | ({"name": str}, {"name": str | None}), 31 | ], 32 | ) 33 | def test_not_equal(spec1: Spec, spec2: Spec) -> None: 34 | schema1 = AnySchema(spec=spec1) 35 | schema2 = AnySchema(spec=spec2) 36 | 37 | assert schema1 != schema2 38 | 39 | 40 | @pytest.mark.parametrize( 41 | "other", 42 | [ 43 | {"name": str}, 44 | "not a schema", 45 | 42, 46 | None, 47 | ], 48 | ) 49 | def test_equality_with_non_anyschema_object(other: Any) -> None: 50 | schema = AnySchema(spec={"name": str}) 51 | 52 | assert schema != other 53 | -------------------------------------------------------------------------------- /tests/anyschema/hash_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING, Any 4 | 5 | import pytest 6 | 7 | from anyschema import AnySchema 8 | 9 | if TYPE_CHECKING: 10 | from anyschema.typing import Spec 11 | 12 | 13 | @pytest.mark.parametrize( 14 | "spec", 15 | [ 16 | {"name": str, "age": int}, 17 | {"users": list[str], "counts": dict[str, int]}, 18 | ], 19 | ) 20 | def test_same_hash(spec: Spec) -> None: 21 | assert hash(AnySchema(spec=spec)) == hash(AnySchema(spec=spec)) 22 | 23 | 24 | @pytest.mark.parametrize( 25 | ("spec1", "spec2"), 26 | [ 27 | ({"name": str, "age": int}, {"age": int, "name": str}), 28 | ({"name": str, "age": int}, {"name": str}), 29 | ({"value": int}, {"value": float}), 30 | ({"name": str}, {"name": str | None}), 31 | ], 32 | ) 33 | def test_different_hash(spec1: Spec, spec2: Spec) -> None: 34 | schema1 = AnySchema(spec=spec1) 35 | schema2 = AnySchema(spec=spec2) 36 | 37 | assert hash(schema1) != hash(schema2) 38 | 39 | 40 | @pytest.mark.parametrize( 41 | "other", 42 | [ 43 | "not a schema", 44 | 42, 45 | None, 46 | ], 47 | ) 48 | def test_different_hash_object(other: Any) -> None: 49 | schema = AnySchema(spec={"name": str}) 50 | 51 | assert hash(schema) != hash(other) 52 | -------------------------------------------------------------------------------- /docs/api-reference/adapters.md: -------------------------------------------------------------------------------- 1 | # Spec Adapters 2 | 3 | Adapters convert various input specifications into a normalized format for parsing. 4 | 5 | Learn how to create custom adapters in the [Advanced Usage](../user-guide/advanced.md#custom-spec-adapters) guide. 6 | 7 | The following built-in adapters are not meant to be used directly. They serve more as an example than anything else. 8 | 9 | ::: anyschema.adapters 10 | handler: python 11 | options: 12 | show_root_heading: true 13 | show_source: true 14 | 15 | ## Adapters specification 16 | 17 | Adapters must follow this signature: 18 | 19 | ```python 20 | from typing import Iterator, TypeAlias, Callable, Any, Generator 21 | from anyschema.typing import FieldConstraints, FieldMetadata, FieldName, FieldType 22 | 23 | FieldSpec: TypeAlias = tuple[FieldName, FieldType, FieldConstraints, FieldMetadata] 24 | 25 | 26 | def my_custom_adapter(spec: Any) -> Iterator[FieldSpec]: 27 | """ 28 | Yields tuples of (field_name, field_type, constraints, metadata). 29 | 30 | - name (str): The name of the field 31 | - type (type): The type annotation of the field 32 | - constraints (tuple): Type constraints (e.g., Gt(0), Le(100) from annotated-types) 33 | - metadata (dict): Custom metadata dictionary for additional information 34 | """ 35 | ... 36 | ``` 37 | 38 | They don't need to be functions; any callable is acceptable. 39 | -------------------------------------------------------------------------------- /anyschema/parsers/_annotated.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING, Annotated 4 | 5 | from typing_extensions import get_args, get_origin # noqa: UP035 6 | 7 | from anyschema.parsers._base import ParserStep 8 | 9 | if TYPE_CHECKING: 10 | from narwhals.dtypes import DType 11 | 12 | from anyschema.typing import FieldConstraints, FieldMetadata, FieldType 13 | 14 | 15 | class AnnotatedStep(ParserStep): 16 | """Parser for `typing.Annotated` types. 17 | 18 | Handles: 19 | 20 | - `Annotated[T, metadata...]` - extracts the type and metadata for further parsing 21 | """ 22 | 23 | def parse(self, input_type: FieldType, constraints: FieldConstraints, metadata: FieldMetadata) -> DType | None: 24 | """Parse Annotated types by extracting the base type and constraints. 25 | 26 | Arguments: 27 | input_type: The type to parse. 28 | constraints: Constraints associated with the type. 29 | metadata: Custom metadata dictionary. 30 | 31 | Returns: 32 | A Narwhals DType by extracting the base type and delegating to the chain. 33 | """ 34 | if get_origin(input_type) is Annotated: 35 | base_type, *extra_constraints = get_args(input_type) 36 | return self.pipeline.parse(base_type, (*constraints, *extra_constraints), metadata, strict=True) 37 | 38 | return None 39 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 5 | 6 | ## Description 7 | 8 | 9 | 10 | ## Type of Change 11 | 12 | 13 | 14 | - [ ] 🐛 Bug fix (non-breaking change which fixes an issue) 15 | - [ ] ✨ New feature (non-breaking change which adds functionality) 16 | - [ ] ⚠️ Breaking change (fix or feature that would cause existing functionality to not work as expected) 17 | - [ ] 📚 Documentation update 18 | - [ ] 🧪 Test improvement 19 | - [ ] 🔧 Maintenance/Refactoring 20 | - [ ] ⚡ Performance improvement 21 | - [ ] 🏗️ Build/CI improvement 22 | 23 | ## Related Issues 24 | 25 | 26 | 27 | - Closes # 28 | - Related to # 29 | 30 | ## Changes Made 31 | 32 | 33 | 34 | ## Checklist 35 | 36 | 37 | 38 | - [ ] My code follows the project's style guidelines (ruff) 39 | - [ ] I have performed a self-review of my code 40 | - [ ] I have commented my code, particularly in hard-to-understand areas 41 | - [ ] I have made corresponding changes to the documentation 42 | - [ ] I have added tests that prove my fix is effective or that my feature works 43 | - [ ] New and existing unit tests pass locally with my changes 44 | -------------------------------------------------------------------------------- /tests/pydantic/datetime_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from datetime import datetime # noqa: TC003 4 | from typing import TYPE_CHECKING 5 | 6 | import narwhals as nw 7 | from pydantic import BaseModel, FutureDatetime, NaiveDatetime, PastDatetime 8 | 9 | from tests.pydantic.utils import model_to_nw_schema 10 | 11 | if TYPE_CHECKING: 12 | from anyschema.parsers import ParserPipeline 13 | 14 | 15 | def test_parse_datetime(auto_pipeline: ParserPipeline) -> None: 16 | class DatetimeModel(BaseModel): 17 | # python datetime type 18 | py_dt: datetime 19 | py_dt_optional: datetime | None 20 | py_dt_or_none: datetime | None 21 | none_or_py_dt: None | datetime 22 | 23 | # pydantic NaiveDatetime type 24 | naive_dt: NaiveDatetime 25 | naive_dt_optional: NaiveDatetime | None 26 | naive_dt_or_none: NaiveDatetime | None 27 | none_or_naive_dt: None | NaiveDatetime 28 | 29 | # pydantic PastDatetime type 30 | past_dt: PastDatetime 31 | past_dt_optional: PastDatetime | None 32 | past_dt_or_none: PastDatetime | None 33 | none_or_past_dt: None | PastDatetime 34 | 35 | # pydantic FutureDatetime type 36 | future_dt: FutureDatetime 37 | future_dt_optional: FutureDatetime | None 38 | future_dt_or_none: FutureDatetime | None 39 | none_or_future_dt: None | FutureDatetime 40 | 41 | schema = model_to_nw_schema(DatetimeModel, pipeline=auto_pipeline) 42 | 43 | assert all(value == nw.Datetime() for value in schema.values()) 44 | -------------------------------------------------------------------------------- /tests/pydantic/parsing_exception_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING, Optional 4 | 5 | import pytest 6 | from pydantic import AwareDatetime, create_model 7 | 8 | from anyschema.exceptions import UnsupportedDTypeError 9 | from tests.pydantic.utils import model_to_nw_schema 10 | 11 | if TYPE_CHECKING: 12 | from anyschema.parsers import ParserPipeline 13 | 14 | 15 | @pytest.mark.parametrize( 16 | ("input_type", "msg"), 17 | [ 18 | (str | float | int, "Union with more than two types is not supported."), 19 | (str | float, "Union with mixed types is not supported."), 20 | ], 21 | ) 22 | def test_raise_parse_union(auto_pipeline: ParserPipeline, input_type: type, msg: str) -> None: 23 | ExceptionModel = create_model("ExceptionModel", foo=(input_type, ...)) # noqa: N806 24 | 25 | with pytest.raises(UnsupportedDTypeError, match=msg): 26 | model_to_nw_schema(ExceptionModel, pipeline=auto_pipeline) 27 | 28 | 29 | @pytest.mark.parametrize( 30 | "input_type", 31 | [ 32 | AwareDatetime, 33 | Optional[AwareDatetime], 34 | AwareDatetime | None, 35 | None | AwareDatetime, 36 | ], 37 | ) 38 | def test_raise_aware_datetime(auto_pipeline: ParserPipeline, input_type: type) -> None: 39 | AwareDatetimeModel = create_model("AwareDatetimeModel", foo=(input_type, ...)) # noqa: N806 40 | 41 | msg = "pydantic AwareDatetime does not specify a fixed timezone." 42 | with pytest.raises(UnsupportedDTypeError, match=msg): 43 | model_to_nw_schema(AwareDatetimeModel, pipeline=auto_pipeline) 44 | -------------------------------------------------------------------------------- /tests/anyschema/fields_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from anyschema import AnyField, AnySchema 4 | 5 | 6 | def test_fields_correct_keys_and_values() -> None: 7 | spec = {"id": int, "name": str, "age": int} 8 | schema = AnySchema(spec=spec) 9 | result = schema.fields 10 | 11 | assert isinstance(result, dict) 12 | assert result.keys() == spec.keys() 13 | 14 | for field_name, field_obj in result.items(): 15 | assert isinstance(field_obj, AnyField) 16 | assert field_obj.name == field_name 17 | 18 | 19 | def test_fields_empty_schema() -> None: 20 | schema = AnySchema(spec={}) 21 | result = schema.fields 22 | 23 | assert result == {} 24 | 25 | 26 | def test_fields_returns_copy() -> None: 27 | schema = AnySchema(spec={"id": int, "name": str}) 28 | result1 = schema.fields 29 | result2 = schema.fields 30 | 31 | assert result1 is not result2 # Should return a new dict each time 32 | assert result1 == result2 # But with equal contents 33 | 34 | 35 | def test_fields_modification_does_not_affect_schema() -> None: 36 | schema = AnySchema(spec={"id": int, "name": str}) 37 | fields = schema.fields 38 | 39 | # Modify the returned dict 40 | fields["new_field"] = AnyField(name="new_field", dtype=schema.field("id").dtype) 41 | 42 | # Original schema should be unchanged 43 | assert "new_field" not in schema.names() 44 | 45 | 46 | def test_fields_with_nullable_and_metadata() -> None: 47 | schema = AnySchema(spec={"id": int, "name": str, "age": int | None}) 48 | fields = schema.fields 49 | 50 | assert fields["id"].nullable is False 51 | assert fields["name"].nullable is False 52 | assert fields["age"].nullable is True 53 | -------------------------------------------------------------------------------- /tests/pydantic/string_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING, Annotated, Optional 4 | 5 | import narwhals as nw 6 | import pydantic 7 | import pytest 8 | from narwhals.utils import parse_version 9 | from pydantic import BaseModel, StrictStr 10 | 11 | from tests.pydantic.utils import model_to_nw_schema 12 | 13 | if TYPE_CHECKING: 14 | from anyschema.parsers import ParserPipeline 15 | 16 | 17 | def test_parse_string(auto_pipeline: ParserPipeline) -> None: 18 | class StringModel(BaseModel): 19 | # python str type 20 | py_str: str 21 | py_str_optional: str | None 22 | py_str_or_none: str | None 23 | none_or_py_str: None | str 24 | 25 | # pydantic StrictStr type 26 | strict_str: StrictStr 27 | strict_str_optional: StrictStr | None 28 | strict_str_or_none: StrictStr | None 29 | none_or_strict_str: None | StrictStr 30 | 31 | schema = model_to_nw_schema(StringModel, pipeline=auto_pipeline) 32 | 33 | assert all(value == nw.String() for value in schema.values()) 34 | 35 | 36 | @pytest.mark.skipif(parse_version(pydantic.__version__) < (2, 1), reason="too old for StringConstraints") 37 | def test_parse_string_with_constraints(auto_pipeline: ParserPipeline) -> None: 38 | from pydantic import StringConstraints 39 | 40 | str_constraint = StringConstraints(strip_whitespace=True, to_upper=True, pattern=r"^[A-Z]+$") 41 | 42 | class StringConstraintsModel(BaseModel): 43 | str_con: Annotated[str, str_constraint] 44 | str_con_optional: Optional[Annotated[str, str_constraint]] 45 | str_con_or_none: Annotated[str, str_constraint] | None 46 | none_or_str_con: None | Annotated[str, str_constraint] 47 | 48 | schema = model_to_nw_schema(StringConstraintsModel, pipeline=auto_pipeline) 49 | 50 | assert all(value == nw.String() for value in schema.values()) 51 | -------------------------------------------------------------------------------- /tests/spec_to_schema/pydantic_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING, Mapping 4 | 5 | import narwhals as nw 6 | import pytest 7 | 8 | from anyschema import AnySchema 9 | from tests.conftest import ( 10 | PydanticEventWithTimeMetadata, 11 | PydanticSpecialDatetimeWithMetadata, 12 | PydanticStudent, 13 | ) 14 | 15 | if TYPE_CHECKING: 16 | from pydantic import BaseModel 17 | 18 | 19 | @pytest.mark.parametrize( 20 | ("spec", "expected_schema"), 21 | [ 22 | ( 23 | PydanticStudent, 24 | { 25 | "name": nw.String(), 26 | "date_of_birth": nw.Date(), 27 | "age": nw.UInt64(), 28 | "classes": nw.List(nw.String()), 29 | "has_graduated": nw.Boolean(), 30 | }, 31 | ), 32 | ( 33 | PydanticEventWithTimeMetadata, 34 | { 35 | "name": nw.String(), 36 | "created_at": nw.Datetime("us"), 37 | "scheduled_at": nw.Datetime("us", time_zone="UTC"), 38 | "started_at": nw.Datetime("ms"), 39 | "completed_at": nw.Datetime("ns", time_zone="Europe/Berlin"), 40 | }, 41 | ), 42 | ( 43 | PydanticSpecialDatetimeWithMetadata, 44 | { 45 | "aware": nw.Datetime("us", time_zone="UTC"), 46 | "aware_ms": nw.Datetime("ms", time_zone="Asia/Tokyo"), 47 | "naive_ms": nw.Datetime("ms"), 48 | "past_utc": nw.Datetime("us", time_zone="UTC"), 49 | "future_ns": nw.Datetime("ns"), 50 | }, 51 | ), 52 | ], 53 | ) 54 | def test_pydantic_model(spec: type[BaseModel], expected_schema: Mapping[str, nw.dtypes.DType]) -> None: 55 | schema = AnySchema(spec=spec) 56 | nw_schema = schema._nw_schema 57 | assert nw_schema == nw.Schema(expected_schema) 58 | -------------------------------------------------------------------------------- /tests/parsers/pydantic_derived_types_test.py: -------------------------------------------------------------------------------- 1 | """Tests for pydantic parser with derived types. 2 | 3 | This module tests that PydanticTypeStep correctly handles types that inherit 4 | from Pydantic's date/datetime types. 5 | """ 6 | 7 | from __future__ import annotations 8 | 9 | from typing import Any 10 | 11 | import narwhals as nw 12 | import pytest 13 | from pydantic import FutureDate, PastDate, PastDatetime 14 | 15 | from anyschema.parsers import ParserPipeline, PyTypeStep, UnionTypeStep 16 | from anyschema.parsers.pydantic import PydanticTypeStep 17 | 18 | 19 | # Custom types that inherit from Pydantic types 20 | class CustomPastDate(PastDate): ... 21 | 22 | 23 | class CustomFutureDate(FutureDate): ... 24 | 25 | 26 | class CustomPastDatetime(PastDatetime): ... 27 | 28 | 29 | @pytest.fixture(scope="module") 30 | def parser_pipeline() -> ParserPipeline: 31 | """Create a parser pipeline with pydantic support.""" 32 | union_parser = UnionTypeStep() 33 | pydantic_parser = PydanticTypeStep() 34 | py_parser = PyTypeStep() 35 | return ParserPipeline([union_parser, pydantic_parser, py_parser]) 36 | 37 | 38 | @pytest.mark.parametrize( 39 | ("input_type", "expected"), 40 | [ 41 | # Base Pydantic types 42 | (PastDate, nw.Date()), 43 | (FutureDate, nw.Date()), 44 | (PastDatetime, nw.Datetime()), 45 | # Derived types 46 | (CustomPastDate, nw.Date()), 47 | (CustomFutureDate, nw.Date()), 48 | (CustomPastDatetime, nw.Datetime()), 49 | # In lists 50 | (list[CustomPastDate], nw.List(nw.Date())), 51 | (list[CustomPastDatetime], nw.List(nw.Datetime())), 52 | ], 53 | ) 54 | def test_pydantic_derived_types(parser_pipeline: ParserPipeline, input_type: Any, expected: nw.dtypes.DType) -> None: 55 | """Test that pydantic parser handles derived types correctly.""" 56 | result = parser_pipeline.parse(input_type, (), {}) 57 | assert result == expected 58 | -------------------------------------------------------------------------------- /tests/anyschema/descriptions_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING 4 | 5 | import pytest 6 | 7 | from anyschema import AnySchema 8 | from tests.conftest import DataclassEventWithTimeMetadata, PydanticStudent, user_table 9 | 10 | if TYPE_CHECKING: 11 | from anyschema.typing import Spec 12 | 13 | 14 | @pytest.mark.parametrize( 15 | ("spec", "expected"), 16 | [ 17 | (PydanticStudent, ("Student full name", None, "Student age in years", None, None)), 18 | (DataclassEventWithTimeMetadata, ("Event name", None, "Scheduled time", None, None)), 19 | (user_table, ("Primary key", None, "User age", None)), 20 | ], 21 | ) 22 | def test_descriptions_named_false(spec: Spec, expected: tuple[str | None, ...]) -> None: 23 | schema = AnySchema(spec=spec) 24 | result = schema.descriptions(named=False) 25 | 26 | assert result == expected 27 | 28 | 29 | @pytest.mark.parametrize( 30 | ("spec", "expected"), 31 | [ 32 | ( 33 | PydanticStudent, 34 | { 35 | "name": "Student full name", 36 | "date_of_birth": None, 37 | "age": "Student age in years", 38 | "classes": None, 39 | "has_graduated": None, 40 | }, 41 | ), 42 | ( 43 | DataclassEventWithTimeMetadata, 44 | { 45 | "name": "Event name", 46 | "created_at": None, 47 | "scheduled_at": "Scheduled time", 48 | "started_at": None, 49 | "completed_at": None, 50 | }, 51 | ), 52 | (user_table, {"id": "Primary key", "name": None, "age": "User age", "email": None}), 53 | ], 54 | ) 55 | def test_descriptions_named_true(spec: Spec, expected: dict[str, str | None]) -> None: 56 | schema = AnySchema(spec=spec) 57 | result = schema.descriptions(named=True) 58 | 59 | assert result == expected 60 | -------------------------------------------------------------------------------- /bump-version.py: -------------------------------------------------------------------------------- 1 | """Adjusted from narwhals. 2 | 3 | https://github.com/narwhals-dev/narwhals/blob/25701453aaa0556adc491e428f6d5724a1eac177/utils/bump_version.py 4 | 5 | License: MIT 6 | Copyright (c) 2024 Marco Gorelli 7 | """ 8 | 9 | # python bump-version.py 10 | 11 | # ruff: noqa: PLW1510, S603, S607, T201 12 | # mypy: ignore 13 | from __future__ import annotations 14 | 15 | import subprocess 16 | import sys 17 | 18 | out = subprocess.run(["git", "fetch", "upstream", "--tags"]) 19 | if out.returncode != 0: 20 | print(out) 21 | sys.exit(1) 22 | subprocess.run(["git", "reset", "--hard", "upstream/main"]) 23 | 24 | if subprocess.run(["git", "branch", "--show-current"], text=True, capture_output=True).stdout.strip() != "bump-version": 25 | msg = "`bump-version.py` should be run from `bump-version` branch" 26 | raise RuntimeError(msg) 27 | 28 | # Delete local tags, if present 29 | try: 30 | # Get the list of all tags 31 | result = subprocess.run(["git", "tag", "-l"], capture_output=True, text=True, check=True) 32 | tags = result.stdout.splitlines() # Split the tags into a list by lines 33 | 34 | # Delete each tag using git tag -d 35 | subprocess.run(["git", "tag", "-d", *tags], check=True) 36 | print("All local tags have been deleted.") 37 | except subprocess.CalledProcessError as e: 38 | print(f"An error occurred: {e}") 39 | 40 | subprocess.run(["git", "fetch", "upstream", "--tags"]) 41 | subprocess.run(["git", "fetch", "upstream", "--prune", "--tags"]) 42 | 43 | how = sys.argv[1] 44 | version = subprocess.run(["uv", "version", "--bump", how, "--short"], text=True, capture_output=True).stdout.strip() 45 | 46 | subprocess.run(["git", "commit", "-a", "-m", f"release: Bump version to {version}"]) 47 | subprocess.run(["git", "tag", "-a", f"v{version}", "-m", f"v{version}"]) 48 | subprocess.run(["git", "push", "upstream", "HEAD", "--follow-tags"]) 49 | subprocess.run(["git", "push", "upstream", "HEAD:stable", "-f", "--follow-tags"]) 50 | -------------------------------------------------------------------------------- /tests/pydantic/date_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from datetime import date # noqa: TC003 4 | from typing import TYPE_CHECKING, Annotated, Optional 5 | 6 | import hypothesis.strategies as st 7 | import narwhals as nw 8 | from annotated_types import Interval 9 | from hypothesis import assume, given 10 | from pydantic import BaseModel, FutureDate, PastDate 11 | 12 | from tests.pydantic.utils import model_to_nw_schema 13 | 14 | if TYPE_CHECKING: 15 | from anyschema.parsers import ParserPipeline 16 | 17 | 18 | def test_parse_date(auto_pipeline: ParserPipeline) -> None: 19 | class DateModel(BaseModel): 20 | # python datetime type 21 | py_dt: date 22 | py_dt_optional: date | None 23 | py_dt_or_none: date | None 24 | none_or_py_dt: None | date 25 | 26 | # pydantic PastDate type 27 | past_dt: PastDate 28 | past_dt_optional: PastDate | None 29 | past_dt_or_none: PastDate | None 30 | none_or_past_dt: None | PastDate 31 | 32 | # pydantic FutureDate type 33 | future_dt: FutureDate 34 | future_dt_optional: FutureDate | None 35 | future_dt_or_none: FutureDate | None 36 | none_or_future_dt: None | FutureDate 37 | 38 | schema = model_to_nw_schema(DateModel, pipeline=auto_pipeline) 39 | 40 | assert all(value == nw.Date() for value in schema.values()) 41 | 42 | 43 | @given(min_date=st.dates(), max_date=st.dates()) 44 | def test_parse_date_with_constraints(auto_pipeline: ParserPipeline, min_date: date, max_date: date) -> None: 45 | assume(min_date < max_date) 46 | 47 | class DateConstraintModel(BaseModel): 48 | x: Annotated[date, Interval(gt=min_date, lt=max_date)] 49 | y: Optional[Annotated[date, Interval(ge=min_date, lt=max_date)]] | None 50 | z: Annotated[date, Interval(gt=min_date, le=max_date)] | None 51 | w: None | Annotated[date, Interval(ge=min_date, le=max_date)] 52 | 53 | schema = model_to_nw_schema(DateConstraintModel, pipeline=auto_pipeline) 54 | 55 | assert all(value == nw.Date() for value in schema.values()) 56 | -------------------------------------------------------------------------------- /tests/anyschema/to_polars_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING 4 | 5 | import polars as pl 6 | from narwhals import Schema 7 | 8 | from anyschema import AnySchema 9 | 10 | if TYPE_CHECKING: 11 | from pydantic import BaseModel 12 | 13 | 14 | def test_pydantic_to_polars(pydantic_student_cls: type[BaseModel]) -> None: 15 | anyschema = AnySchema(spec=pydantic_student_cls) 16 | pl_schema = anyschema.to_polars() 17 | 18 | assert isinstance(pl_schema, pl.Schema) 19 | assert pl_schema == pl.Schema( 20 | { 21 | "name": pl.String(), 22 | "date_of_birth": pl.Date(), 23 | "age": pl.UInt64(), 24 | "classes": pl.List(pl.String()), 25 | "has_graduated": pl.Boolean(), 26 | } 27 | ) 28 | 29 | 30 | def test_nw_schema_to_arrow(nw_schema: Schema) -> None: 31 | unsupported_dtypes = {"array", "enum", "uint128", "int128", "decimal"} 32 | model = Schema({k: v for k, v in nw_schema.items() if k not in unsupported_dtypes}) 33 | anyschema = AnySchema(spec=model) 34 | pl_schema = anyschema.to_polars() 35 | 36 | assert isinstance(pl_schema, pl.Schema) 37 | assert pl_schema == pl.Schema( 38 | { 39 | "boolean": pl.Boolean(), 40 | "categorical": pl.Categorical(), 41 | "date": pl.Date(), 42 | "datetime": pl.Datetime(), 43 | "duration": pl.Duration(), 44 | "float32": pl.Float32(), 45 | "float64": pl.Float64(), 46 | "int8": pl.Int8(), 47 | "int16": pl.Int16(), 48 | "int32": pl.Int32(), 49 | "int64": pl.Int64(), 50 | "list": pl.List(pl.Float32()), 51 | "object": pl.Object(), 52 | "string": pl.String(), 53 | "struct": pl.Struct(fields=[pl.Field("field_1", pl.String()), pl.Field("field_2", pl.Boolean())]), 54 | "uint8": pl.UInt8(), 55 | "uint16": pl.UInt16(), 56 | "uint32": pl.UInt32(), 57 | "uint64": pl.UInt64(), 58 | "unknown": pl.Unknown(), 59 | } 60 | ) 61 | -------------------------------------------------------------------------------- /anyschema/parsers/attrs.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING 4 | 5 | import narwhals as nw 6 | 7 | from anyschema._dependencies import is_attrs_class 8 | from anyschema.parsers._base import ParserStep 9 | 10 | if TYPE_CHECKING: 11 | from narwhals.dtypes import DType 12 | 13 | from anyschema.typing import AttrsClassType, FieldConstraints, FieldMetadata, FieldType 14 | 15 | 16 | __all__ = ("AttrsTypeStep",) 17 | 18 | 19 | class AttrsTypeStep(ParserStep): 20 | """Parser for attrs-specific types. 21 | 22 | Handles: 23 | 24 | - attrs classes (Struct types) 25 | 26 | Warning: 27 | It requires [attrs](https://www.attrs.org/) to be installed. 28 | """ 29 | 30 | def parse( 31 | self, 32 | input_type: FieldType, 33 | constraints: FieldConstraints, # noqa: ARG002 34 | metadata: FieldMetadata, # noqa: ARG002 35 | ) -> DType | None: 36 | """Parse attrs-specific types into Narwhals dtypes. 37 | 38 | Arguments: 39 | input_type: The type to parse. 40 | constraints: Constraints associated with the type. 41 | metadata: Custom metadata dictionary. 42 | 43 | Returns: 44 | A Narwhals DType if this parser can handle the type, None otherwise. 45 | """ 46 | if is_attrs_class(input_type): 47 | return self._parse_attrs_class(input_type) 48 | 49 | # This parser doesn't handle this type 50 | return None 51 | 52 | def _parse_attrs_class(self, attrs_class: AttrsClassType) -> DType: 53 | """Parse an attrs class into a Struct type. 54 | 55 | Arguments: 56 | attrs_class: The attrs class. 57 | 58 | Returns: 59 | A Narwhals Struct dtype. 60 | """ 61 | from anyschema.adapters import attrs_adapter 62 | 63 | return nw.Struct( 64 | [ 65 | nw.Field( 66 | name=field_name, 67 | dtype=self.pipeline.parse(field_type, field_constraints, field_metadata, strict=True), 68 | ) 69 | for field_name, field_type, field_constraints, field_metadata in attrs_adapter(attrs_class) 70 | ] 71 | ) 72 | -------------------------------------------------------------------------------- /tests/adapters/attrs_adapter_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from datetime import date, datetime 4 | from typing import TYPE_CHECKING 5 | 6 | import pytest 7 | 8 | from anyschema.adapters import attrs_adapter 9 | from tests.conftest import ( 10 | AttrsBookWithMetadata, 11 | AttrsDerived, 12 | AttrsEventWithTimeMetadata, 13 | AttrsPerson, 14 | AttrsPersonFrozen, 15 | create_missing_decorator_test_case, 16 | ) 17 | 18 | if TYPE_CHECKING: 19 | from anyschema.typing import AttrsClassType, FieldSpec 20 | 21 | 22 | @pytest.mark.parametrize( 23 | "spec", 24 | [ 25 | AttrsPerson, 26 | AttrsPersonFrozen, 27 | ], 28 | ) 29 | def test_attrs_adapter(spec: AttrsClassType) -> None: 30 | result = list(attrs_adapter(spec)) 31 | assert ("name", str, (), {}) in result 32 | assert ("age", int, (), {}) in result 33 | assert ("date_of_birth", date, (), {}) in result 34 | 35 | 36 | def test_attrs_adapter_with_metadata() -> None: 37 | result = list(attrs_adapter(AttrsBookWithMetadata)) # ty: ignore[invalid-argument-type] 38 | assert result == [("title", str, (), {"description": "Book title"}), ("author", str, (), {"max_length": 100})] 39 | 40 | 41 | def test_attrs_adapter_with_inheritance() -> None: 42 | result = list(attrs_adapter(AttrsDerived)) # ty: ignore[invalid-argument-type] 43 | assert result == [("foo", str, (), {}), ("bar", int, (), {}), ("baz", float, (), {})] 44 | 45 | 46 | def test_attrs_adapter_missing_decorator_raises() -> None: 47 | child_cls, expected_msg = create_missing_decorator_test_case() 48 | with pytest.raises(AssertionError, match=expected_msg.replace("(", r"\(").replace(")", r"\)")): 49 | list(attrs_adapter(child_cls)) # ty: ignore[invalid-argument-type] 50 | 51 | 52 | def test_attrs_adapter_with_time_metadata() -> None: 53 | result = tuple(attrs_adapter(AttrsEventWithTimeMetadata)) # ty: ignore[invalid-argument-type] 54 | expected: tuple[FieldSpec, ...] = ( 55 | ("name", str, (), {}), 56 | ("created_at", datetime, (), {}), 57 | ("scheduled_at", datetime, (), {"anyschema": {"time_zone": "UTC"}}), 58 | ("started_at", datetime, (), {"anyschema": {"time_unit": "ms"}}), 59 | ("completed_at", datetime, (), {"anyschema": {"time_zone": "Europe/Berlin", "time_unit": "ns"}}), 60 | ) 61 | 62 | assert result == expected 63 | -------------------------------------------------------------------------------- /docs/javascript/extra.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Copied from Ruff https://github.com/astral-sh/ruff/blob/924741cb11a68ed037899f9db1bea6969c48385e/docs/js/extra.js 3 | * 4 | * @author Astral Software Inc. 5 | * @license MIT 6 | */ 7 | 8 | 9 | function cleanupClipboardText(targetSelector) { 10 | const targetElement = document.querySelector(targetSelector); 11 | 12 | // exclude "Generic Prompt" and "Generic Output" spans from copy 13 | const excludedClasses = ["gp", "go"]; 14 | 15 | const clipboardText = Array.from(targetElement.childNodes) 16 | .filter( 17 | (node) => 18 | !excludedClasses.some((className) => 19 | node?.classList?.contains(className), 20 | ), 21 | ) 22 | .map((node) => node.textContent) 23 | .filter((s) => s !== ""); 24 | return clipboardText.join("").trim(); 25 | } 26 | 27 | // Sets copy text to attributes lazily using an Intersection Observer. 28 | function setCopyText() { 29 | // The `data-clipboard-text` attribute allows for customized content in the copy 30 | // See: https://www.npmjs.com/package/clipboard#copy-text-from-attribute 31 | const attr = "clipboardText"; 32 | // all "copy" buttons whose target selector is a element 33 | const elements = document.querySelectorAll( 34 | 'button[data-clipboard-target$="code"]', 35 | ); 36 | 37 | if (elements.length === 0) { 38 | return; 39 | } 40 | 41 | const observer = new IntersectionObserver((entries) => { 42 | entries.forEach((entry) => { 43 | // target in the viewport that have not been patched 44 | if ( 45 | entry.intersectionRatio > 0 && 46 | entry.target.dataset[attr] === undefined 47 | ) { 48 | entry.target.dataset[attr] = cleanupClipboardText( 49 | entry.target.dataset.clipboardTarget, 50 | ); 51 | } 52 | }); 53 | }); 54 | 55 | elements.forEach((elt) => { 56 | observer.observe(elt); 57 | }); 58 | } 59 | 60 | // Using the document$ observable is particularly important if you are using instant loading since 61 | // it will not result in a page refresh in the browser 62 | // See `How to integrate with third-party JavaScript libraries` guideline: 63 | // https://squidfunk.github.io/mkdocs-material/customization/?h=javascript#additional-javascript 64 | document$.subscribe(function () { 65 | setCopyText(); 66 | }); 67 | -------------------------------------------------------------------------------- /tests/adapters/pydantic_adapter_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from datetime import datetime 4 | from typing import TYPE_CHECKING, Annotated 5 | 6 | import pytest 7 | from annotated_types import Ge 8 | from pydantic import BaseModel, Field 9 | 10 | from anyschema.adapters import pydantic_adapter 11 | from tests.conftest import PydanticEventWithTimeMetadata 12 | 13 | if TYPE_CHECKING: 14 | from anyschema.typing import FieldMetadata, FieldSpec 15 | 16 | EMPTY_METADATA: FieldMetadata = {} # Type hinted empty metadata dict 17 | 18 | 19 | class SimpleModel(BaseModel): 20 | name: str 21 | age: int 22 | 23 | 24 | class ModelWithConstraints(BaseModel): 25 | name: str 26 | age: Annotated[int, Field(ge=0)] 27 | 28 | 29 | class ModelWithDescriptions(BaseModel): 30 | id: int = Field(description="ID") 31 | name: str = Field(description="Product name", json_schema_extra={"format": "name"}) 32 | tags: list[str] = Field(description="tags", json_schema_extra={"anyschema": {"description": "Override"}}) 33 | 34 | 35 | @pytest.mark.parametrize( 36 | ("spec", "expected"), 37 | [ 38 | (SimpleModel, (("name", str, (), {}), ("age", int, (), {}))), 39 | (ModelWithConstraints, (("name", str, (), {}), ("age", int, (Ge(ge=0),), {}))), 40 | ( 41 | ModelWithDescriptions, 42 | ( 43 | ("id", int, (), {"anyschema": {"description": "ID"}}), 44 | ("name", str, (), {"format": "name", "anyschema": {"description": "Product name"}}), 45 | ("tags", list[str], (), {"anyschema": {"description": "Override"}}), 46 | ), 47 | ), 48 | ], 49 | ) 50 | def test_pydantic_adapter(spec: type[BaseModel], expected: tuple[FieldSpec, ...]) -> None: 51 | result = tuple(pydantic_adapter(spec)) 52 | assert result == expected 53 | 54 | 55 | def test_pydantic_adapter_with_json_schema_extra() -> None: 56 | result = tuple(pydantic_adapter(PydanticEventWithTimeMetadata)) 57 | 58 | expected: tuple[FieldSpec, ...] = ( 59 | ("name", str, (), EMPTY_METADATA), 60 | ("created_at", datetime, (), EMPTY_METADATA), 61 | ("scheduled_at", datetime, (), {"anyschema": {"time_zone": "UTC"}}), 62 | ("started_at", datetime, (), {"anyschema": {"time_unit": "ms"}}), 63 | ("completed_at", datetime, (), {"anyschema": {"time_zone": "Europe/Berlin", "time_unit": "ns"}}), 64 | ) 65 | 66 | assert result == expected 67 | -------------------------------------------------------------------------------- /tests/parsers/parsers_dependency_mock_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from unittest.mock import patch 4 | 5 | import pytest 6 | 7 | from anyschema.parsers import ParserPipeline 8 | 9 | 10 | @pytest.mark.parametrize( 11 | ("dependency_flag", "excluded_step"), 12 | [ 13 | ("ANNOTATED_TYPES_AVAILABLE", "AnnotatedTypesStep"), 14 | ("ATTRS_AVAILABLE", "AttrsTypeStep"), 15 | ("PYDANTIC_AVAILABLE", "PydanticTypeStep"), 16 | ("SQLALCHEMY_AVAILABLE", "SQLAlchemyTypeStep"), 17 | ], 18 | ) 19 | def test_auto_pipeline_without_optional_dependency(dependency_flag: str, excluded_step: str) -> None: 20 | """Test that optional parser steps are excluded when their dependency is unavailable.""" 21 | with patch(target=f"anyschema.parsers._pipeline.{dependency_flag}", new=False): 22 | pipeline = ParserPipeline("auto") 23 | step_names = [str(step) for step in pipeline.steps] 24 | 25 | # The corresponding step should NOT be in the pipeline 26 | assert excluded_step not in step_names 27 | 28 | # Core steps should still be there 29 | assert "ForwardRefStep" in step_names 30 | assert "PyTypeStep" in step_names 31 | 32 | 33 | def test_auto_pipeline_without_all_optional_deps() -> None: 34 | """Test pipeline with only core dependencies.""" 35 | patches = ( 36 | patch(target="anyschema.parsers._pipeline.ANNOTATED_TYPES_AVAILABLE", new=False), 37 | patch(target="anyschema.parsers._pipeline.ATTRS_AVAILABLE", new=False), 38 | patch(target="anyschema.parsers._pipeline.PYDANTIC_AVAILABLE", new=False), 39 | patch(target="anyschema.parsers._pipeline.SQLALCHEMY_AVAILABLE", new=False), 40 | ) 41 | 42 | for p in patches: 43 | p.start() 44 | 45 | try: 46 | pipeline = ParserPipeline("auto") 47 | step_names = [str(step) for step in pipeline.steps] 48 | 49 | # Only core steps should be present 50 | assert "ForwardRefStep" in step_names 51 | assert "UnionTypeStep" in step_names 52 | assert "AnnotatedStep" in step_names 53 | assert "PyTypeStep" in step_names 54 | 55 | # Optional steps should NOT be present 56 | assert "AnnotatedTypesStep" not in step_names 57 | assert "AttrsTypeStep" not in step_names 58 | assert "PydanticTypeStep" not in step_names 59 | assert "SQLAlchemyTypeStep" not in step_names 60 | finally: 61 | for p in patches: 62 | p.stop() 63 | -------------------------------------------------------------------------------- /docs/api-reference/parsers.md: -------------------------------------------------------------------------------- 1 | # Parsers 2 | 3 | ## Pipeline 4 | 5 | A parser pipeline is a sequence of [parser steps](#parser-steps) that process type annotations to produce Narwhals 6 | dtypes. 7 | 8 | ::: anyschema.parsers.ParserPipeline 9 | handler: python 10 | options: 11 | show_root_heading: true 12 | show_source: false 13 | heading_level: 3 14 | 15 | ::: anyschema.parsers.make_pipeline 16 | handler: python 17 | options: 18 | show_root_heading: true 19 | show_source: false 20 | heading_level: 3 21 | 22 | ## Parser Steps 23 | 24 | Parser steps are the building blocks of the type parsing pipeline. Each step handles specific type patterns. 25 | 26 | For more details on how these work together, see the [parser steps](../architecture.md#parser-steps) 27 | section in the Architecture guide. 28 | 29 | ::: anyschema.parsers.ParserStep 30 | handler: python 31 | options: 32 | show_root_heading: true 33 | show_source: false 34 | heading_level: 3 35 | 36 | --- 37 | 38 | The following steps are built-in and come dependency-free. 39 | 40 | ::: anyschema.parsers.ForwardRefStep 41 | handler: python 42 | options: 43 | show_root_heading: true 44 | show_source: false 45 | heading_level: 3 46 | 47 | ::: anyschema.parsers.UnionTypeStep 48 | handler: python 49 | options: 50 | show_root_heading: true 51 | show_source: false 52 | heading_level: 3 53 | 54 | ::: anyschema.parsers.AnnotatedStep 55 | handler: python 56 | options: 57 | show_root_heading: true 58 | show_source: false 59 | heading_level: 3 60 | 61 | ::: anyschema.parsers.PyTypeStep 62 | handler: python 63 | options: 64 | show_root_heading: true 65 | show_source: false 66 | heading_level: 3 67 | 68 | --- 69 | 70 | ::: anyschema.parsers.annotated_types.AnnotatedTypesStep 71 | handler: python 72 | options: 73 | show_root_heading: true 74 | show_source: false 75 | heading_level: 3 76 | 77 | ::: anyschema.parsers.attrs.AttrsTypeStep 78 | handler: python 79 | options: 80 | show_root_heading: true 81 | show_source: false 82 | heading_level: 3 83 | 84 | ::: anyschema.parsers.pydantic.PydanticTypeStep 85 | handler: python 86 | options: 87 | show_root_heading: true 88 | show_source: false 89 | heading_level: 3 90 | 91 | ::: anyschema.parsers.sqlalchemy.SQLAlchemyTypeStep 92 | handler: python 93 | options: 94 | show_root_heading: true 95 | show_source: false 96 | heading_level: 3 97 | -------------------------------------------------------------------------------- /tests/parsers/pydantic_extra_types_test.py: -------------------------------------------------------------------------------- 1 | """Tests using actual pydantic-extra-types to verify derived type handling. 2 | 3 | This module tests that PyTypeStep works with real types from the pydantic-extra-types library. 4 | Note that some pydantic-extra-types require additional dependencies (like pycountry, phonenumbers). 5 | 6 | References: 7 | - https://docs.pydantic.dev/latest/api/pydantic_extra_types_country/ 8 | - https://docs.pydantic.dev/latest/api/pydantic_extra_types_phone_numbers/ 9 | - https://docs.pydantic.dev/latest/api/pydantic_extra_types_coordinate/ 10 | """ 11 | 12 | from __future__ import annotations 13 | 14 | from typing import Any 15 | 16 | import narwhals as nw 17 | import pytest 18 | from pydantic_extra_types.coordinate import Latitude, Longitude 19 | from pydantic_extra_types.country import ( 20 | CountryAlpha2, 21 | CountryAlpha3, 22 | CountryNumericCode, 23 | CountryShortName, 24 | ) 25 | from pydantic_extra_types.phone_numbers import PhoneNumber 26 | 27 | from anyschema.parsers import ParserPipeline, PyTypeStep, UnionTypeStep 28 | 29 | 30 | @pytest.fixture(scope="module") 31 | def py_type_parser() -> PyTypeStep: 32 | """Create a PyTypeStep instance with pipeline set.""" 33 | union_parser = UnionTypeStep() 34 | py_parser = PyTypeStep() 35 | _ = ParserPipeline([union_parser, py_parser]) 36 | return py_parser 37 | 38 | 39 | @pytest.mark.parametrize( 40 | ("input_type", "expected"), 41 | [ 42 | # coordinate 43 | (Latitude, nw.Float64()), 44 | (Longitude, nw.Float64()), 45 | (list[Latitude], nw.List(nw.Float64())), 46 | (list[list[Latitude]], nw.List(nw.List(nw.Float64()))), 47 | (tuple[Longitude, Longitude], nw.Array(nw.Float64(), shape=2)), 48 | (tuple[Latitude, Latitude, Latitude], nw.Array(nw.Float64(), shape=3)), 49 | # country 50 | (CountryAlpha2, nw.String()), 51 | (CountryAlpha3, nw.String()), 52 | (CountryNumericCode, nw.String()), 53 | (CountryShortName, nw.String()), 54 | (list[CountryAlpha2], nw.List(nw.String())), 55 | (list[list[CountryAlpha2]], nw.List(nw.List(nw.String()))), 56 | # phone number 57 | (PhoneNumber, nw.String()), 58 | (list[PhoneNumber], nw.List(nw.String())), 59 | (tuple[PhoneNumber, PhoneNumber, PhoneNumber], nw.Array(nw.String(), shape=3)), 60 | ], 61 | ) 62 | def test_pydantic_extra_types(py_type_parser: PyTypeStep, input_type: Any, expected: nw.dtypes.DType) -> None: 63 | result = py_type_parser.parse(input_type, (), {}) 64 | assert result == expected 65 | -------------------------------------------------------------------------------- /tests/parsers/_union_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from types import NoneType 4 | from typing import Any, Optional, Union 5 | 6 | import narwhals as nw 7 | import pytest 8 | 9 | from anyschema.exceptions import UnsupportedDTypeError 10 | from anyschema.parsers import ParserPipeline, PyTypeStep, UnionTypeStep 11 | 12 | 13 | @pytest.fixture(scope="module") 14 | def union_parser() -> UnionTypeStep: 15 | """Create a UnionTypeStep instance with pipeline set.""" 16 | union_parser = UnionTypeStep() 17 | py_parser = PyTypeStep() 18 | _ = ParserPipeline([union_parser, py_parser]) 19 | return union_parser 20 | 21 | 22 | @pytest.mark.parametrize( 23 | ("input_type", "expected"), 24 | [ 25 | (Optional[int], nw.Int64()), 26 | (Optional[str], nw.String()), 27 | (Optional[float], nw.Float64()), 28 | (Optional[bool], nw.Boolean()), 29 | (int | None, nw.Int64()), 30 | (str | None, nw.String()), 31 | (None | int, nw.Int64()), 32 | (None | str, nw.String()), 33 | (Union[int, None], nw.Int64()), 34 | (Union[None, str], nw.String()), 35 | (Optional[list[int]], nw.List(nw.Int64())), 36 | (list[str] | None, nw.List(nw.String())), 37 | (list[str | None] | None, nw.List(nw.String())), 38 | ], 39 | ) 40 | def test_parse_union_types(union_parser: UnionTypeStep, input_type: Any, expected: nw.dtypes.DType) -> None: 41 | result = union_parser.parse(input_type, (), {}) 42 | assert result == expected 43 | 44 | 45 | @pytest.mark.parametrize( 46 | "input_type", 47 | [ 48 | int, 49 | str, 50 | list[int], 51 | NoneType, 52 | ], 53 | ) 54 | def test_parse_non_union_types(union_parser: UnionTypeStep, input_type: Any) -> None: 55 | result = union_parser.parse(input_type, (), {}) 56 | assert result is None 57 | 58 | 59 | @pytest.mark.parametrize( 60 | ("input_type", "error_msg"), 61 | [ 62 | (Union[int, str, float], "Union with more than two types is not supported."), 63 | (int | str | float, "Union with more than two types is not supported."), 64 | (Union[int, str], "Union with mixed types is not supported."), 65 | (int | str, "Union with mixed types is not supported."), 66 | (float | bool, "Union with mixed types is not supported."), 67 | ], 68 | ) 69 | def test_parse_unsupported_unions_parametrized(union_parser: UnionTypeStep, input_type: Any, error_msg: str) -> None: 70 | with pytest.raises(UnsupportedDTypeError, match=error_msg): 71 | union_parser.parse(input_type, (), {}) 72 | -------------------------------------------------------------------------------- /tests/pydantic/float_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING, Annotated, Optional 4 | 5 | import hypothesis.strategies as st 6 | import narwhals as nw 7 | from annotated_types import Interval 8 | from hypothesis import assume, given 9 | from pydantic import BaseModel, FiniteFloat, NegativeFloat, NonNegativeFloat, NonPositiveFloat, PositiveFloat 10 | 11 | from tests.pydantic.utils import model_to_nw_schema 12 | 13 | if TYPE_CHECKING: 14 | from anyschema.parsers import ParserPipeline 15 | 16 | 17 | @given(lb=st.floats(), ub=st.floats()) 18 | def test_parse_float(auto_pipeline: ParserPipeline, lb: float, ub: float) -> None: 19 | assume(lb < ub) 20 | 21 | class FloatModel(BaseModel): 22 | # python float type 23 | py_int: float 24 | py_float_optional: float | None 25 | py_float_or_none: float | None 26 | none_or_py_float: None | float 27 | 28 | # pydantic NonNegativeFloat type 29 | non_negative: NonNegativeFloat 30 | non_negative_optional: NonNegativeFloat | None 31 | non_negative_or_none: NonNegativeFloat | None 32 | none_or_non_negative: None | NonNegativeFloat 33 | 34 | # pydantic NonPositiveFloat type 35 | non_positive: NonPositiveFloat 36 | non_positive_optional: NonPositiveFloat | None 37 | non_positive_or_none: NonPositiveFloat | None 38 | none_or_non_positive: None | NonPositiveFloat 39 | 40 | # pydantic PositiveFloat type 41 | positive: PositiveFloat 42 | positive_optional: PositiveFloat | None 43 | positive_or_none: PositiveFloat | None 44 | none_or_positive: None | PositiveFloat 45 | 46 | # pydantic NegativeFloat type 47 | negative: NegativeFloat 48 | negative_optional: NegativeFloat | None 49 | negative_or_none: NegativeFloat | None 50 | none_or_negative: None | NegativeFloat 51 | 52 | # pydantic NegativeFloat type 53 | finite: FiniteFloat 54 | finite_optional: FiniteFloat | None 55 | finite_or_none: FiniteFloat | None 56 | none_or_finite: None | NegativeFloat 57 | 58 | # pydantic annotated float with constraints 59 | con_float: Annotated[float, Interval(gt=lb, lt=ub)] 60 | con_float_optional: Optional[Annotated[float, Interval(ge=lb, lt=ub)]] 61 | con_float_or_none: Annotated[float, Interval(gt=lb, le=ub)] | None 62 | non_or_con_float: None | Annotated[float, Interval(ge=lb, le=ub)] 63 | 64 | schema = model_to_nw_schema(FloatModel, pipeline=auto_pipeline) 65 | 66 | assert all(value == nw.Float64() for value in schema.values()) 67 | -------------------------------------------------------------------------------- /tests/spec_to_schema/attrs_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING, Mapping 4 | 5 | import narwhals as nw 6 | import pytest 7 | 8 | from anyschema import AnySchema 9 | from tests.conftest import ( 10 | AttrsAddressWithPydantic, 11 | AttrsDerived, 12 | AttrsEventWithTimeMetadata, 13 | AttrsPerson, 14 | AttrsPersonWithLiterals, 15 | create_missing_decorator_test_case, 16 | ) 17 | 18 | if TYPE_CHECKING: 19 | from anyschema.typing import AttrsClassType 20 | 21 | 22 | @pytest.mark.parametrize( 23 | ("spec", "expected_schema"), 24 | [ 25 | ( 26 | AttrsPerson, 27 | { 28 | "name": nw.String(), 29 | "age": nw.Int64(), 30 | "date_of_birth": nw.Date(), 31 | "is_active": nw.Boolean(), 32 | "classes": nw.List(nw.String()), 33 | "grades": nw.List(nw.Float64()), 34 | }, 35 | ), 36 | ( 37 | AttrsPersonWithLiterals, 38 | { 39 | "username": nw.String(), 40 | "role": nw.Enum(["admin", "user", "guest"]), 41 | "status": nw.Enum(["active", "inactive", "pending"]), 42 | }, 43 | ), 44 | ( 45 | AttrsAddressWithPydantic, 46 | { 47 | "street": nw.String(), 48 | "city": nw.String(), 49 | "zipcode": nw.Struct([nw.Field("zipcode", nw.UInt64())]), 50 | }, 51 | ), 52 | ( 53 | AttrsDerived, 54 | { 55 | "foo": nw.String(), 56 | "bar": nw.Int64(), 57 | "baz": nw.Float64(), 58 | }, 59 | ), 60 | ( 61 | AttrsEventWithTimeMetadata, 62 | { 63 | "name": nw.String(), 64 | "created_at": nw.Datetime("us"), 65 | "scheduled_at": nw.Datetime("us", time_zone="UTC"), 66 | "started_at": nw.Datetime("ms"), 67 | "completed_at": nw.Datetime("ns", time_zone="Europe/Berlin"), 68 | }, 69 | ), 70 | ], 71 | ) 72 | def test_attrs_class(spec: AttrsClassType, expected_schema: Mapping[str, nw.dtypes.DType]) -> None: 73 | schema = AnySchema(spec=spec) 74 | nw_schema = schema._nw_schema 75 | assert nw_schema == nw.Schema(expected_schema) 76 | 77 | 78 | def test_attrs_class_missing_decorator_raises() -> None: 79 | child_cls, expected_msg = create_missing_decorator_test_case() 80 | with pytest.raises(AssertionError, match=expected_msg.replace("(", r"\(").replace(")", r"\)")): 81 | AnySchema(spec=child_cls) 82 | -------------------------------------------------------------------------------- /tests/parsers/_builtin_derived_types_test.py: -------------------------------------------------------------------------------- 1 | """Tests for derived types similar to pydantic-extra-types. 2 | 3 | This module tests that PyTypeStep correctly handles types that inherit 4 | from basic Python types, similar to those in pydantic-extra-types library. 5 | 6 | The derived types mimic pydantic-extra-types 7 | 8 | References: 9 | - https://docs.pydantic.dev/latest/api/pydantic_extra_types_country/ 10 | - https://docs.pydantic.dev/latest/api/pydantic_extra_types_phone_numbers/ 11 | - https://docs.pydantic.dev/latest/api/pydantic_extra_types_routing_numbers/ 12 | """ 13 | 14 | from __future__ import annotations 15 | 16 | from datetime import date, datetime 17 | from decimal import Decimal 18 | from enum import Enum 19 | from typing import Any 20 | 21 | import narwhals as nw 22 | import pytest 23 | 24 | from anyschema.parsers import ParserPipeline, PyTypeStep, UnionTypeStep 25 | 26 | 27 | class EmailStr(str): 28 | __slots__ = () 29 | 30 | 31 | class PositiveInt(int): ... 32 | 33 | 34 | class PositiveFloat(float): ... 35 | 36 | 37 | class SecretBytes(bytes): ... 38 | 39 | 40 | class PastDatetime(datetime): ... 41 | 42 | 43 | class FutureDate(date): ... 44 | 45 | 46 | class NonNegativeDecimal(Decimal): ... 47 | 48 | 49 | class HttpMethod(str, Enum): 50 | """HTTP method enum.""" 51 | 52 | GET = "GET" 53 | POST = "POST" 54 | PUT = "PUT" 55 | DELETE = "DELETE" 56 | 57 | 58 | class Priority(int, Enum): 59 | """Priority levels.""" 60 | 61 | LOW = 1 62 | MEDIUM = 2 63 | HIGH = 3 64 | 65 | 66 | @pytest.fixture(scope="module") 67 | def parser_pipeline() -> ParserPipeline: 68 | """Create a parser pipeline with UnionTypeStep and PyTypeStep.""" 69 | union_parser = UnionTypeStep() 70 | py_parser = PyTypeStep() 71 | return ParserPipeline([union_parser, py_parser]) 72 | 73 | 74 | @pytest.mark.parametrize( 75 | ("input_type", "expected"), 76 | [ 77 | (EmailStr, nw.String()), 78 | (EmailStr | None, nw.String()), 79 | (list[EmailStr], nw.List(nw.String())), 80 | (PositiveInt, nw.Int64()), 81 | (list[PositiveInt], nw.List(nw.Int64())), 82 | (PositiveFloat, nw.Float64()), 83 | (tuple[PositiveFloat, PositiveFloat], nw.Array(nw.Float64(), shape=2)), 84 | (SecretBytes, nw.Binary()), 85 | (PastDatetime, nw.Datetime("us")), 86 | (FutureDate, nw.Date), 87 | (NonNegativeDecimal, nw.Decimal()), 88 | (HttpMethod, nw.Enum(HttpMethod)), 89 | (Priority, nw.Enum(Priority)), 90 | ], 91 | ) 92 | def test_derived_types(parser_pipeline: ParserPipeline, input_type: Any, expected: nw.dtypes.DType) -> None: 93 | result = parser_pipeline.parse(input_type, constraints=(), metadata={}) 94 | assert result == expected 95 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug-report.yaml: -------------------------------------------------------------------------------- 1 | name: 🐛 Bug Report 2 | description: Report a bug or unexpected behavior in anyschema 3 | title: "[Bug]: " 4 | labels: ["bug", "needs-triage"] 5 | body: 6 | - type: markdown 7 | attributes: 8 | value: | 9 | Thanks for taking the time to fill out this bug report! 10 | Please provide as much detail as possible to help us investigate and fix the issue. 11 | 12 | - type: textarea 13 | id: description 14 | attributes: 15 | label: Bug Description 16 | description: A clear and concise description of what the bug is. 17 | placeholder: What went wrong? 18 | validations: 19 | required: true 20 | 21 | - type: textarea 22 | id: reproduction 23 | attributes: 24 | label: Steps to Reproduce 25 | description: Provide a minimal code example that demonstrates the bug. 26 | placeholder: | 27 | ```python 28 | from anyschema import AnySchema 29 | 30 | # What code triggers the bug? 31 | ``` 32 | render: python 33 | validations: 34 | required: true 35 | 36 | - type: textarea 37 | id: expected 38 | attributes: 39 | label: Expected Behavior 40 | description: What did you expect to happen? 41 | placeholder: Describe the expected behavior 42 | validations: 43 | required: true 44 | 45 | - type: textarea 46 | id: actual 47 | attributes: 48 | label: Actual Behavior 49 | description: What actually happened? Include any error messages or stack traces. 50 | placeholder: Paste error messages or describe what happened instead 51 | validations: 52 | required: true 53 | 54 | - type: textarea 55 | id: version 56 | attributes: 57 | label: Please run `anyschema.show_versions()` and enter the output below 58 | description: | 59 | What library versions are you using? 60 | ```python 61 | import anyschema 62 | anyschema.show_versions() 63 | ``` 64 | validations: 65 | required: true 66 | 67 | - type: textarea 68 | id: extra 69 | attributes: 70 | label: Any Additional Information 71 | description: Add any other context, screenshots, or information about the problem here. 72 | placeholder: Optional additional information 73 | 74 | - type: checkboxes 75 | id: terms 76 | attributes: 77 | label: Checklist 78 | description: Please confirm the following 79 | options: 80 | - label: I have searched the existing issues to make sure this bug hasn't been reported yet 81 | required: true 82 | - label: I have provided a minimal reproducible example 83 | required: true 84 | - label: I am willing to submit a PR to fix this issue (optional) 85 | -------------------------------------------------------------------------------- /tests/parsers/_annotated_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import Annotated, Any 4 | 5 | import narwhals as nw 6 | import pytest 7 | 8 | from anyschema.parsers import AnnotatedStep, ParserPipeline, PyTypeStep 9 | 10 | 11 | @pytest.fixture(scope="module") 12 | def annotated_parser() -> AnnotatedStep: 13 | """Create an AnnotatedStep instance with pipeline set.""" 14 | annotated_parser = AnnotatedStep() 15 | py_parser = PyTypeStep() 16 | _ = ParserPipeline([annotated_parser, py_parser]) 17 | return annotated_parser 18 | 19 | 20 | @pytest.mark.parametrize( 21 | ("input_type", "expected"), 22 | [ 23 | (Annotated[int, "meta"], nw.Int64()), 24 | (Annotated[str, "meta"], nw.String()), 25 | (Annotated[float, "meta"], nw.Float64()), 26 | (Annotated[bool, "meta"], nw.Boolean()), 27 | (Annotated[list[int], "meta"], nw.List(nw.Int64())), 28 | (Annotated[list[str], "meta"], nw.List(nw.String())), 29 | (Annotated[tuple[int, ...], "meta"], nw.List(nw.Int64())), 30 | (Annotated[tuple[str, str, str], "meta"], nw.Array(nw.String(), shape=3)), 31 | ], 32 | ) 33 | def test_parse_annotated(annotated_parser: AnnotatedStep, input_type: type, expected: nw.dtypes.DType) -> None: 34 | result = annotated_parser.parse(input_type, (), {}) 35 | assert result == expected 36 | 37 | 38 | @pytest.mark.parametrize( 39 | "metadata_items", 40 | [ 41 | ("meta1",), 42 | ("meta1", "meta2"), 43 | ("meta1", "meta2", "meta3"), 44 | ({"key": "value"},), 45 | (["item1", "item2"],), 46 | (1, 2, 3), 47 | ], 48 | ) 49 | def test_parse_annotated_various_metadata(annotated_parser: AnnotatedStep, metadata_items: tuple[Any, ...]) -> None: 50 | """Parametrized test for Annotated with various metadata.""" 51 | input_type = Annotated[int, metadata_items] 52 | result = annotated_parser.parse(input_type, (), {}) 53 | assert result == nw.Int64() 54 | 55 | 56 | @pytest.mark.parametrize("input_type", [int, str, list[int], tuple[str, ...]]) 57 | def test_parse_non_annotated(annotated_parser: AnnotatedStep, input_type: type) -> None: 58 | result = annotated_parser.parse(input_type, (), {}) 59 | assert result is None 60 | 61 | 62 | def test_parse_annotated_with_class_metadata(annotated_parser: AnnotatedStep) -> None: 63 | class CustomMetadata: 64 | def __init__(self, value: str) -> None: 65 | self.value = value 66 | 67 | result = annotated_parser.parse(Annotated[int, CustomMetadata("test")], (), {}) 68 | assert result == nw.Int64() 69 | 70 | 71 | def test_parse_annotated_with_callable_metadata(annotated_parser: AnnotatedStep) -> None: 72 | result = annotated_parser.parse(Annotated[int, lambda x: x > 0], (), {}) 73 | assert result == nw.Int64() 74 | -------------------------------------------------------------------------------- /tests/pydantic/literal_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING, Literal 4 | 5 | import narwhals as nw 6 | from pydantic import BaseModel 7 | 8 | from tests.pydantic.utils import model_to_nw_schema 9 | 10 | if TYPE_CHECKING: 11 | from anyschema.parsers import ParserPipeline 12 | 13 | 14 | def test_parse_string_literal(auto_pipeline: ParserPipeline) -> None: 15 | class UserModel(BaseModel): 16 | username: str 17 | role: Literal["admin", "user", "guest"] 18 | status: Literal["active", "inactive", "pending"] 19 | 20 | schema = model_to_nw_schema(UserModel, pipeline=auto_pipeline) 21 | 22 | assert schema["username"] == nw.String() 23 | assert schema["role"] == nw.Enum(["admin", "user", "guest"]) 24 | assert schema["status"] == nw.Enum(["active", "inactive", "pending"]) 25 | 26 | 27 | def test_parse_mixed_literal_types(auto_pipeline: ParserPipeline) -> None: 28 | class ConfigModel(BaseModel): 29 | name: str 30 | log_level: Literal["debug", "info", "warning", "error"] 31 | max_retries: Literal[1, 2, 3, 5, 10] 32 | enabled: Literal[True, False] 33 | 34 | schema = model_to_nw_schema(ConfigModel, pipeline=auto_pipeline) 35 | 36 | assert schema["name"] == nw.String() 37 | assert schema["log_level"] == nw.Enum(["debug", "info", "warning", "error"]) 38 | assert schema["max_retries"] == nw.Enum([1, 2, 3, 5, 10]) # type: ignore[list-item] 39 | assert schema["enabled"] == nw.Enum([True, False]) # type: ignore[list-item] 40 | 41 | 42 | def test_parse_literal_with_optional(auto_pipeline: ParserPipeline) -> None: 43 | class ProductModel(BaseModel): 44 | name: str 45 | category: Literal["electronics", "clothing", "food"] | None 46 | priority: Literal["high", "medium", "low"] 47 | 48 | schema = model_to_nw_schema(ProductModel, pipeline=auto_pipeline) 49 | 50 | assert schema["name"] == nw.String() 51 | assert schema["category"] == nw.Enum(["electronics", "clothing", "food"]) 52 | assert schema["priority"] == nw.Enum(["high", "medium", "low"]) 53 | 54 | 55 | def test_parse_nested_model_with_literal(auto_pipeline: ParserPipeline) -> None: 56 | class AddressModel(BaseModel): 57 | street: str 58 | country: Literal["US", "UK", "CA", "AU"] 59 | 60 | class PersonModel(BaseModel): 61 | name: str 62 | role: Literal["employee", "contractor", "intern"] 63 | address: AddressModel 64 | 65 | schema = model_to_nw_schema(PersonModel, pipeline=auto_pipeline) 66 | 67 | assert schema["name"] == nw.String() 68 | assert schema["role"] == nw.Enum(["employee", "contractor", "intern"]) 69 | assert schema["address"] == nw.Struct( 70 | [ 71 | nw.Field("street", nw.String()), 72 | nw.Field("country", nw.Enum(["US", "UK", "CA", "AU"])), 73 | ] 74 | ) 75 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # anyschema: From Type Specifications to Dataframe Schemas 2 | 3 | > [!CAUTION] 4 | > `anyschema` is still in early development and possibly unstable. 5 | 6 | --- 7 | 8 | [Documentation](https://fbruzzesi.github.io/anyschema/) | [Source Code](https://github.com/fbruzzesi/anyschema/) | [Issue Tracker](https://github.com/fbruzzesi/anyschema/issues) 9 | 10 | --- 11 | 12 | `anyschema` allows you to convert from type specifications (such as attrs classes, Pydantic models, SQLAlchemy tables, 13 | TypedDict, dataclasses, or plain Python dicts) to _any_ dataframe schema 14 | (by _"any"_ we intend those supported by Narwhals). 15 | 16 | Let's see how it works in practice with an example: 17 | 18 | ```python 19 | from anyschema import AnySchema 20 | from pydantic import BaseModel 21 | from pydantic import PositiveInt 22 | 23 | 24 | class Student(BaseModel): 25 | name: str 26 | age: PositiveInt 27 | classes: list[str] 28 | 29 | 30 | schema = AnySchema(spec=Student) 31 | 32 | # Convert to pyarrow schema 33 | pa_schema = schema.to_arrow() 34 | 35 | type(pa_schema) 36 | # pyarrow.lib.Schema 37 | 38 | pa_schema 39 | # name: string 40 | # age: uint64 41 | # classes: list 42 | # child 0, item: string 43 | 44 | pl_schema = schema.to_polars() 45 | 46 | type(pl_schema) 47 | # polars.schema.Schema 48 | 49 | pl_schema 50 | # Schema([('name', String), ('age', UInt64), ('classes', List(String))]) 51 | ``` 52 | 53 | To read more about `anyschema` functionalities and features consider checking out the 54 | [documentation](https://fbruzzesi.github.io/anyschema/) website. 55 | 56 | ## Installation 57 | 58 | `anyschema` is available on [pypi](https://pypi.org/project/anyschema/), and it can be installed directly via 59 | any package manager. For instance: 60 | 61 | ```bash 62 | uv pip install "anyschema[attrs]" 63 | uv pip install "anyschema[pydantic]" 64 | uv pip install "anyschema[sqlalchemy]" 65 | ``` 66 | 67 | To allow interoperability with attrs classes, Pydantic models or SQLAlchemy tables. 68 | 69 | ## When to use `anyschema` 70 | 71 | `anyschema` is designed for scenarios where type specifications (e.g., Pydantic models, SQLAlchemy tables) serve as a 72 | single source of truth for both validation and dataframe schema generation. 73 | 74 | The typical use cases are: Data pipelines, database-to-dataframe workflows, API to database workflows, schema 75 | generation, type-safe data processing. 76 | 77 | ## Why `anyschema`? 78 | 79 | The project was inspired by a [Talk Python podcast episode](https://www.youtube.com/live/wuGirNCyTxA?t=2880s) featuring 80 | the creator of [LanceDB](https://github.com/lancedb/lancedb), who mentioned the need to convert from Pydantic models to 81 | PyArrow schemas. 82 | 83 | This challenge led to a realization: such conversion could be generalized to many dataframe libraries by using Narwhals 84 | as an intermediate representation. `anyschema` makes this conversion seamless and extensible. 85 | -------------------------------------------------------------------------------- /tests/anyschema/initialization_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING 4 | 5 | import narwhals as nw 6 | import pytest 7 | from narwhals.schema import Schema 8 | 9 | from anyschema import AnySchema 10 | from anyschema.parsers import ParserPipeline, ParserStep, make_pipeline 11 | 12 | if TYPE_CHECKING: 13 | from narwhals.dtypes import DType 14 | 15 | from anyschema.typing import FieldConstraints, FieldMetadata, FieldSpecIterable, FieldType 16 | 17 | 18 | class CustomType: 19 | pass 20 | 21 | 22 | class CustomTypeStep(ParserStep): 23 | def parse( 24 | self, 25 | input_type: FieldType, 26 | constraints: FieldConstraints, # noqa: ARG002 27 | metadata: FieldMetadata, # noqa: ARG002 28 | ) -> DType | None: 29 | return nw.String() if input_type is CustomType else None 30 | 31 | 32 | def test_anyschema_with_unknown_spec_and_no_adapter() -> None: 33 | class UnknownClass: 34 | """A class that doesn't match any known adapter pattern.""" 35 | 36 | some_field: int 37 | 38 | expected_msg = "`spec` type is unknown and `adapter` is not specified." 39 | with pytest.raises(ValueError, match=expected_msg): 40 | AnySchema(spec=UnknownClass) 41 | 42 | 43 | def test_anyschema_with_unknown_spec_and_custom_adapter() -> None: 44 | class CustomSpec: 45 | """A custom spec class.""" 46 | 47 | field1: str 48 | field2: int 49 | 50 | def custom_adapter(spec: CustomSpec) -> FieldSpecIterable: # noqa: ARG001 51 | yield "field1", str, (), {} 52 | yield "field2", int, (), {} 53 | 54 | schema = AnySchema(spec=CustomSpec, adapter=custom_adapter) 55 | result = schema.to_polars() 56 | 57 | assert "field1" in result 58 | assert "field2" in result 59 | 60 | 61 | def test_anyschema_with_narwhals_schema() -> None: 62 | nw_schema = Schema({"name": nw.String(), "age": nw.Int64()}) 63 | anyschema = AnySchema(spec=nw_schema) 64 | assert anyschema._nw_schema is nw_schema 65 | 66 | 67 | def test_anyschema_with_dict_spec() -> None: 68 | spec = {"name": str, "age": int} 69 | 70 | schema = AnySchema(spec=spec) 71 | result = schema.to_polars() 72 | 73 | assert "name" in result 74 | assert "age" in result 75 | 76 | 77 | @pytest.mark.parametrize( 78 | "pipeline", 79 | [ 80 | make_pipeline("auto").with_steps(CustomTypeStep()), 81 | ParserPipeline.from_auto(CustomTypeStep()), 82 | [step.clone() for step in ParserPipeline.from_auto(CustomTypeStep()).steps], 83 | ], 84 | ) 85 | def test_anyschema_with_pipeline(pipeline: ParserPipeline) -> None: 86 | spec = {"custom_field": CustomType, "normal_field": int} 87 | schema = AnySchema(spec=spec, pipeline=pipeline) 88 | 89 | result = schema._nw_schema 90 | assert result == Schema( 91 | { 92 | "custom_field": nw.String(), 93 | "normal_field": nw.Int64(), 94 | } 95 | ) 96 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature-request.yaml: -------------------------------------------------------------------------------- 1 | name: ✨ Feature Request 2 | description: Suggest a new feature or enhancement for anyschema 3 | title: "[Feature]: " 4 | labels: ["enhancement", "needs-triage"] 5 | body: 6 | - type: markdown 7 | attributes: 8 | value: | 9 | Thanks for your interest in improving anyschema! 10 | Please describe the feature you'd like to see added. 11 | 12 | - type: textarea 13 | id: problem 14 | attributes: 15 | label: Problem Statement 16 | description: Is your feature request related to a problem? Please describe the problem or use case. 17 | placeholder: I would like to be able to... 18 | validations: 19 | required: true 20 | 21 | - type: textarea 22 | id: solution 23 | attributes: 24 | label: Proposed Solution 25 | description: Describe the solution you'd like to see implemented. 26 | validations: 27 | required: true 28 | 29 | - type: textarea 30 | id: example 31 | attributes: 32 | label: Example Usage 33 | description: Provide a code example showing how you'd like to use this feature. 34 | placeholder: | 35 | ```python 36 | from anyschema import AnySchema 37 | 38 | # Example of how the feature would be used 39 | schema = AnySchema(spec=MyClass) 40 | result = schema.new_feature() 41 | ``` 42 | render: python 43 | 44 | - type: textarea 45 | id: alternatives 46 | attributes: 47 | label: Alternatives Considered 48 | description: Describe any alternative solutions or features you've considered. 49 | placeholder: I've considered using... but it doesn't work because... 50 | 51 | - type: textarea 52 | id: additional-context 53 | attributes: 54 | label: Additional Context 55 | description: Add any other context, screenshots, or examples about the feature request here. 56 | 57 | - type: dropdown 58 | id: priority 59 | attributes: 60 | label: Priority 61 | description: How important is this feature to you? 62 | options: 63 | - Nice to have 64 | - Would improve my workflow 65 | - Critical for my use case 66 | validations: 67 | required: true 68 | 69 | - type: checkboxes 70 | id: contribution 71 | attributes: 72 | label: Contribution 73 | description: Would you be willing to contribute? 74 | options: 75 | - label: I am willing to submit a PR to implement this feature 76 | - label: I can help with design/testing 77 | - label: I can provide more examples or use cases 78 | 79 | - type: checkboxes 80 | id: terms 81 | attributes: 82 | label: Checklist 83 | description: Please confirm the following 84 | options: 85 | - label: I have searched the existing issues to make sure this feature hasn't been requested yet 86 | required: true 87 | - label: I have checked the documentation to make sure this feature doesn't already exist 88 | required: true 89 | -------------------------------------------------------------------------------- /anyschema/_utils.py: -------------------------------------------------------------------------------- 1 | # ruff: noqa: T201 2 | from __future__ import annotations 3 | 4 | from collections.abc import Sequence 5 | from typing import TYPE_CHECKING, Any, TypeVar 6 | 7 | if TYPE_CHECKING: 8 | from typing_extensions import TypeIs 9 | 10 | _T = TypeVar("_T") 11 | 12 | 13 | def qualified_type_name(obj: object | type[Any], /) -> str: 14 | # Copied from Narwhals: https://github.com/narwhals-dev/narwhals/blob/282a3cb08f406e2f319d86b81a7300a2a6c5f390/narwhals/_utils.py#L1922 15 | # Author: Marco Gorelli 16 | # License: MIT: https://github.com/narwhals-dev/narwhals/blob/282a3cb08f406e2f319d86b81a7300a2a6c5f390/LICENSE.md 17 | tp = obj if isinstance(obj, type) else type(obj) 18 | module = tp.__module__ if tp.__module__ != "builtins" else "" 19 | return f"{module}.{tp.__name__}".lstrip(".") 20 | 21 | 22 | def _get_sys_info() -> dict[str, str]: 23 | """System information. 24 | 25 | Returns system and Python version information 26 | 27 | Adapted from sklearn. 28 | 29 | Returns: 30 | Dictionary with system info. 31 | """ 32 | import platform 33 | import sys 34 | 35 | python = sys.version.replace("\n", " ") 36 | 37 | blob = ( 38 | ("python", python), 39 | ("machine", platform.platform()), 40 | ) 41 | 42 | return dict(blob) 43 | 44 | 45 | def _get_deps_info() -> dict[str, str]: 46 | """Overview of the installed version of main dependencies. 47 | 48 | This function does not import the modules to collect the version numbers 49 | but instead relies on standard Python package metadata. 50 | 51 | Returns version information on relevant Python libraries 52 | 53 | This function and show_versions were copied from sklearn and adapted 54 | 55 | Returns: 56 | Mapping from dependency to version. 57 | """ 58 | from importlib.metadata import distributions 59 | 60 | libs = ( 61 | "anyschema", 62 | "narwhals", 63 | "typing_extensions", 64 | "attrs", 65 | "pydantic", 66 | "sqlalchemy", 67 | "pandas", 68 | "polars", 69 | "pyarrow", 70 | ) 71 | dist_map = {dist.name.lower(): dist.version for dist in distributions()} 72 | return {lib: dist_map.get(lib, "") for lib in libs} 73 | 74 | 75 | def show_versions() -> None: 76 | """Print useful debugging information. 77 | 78 | Examples: 79 | >>> from anyschema import show_versions 80 | >>> show_versions() # doctest: +SKIP 81 | """ 82 | sys_info = _get_sys_info() 83 | deps_info = _get_deps_info() 84 | 85 | print("\nSystem:") 86 | for k, stat in sys_info.items(): 87 | print(f"{k:>10}: {stat}") 88 | 89 | print("\nPython dependencies:") 90 | for k, stat in deps_info.items(): 91 | print(f"{k:>20}: {stat}") 92 | 93 | 94 | def is_sequence_but_not_str(sequence: Sequence[_T] | Any) -> TypeIs[Sequence[_T]]: 95 | return isinstance(sequence, Sequence) and not isinstance(sequence, str) 96 | 97 | 98 | def is_sequence_of(obj: Any, tp: type[_T]) -> TypeIs[Sequence[_T]]: 99 | # Check if an object is a sequence of `tp`, only sniffing the first element. 100 | return bool(is_sequence_but_not_str(obj) and (first := next(iter(obj), None)) and isinstance(first, tp)) 101 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/documentation.yaml: -------------------------------------------------------------------------------- 1 | name: 📚 Documentation Issue 2 | description: Report an issue with documentation or suggest improvements 3 | title: "[Docs]: " 4 | labels: ["documentation", "needs-triage"] 5 | body: 6 | - type: markdown 7 | attributes: 8 | value: | 9 | Thanks for helping improve anyschema's documentation! 10 | Good documentation is crucial for a great developer experience. 11 | 12 | - type: dropdown 13 | id: doc-type 14 | attributes: 15 | label: Documentation Type 16 | description: What type of documentation issue is this? 17 | options: 18 | - Missing documentation 19 | - Incorrect/outdated documentation 20 | - Unclear/confusing documentation 21 | - Typo or grammar issue 22 | - Example code issue 23 | - API reference issue 24 | - Tutorial/guide improvement 25 | - Other 26 | validations: 27 | required: true 28 | 29 | - type: input 30 | id: location 31 | attributes: 32 | label: Documentation Location 33 | description: Where is the documentation issue located? 34 | placeholder: e.g., https://fbruzzesi.github.io/anyschema/user-guide/getting-started/ or "API Reference > AnySchema" 35 | validations: 36 | required: true 37 | 38 | - type: textarea 39 | id: issue-description 40 | attributes: 41 | label: Issue Description 42 | description: Describe the documentation issue in detail. 43 | placeholder: | 44 | What is wrong, missing, or unclear? 45 | What did you expect to find? 46 | What did you actually find? 47 | validations: 48 | required: true 49 | 50 | - type: textarea 51 | id: suggested-improvement 52 | attributes: 53 | label: Suggested Improvement 54 | description: How would you improve this documentation? 55 | placeholder: | 56 | Provide suggestions for: 57 | - What content should be added or changed 58 | - How it could be explained better 59 | - What examples would be helpful 60 | 61 | - type: textarea 62 | id: example-code 63 | attributes: 64 | label: Code Example (if applicable) 65 | description: If suggesting a code example, provide it here. 66 | placeholder: | 67 | ```python 68 | # Your suggested example code 69 | from anyschema import AnySchema 70 | # ... 71 | ``` 72 | render: python 73 | 74 | - type: textarea 75 | id: additional-context 76 | attributes: 77 | label: Additional Context 78 | description: Add any other context, screenshots, or information about the documentation issue. 79 | 80 | - type: checkboxes 81 | id: contribution 82 | attributes: 83 | label: Contribution 84 | description: Would you be willing to help? 85 | options: 86 | - label: I am willing to submit a PR to improve this documentation 87 | - label: I can provide additional examples or use cases 88 | 89 | - type: checkboxes 90 | id: terms 91 | attributes: 92 | label: Checklist 93 | description: Please confirm the following 94 | options: 95 | - label: I have checked that this documentation issue hasn't been reported yet 96 | required: true 97 | - label: I have reviewed the current documentation at the specified location 98 | required: true 99 | -------------------------------------------------------------------------------- /anyschema/parsers/_union.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from types import NoneType, UnionType 4 | from typing import TYPE_CHECKING, Union 5 | 6 | from typing_extensions import get_args, get_origin # noqa: UP035 7 | 8 | from anyschema._metadata import get_anyschema_value_by_key, set_anyschema_meta 9 | from anyschema.exceptions import UnsupportedDTypeError 10 | from anyschema.parsers._base import ParserStep 11 | 12 | if TYPE_CHECKING: 13 | from narwhals.dtypes import DType 14 | 15 | from anyschema.typing import FieldConstraints, FieldMetadata, FieldType 16 | 17 | 18 | class UnionTypeStep(ParserStep): 19 | """Parser for Union types including `Optional`. 20 | 21 | Handles: 22 | 23 | - `Union[T, None]`, `T | None`, `Optional[T]` 24 | - Extracts the non-None type and its metadata for further parsing 25 | """ 26 | 27 | def parse(self, input_type: FieldType, constraints: FieldConstraints, metadata: FieldMetadata) -> DType | None: 28 | """Parse Union types, particularly Optional types. 29 | 30 | Arguments: 31 | input_type: The type to parse. 32 | constraints: Constraints associated with the type (will be preserved and passed through). 33 | metadata: Custom metadata dictionary (will be preserved and passed through). 34 | 35 | Returns: 36 | A Narwhals DType by extracting the non-None type and delegating to the chain. 37 | """ 38 | # Handle Union types from typing module (including Optional) 39 | # Handle UnionType (PEP 604: T | U syntax) 40 | if get_origin(input_type) is Union or isinstance(input_type, UnionType): 41 | args = get_args(input_type) 42 | extracted_type = self._parse_union(args) 43 | 44 | # Set nullable metadata if not already explicitly set 45 | # This way Union[T, None] / Optional[T] automatically marks the field as nullable 46 | # We mutate the metadata dict in-place so parse_into_field can read it 47 | if get_anyschema_value_by_key(metadata, key="nullable") is None: 48 | set_anyschema_meta(metadata, key="nullable", value=True) 49 | 50 | return self.pipeline.parse(extracted_type, constraints, metadata, strict=True) 51 | 52 | return None 53 | 54 | def _parse_union(self, union: tuple[FieldType, ...]) -> FieldType: 55 | """Extract the non-None type from a Union. 56 | 57 | Arguments: 58 | union: Tuple of types in the Union. 59 | outer_constraints: Constraints from the outer type (e.g., from Annotated[Optional[T], ...]). 60 | 61 | Returns: 62 | A tuple of (non-None type, preserved constraints tuple). 63 | The outer constraints are preserved to ensure constraints aren't lost. 64 | 65 | Raises: 66 | UnsupportedDTypeError: If the Union has more than 2 types or both types are not None. 67 | """ 68 | if len(union) != 2: # noqa: PLR2004 69 | msg = "Union with more than two types is not supported." 70 | raise UnsupportedDTypeError(msg) 71 | 72 | field0, field1 = union 73 | 74 | if field0 is not NoneType and field1 is not NoneType: 75 | msg = "Union with mixed types is not supported." 76 | raise UnsupportedDTypeError(msg) 77 | 78 | return field1 if field0 is NoneType else field0 79 | -------------------------------------------------------------------------------- /tests/parsers/attrs_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import attrs 4 | import narwhals as nw 5 | import pytest 6 | 7 | from anyschema.parsers import ParserPipeline, PyTypeStep 8 | from anyschema.parsers.attrs import AttrsTypeStep 9 | from tests.conftest import AttrsDerived, AttrsPerson, AttrsPersonFrozen, create_missing_decorator_test_case 10 | 11 | 12 | @pytest.fixture(scope="module") 13 | def attrs_parser() -> AttrsTypeStep: 14 | attrs_parser = AttrsTypeStep() 15 | py_parser = PyTypeStep() 16 | _ = ParserPipeline([attrs_parser, py_parser]) 17 | return attrs_parser 18 | 19 | 20 | def test_parse_attrs_class_into_struct(attrs_parser: AttrsTypeStep) -> None: 21 | result = attrs_parser.parse(AttrsPerson, (), {}) 22 | 23 | expected_fields = [ 24 | nw.Field(name="name", dtype=nw.String()), 25 | nw.Field(name="age", dtype=nw.Int64()), 26 | nw.Field(name="date_of_birth", dtype=nw.Date()), 27 | nw.Field(name="is_active", dtype=nw.Boolean()), 28 | nw.Field(name="classes", dtype=nw.List(nw.String())), 29 | nw.Field(name="grades", dtype=nw.List(nw.Float64())), 30 | ] 31 | expected = nw.Struct(expected_fields) 32 | assert result == expected 33 | 34 | 35 | def test_parse_frozen_attrs_class(attrs_parser: AttrsTypeStep) -> None: 36 | result = attrs_parser.parse(AttrsPersonFrozen, (), {}) 37 | 38 | expected_fields = [ 39 | nw.Field(name="name", dtype=nw.String()), 40 | nw.Field(name="age", dtype=nw.Int64()), 41 | nw.Field(name="date_of_birth", dtype=nw.Date()), 42 | ] 43 | expected = nw.Struct(expected_fields) 44 | assert result == expected 45 | 46 | 47 | def test_parse_empty_attrs_class(attrs_parser: AttrsTypeStep) -> None: 48 | @attrs.define 49 | class EmptyClass: 50 | pass 51 | 52 | result = attrs_parser.parse(EmptyClass, (), {}) 53 | expected = nw.Struct([]) 54 | assert result == expected 55 | 56 | 57 | def test_parse_non_attrs_class_returns_none(attrs_parser: AttrsTypeStep) -> None: 58 | class RegularClass: 59 | pass 60 | 61 | result = attrs_parser.parse(RegularClass, (), {}) 62 | assert result is None 63 | 64 | 65 | def test_parse_classic_attr_s_decorator(attrs_parser: AttrsTypeStep) -> None: 66 | import attr 67 | 68 | @attr.s(auto_attribs=True) 69 | class ClassicAttrs: 70 | name: str 71 | value: int 72 | 73 | result = attrs_parser.parse(ClassicAttrs, (), {}) 74 | 75 | expected_fields = [ 76 | nw.Field(name="name", dtype=nw.String()), 77 | nw.Field(name="value", dtype=nw.Int64()), 78 | ] 79 | expected = nw.Struct(expected_fields) 80 | assert result == expected 81 | 82 | 83 | def test_parse_attrs_with_inheritance(attrs_parser: AttrsTypeStep) -> None: 84 | result = attrs_parser.parse(AttrsDerived, (), {}) 85 | 86 | expected_fields = [ 87 | nw.Field(name="foo", dtype=nw.String()), 88 | nw.Field(name="bar", dtype=nw.Int64()), 89 | nw.Field(name="baz", dtype=nw.Float64()), 90 | ] 91 | expected = nw.Struct(expected_fields) 92 | assert result == expected 93 | 94 | 95 | def test_parse_attrs_missing_decorator_raises(attrs_parser: AttrsTypeStep) -> None: 96 | child_cls, expected_msg = create_missing_decorator_test_case() 97 | with pytest.raises(AssertionError, match=expected_msg.replace("(", r"\(").replace(")", r"\)")): 98 | attrs_parser.parse(child_cls, (), {}) 99 | -------------------------------------------------------------------------------- /tests/parsers/forward_ref_dependency_mock_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from unittest.mock import patch 4 | 5 | from anyschema.parsers import ForwardRefStep 6 | 7 | 8 | def test_build_namespace_without_pydantic() -> None: 9 | """Test that pydantic types are excluded when PYDANTIC_AVAILABLE is False.""" 10 | with patch(target="anyschema.parsers._forward_ref.PYDANTIC_AVAILABLE", new=False): 11 | step = ForwardRefStep() 12 | 13 | # Pydantic types should NOT be in the namespace 14 | assert "BaseModel" not in step.globalns 15 | assert "Field" not in step.globalns 16 | assert "PositiveInt" not in step.globalns 17 | assert "conint" not in step.globalns 18 | 19 | # But builtin types should still be there 20 | assert "int" in step.globalns 21 | assert "str" in step.globalns 22 | assert "List" in step.globalns 23 | 24 | 25 | def test_build_namespace_without_annotated_types() -> None: 26 | """Test that annotated_types are excluded when ANNOTATED_TYPES_AVAILABLE is False.""" 27 | with patch(target="anyschema.parsers._forward_ref.ANNOTATED_TYPES_AVAILABLE", new=False): 28 | step = ForwardRefStep() 29 | 30 | # annotated_types should NOT be in the namespace 31 | assert "Gt" not in step.globalns 32 | assert "Ge" not in step.globalns 33 | assert "Lt" not in step.globalns 34 | assert "Le" not in step.globalns 35 | assert "Interval" not in step.globalns 36 | 37 | # But builtin types should still be there 38 | assert "int" in step.globalns 39 | assert "str" in step.globalns 40 | 41 | 42 | def test_build_namespace_without_both_optional_deps() -> None: 43 | """Test namespace with neither pydantic nor annotated_types.""" 44 | with ( 45 | patch(target="anyschema.parsers._forward_ref.PYDANTIC_AVAILABLE", new=False), 46 | patch(target="anyschema.parsers._forward_ref.ANNOTATED_TYPES_AVAILABLE", new=False), 47 | ): 48 | step = ForwardRefStep() 49 | 50 | # No pydantic types 51 | assert "BaseModel" not in step.globalns 52 | assert "PositiveInt" not in step.globalns 53 | 54 | # No annotated_types 55 | assert "Gt" not in step.globalns 56 | assert "Interval" not in step.globalns 57 | 58 | # But builtin types should still be there 59 | assert "int" in step.globalns 60 | assert "str" in step.globalns 61 | assert "list" in step.globalns 62 | assert "dict" in step.globalns 63 | assert "Union" in step.globalns 64 | 65 | 66 | def test_build_namespace_with_user_globals_override() -> None: 67 | """Test that user-provided globals can override defaults.""" 68 | with ( 69 | patch(target="anyschema.parsers._forward_ref.PYDANTIC_AVAILABLE", new=False), 70 | patch(target="anyschema.parsers._forward_ref.ANNOTATED_TYPES_AVAILABLE", new=False), 71 | ): 72 | # User provides their own types 73 | custom_globals = {"CustomType": int, "int": str} # Intentionally override int 74 | step = ForwardRefStep(globalns=custom_globals) 75 | 76 | # User's custom type should be present 77 | assert "CustomType" in step.globalns 78 | assert step.globalns["CustomType"] is int 79 | 80 | # User's override should work (though not recommended!) 81 | assert step.globalns["int"] is str 82 | 83 | # Built-in types that weren't overridden should still be there 84 | assert "str" in step.globalns 85 | assert "list" in step.globalns 86 | -------------------------------------------------------------------------------- /anyschema/_dependencies.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import sys 4 | from collections.abc import Mapping, Sequence 5 | from dataclasses import is_dataclass as dc_is_dataclass 6 | from importlib.util import find_spec 7 | from typing import TYPE_CHECKING 8 | 9 | from typing_extensions import TypeIs, is_typeddict 10 | 11 | if TYPE_CHECKING: 12 | from types import ModuleType 13 | 14 | from pydantic import BaseModel 15 | 16 | from anyschema.typing import AttrsClassType, DataclassType, IntoOrderedDict, SQLAlchemyTableType, TypedDictType 17 | 18 | ANNOTATED_TYPES_AVAILABLE = find_spec("annotated_types") is not None 19 | PYDANTIC_AVAILABLE = find_spec("pydantic") is not None 20 | ATTRS_AVAILABLE = find_spec("attrs") is not None 21 | SQLALCHEMY_AVAILABLE = find_spec("sqlalchemy") is not None 22 | 23 | 24 | def get_pydantic() -> ModuleType | None: 25 | """Get pydantic module (if already imported - else return None).""" 26 | return sys.modules.get("pydantic", None) 27 | 28 | 29 | def get_attrs() -> ModuleType | None: 30 | """Get attrs module (if already imported - else return None).""" 31 | return sys.modules.get("attrs", None) 32 | 33 | 34 | def is_into_ordered_dict(obj: object) -> TypeIs[IntoOrderedDict]: 35 | """Check if the object can be converted into a python OrderedDict.""" 36 | tpl_size = 2 37 | return isinstance(obj, Mapping) or ( 38 | isinstance(obj, Sequence) and all(isinstance(s, tuple) and len(s) == tpl_size for s in obj) 39 | ) 40 | 41 | 42 | def is_typed_dict(obj: object) -> TypeIs[TypedDictType]: 43 | """Check if the object is a TypedDict and narrows type checkers.""" 44 | return is_typeddict(obj) 45 | 46 | 47 | def is_dataclass(obj: object) -> TypeIs[DataclassType]: 48 | """Check if the object is a dataclass and narrows type checkers.""" 49 | return dc_is_dataclass(obj) 50 | 51 | 52 | def is_pydantic_base_model(obj: object) -> TypeIs[type[BaseModel]]: 53 | """Check if the object is a pydantic BaseModel.""" 54 | return ( 55 | (pydantic := get_pydantic()) is not None 56 | and isinstance(obj, type) 57 | and isinstance(obj, type(pydantic.BaseModel)) 58 | and issubclass(obj, pydantic.BaseModel) 59 | ) 60 | 61 | 62 | def is_attrs_class(obj: object) -> TypeIs[AttrsClassType]: 63 | """Check if the object is an attrs class. 64 | 65 | Uses attrs.has() to check if a class is an attrs class. 66 | Supports @attrs.define/@attrs.frozen decorators. 67 | """ 68 | return (attrs := get_attrs()) is not None and attrs.has(obj) 69 | 70 | 71 | def get_sqlalchemy() -> ModuleType | None: 72 | """Get sqlalchemy module (if already imported - else return None).""" 73 | return sys.modules.get("sqlalchemy", None) 74 | 75 | 76 | def get_sqlalchemy_orm() -> ModuleType | None: 77 | """Get sqlalchemy.orm module (if already imported - else return None).""" 78 | return sys.modules.get("sqlalchemy.orm", None) 79 | 80 | 81 | def is_sqlalchemy_table(obj: object) -> TypeIs[SQLAlchemyTableType]: 82 | """Check if the object is a SQLAlchemy Table or DeclarativeBase class. 83 | 84 | Supports both: 85 | 86 | - SQLAlchemy Table instances (Core) 87 | - SQLAlchemy ORM mapped classes (DeclarativeBase subclasses) 88 | """ 89 | is_table = (sql := get_sqlalchemy()) is not None and isinstance(obj, sql.Table) 90 | is_declarative_base = ( 91 | (sql_orm := get_sqlalchemy_orm()) is not None 92 | and isinstance(obj, type) 93 | and issubclass(obj, sql_orm.DeclarativeBase) 94 | ) 95 | return is_table or is_declarative_base 96 | -------------------------------------------------------------------------------- /mkdocs.yaml: -------------------------------------------------------------------------------- 1 | site_name: AnySchema 2 | site_url: https://fbruzzesi.github.io/anyschema/ 3 | site_author: Francesco Bruzzesi 4 | 5 | repo_url: https://github.com/FBruzzesi/anyschema 6 | repo_name: FBruzzesi/anyschema 7 | edit_uri: edit/main/docs/ 8 | 9 | nav: 10 | - Home: index.md 11 | - User Guide: 12 | - Getting Started: user-guide/getting-started.md 13 | - Metadata: user-guide/metadata.md 14 | - Advanced Usage: user-guide/advanced.md 15 | - Serialization & Deserialization: user-guide/serde.md 16 | - Best Practices: user-guide/best-practices.md 17 | - End to End Example with Custom Components: user-guide/custom-end-to-end-example.md 18 | - Troubleshooting: user-guide/troubleshooting.md 19 | - OpenAPI Compatibility: user-guide/openapi-compatibility.md 20 | - Architecture: architecture.md 21 | - API Reference: 22 | - Home: api-reference/index.md 23 | - AnySchema: api-reference/anyschema.md 24 | - Parsers: api-reference/parsers.md 25 | - Spec Adapters: api-reference/adapters.md 26 | - Serialization & Deserialization: api-reference/serde.md 27 | - Exceptions: api-reference/exceptions.md 28 | - Typing: api-reference/typing.md 29 | 30 | theme: 31 | name: material 32 | font: false 33 | # favicon: assets/logo.svg # TODO(FBruzzesi) 34 | # logo: assets/logo.svg # TODO(FBruzzesi) 35 | features: 36 | - content.code.copy 37 | - content.code.annotate 38 | - navigation.footer 39 | - navigation.indexes 40 | - navigation.top 41 | palette: 42 | # Palette toggle for automatic mode 43 | - media: "(prefers-color-scheme)" 44 | toggle: 45 | icon: material/brightness-auto 46 | name: Switch to light mode 47 | 48 | # Palette toggle for light mode 49 | - media: "(prefers-color-scheme: light)" 50 | scheme: default 51 | toggle: 52 | icon: material/brightness-7 53 | name: Switch to dark mode 54 | 55 | # Palette toggle for dark mode 56 | - media: "(prefers-color-scheme: dark)" 57 | scheme: slate 58 | toggle: 59 | icon: material/brightness-4 60 | name: Switch to system preference 61 | 62 | plugins: 63 | - autorefs 64 | - search: 65 | enabled: true 66 | separator: '[\s\-,:!=\[\]()"`/]+|\.(?!\d)|&[lg]t;|(?!\b)(?=[A-Z][a-z])' 67 | - mkdocstrings: 68 | default_handler: python 69 | enable_inventory: true 70 | handlers: 71 | python: 72 | options: 73 | members_order: alphabetical 74 | line_length: 100 75 | show_overloads: true 76 | show_signature_annotations: true 77 | signature_crossrefs: true 78 | 79 | markdown_extensions: 80 | - footnotes 81 | - admonition 82 | - md_in_html 83 | - attr_list 84 | - def_list 85 | - toc: 86 | permalink: true 87 | toc_depth: 5 88 | - pymdownx.details 89 | - pymdownx.tabbed: 90 | alternate_style: true 91 | - pymdownx.superfences: 92 | custom_fences: 93 | - name: python 94 | class: python 95 | validator: !!python/name:markdown_exec.validator 96 | format: !!python/name:markdown_exec.formatter 97 | - name: mermaid 98 | class: mermaid 99 | format: !!python/name:pymdownx.superfences.fence_code_format 100 | - pymdownx.highlight: 101 | anchor_linenums: true 102 | line_spans: __span 103 | pygments_lang_class: true 104 | - pymdownx.inlinehilite 105 | - pymdownx.snippets 106 | - pymdownx.arithmatex: 107 | generic: true 108 | 109 | extra_javascript: 110 | - javascript/extra.js 111 | 112 | extra_css: 113 | - css/extra.css 114 | -------------------------------------------------------------------------------- /tests/adapters/dataclass_adapter_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import sys 4 | from dataclasses import dataclass, field, make_dataclass 5 | from datetime import date, datetime 6 | from typing import TYPE_CHECKING 7 | 8 | import pytest 9 | from pydantic.dataclasses import dataclass as pydantic_dataclass 10 | 11 | from anyschema.adapters import dataclass_adapter 12 | from tests.conftest import DataclassEventWithTimeMetadata 13 | 14 | if TYPE_CHECKING: 15 | from anyschema.typing import DataclassType, FieldSpec 16 | 17 | 18 | class PersonIntoDataclass: 19 | name: str 20 | age: int 21 | date_of_birth: date 22 | 23 | 24 | @pytest.mark.parametrize( 25 | "spec", 26 | [ 27 | pydantic_dataclass(PersonIntoDataclass), 28 | dataclass(PersonIntoDataclass), 29 | make_dataclass("Test", [("name", str), ("age", int), ("date_of_birth", date)]), 30 | ], 31 | ) 32 | def test_dataclass_adapter(spec: DataclassType) -> None: 33 | expected: tuple[FieldSpec, ...] = (("name", str, (), {}), ("age", int, (), {}), ("date_of_birth", date, (), {})) 34 | result = tuple(dataclass_adapter(spec)) 35 | assert result == expected 36 | 37 | 38 | def test_dataclass_adapter_missing_decorator_raises() -> None: 39 | """Test that adapter raises helpful error when child class isn't decorated.""" 40 | 41 | @dataclass 42 | class Base: 43 | foo: str 44 | 45 | class ChildWithoutDecorator(Base): 46 | bar: int 47 | 48 | expected_msg = ( 49 | "Class 'ChildWithoutDecorator' has annotations ('bar') that are not dataclass fields. " 50 | "If this class inherits from a dataclass, you must also decorate it with @dataclass " 51 | "to properly define these fields." 52 | ) 53 | 54 | with pytest.raises(AssertionError, match=expected_msg.replace("(", r"\(").replace(")", r"\)")): 55 | list(dataclass_adapter(ChildWithoutDecorator)) # ty: ignore[invalid-argument-type] 56 | 57 | 58 | def test_dataclass_adapter_with_time_metadata() -> None: 59 | result = tuple(dataclass_adapter(DataclassEventWithTimeMetadata)) # ty: ignore[invalid-argument-type] 60 | 61 | expected: tuple[FieldSpec, ...] = ( 62 | ("name", str, (), {"anyschema": {"description": "Event name"}}), 63 | ("created_at", datetime, (), {}), 64 | ("scheduled_at", datetime, (), {"anyschema": {"time_zone": "UTC", "description": "Scheduled time"}}), 65 | ("started_at", datetime, (), {"anyschema": {"time_unit": "ms"}}), 66 | ("completed_at", datetime, (), {"anyschema": {"time_zone": "Europe/Berlin", "time_unit": "ns"}}), 67 | ) 68 | 69 | assert result == expected 70 | 71 | 72 | @pytest.mark.skipif(sys.version_info < (3, 14), reason="doc parameter requires Python 3.14+") 73 | def test_dataclass_adapter_with_doc_argument() -> None: 74 | @dataclass 75 | class Product: 76 | name: str = field(doc="Product name") # pyright: ignore[reportCallIssue] # ty: ignore[no-matching-overload] 77 | price: float = field( # pyright: ignore[reportCallIssue] # ty: ignore[no-matching-overload] 78 | doc="Product price", 79 | metadata={"anyschema": {"description": "From metadata"}}, # anyschema metadata have precedence 80 | ) 81 | in_stock: bool 82 | 83 | result = list(dataclass_adapter(Product)) # ty: ignore[invalid-argument-type] 84 | expected = [ 85 | ("name", str, (), {"anyschema": {"description": "Product name"}}), 86 | ("price", float, (), {"anyschema": {"description": "From metadata"}}), 87 | ("in_stock", bool, (), {}), 88 | ] 89 | assert result == expected 90 | -------------------------------------------------------------------------------- /docs/user-guide/custom-end-to-end-example.md: -------------------------------------------------------------------------------- 1 | 2 | # End to End Example with Custom Components 3 | 4 | Let's now combine the learnings from the [previous section](advanced.md) to show an example that combines a custom 5 | parser and a custom adapter. 6 | 7 | ## 1. Define custom types 8 | 9 | ```python exec="true" source="above" session="end-to-end" 10 | from typing import Any 11 | 12 | import narwhals as nw 13 | from narwhals.dtypes import DType 14 | 15 | from anyschema import AnySchema 16 | from anyschema.parsers import ( 17 | ParserStep, 18 | ForwardRefStep, 19 | UnionTypeStep, 20 | AnnotatedStep, 21 | PyTypeStep, 22 | ) 23 | from anyschema.typing import FieldSpecIterable 24 | 25 | 26 | class Email: 27 | """Email address type.""" 28 | 29 | 30 | class PhoneNumber: 31 | """Phone number type.""" 32 | 33 | 34 | class Currency: 35 | """Monetary value type.""" 36 | ``` 37 | 38 | ## 2. Create custom parser for these such types 39 | 40 | ```python exec="true" source="above" session="end-to-end" 41 | from anyschema.typing import FieldConstraints, FieldMetadata, FieldType 42 | 43 | 44 | class CustomerTypesStep(ParserStep): 45 | """Parser for custom types.""" 46 | 47 | def parse( 48 | self, 49 | input_type: FieldType, 50 | constraints: FieldConstraints, 51 | metadata: FieldMetadata, 52 | ) -> DType | None: 53 | if input_type is Email: 54 | return nw.String() 55 | elif input_type is PhoneNumber: 56 | return nw.String() 57 | elif input_type is Currency: 58 | return nw.Float32() 59 | return None 60 | ``` 61 | 62 | ## 3. Define custom schema format 63 | 64 | ```python exec="true" source="above" session="end-to-end" 65 | class CustomerSchema: 66 | """Custom schema format.""" 67 | 68 | def __init__(self, entity_name: str, fields: list[dict]): 69 | self.entity_name = entity_name 70 | self.fields = fields 71 | ``` 72 | 73 | ## 4. Create adapter for the custom format 74 | 75 | ```python exec="true" source="above" session="end-to-end" 76 | def customer_schema_adapter(spec: CustomerSchema) -> FieldSpecIterable: 77 | """Adapter for CustomerSchema format.""" 78 | for field in spec.fields: 79 | field_name = field["name"] 80 | field_type = field["type"] 81 | required = field.get("required", True) 82 | 83 | # Convert required=False to Optional 84 | if not required: 85 | field_type = field_type | None 86 | 87 | yield field_name, field_type, (), {} 88 | ``` 89 | 90 | ## 5. Create pipeline steps with custom parser 91 | 92 | ```python exec="true" source="above" session="end-to-end" 93 | pipeline_steps = [ 94 | ForwardRefStep(), 95 | UnionTypeStep(), 96 | AnnotatedStep(), 97 | CustomerTypesStep(), 98 | PyTypeStep(), 99 | ] 100 | ``` 101 | 102 | ## 6. Use everything together 103 | 104 | ```python exec="true" source="above" result="python" session="end-to-end" 105 | customer_schema = CustomerSchema( 106 | entity_name="Customer", 107 | fields=[ 108 | {"name": "id", "type": int, "required": True}, 109 | {"name": "name", "type": str, "required": True}, 110 | {"name": "email", "type": Email, "required": True}, 111 | {"name": "phone", "type": PhoneNumber, "required": False}, 112 | {"name": "balance", "type": Currency, "required": True}, 113 | ], 114 | ) 115 | 116 | schema = AnySchema( 117 | spec=customer_schema, 118 | pipeline=pipeline_steps, 119 | adapter=customer_schema_adapter, 120 | ) 121 | 122 | print(schema.to_polars()) 123 | ``` 124 | -------------------------------------------------------------------------------- /.github/release-drafter.yaml: -------------------------------------------------------------------------------- 1 | # Configuration for Release Drafter 2 | # Automatically generates release notes from pull requests 3 | 4 | name-template: 'v$RESOLVED_VERSION' 5 | tag-template: 'v$RESOLVED_VERSION' 6 | 7 | # Categories for organizing changes in release notes 8 | categories: 9 | - title: '🚀 Features' 10 | labels: 11 | - 'enhancement' 12 | - 'feature' 13 | - title: '🐛 Bug Fixes' 14 | labels: 15 | - 'bug' 16 | - 'bugfix' 17 | - 'fix' 18 | - title: '📚 Documentation' 19 | labels: 20 | - 'documentation' 21 | - 'docs' 22 | - title: '🧪 Tests' 23 | labels: 24 | - 'tests' 25 | - 'testing' 26 | - title: '⚡ Performance' 27 | labels: 28 | - 'performance' 29 | - 'optimization' 30 | - title: '🔧 Maintenance' 31 | labels: 32 | - 'maintenance' 33 | - 'chore' 34 | - 'refactor' 35 | - title: '🏗️ Build & CI' 36 | labels: 37 | - 'ci-cd' 38 | - 'build' 39 | - 'github-actions' 40 | - title: '📦 Dependencies' 41 | labels: 42 | - 'dependencies' 43 | - 'deps' 44 | - title: '⚠️ Breaking Changes' 45 | labels: 46 | - 'breaking-change' 47 | - 'breaking' 48 | 49 | # Exclude certain labels from release notes 50 | exclude-labels: 51 | - 'skip-changelog' 52 | - 'duplicate' 53 | - 'invalid' 54 | - 'wontfix' 55 | 56 | # Template for the release body 57 | template: | 58 | ## What's Changed 59 | 60 | $CHANGES 61 | 62 | ## Statistics 63 | 64 | **Full Changelog**: https://github.com/fbruzzesi/anyschema/compare/$PREVIOUS_TAG...v$RESOLVED_VERSION 65 | 66 | ## Installation 67 | 68 | ```bash 69 | pip install anyschema==$RESOLVED_VERSION 70 | ``` 71 | 72 | ## Contributors 73 | 74 | Thanks to all contributors who made this release possible! 🎉 75 | 76 | $CONTRIBUTORS 77 | 78 | # Change template for each PR 79 | change-template: '- $TITLE (#$NUMBER) @$AUTHOR' 80 | change-title-escapes: '\<*_&' # You can add # and @ to disable mentions 81 | 82 | # Template for contributor list 83 | contributors-template: '- @$AUTHOR' 84 | 85 | # Exclude contributors (bots, etc.) 86 | exclude-contributors: 87 | - 'dependabot' 88 | - 'dependabot[bot]' 89 | - 'github-actions' 90 | - 'github-actions[bot]' 91 | 92 | # Automatically label PRs that don't have labels 93 | autolabeler: 94 | - label: 'bug' 95 | title: 96 | - '/fix(es|ed)?/i' 97 | - label: 'enhancement' 98 | title: 99 | - '/feat(ure)?/i' 100 | - label: 'documentation' 101 | title: 102 | - '/docs?/i' 103 | - '/documentation/i' 104 | - label: 'tests' 105 | title: 106 | - '/tests?/i' 107 | - label: 'ci-cd' 108 | title: 109 | - '/ci/i' 110 | - label: 'dependencies' 111 | title: 112 | - '/deps/i' 113 | - label: 'performance' 114 | title: 115 | - '/perf(ormance)?/i' 116 | - label: 'breaking-change' 117 | title: 118 | - '/breaking/i' 119 | - label: 'maintenance' 120 | title: 121 | - '/refactor/i' 122 | - '/chore/i' 123 | 124 | # Replacers for cleaning up titles 125 | replacers: 126 | - search: '/^(feat|feature)(\(.*\))?:\s*/i' 127 | replace: '' 128 | - search: '/^fix(\(.*\))?:\s*/i' 129 | replace: '' 130 | - search: '/^docs?(\(.*\))?:\s*/i' 131 | replace: '' 132 | - search: '/^test(\(.*\))?:\s*/i' 133 | replace: '' 134 | - search: '/^chore(\(.*\))?:\s*/i' 135 | replace: '' 136 | - search: '/^refactor(\(.*\))?:\s*/i' 137 | replace: '' 138 | - search: '/^perf(\(.*\))?:\s*/i' 139 | replace: '' 140 | - search: '/^ci(\(.*\))?:\s*/i' 141 | replace: '' 142 | - search: '/^build(\(.*\))?:\s*/i' 143 | replace: '' 144 | 145 | sort-by: merged_at 146 | sort-direction: ascending 147 | -------------------------------------------------------------------------------- /docs/user-guide/openapi-compatibility.md: -------------------------------------------------------------------------------- 1 | # OpenAPI Compatibility 2 | 3 | `anyschema` supports OpenAPI-compatible metadata through the `x-anyschema` prefix, which is an extension point defined 4 | in the [OpenAPI specification](https://swagger.io/specification/#specification-extensions). 5 | 6 | ## What is OpenAPI? 7 | 8 | OpenAPI (formerly known as Swagger) is a widely-adopted specification for describing REST APIs. 9 | It allows you to define your API's structure, endpoints, request/response formats, and more in a standardized way. 10 | 11 | ## Extension fields in OpenAPI 12 | 13 | The OpenAPI specification allows custom extensions through fields prefixed with `x-`. 14 | These extension fields can contain any valid JSON and are used to add vendor-specific or custom information that's not 15 | part of the core OpenAPI specification. 16 | 17 | ## Using `x-anyschema` prefix 18 | 19 | In `anyschema`, you can use either `"anyschema"` or `"x-anyschema"` as the metadata namespace key. 20 | Both work identically: 21 | 22 | ```python exec="true" source="above" result="python" session="openapi-intro" 23 | from pydantic import BaseModel, Field 24 | from anyschema import AnySchema 25 | 26 | 27 | class Product(BaseModel): 28 | # Standard anyschema format 29 | name: str = Field(json_schema_extra={"anyschema": {"nullable": False}}) 30 | 31 | # OpenAPI-compatible format (with x- prefix) 32 | price: float = Field(json_schema_extra={"x-anyschema": {"nullable": True}}) 33 | 34 | 35 | schema = AnySchema(spec=Product) 36 | 37 | print(f"name nullable: {schema.fields['name'].nullable}") 38 | print(f"price nullable: {schema.fields['price'].nullable}") 39 | ``` 40 | 41 | ## Why support `x-anyschema`? 42 | 43 | There are several reasons to support the `x-anyschema` prefix: 44 | 45 | 1. **OpenAPI Integration**: If you're generating OpenAPI specifications from Pydantic models and want to include 46 | anyschema metadata, using the `x-` prefix makes it clear that this is an extension field. 47 | 48 | 2. **Tool Compatibility**: Some OpenAPI tools and validators may flag unknown fields without the `x-` prefix as errors. 49 | Using `x-anyschema` ensures better compatibility. 50 | 51 | 3. **Standards Compliance**: Following the OpenAPI convention makes your API documentation more standardized and easier 52 | for other developers to understand. 53 | 54 | ## Choosing between `anyschema` and `x-anyschema` 55 | 56 | Both formats work identically in `anyschema`. Choose based on your needs: 57 | 58 | * Use `"anyschema"` if: 59 | * You're only using anyschema internally 60 | * You want cleaner, shorter metadata keys 61 | * You're not generating OpenAPI specifications 62 | 63 | * Use `"x-anyschema"` if: 64 | * You're generating OpenAPI specifications 65 | * You want to be explicit that this is an extension field 66 | * You're integrating with OpenAPI tooling 67 | * You want maximum standards compliance 68 | 69 | ## Mixing both formats 70 | 71 | !!! warning 72 | You should **not** mix both formats in the same metadata dictionary. 73 | 74 | If both `"anyschema"` and `"x-anyschema"` are present, `anyschema` will use whichever it finds first 75 | (with `"anyschema"` taking precedence). 76 | 77 | ```python 78 | # ❌ Don't do this - mixing both formats 79 | metadata = { 80 | "anyschema": {"nullable": True}, 81 | "x-anyschema": {"unique": True}, # This will be ignored! 82 | } 83 | 84 | # ✅ Do this - use one format consistently 85 | metadata = { 86 | "x-anyschema": { 87 | "nullable": True, 88 | "unique": True, 89 | } 90 | } 91 | ``` 92 | 93 | ## Further Reading 94 | 95 | * [OpenAPI Specification](https://swagger.io/specification/) 96 | * [OpenAPI Extension Fields](https://swagger.io/specification/#specification-extensions) 97 | * [Pydantic and OpenAPI](https://docs.pydantic.dev/latest/concepts/json_schema/) 98 | -------------------------------------------------------------------------------- /tests/spec_to_schema/typed_dict_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING, Literal, Mapping, TypedDict 4 | 5 | import narwhals as nw 6 | import pytest 7 | from pydantic import BaseModel, PositiveInt 8 | 9 | from anyschema import AnySchema 10 | 11 | if TYPE_CHECKING: 12 | from anyschema.typing import TypedDictType 13 | 14 | 15 | class PersonTypedDict(TypedDict): 16 | """Simple TypedDict for testing.""" 17 | 18 | name: str 19 | age: int 20 | is_active: bool 21 | 22 | 23 | class AddressTypedDict(TypedDict): 24 | """Nested TypedDict for testing.""" 25 | 26 | street: str 27 | city: str 28 | zipcode: int 29 | 30 | 31 | class PersonWithAddressTypedDict(TypedDict): 32 | """TypedDict with nested TypedDict for testing.""" 33 | 34 | name: str 35 | age: int 36 | address: AddressTypedDict 37 | 38 | 39 | class StudentTypedDict(TypedDict): 40 | """TypedDict with list field for testing.""" 41 | 42 | name: str 43 | age: int 44 | classes: list[str] 45 | grades: list[float] 46 | 47 | 48 | class UserTypedDict(TypedDict): 49 | """TypedDict with Literal fields for testing.""" 50 | 51 | username: str 52 | role: Literal["admin", "user", "guest"] 53 | status: Literal["active", "inactive", "pending"] 54 | age: int 55 | 56 | 57 | class ConfigTypedDict(TypedDict): 58 | """TypedDict with mixed Literal types for testing.""" 59 | 60 | name: str 61 | log_level: Literal["debug", "info", "warning", "error"] 62 | max_retries: Literal[1, 2, 3, 5, 10] 63 | enabled: Literal[True, False] 64 | 65 | 66 | class ZipcodeModel(BaseModel): 67 | zipcode: PositiveInt 68 | 69 | 70 | class AddressTypedDictWithZipcodeModel(TypedDict): 71 | """TypedDict with Nested pydantic model for testing.""" 72 | 73 | street: str 74 | city: str 75 | zipcode: ZipcodeModel 76 | 77 | 78 | @pytest.mark.parametrize( 79 | ("spec", "expected_schema"), 80 | [ 81 | (PersonTypedDict, {"name": nw.String(), "age": nw.Int64(), "is_active": nw.Boolean()}), 82 | ( 83 | PersonWithAddressTypedDict, 84 | { 85 | "name": nw.String(), 86 | "age": nw.Int64(), 87 | "address": nw.Struct( 88 | [ 89 | nw.Field("street", nw.String()), 90 | nw.Field("city", nw.String()), 91 | nw.Field("zipcode", nw.Int64()), 92 | ] 93 | ), 94 | }, 95 | ), 96 | ( 97 | StudentTypedDict, 98 | {"name": nw.String(), "age": nw.Int64(), "classes": nw.List(nw.String()), "grades": nw.List(nw.Float64())}, 99 | ), 100 | ( 101 | UserTypedDict, 102 | { 103 | "username": nw.String(), 104 | "role": nw.Enum(["admin", "user", "guest"]), 105 | "status": nw.Enum(["active", "inactive", "pending"]), 106 | "age": nw.Int64(), 107 | }, 108 | ), 109 | ( 110 | ConfigTypedDict, 111 | { 112 | "name": nw.String(), 113 | "log_level": nw.Enum(["debug", "info", "warning", "error"]), 114 | "max_retries": nw.Enum([1, 2, 3, 5, 10]), # type: ignore[list-item] 115 | "enabled": nw.Enum([True, False]), # type: ignore[list-item] 116 | }, 117 | ), 118 | ( 119 | AddressTypedDictWithZipcodeModel, 120 | { 121 | "street": nw.String(), 122 | "city": nw.String(), 123 | "zipcode": nw.Struct([nw.Field("zipcode", nw.UInt64())]), 124 | }, 125 | ), 126 | ], 127 | ) 128 | def test_typed_dict(spec: TypedDictType, expected_schema: Mapping[str, nw.dtypes.DType]) -> None: 129 | schema = AnySchema(spec=spec) 130 | nw_schema = schema._nw_schema 131 | assert nw_schema == nw.Schema(expected_schema) 132 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.lock 2 | # Byte-compiled / optimized / DLL files 3 | __pycache__/ 4 | *.py[cod] 5 | *$py.class 6 | 7 | # C extensions 8 | *.so 9 | 10 | # Distribution / packaging 11 | .Python 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | cover/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | db.sqlite3 63 | db.sqlite3-journal 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | .pybuilder/ 77 | target/ 78 | 79 | # Jupyter Notebook 80 | .ipynb_checkpoints 81 | 82 | # IPython 83 | profile_default/ 84 | ipython_config.py 85 | 86 | # pyenv 87 | # For a library or package, you might want to ignore these files since the code is 88 | # intended to run in multiple environments; otherwise, check them in: 89 | # .python-version 90 | 91 | # pipenv 92 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 93 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 94 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 95 | # install all needed dependencies. 96 | #Pipfile.lock 97 | 98 | # UV 99 | # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. 100 | # This is especially recommended for binary packages to ensure reproducibility, and is more 101 | # commonly ignored for libraries. 102 | #uv.lock 103 | 104 | # poetry 105 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 106 | # This is especially recommended for binary packages to ensure reproducibility, and is more 107 | # commonly ignored for libraries. 108 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 109 | #poetry.lock 110 | 111 | # pdm 112 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 113 | #pdm.lock 114 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 115 | # in version control. 116 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 117 | .pdm.toml 118 | .pdm-python 119 | .pdm-build/ 120 | 121 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 122 | __pypackages__/ 123 | 124 | # Celery stuff 125 | celerybeat-schedule 126 | celerybeat.pid 127 | 128 | # SageMath parsed files 129 | *.sage.py 130 | 131 | # Environments 132 | .env 133 | .venv 134 | env/ 135 | venv/ 136 | ENV/ 137 | env.bak/ 138 | venv.bak/ 139 | 140 | # Spyder project settings 141 | .spyderproject 142 | .spyproject 143 | 144 | # Rope project settings 145 | .ropeproject 146 | 147 | # mkdocs documentation 148 | /site 149 | 150 | # mypy 151 | .mypy_cache/ 152 | .dmypy.json 153 | dmypy.json 154 | 155 | # Pyre type checker 156 | .pyre/ 157 | 158 | # pytype static type analyzer 159 | .pytype/ 160 | 161 | # Cython debug symbols 162 | cython_debug/ 163 | 164 | # PyCharm 165 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 166 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 167 | # and can be added to the global gitignore or merged into this file. For a more nuclear 168 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 169 | #.idea/ 170 | 171 | # PyPI configuration file 172 | .pypirc 173 | -------------------------------------------------------------------------------- /.github/workflows/pull-request.yaml: -------------------------------------------------------------------------------- 1 | name: "PR Checks" 2 | 3 | on: 4 | pull_request: 5 | 6 | env: 7 | DEFAULT_PYTHON: "3.14" 8 | 9 | jobs: 10 | pytest: 11 | strategy: 12 | matrix: 13 | python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] 14 | runs-on: ubuntu-latest 15 | steps: 16 | - name: Checkout source code 17 | uses: actions/checkout@v6 18 | - name: Install uv 19 | uses: astral-sh/setup-uv@v7 20 | with: 21 | python-version: ${{ matrix.python-version }} 22 | enable-cache: "true" 23 | cache-dependency-glob: "pyproject.toml" 24 | - name: Run pytest 25 | run: uv run --all-extras --group tests pytest tests --cov=anyschema --cov=tests --cov-fail-under=95 26 | - name: Run doctest 27 | if: matrix.python-version == '3.14' 28 | run: uv run --all-extras --group tests pytest anyschema --doctest-modules 29 | 30 | minimum_versions: 31 | strategy: 32 | matrix: 33 | python-version: ["3.10", "3.11"] 34 | runs-on: ubuntu-latest 35 | steps: 36 | - name: Checkout source code 37 | uses: actions/checkout@v6 38 | - name: Install uv 39 | uses: astral-sh/setup-uv@v7 40 | with: 41 | python-version: ${{ matrix.python-version }} 42 | enable-cache: "true" 43 | cache-dependency-glob: "pyproject.toml" 44 | - name: Run pytest 45 | run: | 46 | uv run \ 47 | --with narwhals==2.0.0,attrs==22.1.0,pydantic==2.0.0,sqlalchemy==2.0.0 \ 48 | --all-extras --group tests \ 49 | pytest tests --cov=anyschema --cov=tests --cov-fail-under=95 50 | 51 | typing: 52 | runs-on: ubuntu-latest 53 | steps: 54 | - name: Checkout source code 55 | uses: actions/checkout@v6 56 | - name: Install uv 57 | uses: astral-sh/setup-uv@v7 58 | with: 59 | python-version: ${{ env.DEFAULT_PYTHON }} 60 | enable-cache: "true" 61 | cache-dependency-glob: "pyproject.toml" 62 | - name: Run type checking 63 | run: | 64 | uv run --all-extras --group typing ty check anyschema tests --output-format github 65 | uv run --all-extras --group typing pyright anyschema tests 66 | uv run --all-extras --group typing mypy anyschema tests 67 | 68 | mkdocs-build: 69 | runs-on: ubuntu-latest 70 | steps: 71 | - name: Checkout source code 72 | uses: actions/checkout@v6 73 | - name: Install uv 74 | uses: astral-sh/setup-uv@v7 75 | with: 76 | python-version: ${{ env.DEFAULT_PYTHON }} 77 | enable-cache: "true" 78 | cache-dependency-glob: "pyproject.toml" 79 | - name: Build docs strict mode 80 | run: uv run --all-extras --group docs mkdocs build --strict 81 | 82 | ruff: 83 | runs-on: ubuntu-latest 84 | steps: 85 | - name: Checkout source code 86 | uses: actions/checkout@v6 87 | - name: Install uv 88 | uses: astral-sh/setup-uv@v7 89 | with: 90 | python-version: ${{ env.DEFAULT_PYTHON }} 91 | enable-cache: "true" 92 | cache-dependency-glob: "pyproject.toml" 93 | - name: Run ruff linter and formatter 94 | run: | 95 | uvx ruff version 96 | uvx ruff format anyschema tests --diff 97 | uvx ruff check anyschema tests --output-format=github 98 | uvx ruff clean 99 | 100 | rumdl: 101 | runs-on: ubuntu-latest 102 | steps: 103 | - name: Checkout source code 104 | uses: actions/checkout@v6 105 | - name: Install uv 106 | uses: astral-sh/setup-uv@v7 107 | with: 108 | python-version: ${{ env.DEFAULT_PYTHON }} 109 | enable-cache: "true" 110 | cache-dependency-glob: "pyproject.toml" 111 | - name: Run rumdl linter and formatter 112 | run: | 113 | uv tool run rumdl version 114 | uv tool run rumdl check --output-format=github . 115 | 116 | check-typos: 117 | runs-on: ubuntu-latest 118 | steps: 119 | - name: Checkout source code 120 | uses: actions/checkout@v6 121 | - name: Check for typos 122 | uses: crate-ci/typos@master 123 | with: 124 | files: . 125 | -------------------------------------------------------------------------------- /tests/anyschema/to_arrow_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING, Any 4 | 5 | import pyarrow as pa 6 | import pytest 7 | from narwhals import Schema 8 | from pydantic import BaseModel 9 | from pydantic import Field as PydanticField 10 | 11 | from anyschema import AnySchema 12 | 13 | if TYPE_CHECKING: 14 | from anyschema.typing import Spec 15 | 16 | 17 | class User(BaseModel): 18 | id: int = PydanticField( 19 | json_schema_extra={ 20 | "anyschema": {"nullable": False}, 21 | "description": "User ID", # Description outside anyschema namespace will end up in Field metadata 22 | } 23 | ) 24 | username: str = PydanticField(json_schema_extra={"anyschema": {"nullable": True}}) 25 | email: str | None 26 | 27 | 28 | class Product(BaseModel): 29 | name: str | None = PydanticField( 30 | json_schema_extra={ 31 | "anyschema": {"nullable": False, "description": "Product name"}, 32 | "max_length": 100, 33 | } 34 | ) 35 | price: float = PydanticField( 36 | json_schema_extra={ 37 | "anyschema": {"nullable": True}, 38 | "currency": "USD", 39 | "min": 0, 40 | } 41 | ) 42 | 43 | 44 | def test_pydantic_to_arrow(pydantic_student_cls: type[BaseModel]) -> None: 45 | anyschema = AnySchema(spec=pydantic_student_cls) 46 | pa_schema = anyschema.to_arrow() 47 | 48 | assert isinstance(pa_schema, pa.Schema) 49 | names_and_types = ( 50 | ("name", pa.string()), 51 | ("date_of_birth", pa.date32()), 52 | ("age", pa.uint64()), 53 | ("classes", pa.list_(pa.string())), 54 | ("has_graduated", pa.bool_()), 55 | ) 56 | fields: tuple[pa.Field[Any], ...] = tuple(pa.field(name, dtype, nullable=False) for name, dtype in names_and_types) 57 | assert pa_schema == pa.schema(fields) 58 | 59 | 60 | def test_nw_schema_to_arrow(nw_schema: Schema) -> None: 61 | unsupported_dtypes = {"array", "int128", "uint128", "decimal", "enum", "object", "unknown"} 62 | model = Schema({k: v for k, v in nw_schema.items() if k not in unsupported_dtypes}) 63 | anyschema = AnySchema(spec=model) 64 | pa_schema = anyschema.to_arrow() 65 | 66 | assert isinstance(pa_schema, pa.Schema) 67 | 68 | struct_dtype = pa.struct([("field_1", pa.string()), ("field_2", pa.bool_())]) 69 | names_and_dtypes = ( 70 | ("boolean", pa.bool_()), 71 | ("categorical", pa.dictionary(pa.uint32(), pa.string())), 72 | ("date", pa.date32()), 73 | ("datetime", pa.timestamp(unit="us", tz=None)), 74 | ("duration", pa.duration(unit="us")), 75 | ("float32", pa.float32()), 76 | ("float64", pa.float64()), 77 | ("int8", pa.int8()), 78 | ("int16", pa.int16()), 79 | ("int32", pa.int32()), 80 | ("int64", pa.int64()), 81 | ("list", pa.list_(pa.float32())), 82 | ("string", pa.string()), 83 | ("struct", struct_dtype), 84 | ("uint8", pa.uint8()), 85 | ("uint16", pa.uint16()), 86 | ("uint32", pa.uint32()), 87 | ("uint64", pa.uint64()), 88 | ) 89 | assert pa_schema == pa.schema((pa.field(name, dtype, nullable=False) for name, dtype in names_and_dtypes)) 90 | 91 | 92 | @pytest.mark.parametrize( 93 | ("spec", "expected_nullable"), 94 | [ 95 | ({"id": int, "name": str, "email": None | str}, (False, False, True)), 96 | (User, (False, True, True)), 97 | (Product, (False, True)), 98 | ], 99 | ) 100 | def test_to_arrow_nullable_flags(spec: Spec, expected_nullable: tuple[bool, ...]) -> None: 101 | schema = AnySchema(spec=spec) 102 | pa_schema = schema.to_arrow() 103 | 104 | for field, nullable in zip(pa_schema, expected_nullable, strict=True): 105 | assert field.nullable is nullable 106 | 107 | 108 | @pytest.mark.parametrize( 109 | ("spec", "expected_metadata"), 110 | [ 111 | ({"id": int, "name": str, "email": None | str}, (None, None, None)), 112 | (User, ({b"description": b"User ID"}, None, None)), 113 | (Product, ({b"max_length": b"100"}, {b"currency": b"USD", b"min": b"0"})), 114 | ], 115 | ) 116 | def test_to_arrow_with_metadata(spec: Spec, expected_metadata: tuple[dict[bytes, bytes], ...]) -> None: 117 | schema = AnySchema(spec=spec) 118 | pa_schema = schema.to_arrow() 119 | 120 | for field, _metadata in zip(pa_schema, expected_metadata, strict=True): 121 | assert field.metadata == _metadata 122 | -------------------------------------------------------------------------------- /anyschema/typing.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from collections.abc import Callable, Generator, Mapping, Sequence 4 | from typing import TYPE_CHECKING, Annotated, Any, Literal, Protocol, TypeAlias, TypedDict 5 | 6 | if TYPE_CHECKING: 7 | from dataclasses import Field as DataclassField 8 | from typing import ClassVar 9 | 10 | from attrs import AttrsInstance 11 | from narwhals.dtypes import DType 12 | from narwhals.schema import Schema 13 | from narwhals.typing import TimeUnit 14 | from pydantic import BaseModel 15 | from sqlalchemy import Table 16 | from sqlalchemy.orm import DeclarativeBase 17 | from sqlalchemy.sql.type_api import TypeEngine 18 | 19 | from anyschema.parsers import ParserStep 20 | 21 | AttrsClassType: TypeAlias = type[AttrsInstance] 22 | SQLAlchemyTableType: TypeAlias = Table | type[DeclarativeBase] 23 | 24 | 25 | IntoOrderedDict: TypeAlias = Mapping[str, type] | Sequence[tuple[str, type]] 26 | """An object that can be converted into a python [`OrderedDict`][ordered-dict]. 27 | 28 | We check for the object to be either a mapping or a sequence of sized 2 tuples. 29 | 30 | [ordered-dict]: https://docs.python.org/3/library/collections.html#collections.OrderedDict 31 | """ 32 | 33 | IntoParserPipeline: TypeAlias = "Literal['auto'] | Sequence['ParserStep']" 34 | """An object that can be converted into a [`ParserPipeline`][anyschema.parsers.ParserPipeline]. 35 | 36 | Either "auto" or a sequence of [`ParserStep`][anyschema.parsers.ParserStep]. 37 | """ 38 | 39 | UnknownSpec: TypeAlias = Any 40 | """An unknown specification.""" 41 | 42 | Spec: TypeAlias = "Schema | IntoOrderedDict | type[BaseModel] | DataclassType | TypedDictType | AttrsClassType | SQLAlchemyTableType | UnknownSpec" # noqa: E501 43 | """Input specification supported directly by [`AnySchema`][anyschema.AnySchema].""" 44 | 45 | FieldName: TypeAlias = str 46 | FieldType: TypeAlias = "type[Any] | Annotated[Any, ...] | TypeEngine[Any]" 47 | FieldConstraints: TypeAlias = tuple[Any, ...] 48 | FieldMetadata: TypeAlias = dict[str, Any] 49 | 50 | FieldSpec: TypeAlias = tuple[FieldName, FieldType, FieldConstraints, FieldMetadata] 51 | """Field specification: alias for a tuple of `(str, type, tuple(constraints, ...), dict(metadata))`.""" 52 | 53 | FieldSpecIterable: TypeAlias = Generator[FieldSpec, None, None] 54 | """Return type of an adapter.""" 55 | 56 | Adapter: TypeAlias = Callable[[Any], FieldSpecIterable] 57 | """Adapter expected signature. 58 | 59 | An adapter is a callable that adapts a spec into field specifications. 60 | """ 61 | 62 | 63 | class DataclassInstance(Protocol): 64 | """Protocol that represents a dataclass in Python.""" 65 | 66 | # dataclasses are runtime composed entities making them tricky to type, this may not work perfectly 67 | # with all type checkers 68 | # code adapted from typeshed: 69 | # https://github.com/python/typeshed/blob/9ab7fde0a0cd24ed7a72837fcb21093b811b80d8/stdlib/_typeshed/__init__.pyi#L351 70 | __dataclass_fields__: ClassVar[dict[str, DataclassField[Any]]] 71 | 72 | 73 | DataclassType = type[DataclassInstance] 74 | 75 | 76 | class TypedDictType(Protocol): 77 | """Protocol that represents a TypedDict in Python.""" 78 | 79 | __annotations__: dict[str, type] 80 | __required_keys__: frozenset[str] 81 | __optional_keys__: frozenset[str] 82 | 83 | 84 | class AnySchemaMetadata(TypedDict, total=False): 85 | """TypedDict for anyschema-specific metadata keys. 86 | 87 | This structure defines the nested metadata format that anyschema recognizes 88 | for controlling field parsing behavior. All keys are optional. 89 | 90 | Attributes: 91 | description: Human-readable description of the field. 92 | dtype: Narwhals DType (or its serialized/string representation) 93 | nullable: Whether the field can contain null values. 94 | time_zone: Timezone for datetime fields (e.g., "UTC", "Europe/Berlin"). 95 | time_unit: Time precision for datetime fields ("s", "ms", "us", "ns"). 96 | unique: Whether all values in the field must be unique. 97 | 98 | Examples: 99 | >>> metadata: AnySchemaMetadata = {"nullable": True, "time_zone": "UTC"} 100 | >>> metadata["unique"] = False 101 | """ 102 | 103 | description: str | None 104 | dtype: str | DType 105 | nullable: bool 106 | time_zone: str 107 | time_unit: TimeUnit 108 | unique: bool 109 | 110 | 111 | AnySchemaMetadataKey: TypeAlias = Literal["description", "dtype", "nullable", "time_zone", "time_unit", "unique"] 112 | AnySchemaNamespaceKey: TypeAlias = Literal["anyschema", "x-anyschema"] 113 | -------------------------------------------------------------------------------- /tests/parsers/parsers_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import Annotated, Optional 4 | 5 | import narwhals as nw 6 | import pytest 7 | from annotated_types import Gt 8 | from pydantic import BaseModel, PositiveInt 9 | 10 | from anyschema.parsers import ( 11 | AnnotatedStep, 12 | ForwardRefStep, 13 | ParserPipeline, 14 | ParserStep, 15 | PyTypeStep, 16 | UnionTypeStep, 17 | make_pipeline, 18 | ) 19 | from anyschema.parsers.annotated_types import AnnotatedTypesStep 20 | from anyschema.parsers.attrs import AttrsTypeStep 21 | from anyschema.parsers.pydantic import PydanticTypeStep 22 | from anyschema.parsers.sqlalchemy import SQLAlchemyTypeStep 23 | 24 | AUTO_PIPELINE_CLS_ORDER = ( 25 | ForwardRefStep, 26 | UnionTypeStep, 27 | AnnotatedStep, 28 | AnnotatedTypesStep, 29 | AttrsTypeStep, 30 | PydanticTypeStep, 31 | SQLAlchemyTypeStep, 32 | PyTypeStep, 33 | ) 34 | 35 | PY_TYPE_STEP = PyTypeStep() 36 | 37 | 38 | class Address(BaseModel): 39 | street: str 40 | city: str 41 | 42 | 43 | class Person(BaseModel): 44 | name: str 45 | address: Address 46 | 47 | 48 | def test_make_pipeline_auto(auto_pipeline: ParserPipeline) -> None: 49 | assert isinstance(auto_pipeline, ParserPipeline) 50 | assert len(auto_pipeline.steps) == len(AUTO_PIPELINE_CLS_ORDER) 51 | 52 | for _parser, _cls in zip(auto_pipeline.steps, AUTO_PIPELINE_CLS_ORDER, strict=True): 53 | assert isinstance(_parser, _cls) 54 | assert _parser.pipeline is auto_pipeline 55 | 56 | 57 | @pytest.mark.parametrize( 58 | "steps", 59 | [ 60 | (PyTypeStep(),), 61 | (UnionTypeStep(), PyTypeStep()), 62 | (UnionTypeStep(), AnnotatedStep(), PyTypeStep()), 63 | ], 64 | ) 65 | def test_make_pipeline_custom(steps: tuple[ParserStep, ...]) -> None: 66 | pipeline = make_pipeline(steps) 67 | assert isinstance(pipeline, ParserPipeline) 68 | assert len(pipeline.steps) == len(steps) 69 | 70 | for _pipeline_parser, _parser in zip(pipeline.steps, steps, strict=True): 71 | assert _parser is _pipeline_parser 72 | assert _parser.pipeline is pipeline 73 | 74 | 75 | def test_make_pipeline_invalid_steps() -> None: 76 | class NotAParserStep: 77 | """A class that is not a ParserStep.""" 78 | 79 | invalid_steps = [PyTypeStep(), NotAParserStep(), "also not a step"] 80 | expected_msg = "Expected a sequence of `ParserStep` instances, found.*NotAParserStep.*str" 81 | 82 | with pytest.raises(TypeError, match=expected_msg): 83 | make_pipeline(invalid_steps) # type: ignore[arg-type] 84 | 85 | 86 | @pytest.mark.parametrize( 87 | ("input_type", "expected"), 88 | [ 89 | (int, nw.Int64()), 90 | (str, nw.String()), 91 | (list[int], nw.List(nw.Int64())), 92 | (Optional[int], nw.Int64()), 93 | (list[str], nw.List(nw.String())), 94 | (Optional[float], nw.Float64()), 95 | (Annotated[int, Gt(0)], nw.UInt64()), 96 | (PositiveInt, nw.UInt64()), 97 | (Optional[str], nw.String()), 98 | (list[list[int]], nw.List(nw.List(nw.Int64()))), 99 | (Optional[Annotated[int, Gt(0)]], nw.UInt64()), 100 | (Annotated[Optional[int], "meta"], nw.Int64()), 101 | (Optional[list[int]], nw.List(nw.Int64())), 102 | (list[Optional[int]], nw.List(nw.Int64())), 103 | ], 104 | ) 105 | def test_non_nested_parsing(auto_pipeline: ParserPipeline, input_type: type, expected: nw.dtypes.DType) -> None: 106 | result = auto_pipeline.parse(input_type, (), {}) 107 | assert result == expected 108 | 109 | 110 | @pytest.mark.parametrize( 111 | ("input_type", "expected"), 112 | [ 113 | (Address, nw.Struct([nw.Field(name="street", dtype=nw.String()), nw.Field(name="city", dtype=nw.String())])), 114 | ( 115 | Person, 116 | nw.Struct( 117 | [ 118 | nw.Field(name="name", dtype=nw.String()), 119 | nw.Field( 120 | name="address", 121 | dtype=nw.Struct( 122 | [ 123 | nw.Field(name="street", dtype=nw.String()), 124 | nw.Field(name="city", dtype=nw.String()), 125 | ] 126 | ), 127 | ), 128 | ] 129 | ), 130 | ), 131 | ], 132 | ) 133 | def test_nested_parsing(auto_pipeline: ParserPipeline, input_type: type, expected: nw.dtypes.DType) -> None: 134 | result = auto_pipeline.parse(input_type, (), {}) 135 | assert result == expected 136 | -------------------------------------------------------------------------------- /tests/pydantic/list_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING, Annotated, Optional 4 | 5 | import narwhals as nw 6 | from annotated_types import Interval, Len 7 | from pydantic import BaseModel 8 | 9 | from tests.pydantic.utils import model_to_nw_schema 10 | 11 | if TYPE_CHECKING: 12 | from anyschema.parsers import ParserPipeline 13 | 14 | 15 | def test_parse_list_optional_outer(auto_pipeline: ParserPipeline) -> None: 16 | class ListModel(BaseModel): 17 | # python list[...] type 18 | py_list: list[int] 19 | py_list_optional: list[str] | None 20 | py_list_or_none: list[float] | None 21 | none_or_py_list: None | list[bool] 22 | 23 | # pydantic conlist type 24 | con_list: Annotated[list[int], Len(min_length=2)] 25 | con_list_optional: Optional[Annotated[list[str], Len(max_length=6)]] 26 | con_list_or_none: Annotated[list[float], Len(0)] | None 27 | none_or_con_list: None | Annotated[list[bool], Len(0)] 28 | 29 | schema = model_to_nw_schema(ListModel, pipeline=auto_pipeline) 30 | expected = { 31 | "py_list": nw.List(nw.Int64()), 32 | "py_list_optional": nw.List(nw.String()), 33 | "py_list_or_none": nw.List(nw.Float64()), 34 | "none_or_py_list": nw.List(nw.Boolean()), 35 | "con_list": nw.List(nw.Int64()), 36 | "con_list_optional": nw.List(nw.String()), 37 | "con_list_or_none": nw.List(nw.Float64()), 38 | "none_or_con_list": nw.List(nw.Boolean()), 39 | } 40 | assert schema == expected 41 | 42 | 43 | def test_parse_list_optional_inner(auto_pipeline: ParserPipeline) -> None: 44 | class ListModel(BaseModel): 45 | # python list[...] type 46 | py_list_optional: list[str | None] 47 | py_list_or_none: list[float | None] | None 48 | none_or_py_list: list[None | bool] 49 | 50 | # pydantic conlist type 51 | con_list_optional: Annotated[list[Optional[int]], Len(min_length=2)] 52 | con_list_or_none: Annotated[list[str | None], Len(max_length=6)] 53 | none_or_con_list: Annotated[list[None | float], Len(0)] 54 | 55 | schema = model_to_nw_schema(ListModel, pipeline=auto_pipeline) 56 | expected = { 57 | "py_list_optional": nw.List(nw.String()), 58 | "py_list_or_none": nw.List(nw.Float64()), 59 | "none_or_py_list": nw.List(nw.Boolean()), 60 | "con_list_optional": nw.List(nw.Int64()), 61 | "con_list_or_none": nw.List(nw.String()), 62 | "none_or_con_list": nw.List(nw.Float64()), 63 | } 64 | assert schema == expected 65 | 66 | 67 | def test_parse_list_optional_outer_and_inner(auto_pipeline: ParserPipeline) -> None: 68 | class ListModel(BaseModel): 69 | # python list[...] type 70 | py_list_optional_optional: list[int | None] | None 71 | py_list_optional_none: list[str | None] | None 72 | py_list_none_optional: list[float | None] | None 73 | py_list_none_none: list[None | bool] | None 74 | 75 | # pydantic conlist type 76 | con_list_optional_optional: Optional[Annotated[list[Optional[int]], Len(min_length=2)]] 77 | con_list_optional_none: Annotated[list[Optional[str]], Len(max_length=6)] | None 78 | con_list_none_optional: Optional[Annotated[list[float | None], Len(0)]] 79 | con_list_none_none: Annotated[list[None | bool], Len(0)] | None 80 | 81 | schema = model_to_nw_schema(ListModel, pipeline=auto_pipeline) 82 | expected = { 83 | "py_list_optional_optional": nw.List(nw.Int64()), 84 | "py_list_optional_none": nw.List(nw.String()), 85 | "py_list_none_optional": nw.List(nw.Float64()), 86 | "py_list_none_none": nw.List(nw.Boolean()), 87 | "con_list_optional_optional": nw.List(nw.Int64()), 88 | "con_list_optional_none": nw.List(nw.String()), 89 | "con_list_none_optional": nw.List(nw.Float64()), 90 | "con_list_none_none": nw.List(nw.Boolean()), 91 | } 92 | assert schema == expected 93 | 94 | 95 | def test_parse_conlist_conint(auto_pipeline: ParserPipeline) -> None: 96 | class ListModel(BaseModel): 97 | # python list[...] type 98 | py_list_int8: list[Annotated[int, Interval(gt=-64, lt=64)]] | None 99 | py_list_uint8: list[Annotated[int, Interval(gt=0, lt=64)] | None] 100 | 101 | # pydantic conlist type 102 | con_list_int8: Annotated[list[None | Annotated[int, Interval(gt=-64, lt=64)]], Len(0)] 103 | con_list_uint8: Annotated[list[Optional[Annotated[int, Interval(gt=0, lt=64)]]], Len(0)] 104 | 105 | schema = model_to_nw_schema(ListModel, pipeline=auto_pipeline) 106 | expected = { 107 | "py_list_int8": nw.List(nw.Int8()), 108 | "py_list_uint8": nw.List(nw.UInt8()), 109 | "con_list_int8": nw.List(nw.Int8()), 110 | "con_list_uint8": nw.List(nw.UInt8()), 111 | } 112 | assert schema == expected 113 | -------------------------------------------------------------------------------- /tests/spec_to_schema/sqlalchemy_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING, Mapping 4 | 5 | import narwhals as nw 6 | import pytest 7 | 8 | from anyschema import AnySchema 9 | from tests.conftest import ( 10 | ComplexORM, 11 | EventORMWithTimeMetadata, 12 | ProductORM, 13 | SimpleUserORM, 14 | array_fixed_table, 15 | array_list_table, 16 | bigint_table, 17 | complex_table, 18 | event_table_with_time_metadata, 19 | event_table_with_tz_aware, 20 | user_table, 21 | ) 22 | 23 | if TYPE_CHECKING: 24 | from anyschema.typing import SQLAlchemyTableType 25 | 26 | 27 | @pytest.mark.parametrize( 28 | ("spec", "expected_schema"), 29 | [ 30 | # ORM models 31 | ( 32 | SimpleUserORM, 33 | { 34 | "id": nw.Int32(), 35 | "name": nw.String(), 36 | }, 37 | ), 38 | ( 39 | ProductORM, 40 | { 41 | "id": nw.Int32(), 42 | "name": nw.String(), 43 | "price": nw.Float32(), 44 | "in_stock": nw.Boolean(), 45 | }, 46 | ), 47 | ( 48 | ComplexORM, 49 | { 50 | "id": nw.Int32(), 51 | "name": nw.String(), 52 | "description": nw.String(), 53 | "age": nw.Int32(), 54 | "score": nw.Float32(), 55 | "is_active": nw.Boolean(), 56 | "created_at": nw.Datetime(), 57 | "birth_date": nw.Date(), 58 | }, 59 | ), 60 | # Core tables 61 | ( 62 | user_table, 63 | { 64 | "id": nw.Int32(), 65 | "name": nw.String(), 66 | "age": nw.Int32(), 67 | "email": nw.String(), 68 | }, 69 | ), 70 | ( 71 | complex_table, 72 | { 73 | "id": nw.Int32(), 74 | "name": nw.String(), 75 | "description": nw.String(), 76 | "age": nw.Int32(), 77 | "score": nw.Float32(), 78 | "is_active": nw.Boolean(), 79 | "created_at": nw.Datetime(), 80 | "birth_date": nw.Date(), 81 | }, 82 | ), 83 | ( 84 | bigint_table, 85 | { 86 | "id": nw.Int64(), 87 | "count": nw.Int64(), 88 | }, 89 | ), 90 | # Array types - List (no dimensions) 91 | ( 92 | array_list_table, 93 | { 94 | "id": nw.Int32(), 95 | "tags": nw.List(nw.String()), 96 | "scores": nw.List(nw.Float32()), 97 | }, 98 | ), 99 | # Array types - Fixed dimensions (Array) 100 | ( 101 | array_fixed_table, 102 | { 103 | "id": nw.Int32(), 104 | "coordinates": nw.Array(nw.Float32(), shape=(3,)), 105 | "matrix": nw.Array(nw.Int32(), shape=(2,)), 106 | }, 107 | ), 108 | ], 109 | ) 110 | def test_sqlalchemy_spec(spec: SQLAlchemyTableType, expected_schema: Mapping[str, nw.dtypes.DType]) -> None: 111 | schema = AnySchema(spec=spec) 112 | nw_schema = schema._nw_schema 113 | assert nw_schema == nw.Schema(expected_schema) 114 | 115 | 116 | @pytest.mark.parametrize( 117 | ("spec", "expected_schema"), 118 | [ 119 | # Table with time metadata 120 | ( 121 | event_table_with_time_metadata, 122 | { 123 | "id": nw.Int32(), 124 | "name": nw.String(), 125 | "created_at": nw.Datetime(), 126 | "scheduled_at": nw.Datetime(time_zone="UTC"), 127 | "started_at": nw.Datetime(time_unit="ms"), 128 | "completed_at": nw.Datetime(time_unit="ns", time_zone="Europe/Berlin"), 129 | }, 130 | ), 131 | # ORM with time metadata 132 | ( 133 | EventORMWithTimeMetadata, 134 | { 135 | "id": nw.Int32(), 136 | "name": nw.String(), 137 | "created_at": nw.Datetime(), 138 | "scheduled_at": nw.Datetime(time_zone="UTC"), 139 | "started_at": nw.Datetime(time_unit="ms"), 140 | "completed_at": nw.Datetime(time_unit="ns", time_zone="Europe/Berlin"), 141 | }, 142 | ), 143 | # Table with timezone-aware datetime 144 | ( 145 | event_table_with_tz_aware, 146 | { 147 | "id": nw.Int32(), 148 | "timestamp_utc": nw.Datetime(time_zone="UTC"), 149 | "timestamp_berlin": nw.Datetime(time_unit="ms", time_zone="Europe/Berlin"), 150 | }, 151 | ), 152 | ], 153 | ) 154 | def test_sqlalchemy_spec_with_time_metadata( 155 | spec: SQLAlchemyTableType, expected_schema: Mapping[str, nw.dtypes.DType] 156 | ) -> None: 157 | """Test that SQLAlchemy specs with time metadata are correctly converted to narwhals schema.""" 158 | schema = AnySchema(spec=spec) 159 | nw_schema = schema._nw_schema 160 | assert nw_schema == nw.Schema(expected_schema) 161 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["hatchling"] 3 | build-backend = "hatchling.build" 4 | 5 | [project] 6 | name = "anyschema" 7 | version = "0.3.0" 8 | description = "From type specifications to dataframe schemas" 9 | readme = "README.md" 10 | license = {file = "LICENSE"} 11 | authors = [{ name = "Francesco Bruzzesi"}] 12 | requires-python = ">=3.10" 13 | dependencies = [ 14 | "narwhals>=2.0.0", 15 | "typing_extensions>=4.14.0", 16 | ] 17 | 18 | classifiers = [ 19 | "Development Status :: 3 - Alpha", 20 | "License :: OSI Approved :: Apache Software License", 21 | "Operating System :: OS Independent", 22 | "Programming Language :: Python", 23 | "Programming Language :: Python :: 3", 24 | "Programming Language :: Python :: 3.10", 25 | "Programming Language :: Python :: 3.11", 26 | "Programming Language :: Python :: 3.12", 27 | "Programming Language :: Python :: 3.13", 28 | "Programming Language :: Python :: 3.14", 29 | "Typing :: Typed" 30 | ] 31 | 32 | [project.urls] 33 | Homepage = "https://github.com/fbruzzesi/anyschema" 34 | Documentation = "https://fbruzzesi.github.io/anyschema/" 35 | Repository = "https://github.com/fbruzzesi/anyschema" 36 | "Issue Tracker" = "https://github.com/fbruzzesi/anyschema/issues" 37 | 38 | [project.optional-dependencies] 39 | attrs = ["attrs>=22.1.0"] 40 | pydantic = ["pydantic>=2.0.0"] 41 | sqlalchemy = ["sqlalchemy>=2.0.0"] 42 | 43 | [dependency-groups] 44 | tests = [ 45 | "anyschema[attrs,pydantic,sqlalchemy]", 46 | "covdefaults>=2.3.0", 47 | "hypothesis>=6.0.0", 48 | "narwhals[pandas,polars,pyarrow]", 49 | "pydantic-extra-types[pycountry,phonenumbers]", 50 | "pytest>=8.0.0", 51 | "pytest-cov>=6.0.0", 52 | "pytest-xdist>=3.8.0", 53 | ] 54 | 55 | typing = [ 56 | "mypy", 57 | "pandas-stubs", 58 | "pyarrow-stubs", 59 | "pyright", 60 | "ty", 61 | {include-group = "tests"}, 62 | ] 63 | 64 | docs = [ 65 | "anyschema[attrs,pydantic,sqlalchemy]", 66 | "black", # required by mkdocstrings_handlers 67 | "markdown-exec[ansi]", 68 | "mkdocs", 69 | "mkdocs-material", 70 | "mkdocstrings[python]", 71 | "mkdocstrings-python", 72 | "mkdocs-autorefs", 73 | "narwhals[pandas,polars,pyarrow]", 74 | "pytest", 75 | ] 76 | 77 | local-dev = [ 78 | "pre-commit", 79 | "ruff", 80 | {include-group = "tests"}, 81 | {include-group = "typing"}, 82 | {include-group = "docs"} 83 | ] 84 | 85 | [tool.hatch.build.targets.sdist] 86 | include = [ 87 | "anyschema/*", 88 | "tests/*", 89 | ] 90 | 91 | [tool.hatch.build.targets.wheel] 92 | include = [ 93 | "anyschema/*", 94 | ] 95 | 96 | [tool.pytest.ini_options] 97 | testpaths = ["tests"] 98 | filterwarnings = [ 99 | "error", 100 | ] 101 | xfail_strict = true 102 | 103 | [tool.ruff] 104 | line-length = 120 105 | target-version = "py310" 106 | fix = true 107 | 108 | [tool.ruff.lint] 109 | select = ["ALL"] 110 | ignore = [ 111 | "ANN401", 112 | "PLC0415", 113 | "UP045", 114 | "COM812", 115 | "ISC001", 116 | "D100", # Checks for undocumented public module definitions. 117 | "D104", # Checks for undocumented public package definitions. 118 | "D107", # Checks for public __init__ method definitions that are missing docstrings. 119 | "D205", 120 | "RET505", 121 | "FIX002", 122 | "TD003", 123 | "PLR0911", # Too many return statements 124 | ] 125 | 126 | [tool.ruff.lint.per-file-ignores] 127 | "tests/*" = [ 128 | "D", 129 | "S101", 130 | "UP", 131 | "SLF", 132 | ] 133 | 134 | [tool.ruff.lint.isort] 135 | required-imports = ["from __future__ import annotations"] 136 | 137 | [tool.ruff.lint.pydocstyle] 138 | convention = "google" 139 | 140 | [tool.ruff.lint.pylint] 141 | max-args = 6 142 | 143 | [tool.ruff.lint.pyupgrade] 144 | keep-runtime-typing = true 145 | 146 | [tool.ruff.format] 147 | docstring-code-format = true 148 | 149 | [tool.mypy] 150 | disallow_untyped_defs = true 151 | disallow_any_unimported = true 152 | no_implicit_optional = true 153 | check_untyped_defs = true 154 | warn_return_any = true 155 | show_error_codes = true 156 | warn_unused_ignores = true 157 | pretty = true 158 | strict = true 159 | files = ["anyschema", "tests"] 160 | plugins = [ 161 | "sqlalchemy.ext.mypy.plugin", 162 | "pydantic.mypy", 163 | ] 164 | 165 | 166 | [tool.pyright] 167 | pythonPlatform = "All" 168 | pythonVersion = "3.10" 169 | reportMissingTypeStubs = "none" 170 | reportPrivateUsage = "none" 171 | reportUnknownVariableType = "none" 172 | reportUnknownArgumentType = "none" 173 | reportUnknownMemberType = "none" 174 | reportUnknownLambdaType = "none" 175 | reportUnusedFunction = "none" 176 | enableExperimentalFeatures = true 177 | typeCheckingMode = "strict" 178 | 179 | include = ["anyschema", "tests"] 180 | 181 | [tool.coverage.run] 182 | plugins = ["covdefaults"] 183 | 184 | [tool.coverage.report] 185 | fail_under = 95 186 | omit = [ 187 | "anyschema/typing.py", 188 | ] 189 | 190 | [tool.rumdl] 191 | line-length = 120 192 | flavor = "mkdocs" 193 | include = ["docs/**", "README.md", "AGENTS.md"] 194 | disable = [ 195 | # NOTE: These look like either false positives or unsupported case 196 | # TODO(FBruzzesi): Report upstream 197 | # No blank line after admonition block 198 | "MD031", 199 | # Use fenced code blocks 200 | "MD046", 201 | ] 202 | 203 | [tool.rumdl.MD007] 204 | # Keep list indentation consistent 205 | indent = 4 206 | start-indented = false 207 | -------------------------------------------------------------------------------- /tests/field/metadata_mutation_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from dataclasses import dataclass, field 4 | from typing import Optional 5 | 6 | import attrs 7 | from pydantic import BaseModel 8 | from pydantic import Field as PydanticField 9 | 10 | from anyschema import AnySchema 11 | 12 | 13 | def test_pydantic_field_metadata_not_mutated_by_optional() -> None: 14 | """Test that parsing Optional fields doesn't mutate Pydantic Field metadata.""" 15 | 16 | class User(BaseModel): 17 | name: str = PydanticField(json_schema_extra={"anyschema": {"description": "User name"}}) 18 | email: Optional[str] = PydanticField(json_schema_extra={"format": "email"}) 19 | 20 | name_metadata_before = User.model_fields["name"].json_schema_extra 21 | email_metadata_before = User.model_fields["email"].json_schema_extra 22 | 23 | schema = AnySchema(spec=User) 24 | 25 | assert schema.fields["name"].nullable is False 26 | assert schema.fields["email"].nullable is True 27 | 28 | name_metadata_after = User.model_fields["name"].json_schema_extra 29 | email_metadata_after = User.model_fields["email"].json_schema_extra 30 | 31 | # !NOTE: Ensure original metadata was not mutated 32 | assert name_metadata_before == name_metadata_after 33 | assert email_metadata_before == email_metadata_after 34 | 35 | 36 | def test_pydantic_field_metadata_with_explicit_anyschema_keys() -> None: 37 | """Test that existing anyschema/* keys in Pydantic metadata are not modified.""" 38 | 39 | class Product(BaseModel): 40 | id: int = PydanticField( 41 | json_schema_extra={ 42 | "anyschema": {"nullable": False, "unique": True, "description": "Product ID"}, 43 | } 44 | ) 45 | name: Optional[str] = PydanticField( 46 | json_schema_extra={ 47 | "anyschema": {"nullable": True}, 48 | "max_length": 100, 49 | } 50 | ) 51 | 52 | schema = AnySchema(spec=Product) 53 | 54 | assert schema.fields["id"].nullable is False 55 | assert schema.fields["id"].unique is True 56 | 57 | id_metadata_after = Product.model_fields["id"].json_schema_extra 58 | name_metadata_after = Product.model_fields["name"].json_schema_extra 59 | 60 | assert id_metadata_after == {"anyschema": {"nullable": False, "unique": True, "description": "Product ID"}} 61 | assert name_metadata_after == {"anyschema": {"nullable": True}, "max_length": 100} 62 | 63 | 64 | def test_dataclass_field_metadata_not_mutated() -> None: 65 | """Test that parsing doesn't mutate dataclass field metadata.""" 66 | 67 | @dataclass 68 | class Person: 69 | name: str = field(metadata={"description": "Person name"}) 70 | email: Optional[str] = field(metadata={"format": "email"}) 71 | 72 | # Get original metadata (dataclass fields are in __dataclass_fields__) 73 | name_field_before = Person.__dataclass_fields__["name"] 74 | email_field_before = Person.__dataclass_fields__["email"] 75 | name_metadata_before = dict(name_field_before.metadata) 76 | email_metadata_before = dict(email_field_before.metadata) 77 | 78 | schema = AnySchema(spec=Person) 79 | 80 | assert schema.fields["name"].nullable is False 81 | assert schema.fields["email"].nullable is True 82 | 83 | name_field_after = Person.__dataclass_fields__["name"] 84 | email_field_after = Person.__dataclass_fields__["email"] 85 | 86 | # !NOTE: Original metadata should not be mutated 87 | assert dict(name_field_after.metadata) == name_metadata_before 88 | assert dict(email_field_after.metadata) == email_metadata_before 89 | 90 | 91 | def test_attrs_field_metadata_not_mutated() -> None: 92 | """Test that parsing doesn't mutate attrs field metadata.""" 93 | 94 | @attrs.define 95 | class Book: 96 | title: str = attrs.field(metadata={"anyschema": {"description": "Book title"}}) 97 | isbn: Optional[str] = attrs.field(metadata={"format": "isbn"}) 98 | 99 | # Get original metadata 100 | attrs_fields = attrs.fields(Book) 101 | title_field_before = next(f for f in attrs_fields if f.name == "title") 102 | isbn_field_before = next(f for f in attrs_fields if f.name == "isbn") 103 | title_metadata_before = dict(title_field_before.metadata) 104 | isbn_metadata_before = dict(isbn_field_before.metadata) 105 | 106 | schema = AnySchema(spec=Book) 107 | 108 | assert schema.fields["title"].nullable is False 109 | assert schema.fields["isbn"].nullable is True 110 | 111 | # Get metadata after 112 | attrs_fields_after = attrs.fields(Book) 113 | title_field_after = next(f for f in attrs_fields_after if f.name == "title") 114 | isbn_field_after = next(f for f in attrs_fields_after if f.name == "isbn") 115 | 116 | # !NOTE: Original metadata should not be mutated 117 | assert dict(title_field_after.metadata) == title_metadata_before 118 | assert dict(isbn_field_after.metadata) == isbn_metadata_before 119 | 120 | 121 | def test_dict_spec_is_safe() -> None: 122 | """Test that dict specs work correctly (they don't share metadata).""" 123 | spec = {"id": int, "name": Optional[str]} 124 | 125 | schema1 = AnySchema(spec=spec) 126 | schema2 = AnySchema(spec=spec) 127 | 128 | assert schema1.fields["name"].nullable is True 129 | assert schema2.fields["name"].nullable is True 130 | 131 | assert spec == {"id": int, "name": Optional[str]} 132 | -------------------------------------------------------------------------------- /anyschema/parsers/pydantic.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING 4 | 5 | import narwhals as nw 6 | from pydantic import AwareDatetime, BaseModel, FutureDate, FutureDatetime, NaiveDatetime, PastDate, PastDatetime 7 | 8 | from anyschema._dependencies import is_pydantic_base_model 9 | from anyschema._metadata import get_anyschema_value_by_key 10 | from anyschema.exceptions import UnsupportedDTypeError 11 | from anyschema.parsers._base import ParserStep 12 | 13 | if TYPE_CHECKING: 14 | from narwhals.dtypes import DType 15 | 16 | from anyschema.typing import FieldConstraints, FieldMetadata, FieldType 17 | 18 | __all__ = ("PydanticTypeStep",) 19 | 20 | 21 | class PydanticTypeStep(ParserStep): 22 | """Parser for Pydantic-specific types. 23 | 24 | Handles: 25 | 26 | - Pydantic datetime types (`AwareDatetime`, `NaiveDatetime`, etc.) 27 | - Pydantic date types (`PastDate`, `FutureDate`) 28 | - Pydantic `BaseModel` (Struct types) 29 | 30 | Warning: 31 | It requires [pydantic](https://docs.pydantic.dev/latest/) to be installed. 32 | """ 33 | 34 | def parse( 35 | self, 36 | input_type: FieldType, 37 | constraints: FieldConstraints, # noqa: ARG002 38 | metadata: FieldMetadata, 39 | ) -> DType | None: 40 | """Parse Pydantic-specific types into Narwhals dtypes. 41 | 42 | Arguments: 43 | input_type: The type to parse. 44 | constraints: Constraints associated with the type. 45 | metadata: Custom metadata dictionary. 46 | 47 | Returns: 48 | A Narwhals DType if this parser can handle the type, None otherwise. 49 | """ 50 | # Check if it's a type/class first (not a generic alias or other special form) 51 | if not isinstance(input_type, type): 52 | return None 53 | 54 | # Handle AwareDatetime 55 | if issubclass(input_type, AwareDatetime): # pyright: ignore[reportArgumentType] # ty: ignore[invalid-argument-type] 56 | # Pydantic AwareDatetime does not fix a single timezone, but any timezone would work. 57 | # See https://github.com/pydantic/pydantic/issues/5829 58 | # Unless a timezone is specified via {"anyschema": {"time_zone": ...}}, we raise an error. 59 | if (time_zone := get_anyschema_value_by_key(metadata, key="time_zone")) is None: 60 | msg = ( 61 | "pydantic AwareDatetime does not specify a fixed timezone.\n\n" 62 | "Hint: You can specify a timezone via " 63 | "`Field(..., json_schema_extra={'anyschema': {'time_zone': 'UTC'}})`" 64 | ) 65 | raise UnsupportedDTypeError(msg) 66 | 67 | return nw.Datetime( 68 | time_unit=get_anyschema_value_by_key(metadata, key="time_unit", default="us"), time_zone=time_zone 69 | ) 70 | 71 | if issubclass(input_type, NaiveDatetime): # pyright: ignore[reportArgumentType] # ty: ignore[invalid-argument-type] 72 | # Pydantic NaiveDatetime should not receive a timezone. 73 | # If a timezone is specified via {"anyschema": {"time_zone": ...}}, we raise an error. 74 | if (time_zone := get_anyschema_value_by_key(metadata, key="time_zone")) is not None: 75 | msg = f"pydantic NaiveDatetime should not specify a timezone, found {time_zone}." 76 | raise UnsupportedDTypeError(msg) 77 | 78 | return nw.Datetime( 79 | time_unit=get_anyschema_value_by_key(metadata, key="time_unit", default="us"), time_zone=None 80 | ) 81 | 82 | # Handle datetime types 83 | if issubclass(input_type, (PastDatetime, FutureDatetime)): # pyright: ignore[reportArgumentType] 84 | # PastDatetime and FutureDatetime accept both aware and naive datetimes. 85 | return nw.Datetime( 86 | time_unit=get_anyschema_value_by_key(metadata, key="time_unit", default="us"), 87 | time_zone=get_anyschema_value_by_key(metadata, key="time_zone"), 88 | ) 89 | 90 | # Handle date types 91 | if issubclass(input_type, (PastDate, FutureDate)): # pyright: ignore[reportArgumentType] 92 | return nw.Date() 93 | 94 | # Handle Pydantic models (Struct types) 95 | if is_pydantic_base_model(input_type): 96 | return self._parse_pydantic_model(input_type) 97 | 98 | # TODO(FBruzzesi): Add support for more pydantic types. See https://github.com/FBruzzesi/anyschema/issues/45 99 | 100 | # This parser doesn't handle this type 101 | return None 102 | 103 | def _parse_pydantic_model(self, model: type[BaseModel]) -> DType: 104 | """Parse a Pydantic model into a Struct type. 105 | 106 | Arguments: 107 | model: The Pydantic model class or instance. 108 | 109 | Returns: 110 | A Narwhals Struct dtype. 111 | """ 112 | from anyschema.adapters import pydantic_adapter 113 | 114 | return nw.Struct( 115 | [ 116 | nw.Field( 117 | name=field_name, 118 | dtype=self.pipeline.parse(field_info, field_constraints, field_metadata, strict=True), 119 | ) 120 | for field_name, field_info, field_constraints, field_metadata in pydantic_adapter(model) 121 | ] 122 | ) 123 | -------------------------------------------------------------------------------- /tests/parsers/sqlalchemy_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import re 4 | from enum import Enum 5 | from typing import TYPE_CHECKING, Any 6 | 7 | import narwhals as nw 8 | import pytest 9 | from sqlalchemy import types as sqltypes 10 | 11 | from anyschema.exceptions import UnsupportedDTypeError 12 | from anyschema.parsers import make_pipeline 13 | from anyschema.parsers.sqlalchemy import SQLAlchemyTypeStep 14 | 15 | if TYPE_CHECKING: 16 | from narwhals.typing import TimeUnit 17 | 18 | 19 | @pytest.fixture 20 | def sqlalchemy_step() -> SQLAlchemyTypeStep: 21 | """Create a SQLAlchemyTypeStep with pipeline.""" 22 | step = SQLAlchemyTypeStep() 23 | _ = make_pipeline(steps=[step]) 24 | return step 25 | 26 | 27 | class Color(Enum): 28 | RED = 1 29 | BLUE = 2 30 | 31 | 32 | @pytest.mark.parametrize( 33 | ("input_type", "expected"), 34 | # SQLAlchemy types have incomplete generic parameters 35 | [ 36 | (sqltypes.Boolean(), nw.Boolean()), 37 | (sqltypes.SmallInteger(), nw.Int16()), 38 | (sqltypes.Integer(), nw.Int32()), 39 | (sqltypes.BigInteger(), nw.Int64()), 40 | (sqltypes.String(50), nw.String()), 41 | (sqltypes.Text(), nw.String()), 42 | (sqltypes.Unicode(50), nw.String()), 43 | (sqltypes.UnicodeText(), nw.String()), 44 | (sqltypes.Float(), nw.Float32()), 45 | (sqltypes.Double(), nw.Float64()), 46 | (sqltypes.Numeric(10, 2), nw.Float64()), 47 | (sqltypes.DECIMAL(10, 2), nw.Decimal()), 48 | (sqltypes.Date(), nw.Date()), 49 | (sqltypes.DateTime(), nw.Datetime()), 50 | (sqltypes.TIMESTAMP(), nw.Datetime()), 51 | (sqltypes.Time(), nw.Time()), 52 | (sqltypes.Interval(), nw.Duration()), 53 | (sqltypes.LargeBinary(), nw.Binary()), 54 | (sqltypes.BINARY(), nw.Binary()), 55 | (sqltypes.VARBINARY(), nw.Binary()), 56 | (sqltypes.JSON(), nw.String()), 57 | (sqltypes.Uuid(), nw.String()), 58 | (sqltypes.Enum("red", "green", "blue"), nw.Enum(["red", "green", "blue"])), 59 | (sqltypes.Enum(Color), nw.Enum(Color)), 60 | (sqltypes.ARRAY(sqltypes.Float()), nw.List(nw.Float32())), 61 | (sqltypes.ARRAY(sqltypes.Float(), dimensions=3), nw.Array(nw.Float32(), shape=(3,))), 62 | (int, None), 63 | ("not a sqlalchemy type", None), 64 | ], 65 | ) 66 | def test_sqlalchemy_parse_step( 67 | sqlalchemy_step: SQLAlchemyTypeStep, input_type: Any, expected: nw.dtypes.DType | None 68 | ) -> None: 69 | result = sqlalchemy_step.parse(input_type=input_type, constraints=(), metadata={}) 70 | assert result == expected 71 | 72 | 73 | @pytest.mark.parametrize("time_unit", ["s", "ms", "ns", "us"]) 74 | def test_sqlalchemy_datetime_naive_with_time_unit_metadata( 75 | sqlalchemy_step: SQLAlchemyTypeStep, time_unit: TimeUnit 76 | ) -> None: 77 | result = sqlalchemy_step.parse( 78 | input_type=sqltypes.DateTime(), constraints=(), metadata={"anyschema": {"time_unit": time_unit}} 79 | ) 80 | assert result == nw.Datetime(time_unit) 81 | 82 | 83 | def test_sqlalchemy_datetime_tz_aware_without_metadata_raises(sqlalchemy_step: SQLAlchemyTypeStep) -> None: 84 | msg = re.escape("SQLAlchemy `DateTime(timezone=True)` does not specify a fixed timezone.") 85 | with pytest.raises(UnsupportedDTypeError, match=msg): 86 | sqlalchemy_step.parse(input_type=sqltypes.DateTime(timezone=True), constraints=(), metadata={}) 87 | 88 | 89 | @pytest.mark.parametrize( 90 | ("metadata", "expected"), 91 | [ 92 | ({"anyschema": {"time_zone": "UTC"}}, nw.Datetime("us", time_zone="UTC")), 93 | ({"anyschema": {"time_zone": "Europe/Rome"}}, nw.Datetime("us", time_zone="Europe/Rome")), 94 | ({"anyschema": {"time_unit": "ms", "time_zone": "UTC"}}, nw.Datetime("ms", time_zone="UTC")), 95 | ( 96 | {"anyschema": {"time_unit": "ns", "time_zone": "America/New_York"}}, 97 | nw.Datetime("ns", time_zone="America/New_York"), 98 | ), 99 | ], 100 | ) 101 | def test_sqlalchemy_datetime_tz_aware_with_metadata( 102 | sqlalchemy_step: SQLAlchemyTypeStep, metadata: dict[str, Any], expected: nw.dtypes.DType 103 | ) -> None: 104 | result = sqlalchemy_step.parse( 105 | input_type=sqltypes.DateTime(timezone=True), 106 | constraints=(), 107 | metadata=metadata, 108 | ) 109 | assert result == expected 110 | 111 | 112 | def test_sqlalchemy_datetime_naive_with_timezone_raises(sqlalchemy_step: SQLAlchemyTypeStep) -> None: 113 | msg = re.escape("SQLAlchemy `DateTime(timezone=False)` should not specify a fixed timezone, found UTC") 114 | with pytest.raises(Exception, match=msg): 115 | sqlalchemy_step.parse( 116 | input_type=sqltypes.DateTime(timezone=False), 117 | constraints=(), 118 | metadata={"anyschema": {"time_zone": "UTC"}}, 119 | ) 120 | 121 | 122 | @pytest.mark.parametrize("input_type", [int, str, list[int], dict]) 123 | def test_sqlalchemy_non_sqlalchemy_types_return_none(sqlalchemy_step: SQLAlchemyTypeStep, input_type: Any) -> None: 124 | result = sqlalchemy_step.parse(input_type=input_type, constraints=(), metadata={}) 125 | assert result is None 126 | 127 | 128 | @pytest.mark.parametrize( 129 | "input_type", 130 | [ 131 | sqltypes.PickleType(), 132 | sqltypes.NullType(), 133 | ], 134 | ) 135 | def test_sqlalchemy_unhandled_types_return_none(sqlalchemy_step: SQLAlchemyTypeStep, input_type: Any) -> None: 136 | result = sqlalchemy_step.parse(input_type=input_type, constraints=(), metadata={}) 137 | assert result is None 138 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, religion, or sexual identity 10 | and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the 26 | overall community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | * The use of sexualized language or imagery, and sexual attention or 31 | advances of any kind 32 | * Trolling, insulting or derogatory comments, and personal or political attacks 33 | * Public or private harassment 34 | * Publishing others' private information, such as a physical or email 35 | address, without their explicit permission 36 | * Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards of 42 | acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies when 54 | an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official e-mail address, 56 | posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders responsible for enforcement at 63 | https://www.linkedin.com/in/francesco-bruzzesi/. 64 | All complaints will be reviewed and investigated promptly and fairly. 65 | 66 | All community leaders are obligated to respect the privacy and security of the 67 | reporter of any incident. 68 | 69 | ## Enforcement Guidelines 70 | 71 | Community leaders will follow these Community Impact Guidelines in determining 72 | the consequences for any action they deem in violation of this Code of Conduct: 73 | 74 | ### 1. Correction 75 | 76 | **Community Impact**: Use of inappropriate language or other behavior deemed 77 | unprofessional or unwelcome in the community. 78 | 79 | **Consequence**: A private, written warning from community leaders, providing 80 | clarity around the nature of the violation and an explanation of why the 81 | behavior was inappropriate. A public apology may be requested. 82 | 83 | ### 2. Warning 84 | 85 | **Community Impact**: A violation through a single incident or series 86 | of actions. 87 | 88 | **Consequence**: A warning with consequences for continued behavior. No 89 | interaction with the people involved, including unsolicited interaction with 90 | those enforcing the Code of Conduct, for a specified period of time. This 91 | includes avoiding interactions in community spaces as well as external channels 92 | like social media. Violating these terms may lead to a temporary or 93 | permanent ban. 94 | 95 | ### 3. Temporary Ban 96 | 97 | **Community Impact**: A serious violation of community standards, including 98 | sustained inappropriate behavior. 99 | 100 | **Consequence**: A temporary ban from any sort of interaction or public 101 | communication with the community for a specified period of time. No public or 102 | private interaction with the people involved, including unsolicited interaction 103 | with those enforcing the Code of Conduct, is allowed during this period. 104 | Violating these terms may lead to a permanent ban. 105 | 106 | ### 4. Permanent Ban 107 | 108 | **Community Impact**: Demonstrating a pattern of violation of community 109 | standards, including sustained inappropriate behavior, harassment of an 110 | individual, or aggression toward or disparagement of classes of individuals. 111 | 112 | **Consequence**: A permanent ban from any sort of public interaction within 113 | the community. 114 | 115 | ## Attribution 116 | 117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 118 | version 2.0, available at 119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. 120 | 121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct 122 | enforcement ladder](https://github.com/mozilla/diversity). 123 | 124 | [homepage]: https://www.contributor-covenant.org 125 | 126 | For answers to common questions about this code of conduct, see the FAQ at 127 | https://www.contributor-covenant.org/faq. Translations are available at 128 | https://www.contributor-covenant.org/translations. 129 | -------------------------------------------------------------------------------- /tests/field/field_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from dataclasses import asdict 4 | from typing import TYPE_CHECKING, Any, TypedDict, cast 5 | 6 | import narwhals as nw 7 | import pytest 8 | 9 | from anyschema import AnyField 10 | 11 | if TYPE_CHECKING: 12 | from collections.abc import Mapping 13 | 14 | class IntoAnyField(TypedDict, total=False): 15 | """Arguments required to create a `AnyField` object.""" 16 | 17 | name: str 18 | dtype: nw.dtypes.DType 19 | nullable: bool 20 | unique: bool 21 | description: str | None 22 | metadata: Mapping[str, Any] 23 | 24 | 25 | @pytest.mark.parametrize("dtype", [nw.String(), nw.Int32(), nw.Array(nw.Int32(), shape=(3, 2))]) 26 | @pytest.mark.parametrize("nullable", [True, False, None]) 27 | @pytest.mark.parametrize("unique", [True, False, None]) 28 | @pytest.mark.parametrize("description", ["some description", None]) 29 | @pytest.mark.parametrize("metadata", [{"min": 0, "max": 150}, None]) 30 | def test_anyfield( 31 | dtype: nw.dtypes.DType, 32 | *, 33 | nullable: bool | None, 34 | unique: bool | None, 35 | description: str | None, 36 | metadata: Mapping[str, Any] | None, 37 | ) -> None: 38 | kwargs = { 39 | "name": "id", 40 | "dtype": dtype, 41 | "nullable": nullable, 42 | "unique": unique, 43 | "description": description, 44 | "metadata": metadata, 45 | } 46 | expected: IntoAnyField = { 47 | "name": "id", 48 | "dtype": dtype, 49 | "nullable": nullable if nullable is not None else False, 50 | "unique": unique if unique is not None else False, 51 | "description": description, 52 | "metadata": metadata if metadata is not None else {}, 53 | } 54 | into_field = cast("IntoAnyField", {k: v for k, v in kwargs.items() if v is not None}) 55 | field = AnyField(**into_field) 56 | assert asdict(field) == expected 57 | 58 | field2 = AnyField(**into_field) 59 | 60 | assert field == field2 61 | assert hash(field) == hash(field2) 62 | 63 | 64 | @pytest.mark.parametrize( 65 | ("field1_kwargs", "field2_kwargs"), 66 | [ 67 | ( 68 | {"name": "id", "dtype": nw.Int64()}, 69 | {"name": "user_id", "dtype": nw.Int64()}, 70 | ), 71 | ( 72 | {"name": "age", "dtype": nw.Int64()}, 73 | {"name": "age", "dtype": nw.Int32()}, 74 | ), 75 | ( 76 | {"name": "email", "dtype": nw.String(), "nullable": True}, 77 | {"name": "email", "dtype": nw.String(), "nullable": False}, 78 | ), 79 | ( 80 | {"name": "username", "dtype": nw.String(), "unique": False}, 81 | {"name": "username", "dtype": nw.String(), "unique": True}, 82 | ), 83 | ( 84 | {"name": "score", "dtype": nw.Float64(), "metadata": {"min": 0}}, 85 | {"name": "score", "dtype": nw.Float64(), "metadata": {"max": 100}}, 86 | ), 87 | ], 88 | ) 89 | def test_field_unequal_fields(field1_kwargs: IntoAnyField, field2_kwargs: IntoAnyField) -> None: 90 | field1, field2 = AnyField(**field1_kwargs), AnyField(**field2_kwargs) 91 | assert field1 != field2 92 | 93 | 94 | @pytest.mark.parametrize( 95 | "other_value", 96 | [ 97 | "not a field", 98 | 42, 99 | None, 100 | {"name": "test"}, 101 | [], 102 | nw.String(), 103 | ], 104 | ) 105 | def test_field_equality_with_non_field(other_value: object) -> None: 106 | """Test that Field is not equal to non-Field objects.""" 107 | field = AnyField(name="test", dtype=nw.String()) 108 | assert field != other_value 109 | 110 | 111 | @pytest.mark.parametrize( 112 | ("field_configs", "expected_unique_count"), 113 | [ 114 | ( 115 | [ 116 | {"name": "id", "dtype": nw.Int64()}, 117 | {"name": "id", "dtype": nw.Int64()}, # Duplicate 118 | {"name": "name", "dtype": nw.String()}, 119 | ], 120 | 2, 121 | ), 122 | ( 123 | [ 124 | {"name": "a", "dtype": nw.String()}, 125 | {"name": "b", "dtype": nw.String()}, 126 | {"name": "c", "dtype": nw.String()}, 127 | ], 128 | 3, 129 | ), 130 | ( 131 | [ 132 | {"name": "id", "dtype": nw.Int64(), "nullable": True}, 133 | {"name": "id", "dtype": nw.Int64(), "nullable": True}, # Duplicate 134 | {"name": "id", "dtype": nw.Int64(), "nullable": False}, # Different 135 | ], 136 | 2, 137 | ), 138 | ], 139 | ) 140 | def test_field_use_in_set(field_configs: list[IntoAnyField], expected_unique_count: int) -> None: 141 | """Test that Field instances work correctly in sets.""" 142 | fields = [AnyField(**config) for config in field_configs] 143 | field_set = set(fields) 144 | assert len(field_set) == expected_unique_count 145 | 146 | 147 | @pytest.mark.parametrize( 148 | ("field_kwargs", "expected_description"), 149 | [ 150 | ({"name": "user_id", "dtype": nw.Int64(), "description": "Unique user identifier"}, "Unique user identifier"), 151 | ({"name": "user_id", "dtype": nw.Int64(), "description": None}, None), 152 | ({"name": "test", "dtype": nw.String()}, None), # Default 153 | ({"name": "email", "dtype": nw.String(), "description": ""}, ""), # Empty string 154 | ], 155 | ) 156 | def test_field_description_values(field_kwargs: IntoAnyField, expected_description: str | None) -> None: 157 | """Test Field creation with various description values.""" 158 | field = AnyField(**field_kwargs) 159 | assert field.description == expected_description 160 | -------------------------------------------------------------------------------- /tests/adapters/custom_adapters_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING, Any, Generator, TypedDict, cast 4 | 5 | import pyarrow as pa 6 | import pytest 7 | 8 | from anyschema import AnySchema 9 | 10 | if TYPE_CHECKING: 11 | from anyschema.typing import FieldSpec 12 | 13 | 14 | class SimpleSchema: 15 | """A simple schema format for testing.""" 16 | 17 | def __init__(self, fields: dict[str, type[Any]]) -> None: 18 | self.fields = fields 19 | 20 | 21 | def simple_dict_adapter(spec: SimpleSchema) -> Generator[FieldSpec, None, None]: 22 | """Adapter for SimpleSchema format. 23 | 24 | Arguments: 25 | spec: A SimpleSchema instance. 26 | 27 | Yields: 28 | Tuples of (field_name, field_type, constraints, metadata). 29 | """ 30 | for field_name, field_type in spec.fields.items(): 31 | yield field_name, field_type, (), {} 32 | 33 | 34 | class NestedSchema: 35 | """A schema that can contain nested schemas.""" 36 | 37 | def __init__(self, fields: dict[str, Any]) -> None: 38 | self.fields = fields 39 | 40 | 41 | def nested_adapter(spec: NestedSchema) -> Generator[FieldSpec, None, None]: 42 | """Adapter for nested schema structures. 43 | 44 | For nested schemas, we dynamically create a TypedDict so the parser 45 | can properly extract the field structure. 46 | 47 | Arguments: 48 | spec: A NestedSchema instance. 49 | 50 | Yields: 51 | Tuples of (field_name, field_type, constraints, metadata). 52 | """ 53 | for field_name, field_value in spec.fields.items(): 54 | if isinstance(field_value, NestedSchema): 55 | # For nested schemas, create a TypedDict with the proper structure 56 | nested_dict = {name: type_ for name, type_, *_ in nested_adapter(field_value)} 57 | # Create a dynamic TypedDict with the nested fields 58 | nested_typed_dict = TypedDict( # type: ignore[misc] 59 | f"{field_name.title()}TypedDict", # Generate a unique name 60 | nested_dict, # Field name -> type mapping 61 | ) 62 | yield field_name, nested_typed_dict, (), {} 63 | else: 64 | yield field_name, field_value, (), {} 65 | 66 | 67 | def test_simple_dict_spec() -> None: 68 | """Test that dict types are converted to Struct.""" 69 | fields = {"id": int, "metadata": dict} 70 | schema_spec = SimpleSchema(fields=fields) 71 | 72 | schema = AnySchema(spec=schema_spec, adapter=simple_dict_adapter) 73 | arrow_schema = schema.to_arrow() 74 | 75 | assert len(arrow_schema) == len(fields) 76 | assert arrow_schema.names == ["id", "metadata"] 77 | assert "struct" in str(arrow_schema.types[1]).lower() 78 | 79 | 80 | def test_typed_dict_spec() -> None: 81 | """Test that TypedDict is converted to Struct with fields.""" 82 | 83 | class PersonTypedDict(TypedDict): 84 | name: str 85 | age: int 86 | 87 | fields = {"person": PersonTypedDict} 88 | schema_spec = SimpleSchema(fields=fields) 89 | 90 | schema = AnySchema(spec=schema_spec, adapter=simple_dict_adapter) 91 | arrow_schema = schema.to_arrow() 92 | 93 | assert len(arrow_schema) == len(fields) 94 | assert arrow_schema.names == ["person"] 95 | # Should be a struct with name and age fields 96 | assert "struct" in str(arrow_schema.types[0]).lower() 97 | 98 | 99 | def test_nested_schema_adapter() -> None: 100 | """Test the nested schema adapter from the advanced documentation.""" 101 | fields = { 102 | "id": int, 103 | "profile": NestedSchema( 104 | fields={ 105 | "name": str, 106 | "age": int, 107 | } 108 | ), 109 | } 110 | schema_spec = NestedSchema(fields=fields) 111 | schema = AnySchema(spec=schema_spec, adapter=nested_adapter) 112 | arrow_schema = schema.to_arrow() 113 | 114 | assert len(arrow_schema) == len(fields) 115 | assert arrow_schema.names == ["id", "profile"] 116 | assert "struct" in str(arrow_schema.types[1]).lower() 117 | # Check that the nested struct has the correct fields 118 | profile_type = arrow_schema.types[1] 119 | assert profile_type.num_fields == len(cast("NestedSchema", fields["profile"]).fields) # Should have 2 fields 120 | assert pa.types.is_struct(profile_type) 121 | assert profile_type.names == ["name", "age"] 122 | 123 | 124 | def test_polars_schema_with_dict() -> None: 125 | """Test that dict types work with Polars schema conversion.""" 126 | fields = {"id": int, "metadata": dict, "name": str} 127 | schema_spec = SimpleSchema(fields=fields) 128 | 129 | schema = AnySchema(spec=schema_spec, adapter=simple_dict_adapter) 130 | polars_schema = schema.to_polars() 131 | 132 | assert len(polars_schema) == len(fields) 133 | # Polars schema items are DType classes, not instances 134 | assert str(polars_schema["id"]) == "Int64" 135 | # Polars represents empty struct as {} instead of [] 136 | assert "Struct" in str(polars_schema["metadata"]) 137 | assert str(polars_schema["name"]) == "String" 138 | 139 | 140 | @pytest.mark.parametrize( 141 | "dict_type", 142 | [dict, dict[str, int], dict[str, str]], 143 | ) 144 | def test_various_dict_types(dict_type: type) -> None: 145 | """Test that various dict type annotations are handled.""" 146 | fields = {"data": dict_type} 147 | schema_spec = SimpleSchema(fields=fields) 148 | 149 | schema = AnySchema(spec=schema_spec, adapter=simple_dict_adapter) 150 | arrow_schema = schema.to_arrow() 151 | 152 | assert len(arrow_schema) == len(fields) 153 | assert arrow_schema.names == ["data"] 154 | # All dict types should become structs 155 | assert "struct" in str(arrow_schema.types[0]).lower() 156 | -------------------------------------------------------------------------------- /anyschema/_metadata.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING, Literal, overload 4 | 5 | if TYPE_CHECKING: 6 | from narwhals.dtypes import DType 7 | from narwhals.typing import TimeUnit 8 | 9 | from anyschema.typing import AnySchemaMetadata, AnySchemaMetadataKey, AnySchemaNamespaceKey, FieldMetadata 10 | 11 | 12 | def _get_anyschema_metadata(metadata: FieldMetadata) -> AnySchemaMetadata: 13 | """Get the nested anyschema metadata dictionary from field metadata. 14 | 15 | Supports both "anyschema" and "x-anyschema" keys (OpenAPI convention). 16 | Returns empty dict if neither key exists or if the value is not a dictionary. 17 | 18 | Arguments: 19 | metadata: The field metadata dictionary. 20 | 21 | Returns: 22 | The anyschema metadata dictionary, or empty dict if not found. 23 | 24 | Notes: 25 | This function tries "x-anyschema" (OpenAPI convention) first, then "anyschema". 26 | """ 27 | for key in ("x-anyschema", "anyschema"): 28 | if anyschema_meta := metadata.get(key): 29 | return anyschema_meta # type: ignore[no-any-return] 30 | return {} 31 | 32 | 33 | @overload 34 | def get_anyschema_value_by_key( 35 | metadata: FieldMetadata, *, key: Literal["nullable", "unique"], default: bool 36 | ) -> bool: ... 37 | 38 | 39 | @overload 40 | def get_anyschema_value_by_key( 41 | metadata: FieldMetadata, *, key: Literal["time_unit"], default: Literal["us"] 42 | ) -> TimeUnit: ... 43 | 44 | 45 | @overload 46 | def get_anyschema_value_by_key( 47 | metadata: FieldMetadata, *, key: Literal["nullable", "unique"], default: None = None 48 | ) -> bool | None: ... 49 | 50 | 51 | @overload 52 | def get_anyschema_value_by_key( 53 | metadata: FieldMetadata, *, key: Literal["time_unit"], default: Literal["us"] | None 54 | ) -> TimeUnit | None: ... 55 | 56 | 57 | @overload 58 | def get_anyschema_value_by_key( 59 | metadata: FieldMetadata, *, key: Literal["description", "time_zone"], default: str | None = None 60 | ) -> str | None: ... 61 | 62 | 63 | @overload 64 | def get_anyschema_value_by_key( 65 | metadata: FieldMetadata, *, key: Literal["dtype"], default: None = None 66 | ) -> DType | str | None: ... 67 | 68 | 69 | def get_anyschema_value_by_key( 70 | metadata: FieldMetadata, 71 | *, 72 | key: AnySchemaMetadataKey, 73 | default: bool | str | None = None, 74 | ) -> bool | str | DType | TimeUnit | None: 75 | """Get a specific anyschema metadata value with fallback to default. 76 | 77 | Supports both "anyschema" and "x-anyschema" keys (OpenAPI convention). 78 | 79 | Arguments: 80 | metadata: The field metadata dictionary. 81 | key: The anyschema metadata key to retrieve. 82 | default: Default value to return if key is not found. 83 | 84 | Returns: 85 | The metadata value, or default if not found. 86 | 87 | Examples: 88 | >>> metadata = {"anyschema": {"nullable": True, "unique": False}} 89 | >>> get_anyschema_value_by_key(metadata, key="nullable") 90 | True 91 | >>> get_anyschema_value_by_key(metadata, key="time_zone", default="UTC") 92 | 'UTC' 93 | >>> metadata_openapi = {"x-anyschema": {"nullable": True}} 94 | >>> get_anyschema_value_by_key(metadata_openapi, key="nullable") 95 | True 96 | """ 97 | return _get_anyschema_metadata(metadata).get(key, default) 98 | 99 | 100 | @overload 101 | def set_anyschema_meta(metadata: FieldMetadata, *, key: Literal["nullable", "unique"], value: bool) -> None: ... 102 | 103 | 104 | @overload 105 | def set_anyschema_meta( 106 | metadata: FieldMetadata, *, key: Literal["description", "time_zone"], value: str | None 107 | ) -> None: ... 108 | 109 | 110 | @overload 111 | def set_anyschema_meta(metadata: FieldMetadata, *, key: Literal["time_unit"], value: TimeUnit) -> None: ... 112 | 113 | 114 | def set_anyschema_meta( 115 | metadata: FieldMetadata, *, key: AnySchemaMetadataKey, value: bool | str | TimeUnit | None 116 | ) -> None: 117 | """Set a specific anyschema metadata value in the nested structure. 118 | 119 | Creates the nested dictionary if it doesn't exist. Modifies the metadata dict in-place. 120 | 121 | Arguments: 122 | metadata: The field metadata dictionary to modify. 123 | key: The anyschema metadata key to set. 124 | value: The value to set. 125 | 126 | Examples: 127 | >>> metadata: dict = {} 128 | >>> set_anyschema_meta(metadata, key="nullable", value=True) 129 | >>> metadata 130 | {'anyschema': {'nullable': True}} 131 | >>> set_anyschema_meta(metadata, key="unique", value=False) 132 | >>> metadata 133 | {'anyschema': {'nullable': True, 'unique': False}} 134 | 135 | Notes: 136 | If "x-anyschema" already exists in the metadata, it will be used; 137 | otherwise "anyschema" is used (the default). This preserves the user's 138 | choice of namespace key. 139 | """ 140 | # Preserve existing key if present, otherwise default to "anyschema" 141 | anyschema_key: AnySchemaNamespaceKey = "x-anyschema" if "x-anyschema" in metadata else "anyschema" 142 | if anyschema_key not in metadata: 143 | namespace: AnySchemaMetadata = {} 144 | metadata[anyschema_key] = namespace 145 | 146 | metadata[anyschema_key][key] = value 147 | 148 | 149 | def filter_anyschema_metadata(metadata: FieldMetadata) -> FieldMetadata: 150 | """Filter out anyschema-specific metadata keys, returning only custom metadata. 151 | 152 | Removes both "anyschema" and "x-anyschema" keys to support both conventions. 153 | 154 | Arguments: 155 | metadata: The field metadata dictionary. 156 | 157 | Returns: 158 | A new dictionary with anyschema keys removed. 159 | 160 | Examples: 161 | >>> metadata = {"anyschema": {"nullable": True}, "custom": "value", "x-anyschema": {"unique": False}} 162 | >>> filter_anyschema_metadata(metadata) 163 | {'custom': 'value'} 164 | """ 165 | return {key: value for key, value in metadata.items() if key not in ("anyschema", "x-anyschema")} 166 | -------------------------------------------------------------------------------- /tests/anyschema/to_pandas_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING 4 | 5 | import narwhals as nw 6 | import pandas as pd 7 | import pyarrow as pa 8 | import pytest 9 | 10 | from anyschema import AnySchema 11 | 12 | if TYPE_CHECKING: 13 | from narwhals.typing import DTypeBackend 14 | from pydantic import BaseModel 15 | 16 | 17 | @pytest.mark.parametrize( 18 | ("dtype_backend", "expected"), 19 | [ 20 | ( 21 | None, 22 | { 23 | "name": str, 24 | "date_of_birth": "date32[pyarrow]", 25 | "age": "uint64", 26 | "classes": pd.ArrowDtype(pa.list_(pa.string())), 27 | "has_graduated": "bool", 28 | }, 29 | ), 30 | ( 31 | "numpy_nullable", 32 | { 33 | "name": "string", 34 | "date_of_birth": "date32[pyarrow]", 35 | "age": "UInt64", 36 | "classes": pd.ArrowDtype(pa.list_(pa.string())), 37 | "has_graduated": "boolean", 38 | }, 39 | ), 40 | ( 41 | "pyarrow", 42 | { 43 | "name": "string[pyarrow]", 44 | "date_of_birth": "date32[pyarrow]", 45 | "age": "UInt64[pyarrow]", 46 | "classes": pd.ArrowDtype(pa.list_(pa.string())), 47 | "has_graduated": "boolean[pyarrow]", 48 | }, 49 | ), 50 | ], 51 | ) 52 | def test_pydantic_to_pandas( 53 | pydantic_student_cls: type[BaseModel], 54 | dtype_backend: DTypeBackend, 55 | expected: dict[str, str | pd.ArrowDtype | type], 56 | ) -> None: 57 | anyschema = AnySchema(spec=pydantic_student_cls) 58 | pd_schema = anyschema.to_pandas(dtype_backend=dtype_backend) 59 | assert isinstance(pd_schema, dict) 60 | assert pd_schema == expected 61 | 62 | 63 | @pytest.mark.parametrize( 64 | ("dtype_backend", "expected"), 65 | [ 66 | ( 67 | None, 68 | { 69 | "boolean": "bool", 70 | "categorical": "category", 71 | "date": "date32[pyarrow]", 72 | "datetime": "datetime64[us]", 73 | "duration": "timedelta64[us]", 74 | "float32": "float32", 75 | "float64": "float64", 76 | "int8": "int8", 77 | "int16": "int16", 78 | "int32": "int32", 79 | "int64": "int64", 80 | "list": pd.ArrowDtype(pa.list_(pa.float32())), 81 | "string": str, 82 | "struct": pd.ArrowDtype( 83 | pa.struct( 84 | [ 85 | ("field_1", pa.string()), 86 | ("field_2", pa.bool_()), 87 | ] 88 | ) 89 | ), 90 | "uint8": "uint8", 91 | "uint16": "uint16", 92 | "uint32": "uint32", 93 | "uint64": "uint64", 94 | }, 95 | ), 96 | ( 97 | "numpy_nullable", 98 | { 99 | "boolean": "boolean", 100 | "categorical": "category", 101 | "date": "date32[pyarrow]", 102 | "datetime": "datetime64[us]", 103 | "duration": "timedelta64[us]", 104 | "float32": "Float32", 105 | "float64": "Float64", 106 | "int8": "Int8", 107 | "int16": "Int16", 108 | "int32": "Int32", 109 | "int64": "Int64", 110 | "list": pd.ArrowDtype(pa.list_(pa.float32())), 111 | "string": "string", 112 | "struct": pd.ArrowDtype( 113 | pa.struct( 114 | [ 115 | ("field_1", pa.string()), 116 | ("field_2", pa.bool_()), 117 | ] 118 | ) 119 | ), 120 | "uint8": "UInt8", 121 | "uint16": "UInt16", 122 | "uint32": "UInt32", 123 | "uint64": "UInt64", 124 | }, 125 | ), 126 | ( 127 | "pyarrow", 128 | { 129 | "boolean": "boolean[pyarrow]", 130 | "categorical": "category", 131 | "date": "date32[pyarrow]", 132 | "datetime": "timestamp[us][pyarrow]", 133 | "duration": "duration[us][pyarrow]", 134 | "float32": "Float32[pyarrow]", 135 | "float64": "Float64[pyarrow]", 136 | "int8": "Int8[pyarrow]", 137 | "int16": "Int16[pyarrow]", 138 | "int32": "Int32[pyarrow]", 139 | "int64": "Int64[pyarrow]", 140 | "list": pd.ArrowDtype(pa.list_(pa.float32())), 141 | "string": "string[pyarrow]", 142 | "struct": pd.ArrowDtype( 143 | pa.struct( 144 | [ 145 | ("field_1", pa.string()), 146 | ("field_2", pa.bool_()), 147 | ] 148 | ) 149 | ), 150 | "uint8": "UInt8[pyarrow]", 151 | "uint16": "UInt16[pyarrow]", 152 | "uint32": "UInt32[pyarrow]", 153 | "uint64": "UInt64[pyarrow]", 154 | }, 155 | ), 156 | ], 157 | ) 158 | def test_nw_schema_to_arrow( 159 | nw_schema: nw.Schema, 160 | dtype_backend: DTypeBackend, 161 | expected: dict[str, str | pd.ArrowDtype | type], 162 | ) -> None: 163 | unsupported_dtypes = {"array", "enum", "uint128", "int128", "decimal", "object", "unknown"} 164 | model = nw.Schema({k: v for k, v in nw_schema.items() if k not in unsupported_dtypes}) 165 | anyschema = AnySchema(spec=model) 166 | pd_schema = anyschema.to_pandas(dtype_backend=dtype_backend) 167 | 168 | assert isinstance(pd_schema, dict) 169 | assert pd_schema == expected 170 | --------------------------------------------------------------------------------