├── .coveragerc ├── .github └── workflows │ ├── ci.yml │ ├── codeql.yml │ ├── docs-deploy.yml │ └── release.yml ├── .gitignore ├── .markdownlinkcheck.json ├── .markdownlint.yaml ├── .pre-commit-config.yaml ├── CHANGELOG.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── docs ├── api │ ├── adapters.md │ ├── core.md │ ├── extras.md │ ├── fields.md │ ├── migrations.md │ └── protocols.md ├── async_mongo_tutorial.md ├── changelog.md ├── ci.md ├── contributing.md ├── error_handling.md ├── getting_started.md ├── guides │ ├── architecture.md │ ├── async-patterns.md │ ├── creating-adapters.md │ ├── end-to-end-backend.md │ ├── field-families.md │ ├── fields-and-protocols-patterns.md │ ├── fields.md │ ├── index.md │ ├── protocols.md │ └── testing-strategies.md ├── index.md ├── migration_guide.md ├── migrations.md ├── neo4j_adapter.md ├── postgres_adapter.md ├── protocols.md ├── qdrant_adapter.md ├── sql_model_adapter.md ├── testing.md └── tutorials │ ├── using_migrations.md │ └── using_protocols.md ├── mkdocs.yml ├── notebooks ├── get_started │ ├── config.toml │ ├── employees.csv │ ├── getting_started.ipynb │ ├── inventory.json │ ├── inventory.toml │ ├── products.csv │ └── students.xlsx ├── using_async_mongo │ ├── async_mongo_tutorial.ipynb │ ├── docker-compose.yml │ └── mongo-init.js ├── using_neo4j │ ├── movie_recommendation_system.ipynb │ ├── neo4j_adapter.ipynb │ └── social_network_analysis.ipynb └── using_qdrant │ ├── docker-compose.yml │ └── qdrant_adapter.ipynb ├── pyproject.toml ├── pytest.ini ├── scripts └── ci.py ├── src └── pydapter │ ├── __init__.py │ ├── adapters │ ├── __init__.py │ ├── csv_.py │ ├── json_.py │ └── toml_.py │ ├── async_core.py │ ├── core.py │ ├── exceptions.py │ ├── extras │ ├── README.md │ ├── __init__.py │ ├── async_mongo_.py │ ├── async_neo4j_.py │ ├── async_postgres_.py │ ├── async_qdrant_.py │ ├── async_sql_.py │ ├── async_weaviate_.py │ ├── excel_.py │ ├── mongo_.py │ ├── neo4j_.py │ ├── pandas_.py │ ├── postgres_.py │ ├── qdrant_.py │ ├── sql_.py │ └── weaviate_.py │ ├── fields │ ├── __init__.py │ ├── builder.py │ ├── common_templates.py │ ├── dts.py │ ├── embedding.py │ ├── execution.py │ ├── families.py │ ├── ids.py │ ├── params.py │ ├── protocol_families.py │ ├── template.py │ ├── types.py │ └── validation_patterns.py │ ├── migrations │ ├── __init__.py │ ├── base.py │ ├── exceptions.py │ ├── protocols.py │ ├── registry.py │ └── sql │ │ ├── __init__.py │ │ └── alembic_adapter.py │ ├── model_adapters │ ├── __init__.py │ ├── config.py │ ├── pg_vector_model.py │ ├── postgres_model.py │ ├── sql_model.py │ ├── sql_vector_model.py │ └── type_registry.py │ ├── protocols │ ├── __init__.py │ ├── auditable.py │ ├── base_model.py │ ├── constants.py │ ├── cryptographical.py │ ├── embeddable.py │ ├── event.py │ ├── factory.py │ ├── identifiable.py │ ├── invokable.py │ ├── registry.py │ ├── soft_deletable.py │ ├── temporal.py │ ├── types.py │ └── utils.py │ ├── py.typed │ └── utils │ ├── __init__.py │ └── dependencies.py ├── tests ├── conftest.py ├── test_adapters │ ├── test_async_adapters.py │ ├── test_async_neo4j_adapter.py │ ├── test_async_neo4j_context_manager.py │ ├── test_async_postgres_adapter.py │ ├── test_async_postgres_adapter_extended.py │ ├── test_async_sql_adapter.py │ ├── test_async_sql_adapter_extended.py │ ├── test_async_weaviate_adapter.py │ ├── test_core_adapters.py │ ├── test_excel_adapter.py │ ├── test_integration_async_neo4j.py │ ├── test_integration_mongodb.py │ ├── test_integration_neo4j.py │ ├── test_integration_postgres.py │ ├── test_integration_qdrant.py │ ├── test_integration_weaviate.py │ ├── test_mongo_adapter.py │ ├── test_mongo_adapter_extended.py │ ├── test_neo4j_adapter.py │ ├── test_neo4j_adapter_extended.py │ ├── test_pandas_adapter.py │ ├── test_postgres_adapter.py │ ├── test_property_based.py │ ├── test_qdrant_adapter.py │ ├── test_qdrant_adapter_extended.py │ ├── test_sql_adapter.py │ ├── test_sql_adapter_extended.py │ └── test_weaviate_adapter.py ├── test_benchmark │ ├── test_bench_json.py │ └── test_benchmarks.py ├── test_dependencies.py ├── test_error_handling │ ├── test_async_error_handling.py │ ├── test_db_error_handling.py │ └── test_error_handling.py ├── test_fields │ ├── test_domain_model_builder.py │ ├── test_field_core.py │ ├── test_field_families.py │ ├── test_field_templates.py │ ├── test_protocol_behavioral_methods.py │ ├── test_protocol_field_families.py │ ├── test_utils.py │ └── test_validation_patterns.py ├── test_migrations │ ├── test_async_migrations_registry.py │ ├── test_migrations_base.py │ ├── test_migrations_exceptions.py │ ├── test_migrations_integration.py │ ├── test_migrations_protocols.py │ ├── test_migrations_registry.py │ ├── test_migrations_registry_error_handling.py │ └── test_migrations_sql_adapters.py ├── test_model_adapters │ ├── test_enhanced_sql_model_adapter.py │ ├── test_model_adapter_config.py │ ├── test_model_adapter_enhancements.py │ ├── test_pg_vector_model_adapter.py │ ├── test_pg_vector_model_adapter_additional.py │ ├── test_pg_vector_model_adapter_extended.py │ ├── test_pg_vector_model_adapter_relationships.py │ ├── test_postgres_model_adapter.py │ ├── test_postgres_model_adapter_extended.py │ ├── test_sql_model_adapter_additional.py │ ├── test_sql_model_adapter_extended.py │ ├── test_sql_model_adapters.py │ ├── test_sql_vector_model_adapter.py │ └── test_type_registry.py └── test_protocols │ ├── test_async_protocol.py │ ├── test_event_protocol.py │ ├── test_protocol_compliance.py │ ├── test_protocol_factory.py │ └── test_protocols.py └── uv.lock /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | source = src/pydapter 3 | omit = 4 | tests/* 5 | .github/khive_modes.json 6 | 7 | [report] 8 | exclude_lines = 9 | pragma: no cover 10 | def __repr__ 11 | raise NotImplementedError 12 | if __name__ == .__main__.: 13 | pass 14 | raise ImportError 15 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | branches: [main] 8 | workflow_dispatch: # Allow manual triggering 9 | 10 | jobs: 11 | test: 12 | name: Test Python ${{ matrix.python-version }} 13 | runs-on: ubuntu-latest 14 | strategy: 15 | fail-fast: false 16 | matrix: 17 | python-version: ["3.10", "3.11", "3.12", "3.13"] 18 | 19 | steps: 20 | - uses: actions/checkout@v4 21 | with: 22 | fetch-depth: 0 # Fetch all history for proper versioning 23 | 24 | - name: Set up Python ${{ matrix.python-version }} 25 | uses: actions/setup-python@v5 26 | with: 27 | python-version: ${{ matrix.python-version }} 28 | cache: "pip" 29 | 30 | - name: Install uv 31 | run: | 32 | pip install uv 33 | 34 | - name: Set up Node.js 35 | uses: actions/setup-node@v4 36 | with: 37 | node-version: '18' 38 | 39 | - name: Install markdown tools 40 | run: | 41 | npm install -g markdownlint-cli markdown-link-check 42 | 43 | - name: Create virtual environment 44 | run: | 45 | uv venv .venv 46 | echo "VIRTUAL_ENV=$GITHUB_WORKSPACE/.venv" >> $GITHUB_ENV 47 | echo "$GITHUB_WORKSPACE/.venv/bin" >> $GITHUB_PATH 48 | 49 | - name: Install dependencies 50 | run: | 51 | source .venv/bin/activate 52 | uv sync --extra all 53 | 54 | - name: Run CI script 55 | run: | 56 | uv run scripts/ci.py --python-version ${{ matrix.python-version }} 57 | 58 | - name: Upload coverage reports to Codecov 59 | uses: codecov/codecov-action@v5 60 | with: 61 | token: ${{ secrets.CODECOV_TOKEN }} 62 | -------------------------------------------------------------------------------- /.github/workflows/codeql.yml: -------------------------------------------------------------------------------- 1 | name: "CodeQL" 2 | 3 | on: 4 | push: 5 | branches: [main, develop] 6 | pull_request: 7 | branches: [main, develop] 8 | schedule: 9 | - cron: "0 0 * * 0" 10 | 11 | jobs: 12 | analyze: 13 | name: Analyze 14 | runs-on: ubuntu-latest 15 | permissions: 16 | actions: read 17 | contents: read 18 | security-events: write 19 | 20 | strategy: 21 | fail-fast: false 22 | matrix: 23 | language: ["python"] 24 | 25 | steps: 26 | - name: Checkout repository 27 | uses: actions/checkout@v4 28 | 29 | - name: Initialize CodeQL 30 | uses: github/codeql-action/init@v3 31 | with: 32 | languages: ${{ matrix.language }} 33 | 34 | - name: Autobuild 35 | uses: github/codeql-action/autobuild@v3 36 | 37 | - name: Perform CodeQL Analysis 38 | uses: github/codeql-action/analyze@v3 39 | 40 | # filepath: .github/workflows/codeql-analysis.yml 41 | -------------------------------------------------------------------------------- /.github/workflows/docs-deploy.yml: -------------------------------------------------------------------------------- 1 | name: Deploy Documentation 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | paths: 8 | - "docs/**" 9 | - "mkdocs.yml" 10 | - ".github/workflows/docs-deploy.yml" 11 | workflow_dispatch: 12 | 13 | permissions: 14 | contents: write 15 | 16 | jobs: 17 | deploy: 18 | runs-on: ubuntu-latest 19 | steps: 20 | - uses: actions/checkout@v4 21 | with: 22 | fetch-depth: 0 23 | 24 | - name: Set up Python 25 | uses: actions/setup-python@v5 26 | with: 27 | python-version: "3.10" 28 | cache: "pip" 29 | 30 | - name: Install dependencies 31 | run: | 32 | pip install uv 33 | uv venv 34 | source .venv/bin/activate 35 | echo "VIRTUAL_ENV=$GITHUB_WORKSPACE/.venv" >> $GITHUB_ENV 36 | echo "$GITHUB_WORKSPACE/.venv/bin" >> $GITHUB_PATH 37 | uv sync 38 | 39 | - name: Deploy documentation 40 | run: uv run mkdocs gh-deploy --force 41 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | permissions: 8 | contents: read 9 | 10 | jobs: 11 | test: 12 | runs-on: ubuntu-latest 13 | strategy: 14 | matrix: 15 | python-version: ["3.10", "3.11", "3.12", "3.13"] 16 | steps: 17 | - uses: actions/checkout@v4 18 | - name: Set up Python ${{ matrix.python-version }} 19 | uses: actions/setup-python@v5 20 | with: 21 | python-version: ${{ matrix.python-version }} 22 | 23 | - name: Install uv 24 | run: | 25 | curl -LsSf https://astral.sh/uv/install.sh | sh 26 | echo "$HOME/.cargo/bin" >> $GITHUB_PATH 27 | 28 | - name: Install dependencies 29 | run: | 30 | uv sync --extra all 31 | - name: Run tests 32 | run: uv run pytest -q --cov=pydapter --cov-report=xml 33 | 34 | deploy: 35 | needs: test 36 | runs-on: ubuntu-latest 37 | steps: 38 | - uses: actions/checkout@v4 39 | - name: Set up Python 40 | uses: actions/setup-python@v5 41 | with: 42 | python-version: "3.12" 43 | 44 | - name: Install uv 45 | run: | 46 | curl -LsSf https://astral.sh/uv/install.sh | sh 47 | echo "$HOME/.cargo/bin" >> $GITHUB_PATH 48 | 49 | - name: Install dependencies 50 | run: | 51 | uv venv 52 | uv pip install twine 53 | 54 | - name: Build package 55 | run: uv build 56 | 57 | - name: Publish package to PyPI 58 | env: 59 | TWINE_USERNAME: __token__ 60 | TWINE_PASSWORD: ${{ secrets.PYPI_API_KEY }} 61 | run: uv run twine upload dist/* 62 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # UV 98 | # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | #uv.lock 102 | 103 | # poetry 104 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 105 | # This is especially recommended for binary packages to ensure reproducibility, and is more 106 | # commonly ignored for libraries. 107 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 108 | #poetry.lock 109 | 110 | # pdm 111 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 112 | #pdm.lock 113 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 114 | # in version control. 115 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 116 | .pdm.toml 117 | .pdm-python 118 | .pdm-build/ 119 | 120 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 121 | __pypackages__/ 122 | 123 | # Celery stuff 124 | celerybeat-schedule 125 | celerybeat.pid 126 | 127 | # SageMath parsed files 128 | *.sage.py 129 | 130 | # Environments 131 | .env 132 | .venv 133 | env/ 134 | venv/ 135 | ENV/ 136 | env.bak/ 137 | venv.bak/ 138 | 139 | # Spyder project settings 140 | .spyderproject 141 | .spyproject 142 | 143 | # Rope project settings 144 | .ropeproject 145 | 146 | # mkdocs documentation 147 | /site 148 | 149 | # mypy 150 | .mypy_cache/ 151 | .dmypy.json 152 | dmypy.json 153 | 154 | # Pyre type checker 155 | .pyre/ 156 | 157 | # pytype static type analyzer 158 | .pytype/ 159 | 160 | # Cython debug symbols 161 | cython_debug/ 162 | 163 | # PyCharm 164 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 165 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 166 | # and can be added to the global gitignore or merged into this file. For a more nuclear 167 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 168 | #.idea/ 169 | 170 | # Ruff stuff: 171 | .ruff_cache/ 172 | 173 | # PyPI configuration file 174 | .pypirc 175 | 176 | *.DS_Store 177 | .roomodes 178 | .roo 179 | .khive/ 180 | -------------------------------------------------------------------------------- /.markdownlinkcheck.json: -------------------------------------------------------------------------------- 1 | { 2 | "ignorePatterns": [ 3 | { 4 | "pattern": "^http://localhost" 5 | }, 6 | { 7 | "pattern": "^https://localhost" 8 | }, 9 | { 10 | "pattern": "^http://127.0.0.1" 11 | }, 12 | { 13 | "pattern": "^https://127.0.0.1" 14 | } 15 | ], 16 | "replacementPatterns": [ 17 | { 18 | "pattern": "^/", 19 | "replacement": "{{BASEURL}}/" 20 | } 21 | ], 22 | "httpHeaders": [ 23 | { 24 | "urls": [ 25 | "https://github.com", 26 | "https://api.github.com" 27 | ], 28 | "headers": { 29 | "Accept": "application/vnd.github.v3+json", 30 | "User-Agent": "pydapter-docs-link-checker" 31 | } 32 | } 33 | ], 34 | "timeout": "20s", 35 | "retryOn429": true, 36 | "retryCount": 3, 37 | "fallbackRetryDelay": "30s", 38 | "aliveStatusCodes": [ 39 | 200, 40 | 206, 41 | 301, 42 | 302, 43 | 303, 44 | 307, 45 | 308, 46 | 400, 47 | 401, 48 | 403, 49 | 405, 50 | 999 51 | ] 52 | } 53 | -------------------------------------------------------------------------------- /.markdownlint.yaml: -------------------------------------------------------------------------------- 1 | # Markdownlint configuration for pydapter documentation 2 | # See https://github.com/DavidAnson/markdownlint/blob/main/doc/Rules.md 3 | 4 | # Default state for all rules 5 | default: true 6 | 7 | # Rule customizations 8 | MD003: # Header style 9 | style: "atx" 10 | 11 | MD007: # Unordered list indentation 12 | indent: 2 13 | 14 | MD013: # Line length 15 | line_length: 88 16 | heading_line_length: 100 17 | code_block_line_length: 120 18 | tables: false 19 | 20 | MD024: # Multiple headers with the same content 21 | siblings_only: true 22 | 23 | MD033: # Inline HTML 24 | allowed_elements: ["br", "sub", "sup", "kbd", "details", "summary"] 25 | 26 | MD041: # First line in file should be a top level header 27 | front_matter_title: "^\\s*title\\s*[:=]" 28 | 29 | # Disable rules that conflict with our documentation style 30 | MD026: false # Trailing punctuation in header 31 | MD034: false # Bare URL used (we use bare URLs in some contexts) 32 | MD036: false # Emphasis used instead of header (we use emphasis for API elements) 33 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # .pre-commit-config.yaml 2 | # See https://pre-commit.com for more information 3 | # See https://pre-commit.com/hooks.html for more hooks 4 | 5 | repos: 6 | # General file formatting and checks 7 | - repo: https://github.com/pre-commit/pre-commit-hooks 8 | rev: v4.6.0 # Use a recent stable version 9 | hooks: 10 | - id: trailing-whitespace # Removes trailing whitespace 11 | - id: end-of-file-fixer # Ensures files end with a single newline 12 | - id: check-yaml # Checks yaml files for parseable syntax 13 | - id: check-toml # Checks toml files for parseable syntax 14 | - id: pretty-format-json # Formats JSON files consistently 15 | args: [ 16 | "--autofix", 17 | "--no-sort-keys", 18 | ] # Autofix formatting, don't sort keys [3] 19 | 20 | # Markdownlint for Markdown validation 21 | - repo: local 22 | hooks: 23 | - id: markdownlint 24 | name: Markdown lint 25 | entry: markdownlint 26 | language: node 27 | files: \.(md|markdown)$ 28 | additional_dependencies: ['markdownlint-cli@0.45.0'] 29 | - id: markdown-link-check 30 | name: Markdown link check 31 | entry: markdown-link-check 32 | language: node 33 | files: \.(md|markdown)$ 34 | additional_dependencies: ['markdown-link-check@3.13.7'] 35 | args: ['--config', '.markdownlinkcheck.json'] 36 | 37 | - repo: https://github.com/pycqa/isort 38 | rev: 5.13.2 39 | hooks: 40 | - id: isort 41 | args: ["--profile", "black"] 42 | 43 | # Ruff for Python linting and formatting 44 | - repo: https://github.com/astral-sh/ruff-pre-commit 45 | # Use a recent version; v0.5.1+ automatically detects pyproject.toml [2] 46 | rev: v0.5.1 # Or specify your desired Ruff version 47 | hooks: 48 | # Run the linter first, with autofix enabled. 49 | # Place linter before formatter as recommended [4]. 50 | - id: ruff 51 | args: [ 52 | --fix, 53 | --exit-non-zero-on-fix, 54 | ] # Apply fixes and report if changes were made 55 | - id: ruff-format 56 | 57 | - repo: https://github.com/asottile/pyupgrade 58 | rev: v3.15.2 59 | hooks: 60 | - id: pyupgrade 61 | args: [--py39-plus] 62 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## 0.2.3 - 2025-05-29 4 | 5 | ### Added 6 | 7 | - **Field Families and Common Patterns Library** (Issue #114): Introduced a 8 | comprehensive field system with: 9 | - `FieldTemplate`: Reusable field definitions with flexible naming 10 | - `FieldFamilies`: Core database pattern collections (ENTITY, SOFT_DELETE, AUDIT) 11 | - `DomainModelBuilder`: Fluent API for building models with method chaining 12 | - `ProtocolFieldFamilies`: Field sets that ensure protocol compliance 13 | - `ValidationPatterns`: Common regex patterns and constraint builders 14 | - `create_protocol_model()`: Function to create protocol-compliant models (structure only) 15 | - **Protocol Enhancements**: 16 | - Added protocol constants (`IDENTIFIABLE`, `TEMPORAL`, etc.) for type-safe protocol selection 17 | - Added `create_protocol_model_class()`: Factory function that creates models with both 18 | structural fields AND behavioral methods in one step 19 | - Added `combine_with_mixins()`: Helper to easily add protocol behaviors to existing models 20 | 21 | ### Changed 22 | 23 | - **BREAKING**: Removed "event" from the protocol system in `ProtocolFieldFamilies`. 24 | The `Event` class remains available but is no longer part of the protocol 25 | selection system since it's a concrete class, not a protocol interface. 26 | 27 | ### Fixed 28 | 29 | - Fixed import organization issues (E402 errors) 30 | - Updated tests to reflect simplified field families 31 | - Fixed email validation test expectations 32 | - Updated documentation to align with new architecture 33 | - Fixed SQLAlchemy primary key mapping issue in test_model_adapter_enhancements.py 34 | 35 | ## 0.2.0 - 2025-05-24 36 | 37 | ### Highlights 38 | 39 | This release introduces two major foundational modules: **Fields** and 40 | **Protocols**. These modules provide a robust and extensible framework for 41 | defining data structures and behaviors within `pydapter`. 42 | 43 | - **Fields Module (`pydapter.fields`)**: A powerful system for defining typed, 44 | validated, and serializable fields. It includes pre-defined field types for 45 | common use cases like IDs, datetimes, embeddings, and execution tracking, 46 | along with a flexible `Field` class for custom definitions and a 47 | `create_model` utility for dynamic Pydantic model creation. 48 | - **Protocols Module (`pydapter.protocols`)**: A set of composable interfaces 49 | (e.g., `Identifiable`, `Temporal`, `Embeddable`, `Invokable`, 50 | `Cryptographical`) that define standard behaviors for Pydantic models. The 51 | `Event` protocol combines these to offer comprehensive event tracking 52 | capabilities, enhanced by the `@as_event` decorator for easily instrumenting 53 | functions. 54 | 55 | These additions significantly enhance `pydapter`'s ability to model complex data 56 | interactions and workflows in a standardized and maintainable way. 57 | 58 | ### Added 59 | 60 | - **New `pydapter.fields` module**: Introduced a robust system for defining 61 | typed, validated, and serializable fields (e.g., IDs, datetimes, embeddings, 62 | execution tracking) and a `create_model` utility for dynamic Pydantic model 63 | creation. (Related to Issue #100, PR #99) 64 | - **New `pydapter.protocols` module**: Added composable protocol interfaces 65 | (`Identifiable`, `Temporal`, `Embeddable`, `Invokable`, `Cryptographical`) and 66 | an `Event` protocol with an `@as_event` decorator for comprehensive event 67 | modeling and function instrumentation. (Related to Issue #100, PR #99) 68 | - **Hybrid Documentation System**: Implemented a new documentation system 69 | combining auto-generated API skeletons with rich manual content and automated 70 | validation (markdown linting, link checking). (Issue #103, PR #104) 71 | - Updated CI to install documentation validation tools. (Issue #105) 72 | 73 | ### Fixed 74 | 75 | - Resolved Python 3.10 compatibility issues related to `datetime.timezone.utc`. 76 | (Part of PR #99 fixes) 77 | - Addressed various `mkdocs` build warnings and broken links in the 78 | documentation. (Part of PR #104 fixes) 79 | 80 | ## 0.1.5 - 2025-05-14 81 | 82 | ### Added 83 | 84 | - New adapter implementations: 85 | - `AsyncNeo4jAdapter` - Asynchronous adapter for Neo4j graph database with 86 | comprehensive error handling 87 | - `WeaviateAdapter` - Synchronous adapter for Weaviate vector database with 88 | vector search capabilities 89 | - `AsyncWeaviateAdapter` - Asynchronous adapter for Weaviate vector database 90 | using aiohttp for REST API calls 91 | 92 | ## 0.1.1 - 2025-05-04 93 | 94 | ### Added 95 | 96 | - Integration tests for database adapters using TestContainers 97 | - PostgreSQL integration tests 98 | - MongoDB integration tests 99 | - Neo4j integration tests 100 | - Qdrant vector database integration tests 101 | 102 | ### Fixed 103 | 104 | - Neo4j adapter now supports authentication 105 | - Qdrant adapter improved connection error handling 106 | - SQL adapter enhanced error handling for connection issues 107 | - Improved error handling in core adapter classes 108 | 109 | ## 0.1.0 - 2025-05-03 110 | 111 | - Initial public release. 112 | - `core.Adapter`, `AdapterRegistry`, `Adaptable` 113 | - Built-in JSON adapter 114 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to Pydapter 2 | 3 | Thank you for your interest in contributing to Pydapter! This document provides 4 | guidelines and instructions for contributing to the project. 5 | 6 | ## Development Environment Setup 7 | 8 | 1. Fork the repository on GitHub 9 | 2. Clone your fork locally: 10 | 11 | ```bash 12 | git clone https://github.com/your-username/pydapter.git 13 | cd pydapter 14 | ``` 15 | 16 | 3. Set up a development environment: 17 | 18 | ```bash 19 | # Using uv (recommended) 20 | uv pip install -e ".[dev,all]" 21 | 22 | # Or using pip 23 | pip install -e ".[dev,all]" 24 | ``` 25 | 26 | 4. Install pre-commit hooks: 27 | 28 | ```bash 29 | pre-commit install 30 | ``` 31 | 32 | ## Development Workflow 33 | 34 | 1. Create a new branch for your feature or bugfix: 35 | 36 | ```bash 37 | git checkout -b feature/your-feature-name 38 | ``` 39 | 40 | 2. Make your changes, following the project's coding standards 41 | 42 | 3. Run the CI script locally to ensure all tests pass: 43 | 44 | ```bash 45 | python scripts/ci.py 46 | ``` 47 | 48 | 4. Commit your changes using conventional commit messages: 49 | 50 | ```bash 51 | git commit -m "feat: add new feature" 52 | ``` 53 | 54 | 5. Push your branch to your fork: 55 | 56 | ```bash 57 | git push origin feature/your-feature-name 58 | ``` 59 | 60 | 6. Open a pull request on GitHub 61 | 62 | ## Continuous Integration 63 | 64 | The project uses a comprehensive CI system that runs: 65 | 66 | - Linting checks (using ruff) 67 | - Code formatting checks (using ruff format) 68 | - Type checking (using mypy) 69 | - Unit tests (using pytest) 70 | - Integration tests (using pytest) 71 | - Coverage reporting 72 | - Documentation validation (using markdownlint and markdown-link-check) 73 | 74 | You can run the CI script locally with various options: 75 | 76 | ```bash 77 | # Run all checks 78 | python scripts/ci.py 79 | 80 | # Skip integration tests (which require Docker) 81 | python scripts/ci.py --skip-integration 82 | 83 | # Run only documentation validation 84 | python scripts/ci.py --only docs 85 | 86 | # Run only linting and formatting checks 87 | python scripts/ci.py --skip-unit --skip-integration --skip-coverage --skip-docs 88 | 89 | # Run tests in parallel 90 | python scripts/ci.py --parallel 4 91 | ``` 92 | 93 | For more information, see [the CI documentation](ci.md). 94 | 95 | ## Code Style 96 | 97 | This project follows these coding standards: 98 | 99 | - Code formatting with [ruff format](https://docs.astral.sh/ruff/formatter/) 100 | - Linting with [ruff](https://docs.astral.sh/ruff/) 101 | - Type annotations for all functions and classes 102 | - Comprehensive docstrings in 103 | [Google style](https://google.github.io/styleguide/pyguide.html#38-comments-and-docstrings) 104 | - Test coverage for all new features 105 | 106 | ## Testing 107 | 108 | All new features and bug fixes should include tests. The project uses pytest for 109 | testing: 110 | 111 | ```bash 112 | # Run all tests 113 | uv run pytest 114 | 115 | # Run specific tests 116 | uv run pytest tests/test_specific_file.py 117 | 118 | # Run with coverage 119 | uv run pytest --cov=pydapter 120 | ``` 121 | 122 | ## Documentation 123 | 124 | Documentation is written in Markdown and built with MkDocs using a hybrid 125 | approach that combines auto-generated API references with enhanced manual 126 | content. 127 | 128 | ### Documentation Standards 129 | 130 | All documentation must follow these standards: 131 | 132 | 1. **Markdown Quality**: All markdown files must pass `markdownlint` validation 133 | 2. **Link Integrity**: All internal and external links must be valid 134 | 3. **API Documentation**: Use the hybrid approach with enhanced manual content 135 | 4. **Code Examples**: Include working code examples with proper syntax 136 | highlighting 137 | 5. **Cross-References**: Link related concepts and maintain navigation 138 | consistency 139 | 140 | ### Validation Tools 141 | 142 | The project uses automated validation tools: 143 | 144 | - **markdownlint**: Ensures consistent markdown formatting 145 | - **markdown-link-check**: Validates all links in documentation 146 | - **Pre-commit hooks**: Automatic validation before commits 147 | 148 | ### Writing Documentation 149 | 150 | When contributing documentation: 151 | 152 | 1. **API Reference**: Follow the pattern established in `docs/api/protocols.md` 153 | and `docs/api/core.md` 154 | 2. **Manual Enhancement**: Add examples, best practices, and cross-references 155 | beyond basic API extraction 156 | 3. **User Personas**: Consider different user needs (new users, API users, 157 | contributors) 158 | 4. **Code Examples**: Provide complete, runnable examples 159 | 5. **Navigation**: Ensure proper cross-linking between related sections 160 | 161 | ### Documentation Workflow 162 | 163 | ```bash 164 | # Preview documentation locally 165 | uv run mkdocs serve 166 | 167 | # Validate documentation 168 | python scripts/ci.py --only docs 169 | 170 | # Check specific files 171 | markdownlint docs/**/*.md 172 | markdown-link-check docs/api/core.md --config .markdownlinkcheck.json 173 | 174 | # Fix common issues automatically (when possible) 175 | markdownlint --fix docs/**/*.md 176 | ``` 177 | 178 | ### Documentation Structure 179 | 180 | - `docs/api/`: API reference documentation (hybrid approach) 181 | - `docs/tutorials/`: Step-by-step guides 182 | - `docs/`: General guides and concepts 183 | - Examples should be complete and testable 184 | - Cross-references should use relative links 185 | 186 | Then open http://127.0.0.1:8000/ in your browser to preview changes. 187 | 188 | ## Pull Request Process 189 | 190 | 1. Ensure your code passes all CI checks 191 | 2. Update documentation if necessary 192 | 3. Add tests for new features 193 | 4. Make sure your PR description clearly describes the changes and their purpose 194 | 5. Wait for review and address any feedback 195 | 196 | ## License 197 | 198 | By contributing to Pydapter, you agree that your contributions will be licensed 199 | under the project's MIT License. 200 | -------------------------------------------------------------------------------- /docs/api/adapters.md: -------------------------------------------------------------------------------- 1 | # Adapters API 2 | 3 | This page documents the built-in adapters provided by pydapter. 4 | 5 | ## CSV Adapter 6 | 7 | ::: pydapter.adapters.csv_ 8 | options: 9 | show_root_heading: true 10 | show_source: true 11 | 12 | ## JSON Adapter 13 | 14 | ::: pydapter.adapters.json_ 15 | options: 16 | show_root_heading: true 17 | show_source: true 18 | 19 | ## TOML Adapter 20 | 21 | ::: pydapter.adapters.toml_ 22 | options: 23 | show_root_heading: true 24 | show_source: true 25 | -------------------------------------------------------------------------------- /docs/api/extras.md: -------------------------------------------------------------------------------- 1 | # Extras API 2 | 3 | This page documents the extra adapters provided by pydapter. 4 | 5 | ## Excel Adapter 6 | 7 | ::: pydapter.extras.excel_ 8 | options: 9 | show_root_heading: true 10 | show_source: true 11 | 12 | ## Pandas Adapter 13 | 14 | ::: pydapter.extras.pandas_ 15 | options: 16 | show_root_heading: true 17 | show_source: true 18 | 19 | ## SQL Adapter 20 | 21 | ::: pydapter.extras.sql_ 22 | options: 23 | show_root_heading: true 24 | show_source: true 25 | 26 | ## PostgreSQL Adapter 27 | 28 | ::: pydapter.extras.postgres_ 29 | options: 30 | show_root_heading: true 31 | show_source: true 32 | 33 | ## MongoDB Adapter 34 | 35 | ::: pydapter.extras.mongo_ 36 | options: 37 | show_root_heading: true 38 | show_source: true 39 | 40 | ## Neo4j Adapter 41 | 42 | ::: pydapter.extras.neo4j_ 43 | options: 44 | show_root_heading: true 45 | show_source: true 46 | 47 | ## Qdrant Adapter 48 | 49 | ::: pydapter.extras.qdrant_ 50 | options: 51 | show_root_heading: true 52 | show_source: true 53 | 54 | ## Async SQL Adapter 55 | 56 | ::: pydapter.extras.async_sql_ 57 | options: 58 | show_root_heading: true 59 | show_source: true 60 | 61 | ## Async PostgreSQL Adapter 62 | 63 | ::: pydapter.extras.async_postgres_ 64 | options: 65 | show_root_heading: true 66 | show_source: true 67 | 68 | ## Async MongoDB Adapter 69 | 70 | ::: pydapter.extras.async_mongo_ 71 | options: 72 | show_root_heading: true 73 | show_source: true 74 | 75 | ## Async Qdrant Adapter 76 | 77 | ::: pydapter.extras.async_qdrant_ 78 | options: 79 | show_root_heading: true 80 | show_source: true 81 | -------------------------------------------------------------------------------- /docs/api/fields.md: -------------------------------------------------------------------------------- 1 | # Fields API Reference 2 | 3 | This page provides API documentation for the `pydapter.fields` module. 4 | 5 | ## Installation 6 | 7 | ```bash 8 | pip install pydapter 9 | ``` 10 | 11 | ## Overview 12 | 13 | The fields module provides tools for building robust, reusable model definitions: 14 | 15 | ```text 16 | ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ 17 | │ Field │ │ FieldTemplate │ │ FieldFamilies │ 18 | │ (Descriptor) │ │ (Reusable) │ │ (Collections) │ 19 | └─────────────────┘ └─────────────────┘ └─────────────────┘ 20 | 21 | ┌─────────────────┐ ┌─────────────────┐ 22 | │DomainModelBuilder│ │ValidationPatterns│ 23 | │ (Fluent API) │ │ (Validators) │ 24 | └─────────────────┘ └─────────────────┘ 25 | ``` 26 | 27 | ## Quick Start 28 | 29 | ```python 30 | from pydapter.fields import DomainModelBuilder, FieldTemplate 31 | 32 | # Build models with field families 33 | User = ( 34 | DomainModelBuilder("User") 35 | .with_entity_fields() # id, created_at, updated_at 36 | .with_audit_fields() # created_by, updated_by, version 37 | .add_field("name", FieldTemplate(base_type=str)) 38 | .add_field("email", FieldTemplate(base_type=str)) 39 | .build() 40 | ) 41 | ``` 42 | 43 | ## Field Templates 44 | 45 | ```python 46 | from pydapter.fields import FieldTemplate 47 | 48 | # Reusable field configuration 49 | email_template = FieldTemplate( 50 | base_type=str, 51 | description="Email address", 52 | validator=lambda cls, v: v.lower() 53 | ) 54 | 55 | # Create variations 56 | user_email = email_template.create_field("user_email") 57 | optional_email = email_template.as_nullable() 58 | email_list = email_template.as_listable() 59 | ``` 60 | 61 | ## Field Families 62 | 63 | ```python 64 | from pydapter.fields import FieldFamilies 65 | 66 | # Pre-defined collections 67 | entity_fields = FieldFamilies.ENTITY # id, created_at, updated_at 68 | audit_fields = FieldFamilies.AUDIT # created_by, updated_by, version 69 | soft_delete_fields = FieldFamilies.SOFT_DELETE # deleted_at, is_deleted 70 | ``` 71 | 72 | ## Model Creation 73 | 74 | ```python 75 | from pydapter.fields import create_model, Field 76 | 77 | # Create models with field lists 78 | fields = [ 79 | Field(name="id", annotation=str), 80 | Field(name="name", annotation=str), 81 | Field(name="email", annotation=str) 82 | ] 83 | 84 | User = create_model("User", fields=fields) 85 | ``` 86 | 87 | --- 88 | 89 | ## API Reference 90 | 91 | ### Core Types 92 | 93 | ::: pydapter.fields.types 94 | options: 95 | show_root_heading: true 96 | show_source: true 97 | 98 | ::: pydapter.fields.template 99 | options: 100 | show_root_heading: true 101 | show_source: true 102 | 103 | ### Specialized Fields 104 | 105 | ::: pydapter.fields.ids 106 | options: 107 | show_root_heading: true 108 | show_source: true 109 | 110 | ::: pydapter.fields.dts 111 | options: 112 | show_root_heading: true 113 | show_source: true 114 | 115 | ::: pydapter.fields.embedding 116 | options: 117 | show_root_heading: true 118 | show_source: true 119 | 120 | ::: pydapter.fields.execution 121 | options: 122 | show_root_heading: true 123 | show_source: true 124 | 125 | ::: pydapter.fields.params 126 | options: 127 | show_root_heading: true 128 | show_source: true 129 | 130 | ### Field Collections 131 | 132 | ::: pydapter.fields.common_templates 133 | options: 134 | show_root_heading: true 135 | show_source: true 136 | 137 | ::: pydapter.fields.families 138 | options: 139 | show_root_heading: true 140 | show_source: true 141 | 142 | ::: pydapter.fields.protocol_families 143 | options: 144 | show_root_heading: true 145 | show_source: true 146 | 147 | ### Builders and Utilities 148 | 149 | ::: pydapter.fields.builder 150 | options: 151 | show_root_heading: true 152 | show_source: true 153 | 154 | ::: pydapter.fields.validation_patterns 155 | options: 156 | show_root_heading: true 157 | show_source: true 158 | -------------------------------------------------------------------------------- /docs/api/migrations.md: -------------------------------------------------------------------------------- 1 | # Migrations API Reference 2 | 3 | This page provides detailed API documentation for the `pydapter.migrations` 4 | module. 5 | 6 | ## Installation 7 | 8 | The migrations module is available as optional dependencies: 9 | 10 | ```bash 11 | # Core migrations functionality 12 | pip install "pydapter[migrations-core]" 13 | 14 | # SQL migrations with Alembic support 15 | pip install "pydapter[migrations-sql]" 16 | 17 | # All migrations components 18 | pip install "pydapter[migrations]" 19 | ``` 20 | 21 | ## Module Overview 22 | 23 | The migrations module provides a framework for managing database schema changes, 24 | following the adapter pattern: 25 | 26 | ```text 27 | MigrationProtocol 28 | │ 29 | ▼ 30 | BaseMigrationAdapter 31 | │ 32 | ├─────────────────────┐ 33 | │ │ 34 | ▼ ▼ 35 | SyncMigrationAdapter AsyncMigrationAdapter 36 | │ │ 37 | ▼ ▼ 38 | AlembicAdapter AsyncAlembicAdapter 39 | ``` 40 | 41 | ## Protocols 42 | 43 | ### MigrationProtocol 44 | 45 | ::: pydapter.migrations.protocols.MigrationProtocol 46 | options: 47 | show_root_heading: true 48 | show_source: true 49 | 50 | ### AsyncMigrationProtocol 51 | 52 | ::: pydapter.migrations.protocols.AsyncMigrationProtocol 53 | options: 54 | show_root_heading: true 55 | show_source: true 56 | 57 | ## Base Classes 58 | 59 | ### BaseMigrationAdapter 60 | 61 | ::: pydapter.migrations.base.BaseMigrationAdapter 62 | options: 63 | show_root_heading: true 64 | show_source: true 65 | 66 | ### SyncMigrationAdapter 67 | 68 | ::: pydapter.migrations.base.SyncMigrationAdapter 69 | options: 70 | show_root_heading: true 71 | show_source: true 72 | 73 | ### AsyncMigrationAdapter 74 | 75 | ::: pydapter.migrations.base.AsyncMigrationAdapter 76 | options: 77 | show_root_heading: true 78 | show_source: true 79 | 80 | ## SQL Adapters 81 | 82 | ### AlembicAdapter 83 | 84 | ::: pydapter.migrations.sql.alembic_adapter.AlembicAdapter 85 | options: 86 | show_root_heading: true 87 | show_source: true 88 | 89 | ### AsyncAlembicAdapter 90 | 91 | ::: pydapter.migrations.sql.alembic_adapter.AsyncAlembicAdapter 92 | options: 93 | show_root_heading: true 94 | show_source: true 95 | 96 | ## Registry 97 | 98 | ::: pydapter.migrations.registry.MigrationRegistry 99 | options: 100 | show_root_heading: true 101 | show_source: true 102 | 103 | ## Exceptions 104 | 105 | ### MigrationError 106 | 107 | ::: pydapter.migrations.exceptions.MigrationError 108 | options: 109 | show_root_heading: true 110 | show_source: true 111 | 112 | ### MigrationInitError 113 | 114 | ::: pydapter.migrations.exceptions.MigrationInitError 115 | options: 116 | show_root_heading: true 117 | show_source: true 118 | 119 | ### MigrationCreationError 120 | 121 | ::: pydapter.migrations.exceptions.MigrationCreationError 122 | options: 123 | show_root_heading: true 124 | show_source: true 125 | 126 | ### MigrationUpgradeError 127 | 128 | ::: pydapter.migrations.exceptions.MigrationUpgradeError 129 | options: 130 | show_root_heading: true 131 | show_source: true 132 | 133 | ### MigrationDowngradeError 134 | 135 | ::: pydapter.migrations.exceptions.MigrationDowngradeError 136 | options: 137 | show_root_heading: true 138 | show_source: true 139 | 140 | ### MigrationNotFoundError 141 | 142 | ::: pydapter.migrations.exceptions.MigrationNotFoundError 143 | options: 144 | show_root_heading: true 145 | show_source: true 146 | -------------------------------------------------------------------------------- /docs/api/protocols.md: -------------------------------------------------------------------------------- 1 | # Protocols API Reference 2 | 3 | This page provides API documentation for the `pydapter.protocols` module. 4 | 5 | ## Installation 6 | 7 | ```bash 8 | pip install pydapter 9 | ``` 10 | 11 | ## Overview 12 | 13 | The protocols module provides composable interfaces for specialized model behavior: 14 | 15 | ```text 16 | ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ 17 | │ Identifiable │ │ Temporal │ │ Embeddable │ 18 | │ (id: UUID) │ │ (timestamps) │ │ (content + │ 19 | │ │ │ │ │ embedding) │ 20 | └─────────────────┘ └─────────────────┘ └─────────────────┘ 21 | 22 | ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ 23 | │ Invokable │ │ Cryptographical │ │ Auditable │ 24 | │ (execution) │ │ (hashing) │ │ (tracking) │ 25 | └─────────────────┘ └─────────────────┘ └─────────────────┘ 26 | 27 | ┌─────────────────┐ 28 | │ SoftDeletable │ 29 | │ (soft delete) │ 30 | └─────────────────┘ 31 | ``` 32 | 33 | ## Quick Start 34 | 35 | ```python 36 | from pydapter.protocols import IdentifiableMixin, TemporalMixin 37 | from pydantic import BaseModel 38 | 39 | class User(BaseModel, IdentifiableMixin, TemporalMixin): 40 | name: str 41 | email: str 42 | 43 | user = User(name="John", email="john@example.com") 44 | user.update_timestamp() # Temporal behavior 45 | print(user.id) # UUID from Identifiable 46 | ``` 47 | 48 | ## Event System 49 | 50 | ```python 51 | from pydapter.protocols.event import as_event 52 | 53 | @as_event(event_type="api_call") 54 | async def process_request(data: dict) -> dict: 55 | return {"result": "processed", "input": data} 56 | 57 | event = await process_request({"user_id": 123}) 58 | print(event.event_type) # "api_call" 59 | ``` 60 | 61 | ## Protocol Factory 62 | 63 | ```python 64 | from pydapter.protocols.factory import create_protocol_model_class 65 | from pydapter.protocols.constants import IDENTIFIABLE, TEMPORAL 66 | 67 | User = create_protocol_model_class( 68 | "User", 69 | IDENTIFIABLE, 70 | TEMPORAL, 71 | name=FieldTemplate(base_type=str), 72 | email=FieldTemplate(base_type=str) 73 | ) 74 | ``` 75 | 76 | --- 77 | 78 | ## API Reference 79 | 80 | ### Core Protocols 81 | 82 | ::: pydapter.protocols.identifiable 83 | options: 84 | show_root_heading: true 85 | show_source: true 86 | 87 | ::: pydapter.protocols.temporal 88 | options: 89 | show_root_heading: true 90 | show_source: true 91 | 92 | ::: pydapter.protocols.embeddable 93 | options: 94 | show_root_heading: true 95 | show_source: true 96 | 97 | ::: pydapter.protocols.invokable 98 | options: 99 | show_root_heading: true 100 | show_source: true 101 | 102 | ::: pydapter.protocols.cryptographical 103 | options: 104 | show_root_heading: true 105 | show_source: true 106 | 107 | ::: pydapter.protocols.auditable 108 | options: 109 | show_root_heading: true 110 | show_source: true 111 | 112 | ::: pydapter.protocols.soft_deletable 113 | options: 114 | show_root_heading: true 115 | show_source: true 116 | 117 | ### Event System 118 | 119 | ::: pydapter.protocols.event 120 | options: 121 | show_root_heading: true 122 | show_source: true 123 | 124 | ### Factory and Utilities 125 | 126 | ::: pydapter.protocols.factory 127 | options: 128 | show_root_heading: true 129 | show_source: true 130 | 131 | ::: pydapter.protocols.registry 132 | options: 133 | show_root_heading: true 134 | show_source: true 135 | 136 | ::: pydapter.protocols.constants 137 | options: 138 | show_root_heading: true 139 | show_source: true 140 | 141 | ::: pydapter.protocols.types 142 | options: 143 | show_root_heading: true 144 | show_source: true 145 | 146 | ::: pydapter.protocols.utils 147 | options: 148 | show_root_heading: true 149 | show_source: true 150 | -------------------------------------------------------------------------------- /docs/changelog.md: -------------------------------------------------------------------------------- 1 | ../CHANGELOG.md -------------------------------------------------------------------------------- /docs/contributing.md: -------------------------------------------------------------------------------- 1 | ../CONTRIBUTING.md -------------------------------------------------------------------------------- /docs/guides/architecture.md: -------------------------------------------------------------------------------- 1 | # Pydapter Architecture and Design Philosophy 2 | 3 | ## Core Principles 4 | 5 | Pydapter follows **protocol-driven architecture** with **stateless 6 | transformations** and **composition over inheritance**. 7 | 8 | ### 1. Protocol + Mixin Pattern 9 | 10 | ```python 11 | @runtime_checkable 12 | class Identifiable(Protocol): 13 | id: UUID 14 | 15 | class IdentifiableMixin: 16 | def __hash__(self) -> int: 17 | return hash(self.id) 18 | ``` 19 | 20 | **Key Benefits:** 21 | 22 | - Type safety without inheritance coupling 23 | - Runtime validation when needed 24 | - Composable behaviors 25 | 26 | ### 2. Stateless Class Methods 27 | 28 | ```python 29 | class Adapter(Protocol[T]): 30 | obj_key: ClassVar[str] 31 | 32 | @classmethod 33 | def from_obj(cls, subj_cls: type[T], obj: Any, /, *, many=False, **kw) -> T | list[T]: ... 34 | 35 | @classmethod 36 | def to_obj(cls, subj: T | list[T], /, *, many=False, **kw) -> Any: ... 37 | ``` 38 | 39 | **Why Class Methods:** 40 | 41 | - Thread safety (no shared state) 42 | - No instantiation overhead 43 | - Simple testing 44 | - Clear interfaces 45 | 46 | ### 3. Dual Sync/Async APIs 47 | 48 | Separate implementations without mixing concerns: 49 | 50 | - **Sync**: `Adapter`, `AdapterRegistry`, `Adaptable` 51 | - **Async**: `AsyncAdapter`, `AsyncAdapterRegistry`, `AsyncAdaptable` 52 | 53 | **Benefits:** 54 | 55 | - No async overhead in sync code 56 | - Clear separation of concerns 57 | - Type safety in both contexts 58 | 59 | ## Component Layers 60 | 61 | ### Core Layer 62 | 63 | - **`Adapter`**: Transformation protocol 64 | - **`AdapterRegistry`**: Adapter management 65 | - **`Adaptable`**: Model mixin for adapter access 66 | 67 | ### Protocol Layer 68 | 69 | - **`Identifiable`**: UUID-based identity 70 | - **`Temporal`**: Timestamp management 71 | - **`Embeddable`**: Vector embeddings 72 | - **`Event`**: Event-driven patterns 73 | 74 | ### Field System 75 | 76 | - **`Field`**: Advanced field descriptors 77 | - **Pre-configured fields**: `ID_FROZEN`, `DATETIME`, `EMBEDDING` 78 | - **Composition methods**: `as_nullable()`, `as_listable()` 79 | 80 | ### Adapter Ecosystem 81 | 82 | - **Built-in**: JSON, CSV, TOML 83 | - **Extended**: PostgreSQL, MongoDB, Neo4j, Qdrant, Weaviate 84 | 85 | ## Design Philosophy 86 | 87 | ### Composition Over Inheritance 88 | 89 | ```python 90 | class Document(BaseModel, IdentifiableMixin, TemporalMixin): 91 | title: str 92 | content: str 93 | ``` 94 | 95 | ### Progressive Complexity 96 | 97 | ```python 98 | # Simple: Direct usage 99 | person = JsonAdapter.from_obj(Person, json_data) 100 | 101 | # Advanced: Registry-based 102 | registry.adapt_from(Person, data, obj_key="json") 103 | ``` 104 | 105 | ### Explicit Configuration 106 | 107 | ```python 108 | # Clear interfaces, explicit parameters 109 | person = JsonAdapter.from_obj(Person, data, many=False, strict=True) 110 | ``` 111 | 112 | ## Extension Points 113 | 114 | 1. **Custom Adapters**: Implement `Adapter`/`AsyncAdapter` protocol 115 | 2. **Custom Protocols**: Extend existing or create new protocols 116 | 3. **Field Descriptors**: Domain-specific fields with `Field` 117 | 4. **Migration Adapters**: Schema evolution support 118 | 5. **Registry Extensions**: Specialized adapter collections 119 | 120 | This architecture enables both simple use cases and complex production systems 121 | through clear abstractions and composable components. 122 | -------------------------------------------------------------------------------- /docs/guides/protocols.md: -------------------------------------------------------------------------------- 1 | # Working with Pydapter Protocols 2 | 3 | ## Protocol + Mixin Pattern 4 | 5 | Each protocol provides: 6 | 7 | 1. **Protocol**: Interface for type checking 8 | 2. **Mixin**: Implementation with behavior 9 | 10 | ```python 11 | @runtime_checkable 12 | class Identifiable(Protocol): 13 | id: UUID 14 | 15 | class IdentifiableMixin: 16 | def __hash__(self) -> int: 17 | return hash(self.id) 18 | ``` 19 | 20 | ## Available Protocols 21 | 22 | ### Identifiable 23 | 24 | - **Purpose**: UUID-based identity management 25 | - **Fields**: `id: UUID` 26 | - **Methods**: `__hash__()`, UUID serialization 27 | - **Usage**: Base for all tracked entities 28 | 29 | ### Temporal 30 | 31 | - **Purpose**: Timestamp management 32 | - **Fields**: `created_at: datetime`, `updated_at: datetime` 33 | - **Methods**: `update_timestamp()`, ISO datetime serialization 34 | - **Usage**: Audit trails, versioning 35 | 36 | ### Embeddable 37 | 38 | - **Purpose**: Vector embeddings for ML/AI 39 | - **Fields**: `content: str | None`, `embedding: list[float]` 40 | - **Methods**: Content processing, embedding validation 41 | - **Usage**: RAG systems, semantic search 42 | 43 | ### Event 44 | 45 | - **Purpose**: Comprehensive event tracking 46 | - **Inherits**: Combines Identifiable, Temporal, Embeddable 47 | - **Usage**: Event sourcing, audit logs 48 | 49 | ## Composition Patterns 50 | 51 | ### Basic Composition 52 | 53 | ```python 54 | class User(BaseModel, IdentifiableMixin, TemporalMixin): 55 | id: UUID 56 | created_at: datetime 57 | updated_at: datetime 58 | name: str 59 | email: str 60 | ``` 61 | 62 | ### Inheritance Order Matters 63 | 64 | ```python 65 | # ✓ Correct: BaseModel first, dependency order for mixins 66 | class Document(BaseModel, IdentifiableMixin, TemporalMixin, EmbeddableMixin): 67 | # Protocol fields first 68 | id: UUID 69 | created_at: datetime 70 | updated_at: datetime 71 | content: str | None = None 72 | embedding: list[float] = Field(default_factory=list) 73 | 74 | # Domain fields 75 | title: str 76 | ``` 77 | 78 | ## Custom Protocol Creation 79 | 80 | ```python 81 | @runtime_checkable 82 | class Versionable(Protocol): 83 | version: int 84 | version_history: list[int] 85 | 86 | class VersionableMixin: 87 | def increment_version(self) -> None: 88 | if hasattr(self, 'version'): 89 | self.version_history.append(self.version) 90 | self.version += 1 91 | ``` 92 | 93 | ## Type Checking 94 | 95 | ### Static Type Checking 96 | 97 | ```python 98 | def process_identifiable_items(items: list[Identifiable]) -> list[UUID]: 99 | return [item.id for item in items] 100 | ``` 101 | 102 | ### Runtime Type Checking 103 | 104 | ```python 105 | def safe_get_id(obj: object) -> UUID | None: 106 | if isinstance(obj, Identifiable): 107 | return obj.id 108 | return None 109 | ``` 110 | 111 | ## Integration with Fields 112 | 113 | ```python 114 | from pydapter.fields import ID_FROZEN, DATETIME 115 | 116 | class AdvancedModel(BaseModel, IdentifiableMixin, TemporalMixin): 117 | id: UUID = ID_FROZEN.field_info 118 | created_at: datetime = DATETIME.field_info 119 | updated_at: datetime = DATETIME.field_info 120 | ``` 121 | 122 | ## Key Tips for LLM Developers 123 | 124 | ### 1. Protocol Contract Compliance 125 | 126 | - Always implement all required protocol fields 127 | - Use proper type annotations 128 | - Test protocol compliance with `isinstance()` 129 | 130 | ### 2. Mixin Order 131 | 132 | - `BaseModel` first 133 | - Protocol mixins in dependency order 134 | - Custom mixins last 135 | 136 | ### 3. Automatic Serialization 137 | 138 | - `IdentifiableMixin`: UUID → string 139 | - `TemporalMixin`: datetime → ISO string 140 | - Use `model_dump_json()` for proper serialization 141 | 142 | ### 4. Common Patterns 143 | 144 | ```python 145 | # Standard composition for entities 146 | class Entity(BaseModel, IdentifiableMixin, TemporalMixin): 147 | pass 148 | 149 | # Standard composition for ML content 150 | class MLContent(BaseModel, IdentifiableMixin, EmbeddableMixin): 151 | pass 152 | 153 | # Standard composition for events 154 | class EventRecord(BaseModel, Event): # Event includes all protocols 155 | pass 156 | ``` 157 | 158 | ### 5. Testing Protocol Implementation 159 | 160 | ```python 161 | def test_protocol_compliance(model_instance): 162 | assert isinstance(model_instance, Identifiable) 163 | assert hasattr(model_instance, 'id') 164 | assert callable(getattr(model_instance, '__hash__')) 165 | ``` 166 | 167 | This protocol system enables consistent, type-safe behavior composition across 168 | your models while maintaining clean separation of concerns. 169 | -------------------------------------------------------------------------------- /docs/guides/testing-strategies.md: -------------------------------------------------------------------------------- 1 | # Testing Strategies for Pydapter 2 | 3 | ## Protocol Testing 4 | 5 | ### Protocol Compliance 6 | 7 | ```python 8 | from pydapter.protocols import Identifiable, Temporal 9 | 10 | def test_protocol_compliance(): 11 | model = MyModel(id=uuid4(), created_at=datetime.now(), updated_at=datetime.now()) 12 | 13 | # Runtime protocol checks 14 | assert isinstance(model, Identifiable) 15 | assert isinstance(model, Temporal) 16 | 17 | # Test mixin functionality 18 | original_updated = model.updated_at 19 | model.update_timestamp() 20 | assert model.updated_at > original_updated 21 | ``` 22 | 23 | ## Adapter Testing 24 | 25 | ### Roundtrip Testing 26 | 27 | ```python 28 | def test_adapter_roundtrip(): 29 | """Test data survives roundtrip conversion""" 30 | original = MyModel(name="test", value=42) 31 | 32 | external = MyAdapter.to_obj(original) 33 | restored = MyAdapter.from_obj(MyModel, external) 34 | 35 | assert restored.name == original.name 36 | assert restored.value == original.value 37 | ``` 38 | 39 | ### Error Handling 40 | 41 | ```python 42 | def test_adapter_error_handling(): 43 | """Test error scenarios""" 44 | with pytest.raises(ParseError, match="Invalid format"): 45 | MyAdapter.from_obj(MyModel, "invalid_data") 46 | 47 | with pytest.raises(ValidationError): 48 | MyAdapter.from_obj(MyModel, {"missing": "required_fields"}) 49 | ``` 50 | 51 | ### Async Testing 52 | 53 | ```python 54 | @pytest.mark.asyncio 55 | async def test_async_adapter(respx_mock): 56 | """Test async adapters with mocked HTTP""" 57 | respx_mock.get("http://api.example.com/data").mock( 58 | return_value=httpx.Response(200, json={"name": "test"}) 59 | ) 60 | 61 | result = await MyAsyncAdapter.from_obj(MyModel, {"url": "http://api.example.com/data"}) 62 | assert result.name == "test" 63 | ``` 64 | 65 | ## Registry Testing 66 | 67 | ```python 68 | def test_registry_operations(): 69 | """Test adapter registry functionality""" 70 | registry = AdapterRegistry() 71 | registry.register(MyAdapter) 72 | 73 | # Test retrieval 74 | adapter = registry.get("my_adapter") 75 | assert adapter == MyAdapter 76 | 77 | # Test missing adapter 78 | with pytest.raises(AdapterNotFoundError): 79 | registry.get("nonexistent") 80 | ``` 81 | 82 | ## Property-Based Testing 83 | 84 | ```python 85 | from hypothesis import given, strategies as st 86 | 87 | @given(st.text(min_size=1)) 88 | def test_field_validation_robustness(text_value): 89 | """Test field validators with random data""" 90 | field = Field(name="test", validator=lambda cls, v: v.strip()) 91 | # Test edge cases with generated data 92 | ``` 93 | 94 | ## Key Testing Patterns for LLM Developers 95 | 96 | ### 1. Test Fixtures 97 | 98 | ```python 99 | @pytest.fixture 100 | def sample_user(): 101 | return User( 102 | id=uuid4(), 103 | created_at=datetime.now(), 104 | updated_at=datetime.now(), 105 | name="Test User", 106 | email="test@example.com" 107 | ) 108 | 109 | @pytest.fixture 110 | def user_registry(): 111 | registry = AdapterRegistry() 112 | registry.register(JsonAdapter) 113 | registry.register(CsvAdapter) 114 | return registry 115 | ``` 116 | 117 | ### 2. Mock External Dependencies 118 | 119 | ```python 120 | # HTTP APIs 121 | @pytest.fixture 122 | def mock_api(respx_mock): 123 | respx_mock.get("http://api.example.com/users").mock( 124 | return_value=httpx.Response(200, json=[{"name": "John", "age": 30}]) 125 | ) 126 | 127 | # Database connections 128 | @pytest.fixture 129 | def mock_db(): 130 | with patch('asyncpg.connect') as mock_connect: 131 | mock_conn = AsyncMock() 132 | mock_connect.return_value = mock_conn 133 | yield mock_conn 134 | ``` 135 | 136 | ### 3. Error Path Testing 137 | 138 | ```python 139 | def test_all_error_scenarios(): 140 | """Comprehensive error testing""" 141 | # Empty input 142 | with pytest.raises(ParseError, match="Empty.*content"): 143 | MyAdapter.from_obj(MyModel, "") 144 | 145 | # Invalid format 146 | with pytest.raises(ParseError, match="Invalid.*format"): 147 | MyAdapter.from_obj(MyModel, "invalid_format") 148 | 149 | # Validation failure 150 | with pytest.raises(ValidationError): 151 | MyAdapter.from_obj(MyModel, {"missing_required_field": True}) 152 | ``` 153 | 154 | ### 4. Async Testing Patterns 155 | 156 | ```python 157 | @pytest.mark.asyncio 158 | class TestAsyncOperations: 159 | async def test_concurrent_operations(self): 160 | """Test concurrent adapter operations""" 161 | tasks = [ 162 | MyAsyncAdapter.from_obj(MyModel, {"id": i}) 163 | for i in range(10) 164 | ] 165 | results = await asyncio.gather(*tasks) 166 | assert len(results) == 10 167 | 168 | async def test_timeout_handling(self): 169 | """Test timeout scenarios""" 170 | with pytest.raises(ParseError, match="timed out"): 171 | await MyAsyncAdapter.from_obj(MyModel, config, timeout=0.01) 172 | ``` 173 | 174 | ## Common Testing Caveats 175 | 176 | ### 1. Async Context 177 | 178 | - Use `pytest.mark.asyncio` for async tests 179 | - Mock external services (HTTP, database) 180 | - Test timeout and retry logic 181 | 182 | ### 2. Protocol Mixins 183 | 184 | - Test both interface and implementation 185 | - Verify field serializers 186 | - Check inheritance order effects 187 | 188 | ### 3. Registry Isolation 189 | 190 | - Use fresh registries per test 191 | - Clean up registered adapters 192 | - Test adapter precedence 193 | 194 | ### 4. Error Context 195 | 196 | - Verify specific exception types 197 | - Check error message content 198 | - Test error data preservation 199 | 200 | ## Testing Tips 201 | 202 | - **Fixtures**: Use pytest fixtures for common setups 203 | - **Mocking**: Mock external dependencies consistently 204 | - **Error paths**: Test failures as thoroughly as success 205 | - **Property-based**: Use Hypothesis for edge case discovery 206 | - **Type safety**: Run mypy in CI to catch type errors 207 | - **Isolation**: Ensure tests don't affect each other 208 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # pydapter 2 | 3 | [![PyPI version](https://badge.fury.io/py/pydapter.svg)](https://badge.fury.io/py/pydapter) 4 | [![Python Versions](https://img.shields.io/pypi/pyversions/pydapter.svg)](https://pypi.org/project/pydapter/) 5 | [![License](https://img.shields.io/github/license/agenticsorg/pydapter.svg)](https://github.com/agenticsorg/pydapter/blob/main/LICENSE) 6 | 7 | **pydapter** is a powerful trait + adapter toolkit for pydantic models, 8 | featuring a comprehensive field system and protocol-based design patterns. 9 | 10 | ## Overview 11 | 12 | pydapter provides a lightweight, flexible way to adapt Pydantic models to 13 | various data sources and sinks. It enables seamless data transfer between 14 | different formats and storage systems while maintaining the type safety and 15 | validation that Pydantic provides. 16 | 17 | ## ✨ Key Features 18 | 19 | ### 🏗️ **Field System** (New in v0.3.0) 20 | 21 | - **Field Templates**: Reusable field definitions with flexible naming 22 | - **Field Families**: Pre-defined collections for common patterns (Entity, 23 | Audit, Soft Delete) 24 | - **Domain Model Builder**: Fluent API for composing models 25 | - **Validation Patterns**: Built-in regex patterns and constraints 26 | 27 | ### 🔌 **Protocol System** 28 | 29 | - **Type-Safe Constants**: Use `IDENTIFIABLE`, `TEMPORAL` instead of strings 30 | - **Behavioral Mixins**: Add methods like `update_timestamp()` to your models 31 | - **One-Step Creation**: `create_protocol_model_class()` for fields + behaviors 32 | 33 | ### 🔄 **Adapters** 34 | 35 | - **Unified Interface**: Consistent API across different data sources 36 | - **Type Safety**: Full Pydantic validation support 37 | - **Async Support**: Both synchronous and asynchronous interfaces 38 | - **Extensible**: Easy to create custom adapters 39 | 40 | ### 🚀 **Additional Features** 41 | 42 | - **Migrations**: Database schema migration tools 43 | - **Minimal Dependencies**: Core functionality has minimal requirements 44 | - **Production Ready**: Battle-tested in real applications 45 | 46 | ## Installation 47 | 48 | ```bash 49 | pip install pydapter 50 | ``` 51 | 52 | With optional dependencies: 53 | 54 | ```bash 55 | # Database adapters 56 | pip install "pydapter[postgres]" 57 | pip install "pydapter[mongo]" 58 | pip install "pydapter[neo4j]" 59 | 60 | # File formats 61 | pip install "pydapter[excel]" 62 | 63 | # New modules 64 | pip install "pydapter[protocols]" # Standardized model interfaces 65 | pip install "pydapter[migrations-sql]" # Database schema migrations with 66 | # SQLAlchemy/Alembic 67 | 68 | # Combined packages 69 | pip install "pydapter[migrations]" # All migration components 70 | pip install "pydapter[migrations-all]" # Migrations with protocols support 71 | 72 | # For all extras 73 | pip install "pydapter[all]" 74 | ``` 75 | 76 | ## Quick Examples 77 | 78 | ### 🏗️ Using the Field System 79 | 80 | ```python 81 | from pydapter.fields import DomainModelBuilder, FieldTemplate 82 | from pydapter.protocols import ( 83 | create_protocol_model_class, 84 | IDENTIFIABLE, 85 | TEMPORAL 86 | ) 87 | 88 | # Build a model with field families 89 | User = ( 90 | DomainModelBuilder("User") 91 | .with_entity_fields(timezone_aware=True) # id, created_at, updated_at 92 | .with_audit_fields() # created_by, updated_by, version 93 | .add_field("username", FieldTemplate(base_type=str, max_length=50)) 94 | .add_field("email", FieldTemplate(base_type=str)) 95 | .build() 96 | ) 97 | 98 | # Or create a protocol-compliant model with behaviors 99 | User = create_protocol_model_class( 100 | "User", 101 | IDENTIFIABLE, # Adds id field 102 | TEMPORAL, # Adds created_at, updated_at fields + methods 103 | username=FieldTemplate(base_type=str), 104 | email=FieldTemplate(base_type=str) 105 | ) 106 | 107 | # Use the model 108 | user = User(username="alice", email="alice@example.com") 109 | user.update_timestamp() # Method from TemporalMixin 110 | ``` 111 | 112 | ### 🔄 Using Adapters 113 | 114 | ```python 115 | from pydapter.adapters.json_ import JsonAdapter 116 | 117 | # Create an adapter for your model 118 | adapter = JsonAdapter[User](path="users.json") 119 | 120 | # Read data 121 | users = adapter.read_all() 122 | 123 | # Write data 124 | adapter.write_one(user) 125 | ``` 126 | 127 | ## 📚 Documentation 128 | 129 | ### Getting Started 130 | 131 | - 🚀 [**Getting Started Guide**](getting_started.md) - Your first steps with 132 | pydapter 133 | - 🏗️ [**Field System Overview**](guides/fields.md) - Learn about the powerful 134 | field system 135 | - 🔌 [**Protocols Overview**](protocols.md) - Understand protocol-based design 136 | 137 | ### Core Concepts 138 | 139 | - 📋 [**Field Families**](guides/field-families.md) - Pre-built field 140 | collections 141 | - 🎯 [**Best Practices**](guides/fields-and-protocols-patterns.md) - Field and 142 | protocol patterns 143 | - ⚡ [**Error Handling**](error_handling.md) - Robust error management 144 | 145 | ### Tutorials & Guides 146 | 147 | - 🔧 [**End-to-End Backend**](guides/end-to-end-backend.md) - Build a complete 148 | backend 149 | - 📖 [**Using Protocols**](tutorials/using_protocols.md) - Protocol tutorial 150 | - 🔄 [**Using Migrations**](tutorials/using_migrations.md) - Database migrations 151 | 152 | ### Adapters 153 | 154 | - 🐘 [**PostgreSQL**](postgres_adapter.md) - PostgreSQL adapter guide 155 | - 🔗 [**Neo4j**](neo4j_adapter.md) - Graph database integration 156 | - 🔍 [**Qdrant**](qdrant_adapter.md) - Vector database support 157 | 158 | ## 🤝 Contributing 159 | 160 | We welcome contributions! See our [Contributing Guide](contributing.md) for 161 | details. 162 | 163 | ## 📄 License 164 | 165 | pydapter is released under the Apache-2.0 License. See the 166 | [LICENSE](https://github.com/agenticsorg/pydapter/blob/main/LICENSE) file for 167 | details. 168 | -------------------------------------------------------------------------------- /docs/sql_model_adapter.md: -------------------------------------------------------------------------------- 1 | # pydapter 0.1.4 Tutorial 2 | 3 | _Bridge Pydantic ⇆ SQLAlchemy (with optional pgvector)_ 4 | 5 | --- 6 | 7 | ## 1 Installation 8 | 9 | ```bash 10 | # core features 11 | pip install pydapter>=0.1.4 sqlalchemy>=2.0 alembic 12 | 13 | # add pgvector support and drivers 14 | pip install pydapter[pgvector] psycopg[binary] pgvector 15 | ``` 16 | 17 | --- 18 | 19 | ## 2 Quick-start (scalar models) 20 | 21 | ### 2.1 Define your validation model 22 | 23 | ```python 24 | from pydantic import BaseModel 25 | 26 | class UserSchema(BaseModel): 27 | id: int | None = None # promoted to PK 28 | name: str 29 | email: str | None = None 30 | active: bool = True 31 | ``` 32 | 33 | ### 2.2 Generate the ORM class 34 | 35 | ```python 36 | from pydapter.model_adapters import SQLModelAdapter 37 | 38 | UserSQL = SQLModelAdapter.pydantic_model_to_sql(UserSchema) 39 | ``` 40 | 41 | `UserSQL` is a fully-mapped SQLAlchemy declarative model—Alembic will pick it up 42 | automatically. 43 | 44 | ### 2.3 Round-trip back to Pydantic (optional) 45 | 46 | ```python 47 | RoundTrip = SQLModelAdapter.sql_model_to_pydantic(UserSQL) 48 | user_json = RoundTrip.model_validate(UserSQL(name="Ann")).model_dump() 49 | ``` 50 | 51 | --- 52 | 53 | ## 3 Embeddings with `pgvector` 54 | 55 | ### 3.1 Validation layer 56 | 57 | ```python 58 | from pydantic import BaseModel, Field 59 | 60 | class DocSchema(BaseModel): 61 | id: int | None = None 62 | text: str 63 | embedding: list[float] = Field(..., vector_dim=768) 64 | ``` 65 | 66 | ### 3.2 Generate vector-aware model 67 | 68 | ```python 69 | from pydapter.model_adapters import SQLVectorModelAdapter 70 | 71 | DocSQL = SQLVectorModelAdapter.pydantic_model_to_sql(DocSchema) 72 | ``` 73 | 74 | Result: 75 | 76 | ```text 77 | Column('embedding', Vector(768), nullable=False) 78 | ``` 79 | 80 | ### 3.3 Reverse conversion 81 | 82 | ```python 83 | DocSchemaRT = SQLVectorModelAdapter.sql_model_to_pydantic(DocSQL) 84 | assert DocSchemaRT.model_fields["embedding"].json_schema_extra["vector_dim"] == 768 85 | ``` 86 | 87 | --- 88 | 89 | ## 4 Alembic integration 90 | 91 | 1. **Add pgvector extension (first migration only)** 92 | 93 | ```python 94 | # env.py or an initial upgrade() block 95 | op.execute("CREATE EXTENSION IF NOT EXISTS pgvector") 96 | ``` 97 | 98 | 2. **Autogenerate migrations** 99 | 100 | ```bash 101 | alembic revision --autogenerate -m "init tables" 102 | ``` 103 | 104 | All columns—including `Vector(dim)`—appear in the diff. 105 | 106 | --- 107 | 108 | ## 5 Advanced options 109 | 110 | | Need | How | 111 | | ----------------------- | ----------------------------------------------------------------------------- | 112 | | Custom table name | `SQLModelAdapter.pydantic_model_to_sql(UserSchema, table_name="users")` | 113 | | Alternate PK field | `…, pk_field="uuid"` | 114 | | Cache generated classes | Wrap the call in your own memoization layer; generation runs once per import. | 115 | | Unsupported types | Extend `_PY_TO_SQL` / `_SQL_TO_PY` dictionaries or subclass the adapter. | 116 | 117 | --- 118 | 119 | ## 6 Testing & CI 120 | 121 | Unit tests rely only on SQLAlchemy inspection—no database spin-up. 122 | 123 | ```bash 124 | pytest -q 125 | ``` 126 | 127 | To include vector tests: 128 | 129 | ```bash 130 | pytest -q -m "not pgvector" # skip 131 | pytest -q # run all (pgvector installed) 132 | ``` 133 | 134 | --- 135 | 136 | ## 7 Troubleshooting 137 | 138 | | Symptom | Fix | 139 | | ------------------------------- | -------------------------------------------------------------------------- | 140 | | `TypeError: Unsupported type …` | Add a mapping in the adapter or exclude the field. | 141 | | Alembic shows no changes | Ensure generated classes share `metadata` or are imported in `env.py`. | 142 | | Vector dim missing | Provide `vector_dim` in `json_schema_extra`, or accept flexible dimension. | 143 | 144 | --- 145 | 146 | ## 8 Wrap-up 147 | 148 | pydapter 0.1.4 lets you: 149 | 150 | - Keep **one source of truth**—your Pydantic models. 151 | - **Ship migrations** without hand-writing ORM classes. 152 | - **Store embeddings** directly in Postgres with pgvector. 153 | 154 | Update, generate, migrate—done. Happy coding! 🚀 155 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: pydapter 2 | site_description: Tiny trait + adapter toolkit for pydantic models 3 | site_url: https://agenticsorg.github.io/pydapter/ 4 | repo_url: https://github.com/agenticsorg/pydapter 5 | repo_name: agenticsorg/pydapter 6 | 7 | theme: 8 | name: material 9 | palette: 10 | - media: "(prefers-color-scheme: light)" 11 | scheme: default 12 | primary: indigo 13 | accent: indigo 14 | toggle: 15 | icon: material/toggle-switch-off-outline 16 | name: Switch to dark mode 17 | - media: "(prefers-color-scheme: dark)" 18 | scheme: slate 19 | primary: indigo 20 | accent: indigo 21 | toggle: 22 | icon: material/toggle-switch 23 | name: Switch to light mode 24 | features: 25 | - navigation.instant 26 | - navigation.tracking 27 | - navigation.expand 28 | - navigation.indexes 29 | - navigation.top 30 | - content.code.copy 31 | - content.code.annotate 32 | - toc.follow 33 | 34 | markdown_extensions: 35 | - admonition 36 | - attr_list 37 | - def_list 38 | - footnotes 39 | - md_in_html 40 | - toc: 41 | permalink: true 42 | - pymdownx.highlight: 43 | anchor_linenums: true 44 | line_spans: __span 45 | pygments_lang_class: true 46 | - pymdownx.inlinehilite 47 | - pymdownx.snippets 48 | - pymdownx.superfences 49 | - pymdownx.tabbed: 50 | alternate_style: true 51 | - pymdownx.tasklist: 52 | custom_checkbox: true 53 | 54 | plugins: 55 | - search 56 | - autorefs 57 | - mkdocstrings: 58 | handlers: 59 | python: 60 | options: 61 | show_root_heading: true 62 | show_source: true 63 | show_category_heading: true 64 | show_submodules: true 65 | 66 | nav: 67 | - Home: index.md 68 | - Getting Started: getting_started.md 69 | - Core Concepts: 70 | - Fields System: 71 | - Overview: guides/fields.md 72 | - Field Families: guides/field-families.md 73 | - Best Practices: guides/fields-and-protocols-patterns.md 74 | - Protocols: 75 | - Overview: protocols.md 76 | - Protocol Guide: guides/protocols.md 77 | - Error Handling: error_handling.md 78 | - Adapters: 79 | - PostgreSQL: postgres_adapter.md 80 | - Neo4j: neo4j_adapter.md 81 | - Qdrant: qdrant_adapter.md 82 | - SQL Model: sql_model_adapter.md 83 | - Advanced Topics: 84 | - Architecture: guides/architecture.md 85 | - Async Patterns: guides/async-patterns.md 86 | - Creating Adapters: guides/creating-adapters.md 87 | - Testing Strategies: guides/testing-strategies.md 88 | - Migrations: migrations.md 89 | - Tutorials: 90 | - End-to-End Backend: guides/end-to-end-backend.md 91 | - Using Protocols: tutorials/using_protocols.md 92 | - Using Migrations: tutorials/using_migrations.md 93 | - API Reference: 94 | - Core: api/core.md 95 | - Fields: api/fields.md 96 | - Adapters: api/adapters.md 97 | - Extras: api/extras.md 98 | - Protocols: api/protocols.md 99 | - Migrations: api/migrations.md 100 | - Contributing: 101 | - Contributing Guide: contributing.md 102 | - CI/CD: ci.md 103 | - Resources: 104 | - Migration Guide: migration_guide.md 105 | - Testing: testing.md 106 | - Changelog: changelog.md 107 | 108 | extra: 109 | social: 110 | - icon: fontawesome/brands/github 111 | link: https://github.com/agenticsorg/pydapter 112 | -------------------------------------------------------------------------------- /notebooks/get_started/config.toml: -------------------------------------------------------------------------------- 1 | app_name = "MyApp" 2 | version = "1.0.0" 3 | debug = true 4 | allowed_hosts = [ "localhost", "example.com",] 5 | 6 | [database] 7 | host = "localhost" 8 | port = "5432" 9 | name = "myapp" 10 | -------------------------------------------------------------------------------- /notebooks/get_started/employees.csv: -------------------------------------------------------------------------------- 1 | id,name,department,salary,hire_date 2 | 1,Alice,Engineering,85000.0,2020-01-15 3 | 2,Bob,Marketing,75000.0,2021-03-20 4 | 3,Charlie,Finance,95000.0,2019-11-01 5 | -------------------------------------------------------------------------------- /notebooks/get_started/inventory.json: -------------------------------------------------------------------------------- 1 | { 2 | "last_updated": "2025-05-04T17:56:06.722449", 3 | "products": [ 4 | { 5 | "attributes": { 6 | "brand": "TechX", 7 | "color": "silver", 8 | "ram": "16GB" 9 | }, 10 | "categories": [ 11 | "electronics", 12 | "computers" 13 | ], 14 | "description": "Powerful laptop for developers", 15 | "id": 1, 16 | "name": "Laptop", 17 | "price": 1299.99, 18 | "stock": 10 19 | }, 20 | { 21 | "attributes": { 22 | "brand": "ViewClear", 23 | "resolution": "4K", 24 | "size": "27in" 25 | }, 26 | "categories": [ 27 | "electronics", 28 | "accessories" 29 | ], 30 | "description": "27-inch 4K monitor", 31 | "id": 2, 32 | "name": "Monitor", 33 | "price": 499.99, 34 | "stock": 15 35 | }, 36 | { 37 | "attributes": { 38 | "backlight": "RGB", 39 | "brand": "GameMaster", 40 | "type": "mechanical" 41 | }, 42 | "categories": [ 43 | "electronics", 44 | "accessories", 45 | "gaming" 46 | ], 47 | "description": "Mechanical gaming keyboard", 48 | "id": 3, 49 | "name": "Keyboard", 50 | "price": 149.99, 51 | "stock": 30 52 | } 53 | ] 54 | } 55 | -------------------------------------------------------------------------------- /notebooks/get_started/inventory.toml: -------------------------------------------------------------------------------- 1 | last_updated = "2025-05-04T17:56:06.722449" 2 | [[products]] 3 | id = 1 4 | name = "Laptop" 5 | description = "Powerful laptop for developers" 6 | price = 1299.99 7 | stock = 10 8 | categories = [ "electronics", "computers",] 9 | 10 | [products.attributes] 11 | brand = "TechX" 12 | color = "silver" 13 | ram = "16GB" 14 | [[products]] 15 | id = 2 16 | name = "Monitor" 17 | description = "27-inch 4K monitor" 18 | price = 499.99 19 | stock = 15 20 | categories = [ "electronics", "accessories",] 21 | 22 | [products.attributes] 23 | brand = "ViewClear" 24 | resolution = "4K" 25 | size = "27in" 26 | [[products]] 27 | id = 3 28 | name = "Keyboard" 29 | description = "Mechanical gaming keyboard" 30 | price = 149.99 31 | stock = 30 32 | categories = [ "electronics", "accessories", "gaming",] 33 | 34 | [products.attributes] 35 | brand = "GameMaster" 36 | type = "mechanical" 37 | backlight = "RGB" 38 | -------------------------------------------------------------------------------- /notebooks/get_started/products.csv: -------------------------------------------------------------------------------- 1 | id,name,description,price,stock,categories,attributes 2 | 1,Laptop,Powerful laptop for developers,1299.99,10,"['electronics', 'computers']","{'brand': 'TechX', 'color': 'silver', 'ram': '16GB'}" 3 | 2,Monitor,27-inch 4K monitor,499.99,15,"['electronics', 'accessories']","{'brand': 'ViewClear', 'resolution': '4K', 'size': '27in'}" 4 | 3,Keyboard,Mechanical gaming keyboard,149.99,30,"['electronics', 'accessories', 'gaming']","{'brand': 'GameMaster', 'type': 'mechanical', 'backlight': 'RGB'}" 5 | -------------------------------------------------------------------------------- /notebooks/get_started/students.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/khive-ai/pydapter/6bb2a9b2951232251b4f581030254181fc0fa85e/notebooks/get_started/students.xlsx -------------------------------------------------------------------------------- /notebooks/using_async_mongo/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.8' 2 | 3 | services: 4 | mongodb: 5 | image: mongo:7.0 6 | container_name: pydapter_mongo_tutorial 7 | restart: unless-stopped 8 | ports: 9 | - "27017:27017" 10 | environment: 11 | MONGO_INITDB_DATABASE: tutorial_db 12 | volumes: 13 | - mongodb_data:/data/db 14 | - ./mongo-init.js:/docker-entrypoint-initdb.d/mongo-init.js:ro 15 | networks: 16 | - pydapter_network 17 | 18 | mongo-express: 19 | image: mongo-express:1.0.0-alpha 20 | container_name: pydapter_mongo_express 21 | restart: unless-stopped 22 | ports: 23 | - "8081:8081" 24 | environment: 25 | ME_CONFIG_MONGODB_URL: mongodb://mongodb:27017/ 26 | ME_CONFIG_BASICAUTH_USERNAME: admin 27 | ME_CONFIG_BASICAUTH_PASSWORD: admin 28 | depends_on: 29 | - mongodb 30 | networks: 31 | - pydapter_network 32 | 33 | volumes: 34 | mongodb_data: 35 | 36 | networks: 37 | pydapter_network: 38 | driver: bridge -------------------------------------------------------------------------------- /notebooks/using_async_mongo/mongo-init.js: -------------------------------------------------------------------------------- 1 | // MongoDB initialization script 2 | // This script will run when the MongoDB container starts for the first time 3 | 4 | // Switch to the tutorial database 5 | db = db.getSiblingDB('tutorial_db'); 6 | 7 | // Create collections with some initial configuration 8 | db.createCollection('users'); 9 | db.createCollection('products'); 10 | db.createCollection('orders'); 11 | 12 | // Add indexes for better performance 13 | db.users.createIndex({ "id": 1 }, { unique: true }); 14 | db.users.createIndex({ "email": 1 }, { unique: true }); 15 | db.users.createIndex({ "username": 1 }, { unique: true }); 16 | 17 | db.products.createIndex({ "id": 1 }, { unique: true }); 18 | db.products.createIndex({ "category": 1 }); 19 | db.products.createIndex({ "tags": 1 }); 20 | 21 | db.orders.createIndex({ "id": 1 }, { unique: true }); 22 | db.orders.createIndex({ "user_id": 1 }); 23 | db.orders.createIndex({ "order_date": 1 }); 24 | 25 | print('Database initialization completed successfully!'); -------------------------------------------------------------------------------- /notebooks/using_qdrant/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.8" 2 | services: 3 | qdrant: 4 | image: qdrant/qdrant:latest 5 | ports: 6 | - "6333:6333" 7 | - "6334:6334" 8 | volumes: 9 | - qdrant_storage:/qdrant/storage 10 | 11 | volumes: 12 | qdrant_storage: 13 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "pydapter" 3 | version = "0.3.1" 4 | description = "Tiny trait + adapter toolkit for pydantic models" 5 | readme = "README.md" 6 | requires-python = ">=3.10" 7 | authors = [{ name = "HaiyangLi", email = "quantocean.li@gmail.com" }] 8 | dependencies = [ 9 | "orjson>=3.10.18", 10 | "pydantic>=2.6", 11 | "toml>=0.10.0", 12 | ] 13 | 14 | [dependency-groups] 15 | dev = [ 16 | "pydapter[docs, lint, test, performance, utils]", 17 | ] 18 | 19 | [project.optional-dependencies] 20 | docs = [ 21 | "mkdocs>=1.5.0", 22 | "mkdocs-material>=9.5.0", 23 | "mkdocstrings[python]>=0.24.0", 24 | "pymdown-extensions>=10.7.0", 25 | "mkdocs-autorefs>=1.4.0", 26 | "types-toml>=0.10.8", 27 | "pandas-stubs>=2.2.3", 28 | ] 29 | 30 | lint = [ 31 | "black>=25.1.0", 32 | "isort>=6.0.1", 33 | "pre-commit>=4.2.0", 34 | "ruff>=0.11.11", 35 | "bandit>=1.7.5", 36 | ] 37 | 38 | test = [ 39 | "pytest>=8.3.5", 40 | "pytest-cov>=6.0.0", 41 | "pytest-asyncio>=1.0.0", 42 | "pytest-benchmark>=5.0.0", 43 | "pytest-timeout>=2.1.0", 44 | "pytest-mock>=3.10.0", 45 | "pytest-xdist>=3.3.0", 46 | "pytest-watch>=4.2.0", 47 | "testcontainers[mongodb,postgres,qdrant, weaviate]>=3.7.0", 48 | "hypothesis>=6.130.0", 49 | "hypothesis-jsonschema>=0.23.0", 50 | "mutmut>=3.3.0", 51 | "faker>=37.3.0", 52 | "factory-boy>=3.3.0", 53 | "anyio>=4.9.0", 54 | "trio>=0.22.0", 55 | ] 56 | 57 | performance = [ 58 | "py-spy>=0.4.0", 59 | "memory-profiler>=0.61.0", 60 | ] 61 | 62 | utils = [ 63 | "python-dotenv>=1.1.0", 64 | "colorama>=0.4.0", # Colored test output 65 | "ipykernel>=6.0.0", 66 | ] 67 | 68 | pandas = [ 69 | "pandas>=2.0", 70 | ] 71 | 72 | excel = [ 73 | "pydapter[pandas]", 74 | "xlsxwriter>=3.0", 75 | "openpyxl>=3.0.0", 76 | ] 77 | 78 | sql = [ 79 | "sqlalchemy[asyncio]>=2.0", 80 | ] 81 | 82 | postgres = [ 83 | "pydapter[sql]", 84 | "psycopg[binary]>=3", 85 | "greenlet>=3.0.0", 86 | "psycopg2-binary>=2.8.0", 87 | "asyncpg>=0.28", 88 | ] 89 | 90 | pgvector = [ 91 | "pydapter[postgres]", 92 | "pgvector>=0.4.0", 93 | ] 94 | 95 | mongo = [ 96 | "pymongo>=4.10", 97 | ] 98 | 99 | weaviate = [ 100 | "weaviate-client>=4.10", 101 | "weaviate>=0.1.2", 102 | ] 103 | 104 | neo4j = [ 105 | "neo4j>=5.20", 106 | ] 107 | 108 | motor = [ 109 | "motor>=3", 110 | ] 111 | 112 | qdrant = [ 113 | "qdrant-client>=1.10", 114 | ] 115 | 116 | aiohttp = [ 117 | "aiohttp>=3.12.0", 118 | ] 119 | 120 | email = [ 121 | "email-validator>=2.0.0", 122 | ] 123 | 124 | migrations-sql = [ 125 | "pydapter[sql]", 126 | "alembic>=1.10.0", 127 | ] 128 | 129 | # Combined dependencies 130 | migrations = [ 131 | "pydapter[migrations-sql]", 132 | ] 133 | 134 | all = [ 135 | "pydapter[excel]", 136 | "pydapter[pgvector]", 137 | "pydapter[mongo]", 138 | "pydapter[weaviate]", 139 | "pydapter[neo4j]", 140 | "pydapter[motor]", 141 | "pydapter[qdrant]", 142 | "pydapter[aiohttp]", 143 | "pydapter[email]", 144 | "pydapter[migrations]", 145 | ] 146 | 147 | [tool.black] 148 | line-length = 88 149 | target-version = ['py310'] 150 | 151 | [tool.isort] 152 | profile = "black" 153 | line_length = 88 154 | 155 | [build-system] 156 | requires = ["hatchling"] 157 | build-backend = "hatchling.build" 158 | 159 | [tool.hatch.build.targets.wheel] 160 | packages = ["src/pydapter"] 161 | 162 | [tool.hatch.build] 163 | exclude = [ 164 | "dev/*", 165 | "data/*", 166 | "notebooks/*", 167 | "tests/*", 168 | "*.pyc", 169 | "__pycache__", 170 | "temp_logs/*", 171 | "logs/*" 172 | ] 173 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | asyncio_default_fixture_loop_scope = function 3 | testpaths = ["tests"] 4 | 5 | # Filter warnings 6 | filterwarnings = 7 | # Ignore SQLVectorModelAdapter deprecation warning 8 | ignore:SQLVectorModelAdapter is deprecated and will be removed in a future version:DeprecationWarning 9 | # Ignore qdrant_client deprecation warnings 10 | ignore:.*recreate_collection.*:DeprecationWarning 11 | ignore:.*search method is deprecated.*:DeprecationWarning 12 | ignore:.*search.*:DeprecationWarning 13 | # Ignore qdrant_client compatibility warnings 14 | ignore:Qdrant client version.*incompatible with server version.*:UserWarning 15 | # Ignore invalid escape sequence warnings from qdrant_client 16 | ignore:invalid escape sequence.*:DeprecationWarning 17 | # Ignore shadow attribute warnings 18 | ignore:Field name "schema" in "PostgresAdapterConfig" shadows an attribute in parent "BaseModel":UserWarning 19 | # Ignore test collection warnings for classes with __init__ constructors 20 | ignore:cannot collect test class 'TestModel' because it has a __init__ constructor:Warning 21 | -------------------------------------------------------------------------------- /src/pydapter/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | pydapter - tiny trait + adapter toolkit. 3 | """ 4 | 5 | from .async_core import AsyncAdaptable, AsyncAdapter, AsyncAdapterRegistry 6 | from .core import Adaptable, Adapter, AdapterRegistry 7 | from .fields import ( 8 | ID, 9 | Embedding, 10 | Execution, 11 | Field, 12 | Undefined, 13 | UndefinedType, 14 | create_model, 15 | ) 16 | from .protocols import Event, as_event 17 | 18 | __all__ = ( 19 | "Adaptable", 20 | "Adapter", 21 | "AdapterRegistry", 22 | "AsyncAdaptable", 23 | "AsyncAdapter", 24 | "AsyncAdapterRegistry", 25 | "Field", 26 | "create_model", 27 | "Execution", 28 | "Embedding", 29 | "ID", 30 | "Undefined", 31 | "UndefinedType", 32 | "Event", 33 | "as_event", 34 | ) 35 | 36 | __version__ = "0.3.1" 37 | -------------------------------------------------------------------------------- /src/pydapter/adapters/__init__.py: -------------------------------------------------------------------------------- 1 | from .csv_ import CsvAdapter 2 | from .json_ import JsonAdapter 3 | from .toml_ import TomlAdapter 4 | 5 | __all__ = ["JsonAdapter", "CsvAdapter", "TomlAdapter"] 6 | -------------------------------------------------------------------------------- /src/pydapter/adapters/json_.py: -------------------------------------------------------------------------------- 1 | """ 2 | JSON Adapter for Pydantic Models. 3 | 4 | This module provides the JsonAdapter class for converting between Pydantic models 5 | and JSON data formats. It supports reading from JSON files, strings, or bytes 6 | and writing Pydantic models to JSON format. 7 | """ 8 | 9 | from __future__ import annotations 10 | 11 | import json 12 | from pathlib import Path 13 | from typing import TypeVar 14 | 15 | from pydantic import BaseModel, ValidationError 16 | 17 | from ..core import Adapter 18 | from ..exceptions import ParseError 19 | from ..exceptions import ValidationError as AdapterValidationError 20 | 21 | T = TypeVar("T", bound=BaseModel) 22 | 23 | 24 | class JsonAdapter(Adapter[T]): 25 | """ 26 | Adapter for converting between Pydantic models and JSON data. 27 | 28 | This adapter handles JSON files, strings, and byte data, providing methods to: 29 | - Parse JSON data into Pydantic model instances 30 | - Convert Pydantic models to JSON format 31 | - Handle both single objects and arrays of objects 32 | 33 | Attributes: 34 | obj_key: The key identifier for this adapter type ("json") 35 | 36 | Example: 37 | ```python 38 | from pydantic import BaseModel 39 | from pydapter.adapters.json_ import JsonAdapter 40 | 41 | class Person(BaseModel): 42 | name: str 43 | age: int 44 | 45 | # Parse JSON data 46 | json_data = '{"name": "John", "age": 30}' 47 | person = JsonAdapter.from_obj(Person, json_data) 48 | 49 | # Parse JSON array 50 | json_array = '[{"name": "John", "age": 30}, {"name": "Jane", "age": 25}]' 51 | people = JsonAdapter.from_obj(Person, json_array, many=True) 52 | 53 | # Convert to JSON 54 | json_output = JsonAdapter.to_obj(person) 55 | ``` 56 | """ 57 | 58 | obj_key = "json" 59 | 60 | # ---------------- incoming 61 | @classmethod 62 | def from_obj( 63 | cls, subj_cls: type[T], obj: str | bytes | Path, /, *, many=False, **kw 64 | ): 65 | try: 66 | # Handle file path 67 | if isinstance(obj, Path): 68 | try: 69 | text = Path(obj).read_text() 70 | except Exception as e: 71 | raise ParseError(f"Failed to read JSON file: {e}", source=str(obj)) 72 | else: 73 | text = obj.decode("utf-8") if isinstance(obj, bytes) else obj 74 | # Check for empty input 75 | if not text or (isinstance(text, str) and not text.strip()): 76 | raise ParseError( 77 | "Empty JSON content", 78 | source=str(obj)[:100] if isinstance(obj, str) else str(obj), 79 | ) 80 | 81 | # Parse JSON 82 | try: 83 | data = json.loads(text) 84 | except json.JSONDecodeError as e: 85 | raise ParseError( 86 | f"Invalid JSON: {e}", 87 | source=str(text)[:100] if isinstance(text, str) else str(text), 88 | position=e.pos, 89 | line=e.lineno, 90 | column=e.colno, 91 | ) 92 | 93 | # Validate against model 94 | try: 95 | if many: 96 | if not isinstance(data, list): 97 | raise AdapterValidationError( 98 | "Expected JSON array for many=True", data=data 99 | ) 100 | return [subj_cls.model_validate(i) for i in data] 101 | return subj_cls.model_validate(data) 102 | except ValidationError as e: 103 | raise AdapterValidationError( 104 | f"Validation error: {e}", 105 | data=data, 106 | errors=e.errors(), 107 | ) 108 | 109 | except (ParseError, AdapterValidationError): 110 | # Re-raise our custom exceptions 111 | raise 112 | except Exception as e: 113 | # Wrap other exceptions 114 | raise ParseError( 115 | f"Unexpected error parsing JSON: {e}", 116 | source=str(obj)[:100] if isinstance(obj, str) else str(obj), 117 | ) 118 | 119 | # ---------------- outgoing 120 | @classmethod 121 | def to_obj(cls, subj: T | list[T], /, *, many=False, **kw) -> str: 122 | try: 123 | items = subj if isinstance(subj, list) else [subj] 124 | 125 | if not items: 126 | return "[]" if many else "{}" 127 | 128 | # Extract JSON serialization options from kwargs 129 | json_kwargs = { 130 | "indent": kw.pop("indent", 2), 131 | "sort_keys": kw.pop("sort_keys", True), 132 | "ensure_ascii": kw.pop("ensure_ascii", False), 133 | } 134 | 135 | payload = [i.model_dump() for i in items] if many else items[0].model_dump() 136 | return json.dumps(payload, **json_kwargs) 137 | 138 | except Exception as e: 139 | # Wrap exceptions 140 | raise ParseError(f"Error generating JSON: {e}") 141 | -------------------------------------------------------------------------------- /src/pydapter/adapters/toml_.py: -------------------------------------------------------------------------------- 1 | """ 2 | TOML Adapter for Pydantic Models. 3 | 4 | This module provides the TomlAdapter class for converting between Pydantic models 5 | and TOML data formats. It supports reading from TOML files or strings and writing 6 | Pydantic models to TOML format. 7 | """ 8 | 9 | from __future__ import annotations 10 | 11 | from pathlib import Path 12 | from typing import TypeVar 13 | 14 | import toml 15 | from pydantic import BaseModel, ValidationError 16 | 17 | from ..core import Adapter 18 | from ..exceptions import ParseError 19 | from ..exceptions import ValidationError as AdapterValidationError 20 | 21 | T = TypeVar("T", bound=BaseModel) 22 | 23 | 24 | def _ensure_list(d): 25 | """ 26 | Helper function to ensure data is in list format when many=True. 27 | 28 | This handles TOML's structure where arrays might be nested in sections. 29 | """ 30 | if isinstance(d, list): 31 | return d 32 | if isinstance(d, dict) and len(d) == 1 and isinstance(next(iter(d.values())), list): 33 | return next(iter(d.values())) 34 | return [d] 35 | 36 | 37 | class TomlAdapter(Adapter[T]): 38 | """ 39 | Adapter for converting between Pydantic models and TOML data. 40 | 41 | This adapter handles TOML files and strings, providing methods to: 42 | - Parse TOML data into Pydantic model instances 43 | - Convert Pydantic models to TOML format 44 | - Handle both single objects and arrays of objects 45 | 46 | Attributes: 47 | obj_key: The key identifier for this adapter type ("toml") 48 | 49 | Example: 50 | ```python 51 | from pydantic import BaseModel 52 | from pydapter.adapters.toml_ import TomlAdapter 53 | 54 | class Person(BaseModel): 55 | name: str 56 | age: int 57 | 58 | # Parse TOML data 59 | toml_data = ''' 60 | name = "John" 61 | age = 30 62 | ''' 63 | person = TomlAdapter.from_obj(Person, toml_data) 64 | 65 | # Parse TOML array 66 | toml_array = ''' 67 | [[people]] 68 | name = "John" 69 | age = 30 70 | 71 | [[people]] 72 | name = "Jane" 73 | age = 25 74 | ''' 75 | people = TomlAdapter.from_obj(Person, toml_array, many=True) 76 | 77 | # Convert to TOML 78 | toml_output = TomlAdapter.to_obj(person) 79 | ``` 80 | """ 81 | 82 | obj_key = "toml" 83 | 84 | @classmethod 85 | def from_obj(cls, subj_cls: type[T], obj: str | Path, /, *, many=False, **kw): 86 | try: 87 | # Handle file path 88 | if isinstance(obj, Path): 89 | try: 90 | text = Path(obj).read_text() 91 | except Exception as e: 92 | raise ParseError(f"Failed to read TOML file: {e}", source=str(obj)) 93 | else: 94 | text = obj 95 | 96 | # Check for empty input 97 | if not text or (isinstance(text, str) and not text.strip()): 98 | raise ParseError( 99 | "Empty TOML content", 100 | source=str(obj)[:100] if isinstance(obj, str) else str(obj), 101 | ) 102 | 103 | # Parse TOML 104 | try: 105 | parsed = toml.loads(text, **kw) 106 | except toml.TomlDecodeError as e: 107 | raise ParseError( 108 | f"Invalid TOML: {e}", 109 | source=str(text)[:100] if isinstance(text, str) else str(text), 110 | ) 111 | 112 | # Validate against model 113 | try: 114 | if many: 115 | return [subj_cls.model_validate(x) for x in _ensure_list(parsed)] 116 | return subj_cls.model_validate(parsed) 117 | except ValidationError as e: 118 | raise AdapterValidationError( 119 | f"Validation error: {e}", 120 | data=parsed, 121 | errors=e.errors(), 122 | ) 123 | 124 | except (ParseError, AdapterValidationError): 125 | # Re-raise our custom exceptions 126 | raise 127 | except Exception as e: 128 | # Wrap other exceptions 129 | raise ParseError( 130 | f"Unexpected error parsing TOML: {e}", 131 | source=str(obj)[:100] if isinstance(obj, str) else str(obj), 132 | ) 133 | 134 | @classmethod 135 | def to_obj(cls, subj: T | list[T], /, *, many=False, **kw) -> str: 136 | try: 137 | items = subj if isinstance(subj, list) else [subj] 138 | 139 | if not items: 140 | return "" 141 | 142 | payload = ( 143 | {"items": [i.model_dump() for i in items]} 144 | if many 145 | else items[0].model_dump() 146 | ) 147 | return toml.dumps(payload, **kw) 148 | 149 | except Exception as e: 150 | # Wrap exceptions 151 | raise ParseError(f"Error generating TOML: {e}") 152 | -------------------------------------------------------------------------------- /src/pydapter/async_core.py: -------------------------------------------------------------------------------- 1 | """ 2 | pydapter.async_core - async counterparts to the sync Adapter stack 3 | """ 4 | 5 | from __future__ import annotations 6 | 7 | from typing import Any, ClassVar, Protocol, TypeVar, runtime_checkable 8 | 9 | from .exceptions import ( 10 | PYDAPTER_PYTHON_ERRORS, 11 | AdapterError, 12 | AdapterNotFoundError, 13 | ConfigurationError, 14 | ) 15 | 16 | T = TypeVar("T") 17 | 18 | 19 | # ----------------------------------------------------------------- AsyncAdapter 20 | @runtime_checkable 21 | class AsyncAdapter(Protocol[T]): 22 | """Stateless, **async** conversion helper.""" 23 | 24 | obj_key: ClassVar[str] 25 | 26 | @classmethod 27 | async def from_obj( 28 | cls, subj_cls: type[T], obj: Any, /, *, many: bool = False, **kw 29 | ) -> T | list[T]: ... 30 | 31 | @classmethod 32 | async def to_obj(cls, subj: T | list[T], /, *, many: bool = False, **kw) -> Any: ... 33 | 34 | 35 | # ------------------------------------------------------ AsyncAdapterRegistry 36 | class AsyncAdapterRegistry: 37 | def __init__(self) -> None: 38 | self._reg: dict[str, type[AsyncAdapter]] = {} 39 | 40 | def register(self, adapter_cls: type[AsyncAdapter]) -> None: 41 | key = getattr(adapter_cls, "obj_key", None) 42 | if not key: 43 | raise ConfigurationError( 44 | "AsyncAdapter must define 'obj_key'", adapter_cls=adapter_cls.__name__ 45 | ) 46 | self._reg[key] = adapter_cls 47 | 48 | def get(self, obj_key: str) -> type[AsyncAdapter]: 49 | try: 50 | return self._reg[obj_key] 51 | except KeyError as exc: 52 | raise AdapterNotFoundError( 53 | f"No async adapter for '{obj_key}'", obj_key=obj_key 54 | ) from exc 55 | 56 | # convenience helpers 57 | async def adapt_from(self, subj_cls: type[T], obj, *, obj_key: str, **kw): 58 | try: 59 | result = await self.get(obj_key).from_obj(subj_cls, obj, **kw) 60 | if result is None: 61 | raise AdapterError( 62 | f"Async adapter {obj_key} returned None", adapter=obj_key 63 | ) 64 | return result 65 | except Exception as exc: 66 | if isinstance(exc, AdapterError) or isinstance(exc, PYDAPTER_PYTHON_ERRORS): 67 | raise 68 | 69 | # Wrap other exceptions with context 70 | raise AdapterError( 71 | f"Error in async adapt_from for {obj_key}", original_error=str(exc) 72 | ) from exc 73 | 74 | async def adapt_to(self, subj, *, obj_key: str, **kw): 75 | try: 76 | result = await self.get(obj_key).to_obj(subj, **kw) 77 | if result is None: 78 | raise AdapterError( 79 | f"Async adapter {obj_key} returned None", adapter=obj_key 80 | ) 81 | return result 82 | except Exception as exc: 83 | if isinstance(exc, AdapterError) or isinstance(exc, PYDAPTER_PYTHON_ERRORS): 84 | raise 85 | 86 | raise AdapterError( 87 | f"Error in async adapt_to for {obj_key}", original_error=str(exc) 88 | ) from exc 89 | 90 | 91 | # -------------------------------------------------------------- AsyncAdaptable 92 | class AsyncAdaptable: 93 | """ 94 | Mixin that endows any Pydantic model with async adapt-from / adapt-to. 95 | """ 96 | 97 | _async_registry: ClassVar[AsyncAdapterRegistry | None] = None 98 | 99 | # registry access 100 | @classmethod 101 | def _areg(cls) -> AsyncAdapterRegistry: 102 | if cls._async_registry is None: 103 | cls._async_registry = AsyncAdapterRegistry() 104 | return cls._async_registry 105 | 106 | @classmethod 107 | def register_async_adapter(cls, adapter_cls: type[AsyncAdapter]) -> None: 108 | cls._areg().register(adapter_cls) 109 | 110 | # helpers 111 | @classmethod 112 | async def adapt_from_async(cls, obj, *, obj_key: str, **kw): 113 | return await cls._areg().adapt_from(cls, obj, obj_key=obj_key, **kw) 114 | 115 | async def adapt_to_async(self, *, obj_key: str, **kw): 116 | return await self._areg().adapt_to(self, obj_key=obj_key, **kw) 117 | -------------------------------------------------------------------------------- /src/pydapter/exceptions.py: -------------------------------------------------------------------------------- 1 | """ 2 | pydapter.exceptions - Custom exception hierarchy for pydapter. 3 | """ 4 | 5 | from typing import Any, Optional 6 | 7 | PYDAPTER_PYTHON_ERRORS = (KeyError, ImportError, AttributeError, ValueError) 8 | 9 | 10 | class AdapterError(Exception): 11 | """Base exception for all pydapter errors.""" 12 | 13 | def __init__(self, message: str, **context: Any): 14 | super().__init__(message) 15 | self.message = message 16 | self.context = context 17 | 18 | def __str__(self) -> str: 19 | context_str = ", ".join(f"{k}={v!r}" for k, v in self.context.items()) 20 | if context_str: 21 | return f"{self.message} ({context_str})" 22 | return self.message 23 | 24 | 25 | class ValidationError(AdapterError): 26 | """Exception raised when data validation fails.""" 27 | 28 | def __init__(self, message: str, data: Optional[Any] = None, **context: Any): 29 | super().__init__(message, **context) 30 | self.data = data 31 | 32 | 33 | class TypeConversionError(ValidationError): 34 | """Exception raised when type conversion fails.""" 35 | 36 | def __init__( 37 | self, 38 | message: str, 39 | source_type: Optional[type] = None, 40 | target_type: Optional[type] = None, 41 | field_name: Optional[str] = None, 42 | model_name: Optional[str] = None, 43 | **context: Any, 44 | ): 45 | super().__init__(message, **context) 46 | self.source_type = source_type 47 | self.target_type = target_type 48 | self.field_name = field_name 49 | self.model_name = model_name 50 | 51 | 52 | class ParseError(AdapterError): 53 | """Exception raised when data parsing fails.""" 54 | 55 | def __init__(self, message: str, source: Optional[str] = None, **context: Any): 56 | super().__init__(message, **context) 57 | self.source = source 58 | 59 | 60 | class ConnectionError(AdapterError): 61 | """Exception raised when a connection to a data source fails.""" 62 | 63 | def __init__( 64 | self, 65 | message: str, 66 | adapter: Optional[str] = None, 67 | url: Optional[str] = None, 68 | **context: Any, 69 | ): 70 | super().__init__(message, **context) 71 | self.adapter = adapter 72 | self.url = url 73 | 74 | 75 | class QueryError(AdapterError): 76 | """Exception raised when a query to a data source fails.""" 77 | 78 | def __init__( 79 | self, 80 | message: str, 81 | query: Optional[Any] = None, 82 | adapter: Optional[str] = None, 83 | **context: Any, 84 | ): 85 | super().__init__(message, **context) 86 | self.query = query 87 | self.adapter = adapter 88 | 89 | 90 | class ResourceError(AdapterError): 91 | """Exception raised when a resource (file, database, etc.) cannot be accessed.""" 92 | 93 | def __init__(self, message: str, resource: Optional[str] = None, **context: Any): 94 | super().__init__(message, **context) 95 | self.resource = resource 96 | 97 | 98 | class ConfigurationError(AdapterError): 99 | """Exception raised when adapter configuration is invalid.""" 100 | 101 | def __init__( 102 | self, message: str, config: Optional[dict[str, Any]] = None, **context: Any 103 | ): 104 | super().__init__(message, **context) 105 | self.config = config 106 | 107 | 108 | class AdapterNotFoundError(AdapterError): 109 | """Exception raised when an adapter is not found.""" 110 | 111 | def __init__(self, message: str, obj_key: Optional[str] = None, **context: Any): 112 | super().__init__(message, **context) 113 | self.obj_key = obj_key 114 | -------------------------------------------------------------------------------- /src/pydapter/extras/README.md: -------------------------------------------------------------------------------- 1 | # pydapter extras 2 | 3 | This directory contains additional adapters for various data sources and 4 | formats. 5 | 6 | ## Available Adapters 7 | 8 | - **Database Adapters**: SQL, PostgreSQL, MongoDB, Neo4j, Qdrant 9 | - **Async Database Adapters**: AsyncSQL, AsyncPostgres, AsyncMongo, AsyncQdrant 10 | - **Other Formats**: Excel, Pandas 11 | 12 | ## Error Handling 13 | 14 | All adapters in this directory implement robust error handling using the 15 | pydapter exception hierarchy: 16 | 17 | - `ConnectionError`: Raised when connection to a data source fails 18 | - `QueryError`: Raised when a query to a data source fails 19 | - `ResourceError`: Raised when a resource (table, collection, etc.) cannot be 20 | accessed 21 | - `ValidationError`: Raised when data validation fails 22 | - `ConfigurationError`: Raised when adapter configuration is invalid 23 | 24 | See the [Error Handling Documentation](../../../docs/error_handling.md) for more 25 | details. 26 | 27 | ## Template for AsyncAdapter 28 | 29 | ```python 30 | class AsyncMongoAdapter(AsyncAdapter[T]): 31 | obj_key = "async_mongo" 32 | 33 | @classmethod 34 | async def from_obj(cls, subj_cls, obj, /, **kw): 35 | try: 36 | # Validate required parameters 37 | if "url" not in obj: 38 | raise ValidationError("Missing required parameter 'url'") 39 | if "db" not in obj: 40 | raise ValidationError("Missing required parameter 'db'") 41 | if "collection" not in obj: 42 | raise ValidationError("Missing required parameter 'collection'") 43 | 44 | # Connect to database 45 | client = motor.motor_asyncio.AsyncIOMotorClient(obj["url"]) 46 | 47 | # Execute query 48 | docs = await client[obj["db"]][obj["collection"]].find(obj.get("filter", {})).to_list(length=None) 49 | 50 | # Handle empty result set 51 | if not docs and not kw.get("many", True): 52 | raise ResourceError( 53 | "No documents found matching the query", 54 | resource=f"{obj['db']}.{obj['collection']}", 55 | filter=obj.get("filter", {}) 56 | ) 57 | 58 | # Process results 59 | return [subj_cls(**doc) for doc in docs] if kw.get("many", True) else subj_cls(**docs[0]) 60 | 61 | except motor.errors.ConnectionFailure as e: 62 | raise ConnectionError( 63 | f"MongoDB connection failed: {e}", 64 | adapter="async_mongo", 65 | url=obj.get("url") 66 | ) 67 | except motor.errors.OperationFailure as e: 68 | raise QueryError( 69 | f"MongoDB query error: {e}", 70 | query=obj.get("filter"), 71 | adapter="async_mongo" 72 | ) 73 | 74 | @classmethod 75 | async def to_obj(cls, subj, /, **kw): 76 | # Similar error handling for to_obj method 77 | ... 78 | ``` 79 | -------------------------------------------------------------------------------- /src/pydapter/extras/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | This sub-package holds adapters that require heavy optional deps. 3 | Import only what you need, e.g.: 4 | 5 | from pydapter.extras.pandas_ import DataFrameAdapter 6 | """ 7 | -------------------------------------------------------------------------------- /src/pydapter/extras/excel_.py: -------------------------------------------------------------------------------- 1 | """ 2 | Excel adapter (requires pandas + xlsxwriter engine). 3 | """ 4 | 5 | from __future__ import annotations 6 | 7 | import io 8 | from pathlib import Path 9 | from typing import Any, TypeVar 10 | 11 | import pandas as pd 12 | from pydantic import BaseModel 13 | 14 | from ..core import Adapter 15 | from ..exceptions import AdapterError, ResourceError 16 | from .pandas_ import DataFrameAdapter 17 | 18 | T = TypeVar("T", bound=BaseModel) 19 | 20 | 21 | class ExcelAdapter(Adapter[T]): 22 | """ 23 | Adapter for converting between Pydantic models and Excel files. 24 | 25 | This adapter handles Excel (.xlsx) files, providing methods to: 26 | - Read Excel files into Pydantic model instances 27 | - Write Pydantic models to Excel files 28 | - Support for different sheets and pandas read_excel options 29 | 30 | Attributes: 31 | obj_key: The key identifier for this adapter type ("xlsx") 32 | 33 | Example: 34 | ```python 35 | from pathlib import Path 36 | from pydantic import BaseModel 37 | from pydapter.extras.excel_ import ExcelAdapter 38 | 39 | class Person(BaseModel): 40 | name: str 41 | age: int 42 | 43 | # Read from Excel file 44 | excel_file = Path("people.xlsx") 45 | people = ExcelAdapter.from_obj(Person, excel_file, many=True) 46 | 47 | # Write to Excel file 48 | output_bytes = ExcelAdapter.to_obj(people, many=True) 49 | with open("output.xlsx", "wb") as f: 50 | f.write(output_bytes) 51 | ``` 52 | """ 53 | 54 | obj_key = "xlsx" 55 | 56 | @classmethod 57 | def from_obj( 58 | cls, 59 | subj_cls: type[T], 60 | obj: str | Path | bytes, 61 | /, 62 | *, 63 | many: bool = True, 64 | sheet_name: str | int = 0, 65 | **kw: Any, 66 | ) -> T | list[T]: 67 | """ 68 | Convert Excel data to Pydantic model instances. 69 | 70 | Args: 71 | subj_cls: The Pydantic model class to instantiate 72 | obj: Excel file path, file-like object, or bytes 73 | many: If True, convert all rows; if False, convert only first row 74 | sheet_name: Sheet name or index to read (default: 0) 75 | **kw: Additional arguments passed to pandas.read_excel 76 | 77 | Returns: 78 | List of model instances if many=True, single instance if many=False 79 | 80 | Raises: 81 | ResourceError: If the Excel file cannot be read 82 | AdapterError: If the data cannot be converted to models 83 | """ 84 | try: 85 | if isinstance(obj, bytes): 86 | df = pd.read_excel(io.BytesIO(obj), sheet_name=sheet_name, **kw) 87 | else: 88 | df = pd.read_excel(obj, sheet_name=sheet_name, **kw) 89 | return DataFrameAdapter.from_obj(subj_cls, df, many=many) 90 | except FileNotFoundError as e: 91 | raise ResourceError(f"File not found: {e}", resource=str(obj)) from e 92 | except ValueError as e: 93 | raise AdapterError( 94 | f"Error adapting from xlsx (original_error='{e}')", adapter="xlsx" 95 | ) from e 96 | except Exception as e: 97 | raise AdapterError( 98 | f"Unexpected error in Excel adapter: {e}", adapter="xlsx" 99 | ) from e 100 | 101 | # outgoing 102 | @classmethod 103 | def to_obj( 104 | cls, 105 | subj: T | list[T], 106 | /, 107 | *, 108 | many=True, 109 | sheet_name="Sheet1", 110 | **kw, 111 | ) -> bytes: 112 | df = DataFrameAdapter.to_obj(subj, many=many) 113 | buf = io.BytesIO() 114 | with pd.ExcelWriter(buf, engine="xlsxwriter") as wr: 115 | df.to_excel(wr, sheet_name=sheet_name, index=False) 116 | return buf.getvalue() 117 | -------------------------------------------------------------------------------- /src/pydapter/extras/pandas_.py: -------------------------------------------------------------------------------- 1 | """ 2 | DataFrame & Series adapters (require `pandas`). 3 | """ 4 | 5 | from __future__ import annotations 6 | 7 | from typing import Any, TypeVar 8 | 9 | import pandas as pd 10 | from pydantic import BaseModel 11 | 12 | from ..core import Adapter 13 | 14 | T = TypeVar("T", bound=BaseModel) 15 | 16 | 17 | class DataFrameAdapter(Adapter[T]): 18 | """ 19 | Adapter for converting between Pydantic models and pandas DataFrames. 20 | 21 | This adapter handles pandas DataFrame objects, providing methods to: 22 | - Convert DataFrame rows to Pydantic model instances 23 | - Convert Pydantic models to DataFrame rows 24 | - Handle both single records and multiple records 25 | 26 | Attributes: 27 | obj_key: The key identifier for this adapter type ("pd.DataFrame") 28 | 29 | Example: 30 | ```python 31 | import pandas as pd 32 | from pydantic import BaseModel 33 | from pydapter.extras.pandas_ import DataFrameAdapter 34 | 35 | class Person(BaseModel): 36 | name: str 37 | age: int 38 | 39 | # Create DataFrame 40 | df = pd.DataFrame([ 41 | {"name": "John", "age": 30}, 42 | {"name": "Jane", "age": 25} 43 | ]) 44 | 45 | # Convert to Pydantic models 46 | people = DataFrameAdapter.from_obj(Person, df, many=True) 47 | 48 | # Convert back to DataFrame 49 | df_output = DataFrameAdapter.to_obj(people, many=True) 50 | ``` 51 | """ 52 | 53 | obj_key = "pd.DataFrame" 54 | 55 | @classmethod 56 | def from_obj( 57 | cls, subj_cls: type[T], obj: pd.DataFrame, /, *, many: bool = True, **kw: Any 58 | ) -> T | list[T]: 59 | """ 60 | Convert DataFrame to Pydantic model instances. 61 | 62 | Args: 63 | subj_cls: The Pydantic model class to instantiate 64 | obj: The pandas DataFrame to convert 65 | many: If True, convert all rows; if False, convert only first row 66 | **kw: Additional arguments passed to model_validate 67 | 68 | Returns: 69 | List of model instances if many=True, single instance if many=False 70 | """ 71 | if many: 72 | return [subj_cls.model_validate(r) for r in obj.to_dict(orient="records")] 73 | return subj_cls.model_validate(obj.iloc[0].to_dict(), **kw) 74 | 75 | @classmethod 76 | def to_obj( 77 | cls, subj: T | list[T], /, *, many: bool = True, **kw: Any 78 | ) -> pd.DataFrame: 79 | """ 80 | Convert Pydantic model instances to pandas DataFrame. 81 | 82 | Args: 83 | subj: Single model instance or list of instances 84 | many: If True, handle as multiple instances 85 | **kw: Additional arguments passed to DataFrame constructor 86 | 87 | Returns: 88 | pandas DataFrame with model data 89 | """ 90 | items = subj if isinstance(subj, list) else [subj] 91 | return pd.DataFrame([i.model_dump() for i in items], **kw) 92 | 93 | 94 | class SeriesAdapter(Adapter[T]): 95 | """ 96 | Adapter for converting between Pydantic models and pandas Series. 97 | 98 | This adapter handles pandas Series objects, providing methods to: 99 | - Convert Series to a single Pydantic model instance 100 | - Convert Pydantic model to Series 101 | - Only supports single records (many=False) 102 | 103 | Attributes: 104 | obj_key: The key identifier for this adapter type ("pd.Series") 105 | 106 | Example: 107 | ```python 108 | import pandas as pd 109 | from pydantic import BaseModel 110 | from pydapter.extras.pandas_ import SeriesAdapter 111 | 112 | class Person(BaseModel): 113 | name: str 114 | age: int 115 | 116 | # Create Series 117 | series = pd.Series({"name": "John", "age": 30}) 118 | 119 | # Convert to Pydantic model 120 | person = SeriesAdapter.from_obj(Person, series) 121 | 122 | # Convert back to Series 123 | series_output = SeriesAdapter.to_obj(person) 124 | ``` 125 | """ 126 | 127 | obj_key = "pd.Series" 128 | 129 | @classmethod 130 | def from_obj( 131 | cls, subj_cls: type[T], obj: pd.Series, /, *, many: bool = False, **kw: Any 132 | ) -> T: 133 | """ 134 | Convert pandas Series to Pydantic model instance. 135 | 136 | Args: 137 | subj_cls: The Pydantic model class to instantiate 138 | obj: The pandas Series to convert 139 | many: Must be False (Series only supports single records) 140 | **kw: Additional arguments passed to model_validate 141 | 142 | Returns: 143 | Single model instance 144 | 145 | Raises: 146 | ValueError: If many=True is specified 147 | """ 148 | if many: 149 | raise ValueError("SeriesAdapter supports single records only.") 150 | return subj_cls.model_validate(obj.to_dict(), **kw) 151 | 152 | @classmethod 153 | def to_obj( 154 | cls, subj: T | list[T], /, *, many: bool = False, **kw: Any 155 | ) -> pd.Series: 156 | if many or isinstance(subj, list): 157 | raise ValueError("SeriesAdapter supports single records only.") 158 | return pd.Series(subj.model_dump(), **kw) 159 | -------------------------------------------------------------------------------- /src/pydapter/extras/postgres_.py: -------------------------------------------------------------------------------- 1 | """ 2 | PostgresAdapter - thin preset over SQLAdapter (pgvector-ready if you add vec column). 3 | """ 4 | 5 | from __future__ import annotations 6 | 7 | from typing import TypeVar 8 | 9 | from pydantic import BaseModel 10 | 11 | from ..exceptions import ConnectionError 12 | from .sql_ import SQLAdapter 13 | 14 | T = TypeVar("T", bound=BaseModel) 15 | 16 | 17 | class PostgresAdapter(SQLAdapter[T]): 18 | """ 19 | PostgreSQL-specific adapter extending SQLAdapter with PostgreSQL optimizations. 20 | 21 | This adapter provides: 22 | - PostgreSQL-specific connection handling and error messages 23 | - Default PostgreSQL connection string 24 | - Enhanced error handling for common PostgreSQL issues 25 | - Support for pgvector when vector columns are present 26 | 27 | Attributes: 28 | obj_key: The key identifier for this adapter type ("postgres") 29 | DEFAULT: Default PostgreSQL connection string 30 | 31 | Example: 32 | ```python 33 | from pydantic import BaseModel 34 | from pydapter.extras.postgres_ import PostgresAdapter 35 | 36 | class User(BaseModel): 37 | id: int 38 | name: str 39 | email: str 40 | 41 | # Query with custom connection 42 | query_config = { 43 | "query": "SELECT id, name, email FROM users WHERE active = true", 44 | "engine_url": "postgresql+psycopg://user:pass@localhost/mydb" 45 | } 46 | users = PostgresAdapter.from_obj(User, query_config, many=True) 47 | 48 | # Insert with default connection 49 | insert_config = { 50 | "table": "users", 51 | "engine_url": "postgresql+psycopg://user:pass@localhost/mydb" 52 | } 53 | new_users = [User(id=1, name="John", email="john@example.com")] 54 | PostgresAdapter.to_obj(new_users, insert_config, many=True) 55 | ``` 56 | """ 57 | 58 | obj_key = "postgres" 59 | DEFAULT = "postgresql+psycopg://user:pass@localhost/db" 60 | 61 | @classmethod 62 | def from_obj(cls, subj_cls, obj: dict, /, **kw): 63 | try: 64 | # Set default connection string if not provided 65 | obj.setdefault("engine_url", cls.DEFAULT) 66 | 67 | # Add PostgreSQL-specific error handling 68 | try: 69 | return super().from_obj(subj_cls, obj, **kw) 70 | except Exception as e: 71 | # Check for common PostgreSQL-specific errors 72 | error_str = str(e).lower() 73 | if "authentication" in error_str: 74 | raise ConnectionError( 75 | f"PostgreSQL authentication failed: {e}", 76 | adapter="postgres", 77 | url=obj["engine_url"], 78 | ) from e 79 | elif "connection" in error_str and "refused" in error_str: 80 | raise ConnectionError( 81 | f"PostgreSQL connection refused: {e}", 82 | adapter="postgres", 83 | url=obj["engine_url"], 84 | ) from e 85 | elif "does not exist" in error_str and "database" in error_str: 86 | raise ConnectionError( 87 | f"PostgreSQL database does not exist: {e}", 88 | adapter="postgres", 89 | url=obj["engine_url"], 90 | ) from e 91 | # Re-raise the original exception 92 | raise 93 | 94 | except ConnectionError: 95 | # Re-raise ConnectionError 96 | raise 97 | except Exception as e: 98 | # Wrap other exceptions 99 | raise ConnectionError( 100 | f"Unexpected error in PostgreSQL adapter: {e}", 101 | adapter="postgres", 102 | url=obj.get("engine_url", cls.DEFAULT), 103 | ) from e 104 | 105 | @classmethod 106 | def to_obj(cls, subj, /, **kw): 107 | try: 108 | # Set default connection string if not provided 109 | kw.setdefault("engine_url", cls.DEFAULT) 110 | 111 | # Add PostgreSQL-specific error handling 112 | try: 113 | return super().to_obj(subj, **kw) 114 | except Exception as e: 115 | # Check for common PostgreSQL-specific errors 116 | error_str = str(e).lower() 117 | if "authentication" in error_str: 118 | raise ConnectionError( 119 | f"PostgreSQL authentication failed: {e}", 120 | adapter="postgres", 121 | url=kw["engine_url"], 122 | ) from e 123 | elif "connection" in error_str and "refused" in error_str: 124 | raise ConnectionError( 125 | f"PostgreSQL connection refused: {e}", 126 | adapter="postgres", 127 | url=kw["engine_url"], 128 | ) from e 129 | elif "does not exist" in error_str and "database" in error_str: 130 | raise ConnectionError( 131 | f"PostgreSQL database does not exist: {e}", 132 | adapter="postgres", 133 | url=kw["engine_url"], 134 | ) from e 135 | # Re-raise the original exception 136 | raise 137 | 138 | except ConnectionError: 139 | # Re-raise ConnectionError 140 | raise 141 | except Exception as e: 142 | # Wrap other exceptions 143 | raise ConnectionError( 144 | f"Unexpected error in PostgreSQL adapter: {e}", 145 | adapter="postgres", 146 | url=kw.get("engine_url", cls.DEFAULT), 147 | ) from e 148 | -------------------------------------------------------------------------------- /src/pydapter/fields/__init__.py: -------------------------------------------------------------------------------- 1 | from pydapter.fields.builder import DomainModelBuilder 2 | from pydapter.fields.common_templates import ( 3 | CREATED_AT_TEMPLATE, 4 | CREATED_AT_TZ_TEMPLATE, 5 | DELETED_AT_TEMPLATE, 6 | DELETED_AT_TZ_TEMPLATE, 7 | DESCRIPTION_TEMPLATE, 8 | EMAIL_TEMPLATE, 9 | ID_TEMPLATE, 10 | JSON_TEMPLATE, 11 | METADATA_TEMPLATE, 12 | NAME_TEMPLATE, 13 | NONNEGATIVE_INT_TEMPLATE, 14 | PERCENTAGE_TEMPLATE, 15 | PHONE_TEMPLATE, 16 | POSITIVE_FLOAT_TEMPLATE, 17 | POSITIVE_INT_TEMPLATE, 18 | STRING_TEMPLATE, 19 | TAGS_TEMPLATE, 20 | UPDATED_AT_TEMPLATE, 21 | UPDATED_AT_TZ_TEMPLATE, 22 | URL_TEMPLATE, 23 | USERNAME_TEMPLATE, 24 | ) 25 | from pydapter.fields.dts import ( 26 | DATETIME, 27 | DATETIME_NULLABLE, 28 | datetime_serializer, 29 | validate_datetime, 30 | ) 31 | from pydapter.fields.embedding import EMBEDDING, validate_embedding 32 | from pydapter.fields.execution import EXECUTION, Execution 33 | from pydapter.fields.families import FieldFamilies, create_field_dict 34 | from pydapter.fields.ids import ( 35 | ID_FROZEN, 36 | ID_MUTABLE, 37 | ID_NULLABLE, 38 | serialize_uuid, 39 | validate_uuid, 40 | ) 41 | from pydapter.fields.params import ( 42 | PARAM_TYPE, 43 | PARAM_TYPE_NULLABLE, 44 | PARAMS, 45 | validate_model_to_params, 46 | validate_model_to_type, 47 | ) 48 | from pydapter.fields.protocol_families import ( 49 | ProtocolFieldFamilies, 50 | create_protocol_model, 51 | ) 52 | from pydapter.fields.template import FieldTemplate 53 | from pydapter.fields.types import ( 54 | ID, 55 | Embedding, 56 | Field, 57 | Metadata, 58 | Undefined, 59 | UndefinedType, 60 | create_model, 61 | ) 62 | from pydapter.fields.validation_patterns import ( 63 | ValidationPatterns, 64 | create_pattern_template, 65 | create_range_template, 66 | ) 67 | 68 | __all__ = ( 69 | "DATETIME", 70 | "DATETIME_NULLABLE", 71 | "validate_datetime", 72 | "datetime_serializer", 73 | "ID_FROZEN", 74 | "ID_MUTABLE", 75 | "ID_NULLABLE", 76 | "validate_uuid", 77 | "serialize_uuid", 78 | "PARAMS", 79 | "PARAM_TYPE", 80 | "PARAM_TYPE_NULLABLE", 81 | "validate_model_to_params", 82 | "validate_model_to_type", 83 | "EMBEDDING", 84 | "validate_embedding", 85 | "UndefinedType", 86 | "Undefined", 87 | "Field", 88 | "create_model", 89 | "Execution", 90 | "EXECUTION", 91 | "ID", 92 | "Embedding", 93 | "Metadata", 94 | "FieldTemplate", 95 | "ID_TEMPLATE", 96 | "STRING_TEMPLATE", 97 | "EMAIL_TEMPLATE", 98 | "USERNAME_TEMPLATE", 99 | "CREATED_AT_TEMPLATE", 100 | "UPDATED_AT_TEMPLATE", 101 | "DELETED_AT_TEMPLATE", 102 | "CREATED_AT_TZ_TEMPLATE", 103 | "UPDATED_AT_TZ_TEMPLATE", 104 | "DELETED_AT_TZ_TEMPLATE", 105 | "NAME_TEMPLATE", 106 | "DESCRIPTION_TEMPLATE", 107 | "URL_TEMPLATE", 108 | "PHONE_TEMPLATE", 109 | "POSITIVE_INT_TEMPLATE", 110 | "NONNEGATIVE_INT_TEMPLATE", 111 | "POSITIVE_FLOAT_TEMPLATE", 112 | "PERCENTAGE_TEMPLATE", 113 | "JSON_TEMPLATE", 114 | "TAGS_TEMPLATE", 115 | "METADATA_TEMPLATE", 116 | "FieldFamilies", 117 | "create_field_dict", 118 | "DomainModelBuilder", 119 | "ProtocolFieldFamilies", 120 | "create_protocol_model", 121 | "ValidationPatterns", 122 | "create_pattern_template", 123 | "create_range_template", 124 | ) 125 | -------------------------------------------------------------------------------- /src/pydapter/fields/common_templates.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import uuid 4 | from datetime import datetime, timezone 5 | from typing import Annotated, Any 6 | 7 | from pydantic import AwareDatetime, EmailStr 8 | from pydantic import Field as PydanticField 9 | from pydantic import HttpUrl, NaiveDatetime, confloat, conint, constr 10 | 11 | from pydapter.fields.template import FieldTemplate 12 | 13 | __all__ = ( 14 | "ID_TEMPLATE", 15 | "STRING_TEMPLATE", 16 | "EMAIL_TEMPLATE", 17 | "USERNAME_TEMPLATE", 18 | "CREATED_AT_TEMPLATE", 19 | "UPDATED_AT_TEMPLATE", 20 | "DELETED_AT_TEMPLATE", 21 | "CREATED_AT_TZ_TEMPLATE", 22 | "UPDATED_AT_TZ_TEMPLATE", 23 | "DELETED_AT_TZ_TEMPLATE", 24 | "NAME_TEMPLATE", 25 | "DESCRIPTION_TEMPLATE", 26 | "URL_TEMPLATE", 27 | "PHONE_TEMPLATE", 28 | "POSITIVE_INT_TEMPLATE", 29 | "NONNEGATIVE_INT_TEMPLATE", 30 | "POSITIVE_FLOAT_TEMPLATE", 31 | "PERCENTAGE_TEMPLATE", 32 | "JSON_TEMPLATE", 33 | "TAGS_TEMPLATE", 34 | "METADATA_TEMPLATE", 35 | ) 36 | 37 | 38 | # ID Templates 39 | ID_TEMPLATE = FieldTemplate( 40 | base_type=uuid.UUID, 41 | description="Unique identifier", 42 | default_factory=uuid.uuid4, 43 | ) 44 | 45 | # String Templates 46 | STRING_TEMPLATE = FieldTemplate( 47 | base_type=str, 48 | description="String field", 49 | ) 50 | 51 | # Using Pydantic v2 EmailStr for email validation 52 | EMAIL_TEMPLATE = FieldTemplate( 53 | base_type=EmailStr, 54 | description="Email address", 55 | ) 56 | 57 | # Username with pattern constraint using constr 58 | USERNAME_TEMPLATE = FieldTemplate( 59 | base_type=constr(pattern=r"^[a-zA-Z0-9_-]{3,32}$"), 60 | description="Username", 61 | ) 62 | 63 | NAME_TEMPLATE = FieldTemplate( 64 | base_type=Annotated[str, PydanticField(min_length=1, max_length=255)], 65 | description="Name field", 66 | ) 67 | 68 | DESCRIPTION_TEMPLATE = FieldTemplate( 69 | base_type=str, 70 | description="Description field", 71 | default="", 72 | ) 73 | 74 | # Using Pydantic v2 HttpUrl for URL validation 75 | URL_TEMPLATE = FieldTemplate( 76 | base_type=HttpUrl, 77 | description="URL field", 78 | ) 79 | 80 | # Phone number with pattern constraint 81 | PHONE_TEMPLATE = FieldTemplate( 82 | base_type=constr(pattern=r"^\+?[0-9\s\-\(\)]{10,20}$"), 83 | description="Phone number", 84 | ) 85 | 86 | # Datetime Templates - Naive (without timezone) 87 | CREATED_AT_TEMPLATE = FieldTemplate( 88 | base_type=NaiveDatetime, 89 | description="Creation timestamp (naive)", 90 | default_factory=datetime.utcnow, 91 | frozen=True, 92 | ) 93 | 94 | UPDATED_AT_TEMPLATE = FieldTemplate( 95 | base_type=NaiveDatetime, 96 | description="Last update timestamp (naive)", 97 | default_factory=datetime.utcnow, 98 | ) 99 | 100 | DELETED_AT_TEMPLATE = FieldTemplate( 101 | base_type=NaiveDatetime, 102 | description="Deletion timestamp (naive)", 103 | ).as_nullable() 104 | 105 | # Datetime Templates - Timezone Aware (recommended) 106 | CREATED_AT_TZ_TEMPLATE = FieldTemplate( 107 | base_type=AwareDatetime, 108 | description="Creation timestamp (timezone-aware)", 109 | default_factory=lambda: datetime.now(timezone.utc), 110 | frozen=True, 111 | ) 112 | 113 | UPDATED_AT_TZ_TEMPLATE = FieldTemplate( 114 | base_type=AwareDatetime, 115 | description="Last update timestamp (timezone-aware)", 116 | default_factory=lambda: datetime.now(timezone.utc), 117 | ) 118 | 119 | DELETED_AT_TZ_TEMPLATE = FieldTemplate( 120 | base_type=AwareDatetime, 121 | description="Deletion timestamp (timezone-aware)", 122 | ).as_nullable() 123 | 124 | # Numeric Templates using Pydantic v2 constraints 125 | POSITIVE_INT_TEMPLATE = FieldTemplate( 126 | base_type=conint(gt=0), 127 | description="Positive integer", 128 | ) 129 | 130 | NONNEGATIVE_INT_TEMPLATE = FieldTemplate( 131 | base_type=conint(ge=0), 132 | description="Non-negative integer", 133 | default=0, 134 | ) 135 | 136 | POSITIVE_FLOAT_TEMPLATE = FieldTemplate( 137 | base_type=confloat(gt=0), 138 | description="Positive float", 139 | ) 140 | 141 | PERCENTAGE_TEMPLATE = FieldTemplate( 142 | base_type=confloat(ge=0, le=100), 143 | description="Percentage value (0-100)", 144 | default=0.0, 145 | ) 146 | 147 | # JSON/Dict Templates for JSONB support 148 | JSON_TEMPLATE = FieldTemplate( 149 | base_type=dict, 150 | description="JSON data", 151 | default_factory=dict, 152 | json_schema_extra={"db_type": "jsonb"}, 153 | ) 154 | 155 | # Common collection templates 156 | TAGS_TEMPLATE = FieldTemplate( 157 | base_type=list[str], 158 | description="List of tags", 159 | default_factory=list, 160 | ) 161 | 162 | METADATA_TEMPLATE = FieldTemplate( 163 | base_type=dict[str, Any], 164 | description="Metadata dictionary", 165 | default_factory=dict, 166 | json_schema_extra={"db_type": "jsonb"}, 167 | ) 168 | -------------------------------------------------------------------------------- /src/pydapter/fields/dts.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | from datetime import datetime, timezone 3 | 4 | from pydapter.exceptions import ValidationError 5 | from pydapter.fields.types import Field 6 | 7 | __all__ = ( 8 | "DATETIME", 9 | "DATETIME_NULLABLE", 10 | "validate_datetime", 11 | "datetime_serializer", 12 | ) 13 | 14 | 15 | def validate_datetime( 16 | v: datetime | str, 17 | /, 18 | nullable: bool = False, 19 | ) -> datetime | None: 20 | if not v and nullable: 21 | return None 22 | if isinstance(v, datetime): 23 | return v 24 | if isinstance(v, str): 25 | with contextlib.suppress(ValueError): 26 | return datetime.fromisoformat(v) 27 | raise ValidationError( 28 | "Invalid datetime format, must be ISO 8601 or datetime object" 29 | ) 30 | 31 | 32 | def datetime_serializer(v: datetime, /) -> str: 33 | return v.isoformat() 34 | 35 | 36 | def datetime_validator(cls, v): 37 | return validate_datetime(v) 38 | 39 | 40 | def nullable_datetime_validator(cls, v): 41 | return validate_datetime(v, nullable=True) 42 | 43 | 44 | DATETIME = Field( 45 | name="datetime_field", 46 | annotation=datetime, 47 | default_factory=lambda: datetime.now(tz=timezone.utc), 48 | validator=datetime_validator, 49 | immutable=True, 50 | ) 51 | 52 | DATETIME_NULLABLE = Field( 53 | name="nullable_datetime_field", 54 | annotation=type(None), # Simplified to avoid UnionType issues 55 | default=None, 56 | validator=nullable_datetime_validator, 57 | immutable=True, 58 | ) 59 | -------------------------------------------------------------------------------- /src/pydapter/fields/embedding.py: -------------------------------------------------------------------------------- 1 | import orjson 2 | 3 | from pydapter.fields.types import Field 4 | 5 | __all__ = ( 6 | "EMBEDDING", 7 | "validate_embedding", 8 | ) 9 | 10 | 11 | def validate_embedding(value: list[float] | str | None) -> list[float] | None: 12 | if value is None: 13 | return [] 14 | if isinstance(value, str): 15 | try: 16 | loaded = orjson.loads(value) 17 | return [float(x) for x in loaded] 18 | except Exception as e: 19 | raise ValueError("Invalid embedding string.") from e 20 | if isinstance(value, list): 21 | try: 22 | return [float(x) for x in value] 23 | except Exception as e: 24 | raise ValueError("Invalid embedding list.") from e 25 | raise ValueError("Invalid embedding type; must be list or JSON-encoded string.") 26 | 27 | 28 | def embedding_validator(cls, v): 29 | return validate_embedding(v) 30 | 31 | 32 | EMBEDDING = Field( 33 | name="embedding", 34 | annotation=list[float], 35 | default_factory=list, 36 | title="Embedding", 37 | description="List of floats representing the embedding vector.", 38 | immutable=True, 39 | ) 40 | -------------------------------------------------------------------------------- /src/pydapter/fields/execution.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, timezone 2 | from enum import Enum 3 | from typing import Any 4 | 5 | from pydantic import BaseModel, ConfigDict 6 | from pydantic import Field as PydanticField 7 | from pydantic import field_validator 8 | 9 | from pydapter.exceptions import ValidationError 10 | from pydapter.fields.params import validate_model_to_params 11 | from pydapter.fields.types import Field 12 | 13 | __all__ = ("EXECUTION",) 14 | 15 | 16 | class ExecutionStatus(str, Enum): 17 | """Status states for tracking action execution progress.""" 18 | 19 | PENDING = "pending" 20 | PROCESSING = "processing" 21 | COMPLETED = "completed" 22 | FAILED = "failed" 23 | 24 | 25 | class Execution(BaseModel): 26 | """Represents the execution state of an event.""" 27 | 28 | model_config = ConfigDict( 29 | use_enum_values=True, 30 | arbitrary_types_allowed=True, 31 | ) 32 | duration: float | None = None 33 | response: dict | None = None 34 | status: ExecutionStatus = ExecutionStatus.PENDING 35 | error: str | None = None 36 | response_obj: Any = PydanticField(None, exclude=True) 37 | updated_at: datetime | None = PydanticField( 38 | default_factory=lambda: datetime.now(tz=timezone.utc), 39 | exclude=True, 40 | ) 41 | 42 | @field_validator("response", mode="before") 43 | def _validate_response(cls, v: BaseModel | dict | None): 44 | return validate_model_to_params(v) 45 | 46 | def validate_response(self): 47 | if self.response is None and self.response_obj is None: 48 | raise ValidationError("Response and response_obj are both None") 49 | if not isinstance(self.response, dict): 50 | self.response = validate_model_to_params(self.response_obj) 51 | 52 | 53 | EXECUTION = Field( 54 | name="execution", 55 | annotation=Execution, 56 | default_factory=Execution, 57 | validator=lambda cls, v: v or Execution(), 58 | validator_kwargs={"mode": "before"}, 59 | immutable=True, 60 | ) 61 | -------------------------------------------------------------------------------- /src/pydapter/fields/families.py: -------------------------------------------------------------------------------- 1 | """Field Families - Predefined collections of field templates for core database patterns. 2 | 3 | This module provides pre-configured field families that group commonly used fields 4 | together for database models. These families focus on core abstractions like 5 | entity tracking, soft deletion, and audit trails. 6 | """ 7 | 8 | from __future__ import annotations 9 | 10 | from typing import TYPE_CHECKING 11 | 12 | from pydapter.fields.common_templates import ( 13 | CREATED_AT_TEMPLATE, 14 | CREATED_AT_TZ_TEMPLATE, 15 | DELETED_AT_TEMPLATE, 16 | DELETED_AT_TZ_TEMPLATE, 17 | ID_TEMPLATE, 18 | UPDATED_AT_TEMPLATE, 19 | UPDATED_AT_TZ_TEMPLATE, 20 | ) 21 | from pydapter.fields.template import FieldTemplate 22 | 23 | if TYPE_CHECKING: 24 | from pydantic import Field 25 | 26 | 27 | __all__ = ( 28 | "FieldFamilies", 29 | "create_field_dict", 30 | ) 31 | 32 | 33 | class FieldFamilies: 34 | """Collection of predefined field template groups for core database patterns. 35 | 36 | This class provides field families that represent common database patterns 37 | like entity tracking, soft deletion, and audit trails. These are foundational 38 | patterns that align with pydapter's core abstractions. 39 | """ 40 | 41 | # Basic entity fields (id, created_at, updated_at) 42 | # Maps to Identifiable + Temporal protocols 43 | ENTITY: dict[str, FieldTemplate] = { 44 | "id": ID_TEMPLATE, 45 | "created_at": CREATED_AT_TEMPLATE, 46 | "updated_at": UPDATED_AT_TEMPLATE, 47 | } 48 | 49 | # Entity fields with timezone-aware timestamps 50 | ENTITY_TZ: dict[str, FieldTemplate] = { 51 | "id": ID_TEMPLATE, 52 | "created_at": CREATED_AT_TZ_TEMPLATE, 53 | "updated_at": UPDATED_AT_TZ_TEMPLATE, 54 | } 55 | 56 | # Soft delete support - common database pattern 57 | SOFT_DELETE: dict[str, FieldTemplate] = { 58 | "deleted_at": DELETED_AT_TEMPLATE, 59 | "is_deleted": None, # Will be defined below 60 | } 61 | 62 | # Soft delete with timezone-aware timestamp 63 | SOFT_DELETE_TZ: dict[str, FieldTemplate] = { 64 | "deleted_at": DELETED_AT_TZ_TEMPLATE, 65 | "is_deleted": None, # Will be defined below 66 | } 67 | 68 | # Audit/tracking fields - common pattern for tracking changes 69 | AUDIT: dict[str, FieldTemplate] = { 70 | "created_by": None, # Will be defined below 71 | "updated_by": None, # Will be defined below 72 | "version": None, # Will be defined below 73 | } 74 | 75 | 76 | # Define core field templates 77 | _BOOLEAN_TEMPLATE = FieldTemplate( 78 | base_type=bool, 79 | description="Boolean flag", 80 | default=False, 81 | ) 82 | 83 | _UUID_NULLABLE_TEMPLATE = ID_TEMPLATE.as_nullable() 84 | 85 | _VERSION_TEMPLATE = FieldTemplate( 86 | base_type=int, 87 | description="Version number for optimistic locking", 88 | default=1, 89 | ) 90 | 91 | 92 | # Update the field families with actual templates 93 | FieldFamilies.SOFT_DELETE["is_deleted"] = _BOOLEAN_TEMPLATE 94 | FieldFamilies.SOFT_DELETE_TZ["is_deleted"] = _BOOLEAN_TEMPLATE 95 | 96 | FieldFamilies.AUDIT.update( 97 | { 98 | "created_by": _UUID_NULLABLE_TEMPLATE, 99 | "updated_by": _UUID_NULLABLE_TEMPLATE, 100 | "version": _VERSION_TEMPLATE, 101 | } 102 | ) 103 | 104 | 105 | def create_field_dict( 106 | *families: dict[str, FieldTemplate], **overrides: FieldTemplate 107 | ) -> dict[str, Field]: 108 | """Create a field dictionary by merging multiple field families. 109 | 110 | This function takes multiple field families and merges them into a single 111 | dictionary of Pydantic fields. Later families override fields from earlier 112 | ones if there are naming conflicts. 113 | 114 | Args: 115 | *families: Variable number of field family dictionaries to merge 116 | **overrides: Individual field templates to add or override 117 | 118 | Returns: 119 | Dict[str, Field]: A dictionary mapping field names to Pydantic Field instances 120 | 121 | Example: 122 | ```python 123 | # Combine entity and audit fields 124 | fields = create_field_dict( 125 | FieldFamilies.ENTITY, 126 | FieldFamilies.AUDIT, 127 | name=FieldTemplate(base_type=str, description="Entity name") 128 | ) 129 | 130 | # Create a model with the combined fields 131 | AuditedEntity = create_model("AuditedEntity", fields=fields) 132 | ``` 133 | """ 134 | 135 | result: dict[str, Field] = {} 136 | 137 | # Process field families in order 138 | for family in families: 139 | for field_name, template in family.items(): 140 | if template is not None: 141 | result[field_name] = template.create_field(field_name) 142 | 143 | # Process individual overrides 144 | for field_name, template in overrides.items(): 145 | if template is not None: 146 | result[field_name] = template.create_field(field_name) 147 | 148 | return result 149 | -------------------------------------------------------------------------------- /src/pydapter/fields/ids.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | from typing import Union 3 | from uuid import UUID, uuid4 4 | 5 | from pydapter.exceptions import ValidationError 6 | from pydapter.fields.types import Field 7 | 8 | __all__ = ( 9 | "ID_FROZEN", 10 | "ID_MUTABLE", 11 | "ID_NULLABLE", 12 | "validate_uuid", 13 | "serialize_uuid", 14 | ) 15 | 16 | 17 | def validate_uuid(v: UUID | str, /, nullable: bool = False) -> UUID | None: 18 | if not v and nullable: 19 | return None 20 | if isinstance(v, UUID): 21 | return v 22 | with contextlib.suppress(ValueError): 23 | return UUID(str(v)) 24 | raise ValidationError("id must be a valid UUID or UUID string") 25 | 26 | 27 | def serialize_uuid(v: UUID, /) -> str: 28 | return str(v) 29 | 30 | 31 | def uuid_validator(cls, v) -> UUID | None: 32 | return validate_uuid(v) 33 | 34 | 35 | def nullable_uuid_validator(cls, v) -> UUID | None: 36 | return validate_uuid(v, nullable=True) 37 | 38 | 39 | ID_FROZEN = Field( 40 | name="id", 41 | annotation=UUID, 42 | default_factory=uuid4, 43 | frozen=True, 44 | title="ID", 45 | validator=uuid_validator, 46 | description="Frozen Unique identifier", 47 | immutable=True, 48 | ) 49 | 50 | ID_MUTABLE = Field( 51 | name="id", 52 | annotation=UUID, 53 | default_factory=uuid4, 54 | title="ID", 55 | validator=lambda cls, v: validate_uuid(v), 56 | immutable=True, 57 | ) 58 | 59 | ID_NULLABLE = Field( 60 | name="nullable_id", 61 | annotation=Union[UUID, None], # Use Union to avoid UnionType issues 62 | default=None, 63 | validator=lambda cls, v: validate_uuid(v, nullable=True), 64 | immutable=True, 65 | ) 66 | -------------------------------------------------------------------------------- /src/pydapter/fields/params.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel 2 | from pydantic_core import PydanticUndefined 3 | 4 | from pydapter.exceptions import ValidationError 5 | from pydapter.fields.types import Field, Undefined 6 | 7 | __all__ = ( 8 | "PARAMS", 9 | "PARAM_TYPE", 10 | "PARAM_TYPE_NULLABLE", 11 | ) 12 | 13 | 14 | def validate_model_to_params(v, /) -> dict: 15 | if v in [None, {}, [], Undefined, PydanticUndefined]: 16 | return {} 17 | if isinstance(v, dict): 18 | return v 19 | if isinstance(v, BaseModel): 20 | return v.model_dump() 21 | raise ValidationError( 22 | "Invalid params input, must be a dictionary or BaseModel instance" 23 | ) 24 | 25 | 26 | PARAMS = Field( 27 | name="params", 28 | annotation=dict, 29 | default_factory=dict, 30 | validator=lambda cls, v: validate_model_to_params(v), 31 | validator_kwargs={"mode": "before"}, 32 | immutable=True, 33 | ) 34 | 35 | 36 | def validate_model_to_type(v, /, nullable: bool = False) -> type | None: 37 | if not v: 38 | if nullable: 39 | return None 40 | raise ValidationError("Model type cannot be None or empty") 41 | if v is BaseModel: 42 | return v 43 | if isinstance(v, type) and issubclass(v, BaseModel): 44 | return v 45 | if isinstance(v, BaseModel): 46 | return v.__class__ 47 | raise ValidationError( 48 | "Invalid model type, must be a pydantic class or BaseModel instance" 49 | ) 50 | 51 | 52 | PARAM_TYPE = Field( 53 | name="param_type", 54 | annotation=type, # Simplified annotation to avoid GenericAlias issues 55 | validator=lambda cls, v: validate_model_to_type(v), 56 | validator_kwargs={"mode": "before"}, 57 | immutable=True, 58 | ) 59 | 60 | PARAM_TYPE_NULLABLE = Field( 61 | name="param_type_nullable", 62 | annotation=type, # Simplified annotation to avoid UnionType issues 63 | default=None, 64 | validator=lambda cls, v: validate_model_to_type(v, nullable=True), 65 | validator_kwargs={"mode": "before"}, 66 | immutable=True, 67 | ) 68 | -------------------------------------------------------------------------------- /src/pydapter/migrations/__init__.py: -------------------------------------------------------------------------------- 1 | from importlib.util import find_spec 2 | from typing import TYPE_CHECKING 3 | 4 | if TYPE_CHECKING: 5 | # Type checking imports 6 | from .base import AsyncMigrationAdapter, BaseMigrationAdapter, SyncMigrationAdapter 7 | from .exceptions import ( 8 | MigrationCreationError, 9 | MigrationDowngradeError, 10 | MigrationError, 11 | MigrationInitError, 12 | MigrationNotFoundError, 13 | MigrationUpgradeError, 14 | ) 15 | from .protocols import AsyncMigrationProtocol, MigrationProtocol 16 | from .registry import MigrationRegistry 17 | else: 18 | try: 19 | # Runtime imports 20 | from .base import ( 21 | AsyncMigrationAdapter, 22 | BaseMigrationAdapter, 23 | SyncMigrationAdapter, 24 | ) 25 | from .exceptions import ( 26 | MigrationCreationError, 27 | MigrationDowngradeError, 28 | MigrationError, 29 | MigrationInitError, 30 | MigrationNotFoundError, 31 | MigrationUpgradeError, 32 | ) 33 | from .protocols import AsyncMigrationProtocol, MigrationProtocol 34 | from .registry import MigrationRegistry 35 | except ImportError: 36 | # Import error handling 37 | from ..utils.dependencies import check_migrations_dependencies 38 | 39 | def __getattr__(name): 40 | check_migrations_dependencies() 41 | raise ImportError(f"Cannot import {name} because dependencies are missing") 42 | 43 | 44 | __all__ = [ 45 | "BaseMigrationAdapter", 46 | "SyncMigrationAdapter", 47 | "AsyncMigrationAdapter", 48 | "MigrationProtocol", 49 | "AsyncMigrationProtocol", 50 | "MigrationError", 51 | "MigrationInitError", 52 | "MigrationCreationError", 53 | "MigrationUpgradeError", 54 | "MigrationDowngradeError", 55 | "MigrationNotFoundError", 56 | "MigrationRegistry", 57 | ] 58 | 59 | # Optional imports based on available dependencies 60 | if find_spec("sqlalchemy") is not None and find_spec("alembic") is not None: 61 | try: 62 | from .sql.alembic_adapter import ( # noqa: F401 63 | AlembicAdapter, 64 | AsyncAlembicAdapter, 65 | ) 66 | 67 | __all__.extend(["AlembicAdapter", "AsyncAlembicAdapter"]) 68 | except ImportError: 69 | pass 70 | -------------------------------------------------------------------------------- /src/pydapter/migrations/exceptions.py: -------------------------------------------------------------------------------- 1 | """ 2 | pydapter.migrations.exceptions - Custom exceptions for migration operations. 3 | """ 4 | 5 | from typing import Any, Optional 6 | 7 | from pydapter.exceptions import AdapterError 8 | 9 | 10 | class MigrationError(AdapterError): 11 | """Base exception for all migration-related errors.""" 12 | 13 | def __init__( 14 | self, 15 | message: str, 16 | original_error: Optional[Exception] = None, 17 | adapter: Optional[str] = None, 18 | **context: Any, 19 | ): 20 | super().__init__(message, **context) 21 | self.original_error = original_error 22 | self.adapter = adapter 23 | 24 | def __str__(self) -> str: 25 | """Return a string representation of the error.""" 26 | result = super().__str__() 27 | if hasattr(self, "original_error") and self.original_error is not None: 28 | result += f" (original_error='{self.original_error}')" 29 | return result 30 | 31 | 32 | class MigrationInitError(MigrationError): 33 | """Exception raised when migration initialization fails.""" 34 | 35 | def __init__( 36 | self, 37 | message: str, 38 | directory: Optional[str] = None, 39 | adapter: Optional[str] = None, 40 | **context: Any, 41 | ): 42 | super().__init__(message, directory=directory, adapter=adapter, **context) 43 | self.directory = directory 44 | self.adapter = adapter 45 | # Ensure original_error is set even if not passed through super().__init__ 46 | if "original_error" in context: 47 | self.original_error = context["original_error"] 48 | 49 | 50 | class MigrationCreationError(MigrationError): 51 | """Exception raised when migration creation fails.""" 52 | 53 | def __init__( 54 | self, 55 | message: str, 56 | message_text: Optional[str] = None, 57 | autogenerate: Optional[bool] = None, 58 | adapter: Optional[str] = None, 59 | **context: Any, 60 | ): 61 | super().__init__( 62 | message, 63 | message_text=message_text, 64 | autogenerate=autogenerate, 65 | adapter=adapter, 66 | **context, 67 | ) 68 | self.message_text = message_text 69 | self.autogenerate = autogenerate 70 | self.adapter = adapter 71 | # Ensure original_error is set even if not passed through super().__init__ 72 | if "original_error" in context: 73 | self.original_error = context["original_error"] 74 | 75 | 76 | class MigrationUpgradeError(MigrationError): 77 | """Exception raised when migration upgrade fails.""" 78 | 79 | def __init__( 80 | self, 81 | message: str, 82 | revision: Optional[str] = None, 83 | adapter: Optional[str] = None, 84 | **context: Any, 85 | ): 86 | super().__init__(message, revision=revision, adapter=adapter, **context) 87 | self.revision = revision 88 | self.adapter = adapter 89 | # Ensure original_error is set even if not passed through super().__init__ 90 | if "original_error" in context: 91 | self.original_error = context["original_error"] 92 | 93 | 94 | class MigrationDowngradeError(MigrationError): 95 | """Exception raised when migration downgrade fails.""" 96 | 97 | def __init__( 98 | self, 99 | message: str, 100 | revision: Optional[str] = None, 101 | adapter: Optional[str] = None, 102 | **context: Any, 103 | ): 104 | super().__init__(message, revision=revision, adapter=adapter, **context) 105 | self.revision = revision 106 | self.adapter = adapter 107 | # Ensure original_error is set even if not passed through super().__init__ 108 | if "original_error" in context: 109 | self.original_error = context["original_error"] 110 | 111 | 112 | class MigrationNotFoundError(MigrationError): 113 | """Exception raised when a migration is not found.""" 114 | 115 | def __init__( 116 | self, 117 | message: str, 118 | revision: Optional[str] = None, 119 | adapter: Optional[str] = None, 120 | **context: Any, 121 | ): 122 | super().__init__(message, revision=revision, adapter=adapter, **context) 123 | self.revision = revision 124 | self.adapter = adapter 125 | # Ensure original_error is set even if not passed through super().__init__ 126 | if "original_error" in context: 127 | self.original_error = context["original_error"] 128 | -------------------------------------------------------------------------------- /src/pydapter/migrations/protocols.py: -------------------------------------------------------------------------------- 1 | """ 2 | pydapter.migrations.protocols - Protocol definitions for migration adapters. 3 | """ 4 | 5 | from __future__ import annotations 6 | 7 | from typing import Any, ClassVar, Protocol, TypeVar, runtime_checkable 8 | 9 | T = TypeVar("T", covariant=True) 10 | 11 | 12 | @runtime_checkable 13 | class MigrationProtocol(Protocol[T]): 14 | """Protocol defining synchronous migration operations.""" 15 | 16 | migration_key: ClassVar[str] 17 | 18 | @classmethod 19 | def init_migrations(cls, directory: str, **kwargs: Any) -> None: 20 | """ 21 | Initialize migration environment in the specified directory. 22 | 23 | Args: 24 | directory: Path to the directory where migrations will be stored 25 | **kwargs: Additional adapter-specific arguments 26 | """ 27 | ... 28 | 29 | @classmethod 30 | def create_migration( 31 | cls, message: str, autogenerate: bool = True, **kwargs: Any 32 | ) -> str: 33 | """ 34 | Create a new migration. 35 | 36 | Args: 37 | message: Description of the migration 38 | autogenerate: Whether to auto-generate the migration based on model changes 39 | **kwargs: Additional adapter-specific arguments 40 | 41 | Returns: 42 | The revision identifier of the created migration 43 | """ 44 | ... 45 | 46 | @classmethod 47 | def upgrade(cls, revision: str = "head", **kwargs: Any) -> None: 48 | """ 49 | Upgrade to the specified revision. 50 | 51 | Args: 52 | revision: The target revision to upgrade to (default: "head") 53 | **kwargs: Additional adapter-specific arguments 54 | """ 55 | ... 56 | 57 | @classmethod 58 | def downgrade(cls, revision: str, **kwargs: Any) -> None: 59 | """ 60 | Downgrade to the specified revision. 61 | 62 | Args: 63 | revision: The target revision to downgrade to 64 | **kwargs: Additional adapter-specific arguments 65 | """ 66 | ... 67 | 68 | @classmethod 69 | def get_current_revision(cls, **kwargs: Any) -> str | None: 70 | """ 71 | Get the current migration revision. 72 | 73 | Args: 74 | **kwargs: Additional adapter-specific arguments 75 | 76 | Returns: 77 | The current revision identifier, or None if no migrations have been applied 78 | """ 79 | ... 80 | 81 | @classmethod 82 | def get_migration_history(cls, **kwargs: Any) -> list[dict]: 83 | """ 84 | Get the migration history. 85 | 86 | Args: 87 | **kwargs: Additional adapter-specific arguments 88 | 89 | Returns: 90 | A list of dictionaries containing migration information 91 | """ 92 | ... 93 | 94 | 95 | @runtime_checkable 96 | class AsyncMigrationProtocol(Protocol[T]): 97 | """Protocol defining asynchronous migration operations.""" 98 | 99 | migration_key: ClassVar[str] 100 | 101 | @classmethod 102 | async def init_migrations(cls, directory: str, **kwargs: Any) -> None: 103 | """ 104 | Initialize migration environment in the specified directory. 105 | 106 | Args: 107 | directory: Path to the directory where migrations will be stored 108 | **kwargs: Additional adapter-specific arguments 109 | """ 110 | ... 111 | 112 | @classmethod 113 | async def create_migration( 114 | cls, message: str, autogenerate: bool = True, **kwargs: Any 115 | ) -> str: 116 | """ 117 | Create a new migration. 118 | 119 | Args: 120 | message: Description of the migration 121 | autogenerate: Whether to auto-generate the migration based on model changes 122 | **kwargs: Additional adapter-specific arguments 123 | 124 | Returns: 125 | The revision identifier of the created migration 126 | """ 127 | ... 128 | 129 | @classmethod 130 | async def upgrade(cls, revision: str = "head", **kwargs: Any) -> None: 131 | """ 132 | Upgrade to the specified revision. 133 | 134 | Args: 135 | revision: The target revision to upgrade to (default: "head") 136 | **kwargs: Additional adapter-specific arguments 137 | """ 138 | ... 139 | 140 | @classmethod 141 | async def downgrade(cls, revision: str, **kwargs: Any) -> None: 142 | """ 143 | Downgrade to the specified revision. 144 | 145 | Args: 146 | revision: The target revision to downgrade to 147 | **kwargs: Additional adapter-specific arguments 148 | """ 149 | ... 150 | 151 | @classmethod 152 | async def get_current_revision(cls, **kwargs: Any) -> str | None: 153 | """ 154 | Get the current migration revision. 155 | 156 | Args: 157 | **kwargs: Additional adapter-specific arguments 158 | 159 | Returns: 160 | The current revision identifier, or None if no migrations have been applied 161 | """ 162 | ... 163 | 164 | @classmethod 165 | async def get_migration_history(cls, **kwargs: Any) -> list[dict]: 166 | """ 167 | Get the migration history. 168 | 169 | Args: 170 | **kwargs: Additional adapter-specific arguments 171 | 172 | Returns: 173 | A list of dictionaries containing migration information 174 | """ 175 | ... 176 | -------------------------------------------------------------------------------- /src/pydapter/migrations/sql/__init__.py: -------------------------------------------------------------------------------- 1 | from typing import TYPE_CHECKING 2 | 3 | if TYPE_CHECKING: 4 | # Type checking imports 5 | from .alembic_adapter import AlembicAdapter, AsyncAlembicAdapter 6 | else: 7 | try: 8 | # Runtime imports 9 | from .alembic_adapter import AlembicAdapter, AsyncAlembicAdapter 10 | 11 | __all__ = ["AlembicAdapter", "AsyncAlembicAdapter"] 12 | except ImportError: 13 | # Import error handling 14 | from ...utils.dependencies import check_migrations_sql_dependencies 15 | 16 | def __getattr__(name): 17 | check_migrations_sql_dependencies() 18 | raise ImportError(f"Cannot import {name} because dependencies are missing") 19 | 20 | __all__ = [] 21 | -------------------------------------------------------------------------------- /src/pydapter/model_adapters/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Model adapters for converting between Pydantic and SQLAlchemy models. 3 | """ 4 | 5 | from .config import PostgresAdapterConfig, VectorIndexConfig 6 | from .pg_vector_model import PGVectorModelAdapter 7 | from .postgres_model import PostgresModelAdapter 8 | from .sql_model import SQLModelAdapter 9 | 10 | # For backward compatibility 11 | from .sql_vector_model import SQLVectorModelAdapter 12 | from .type_registry import TypeRegistry 13 | 14 | __all__ = [ 15 | "SQLModelAdapter", 16 | "PGVectorModelAdapter", 17 | "PostgresModelAdapter", 18 | "SQLVectorModelAdapter", # Deprecated, use PGVectorModelAdapter instead 19 | "TypeRegistry", 20 | "VectorIndexConfig", 21 | "PostgresAdapterConfig", 22 | ] 23 | -------------------------------------------------------------------------------- /src/pydapter/model_adapters/config.py: -------------------------------------------------------------------------------- 1 | # config.py 2 | from __future__ import annotations 3 | 4 | from typing import Any, Literal 5 | 6 | from pydantic import BaseModel, Field, field_validator 7 | 8 | 9 | class VectorIndexConfig(BaseModel): 10 | """Configuration for vector indexing.""" 11 | 12 | index_type: Literal["hnsw", "ivfflat", "exact"] = "hnsw" 13 | params: dict[str, Any] = Field(default_factory=dict) 14 | 15 | # HNSW parameters 16 | m: int = 16 # HNSW parameter: max number of connections per node 17 | ef_construction: int = 64 # HNSW build-time parameter 18 | 19 | # IVFFlat parameters 20 | lists: int = 100 # Number of IVF lists (clusters) 21 | probes: int = 10 # Number of lists to search at query time 22 | 23 | @field_validator("index_type") 24 | @classmethod 25 | def validate_index_type(cls, v): 26 | """Validate that the index type is supported.""" 27 | if v not in ["hnsw", "ivfflat", "exact"]: 28 | raise ValueError(f"Unsupported index type: {v}") 29 | return v 30 | 31 | def get_params(self) -> dict[str, Any]: 32 | """ 33 | Get the parameters for the specified index type. 34 | 35 | Returns: 36 | A dictionary of parameters for the index 37 | """ 38 | params = self.params.copy() 39 | 40 | if self.index_type == "hnsw" and not params: 41 | params = { 42 | "m": self.m, 43 | "ef_construction": self.ef_construction, 44 | } 45 | elif self.index_type == "ivfflat" and not params: 46 | params = { 47 | "lists": self.lists, 48 | } 49 | 50 | return params 51 | 52 | 53 | class PostgresAdapterConfig(BaseModel): 54 | """Configuration for PostgreSQL adapters.""" 55 | 56 | db_schema: str = Field(default="public", description="Database schema name") 57 | batch_size: int = Field(default=1000, gt=0) 58 | vector_index_config: VectorIndexConfig = Field(default_factory=VectorIndexConfig) 59 | validate_vector_dimensions: bool = True 60 | 61 | @field_validator("batch_size") 62 | @classmethod 63 | def validate_batch_size(cls, v): 64 | """Validate that the batch size is positive.""" 65 | if v <= 0: 66 | raise ValueError("Batch size must be positive") 67 | return v 68 | -------------------------------------------------------------------------------- /src/pydapter/model_adapters/sql_vector_model.py: -------------------------------------------------------------------------------- 1 | # sql_vector_model.py 2 | """ 3 | This module is deprecated. Use pg_vector_model.py instead. 4 | """ 5 | 6 | import warnings 7 | 8 | from .pg_vector_model import PGVectorModelAdapter 9 | 10 | warnings.warn( 11 | "SQLVectorModelAdapter is deprecated and will be removed in a future version. " 12 | "Use PGVectorModelAdapter instead.", 13 | DeprecationWarning, 14 | stacklevel=2, 15 | ) 16 | 17 | # Re-export PGVectorModelAdapter as SQLVectorModelAdapter for backward compatibility 18 | SQLVectorModelAdapter = PGVectorModelAdapter 19 | -------------------------------------------------------------------------------- /src/pydapter/model_adapters/type_registry.py: -------------------------------------------------------------------------------- 1 | """ 2 | Type registry for mapping between Python and SQL types. 3 | """ 4 | 5 | from __future__ import annotations 6 | 7 | from typing import Any, Callable, TypeVar 8 | 9 | T = TypeVar("T") 10 | 11 | 12 | class TypeRegistry: 13 | """Registry for type mappings between Python and SQL types.""" 14 | 15 | _PY_TO_SQL: dict[type, Callable[[], Any]] = {} 16 | _SQL_TO_PY: dict[type, type] = {} 17 | _PY_TO_SQL_CONVERTERS: dict[type, Callable[[Any], Any]] = {} 18 | _SQL_TO_PY_CONVERTERS: dict[type, Callable[[Any], Any]] = {} 19 | 20 | @classmethod 21 | def register( 22 | cls, 23 | python_type: type, 24 | sql_type_factory: Callable[[], Any], 25 | python_to_sql: Callable[[Any], Any] | None = None, 26 | sql_to_python: Callable[[Any], Any] | None = None, 27 | ) -> None: 28 | """ 29 | Register a type mapping between Python and SQL types. 30 | 31 | Args: 32 | python_type: The Python type to map from/to. 33 | sql_type_factory: A factory function that creates the corresponding SQL type. 34 | python_to_sql: Optional function to convert from Python to SQL value. 35 | sql_to_python: Optional function to convert from SQL to Python value. 36 | """ 37 | cls._PY_TO_SQL[python_type] = sql_type_factory 38 | sql_type = sql_type_factory() 39 | cls._SQL_TO_PY[type(sql_type)] = python_type 40 | 41 | if python_to_sql: 42 | cls._PY_TO_SQL_CONVERTERS[python_type] = python_to_sql 43 | if sql_to_python: 44 | cls._SQL_TO_PY_CONVERTERS[type(sql_type)] = sql_to_python 45 | 46 | @classmethod 47 | def get_sql_type(cls, python_type: type) -> Callable[[], Any] | None: 48 | """ 49 | Get the SQL type factory for a Python type. 50 | 51 | Args: 52 | python_type: The Python type to get the SQL type for. 53 | 54 | Returns: 55 | A factory function that creates the corresponding SQL type, or None if not found. 56 | """ 57 | if python_type in cls._PY_TO_SQL: 58 | return cls._PY_TO_SQL[python_type] 59 | 60 | # Try to find a compatible type 61 | for registered_type, sql_type in cls._PY_TO_SQL.items(): 62 | try: 63 | if isinstance(python_type, type) and issubclass( 64 | python_type, registered_type 65 | ): 66 | return sql_type 67 | except TypeError: 68 | # Skip parameterized generics that can't be used with issubclass 69 | continue 70 | 71 | return None 72 | 73 | @classmethod 74 | def get_python_type(cls, sql_type: Any) -> type | None: 75 | """ 76 | Get the Python type for an SQL type. 77 | 78 | Args: 79 | sql_type: The SQL type to get the Python type for. 80 | 81 | Returns: 82 | The corresponding Python type, or None if not found. 83 | """ 84 | sql_type_class = type(sql_type) 85 | if sql_type_class in cls._SQL_TO_PY: 86 | return cls._SQL_TO_PY[sql_type_class] 87 | 88 | # Try to find a compatible type 89 | for registered_type, py_type in cls._SQL_TO_PY.items(): 90 | if isinstance(sql_type, registered_type): 91 | return py_type 92 | 93 | return None 94 | 95 | @classmethod 96 | def convert_to_sql(cls, value: Any, python_type: type) -> Any: 97 | """ 98 | Convert a Python value to an SQL value. 99 | 100 | Args: 101 | value: The Python value to convert. 102 | python_type: The Python type of the value. 103 | 104 | Returns: 105 | The converted SQL value. 106 | """ 107 | if value is None: 108 | return None 109 | 110 | converter = cls._PY_TO_SQL_CONVERTERS.get(python_type) 111 | if converter: 112 | return converter(value) 113 | 114 | # Try to find a compatible converter 115 | for registered_type, conv in cls._PY_TO_SQL_CONVERTERS.items(): 116 | if isinstance(python_type, type) and issubclass( 117 | python_type, registered_type 118 | ): 119 | return conv(value) 120 | 121 | # No converter found, return as is 122 | return value 123 | 124 | @classmethod 125 | def convert_to_python(cls, value: Any, sql_type: Any) -> Any: 126 | """ 127 | Convert an SQL value to a Python value. 128 | 129 | Args: 130 | value: The SQL value to convert. 131 | sql_type: The SQL type of the value. 132 | 133 | Returns: 134 | The converted Python value. 135 | """ 136 | if value is None: 137 | return None 138 | 139 | sql_type_class = type(sql_type) 140 | converter = cls._SQL_TO_PY_CONVERTERS.get(sql_type_class) 141 | if converter: 142 | return converter(value) 143 | 144 | # Try to find a compatible converter 145 | for registered_type, conv in cls._SQL_TO_PY_CONVERTERS.items(): 146 | if isinstance(sql_type, registered_type): 147 | return conv(value) 148 | 149 | # No converter found, return as is 150 | return value 151 | -------------------------------------------------------------------------------- /src/pydapter/protocols/__init__.py: -------------------------------------------------------------------------------- 1 | from pydapter.protocols.constants import ( 2 | CRYPTOGRAPHICAL, 3 | EMBEDDABLE, 4 | IDENTIFIABLE, 5 | INVOKABLE, 6 | PROTOCOL_MIXINS, 7 | TEMPORAL, 8 | ProtocolType, 9 | ) 10 | from pydapter.protocols.cryptographical import ( 11 | Cryptographical, 12 | CryptographicalMixin, 13 | sha256_of_obj, 14 | ) 15 | from pydapter.protocols.embeddable import Embeddable, EmbeddableMixin 16 | from pydapter.protocols.event import Event, as_event 17 | from pydapter.protocols.factory import combine_with_mixins, create_protocol_model_class 18 | from pydapter.protocols.identifiable import Identifiable, IdentifiableMixin 19 | from pydapter.protocols.invokable import Invokable, InvokableMixin 20 | from pydapter.protocols.registry import get_mixin_registry, register_mixin 21 | from pydapter.protocols.temporal import Temporal, TemporalMixin 22 | 23 | __all__ = ( 24 | # Protocol classes 25 | "Identifiable", 26 | "IdentifiableMixin", 27 | "Invokable", 28 | "InvokableMixin", 29 | "Embeddable", 30 | "EmbeddableMixin", 31 | "Event", 32 | "as_event", 33 | "Temporal", 34 | "TemporalMixin", 35 | "Cryptographical", 36 | "CryptographicalMixin", 37 | "sha256_of_obj", 38 | # Protocol constants 39 | "ProtocolType", 40 | "IDENTIFIABLE", 41 | "TEMPORAL", 42 | "EMBEDDABLE", 43 | "INVOKABLE", 44 | "CRYPTOGRAPHICAL", 45 | "PROTOCOL_MIXINS", 46 | "ProtocolType", 47 | # Factory functions 48 | "create_protocol_model_class", 49 | "combine_with_mixins", 50 | # Registry functions 51 | "get_mixin_registry", 52 | "register_mixin", 53 | ) 54 | -------------------------------------------------------------------------------- /src/pydapter/protocols/auditable.py: -------------------------------------------------------------------------------- 1 | # Create protocol mixin classes 2 | class AuditableMixin: 3 | """Mixin for audit functionality""" 4 | 5 | def mark_updated_by(self, user_id: str): 6 | """Mark entity as updated by user""" 7 | self.updated_by = user_id 8 | self.version += 1 9 | if hasattr(self, "update_timestamp"): 10 | self.update_timestamp() 11 | -------------------------------------------------------------------------------- /src/pydapter/protocols/base_model.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel, ConfigDict 2 | 3 | 4 | # Export configured BaseModel for tests and direct use 5 | class BasePydapterModel(BaseModel): 6 | """Base model with standard configuration""" 7 | 8 | model_config = ConfigDict( 9 | from_attributes=True, 10 | use_enum_values=True, 11 | extra="forbid", 12 | ) 13 | -------------------------------------------------------------------------------- /src/pydapter/protocols/constants.py: -------------------------------------------------------------------------------- 1 | """Protocol constants for type-safe protocol selection.""" 2 | 3 | from typing import Literal 4 | 5 | # Protocol type literals for type checking 6 | ProtocolType = Literal[ 7 | "identifiable", 8 | "temporal", 9 | "embeddable", 10 | "invokable", 11 | "cryptographical", 12 | "soft_deletable", 13 | "auditable", 14 | ] 15 | 16 | # Protocol constants 17 | IDENTIFIABLE: ProtocolType = "identifiable" 18 | TEMPORAL: ProtocolType = "temporal" 19 | EMBEDDABLE: ProtocolType = "embeddable" 20 | INVOKABLE: ProtocolType = "invokable" 21 | CRYPTOGRAPHICAL: ProtocolType = "cryptographical" 22 | AUDITABLE: ProtocolType = "auditable" 23 | SOFT_DELETABLE: ProtocolType = "soft_deletable" 24 | 25 | # Map protocol names to their corresponding mixin classes 26 | PROTOCOL_MIXINS = { 27 | "identifiable": "IdentifiableMixin", 28 | "temporal": "TemporalMixin", 29 | "embeddable": "EmbeddableMixin", 30 | "invokable": "InvokableMixin", 31 | "cryptographical": "CryptographicalMixin", 32 | "auditable": "AuditableMixin", 33 | "soft_deletable": "SoftDeletableMixin", 34 | } 35 | 36 | # Export all constants 37 | __all__ = [ 38 | "ProtocolType", 39 | "IDENTIFIABLE", 40 | "TEMPORAL", 41 | "EMBEDDABLE", 42 | "INVOKABLE", 43 | "CRYPTOGRAPHICAL", 44 | "AUDITABLE", 45 | "SOFT_DELETABLE", 46 | "PROTOCOL_MIXINS", 47 | ] 48 | -------------------------------------------------------------------------------- /src/pydapter/protocols/cryptographical.py: -------------------------------------------------------------------------------- 1 | from typing import TYPE_CHECKING, Protocol, Union, runtime_checkable 2 | 3 | from pydantic import JsonValue 4 | 5 | 6 | @runtime_checkable 7 | class Cryptographical(Protocol): 8 | """An object that can be hashed with a cryptographic hash function""" 9 | 10 | content: JsonValue 11 | sha256: str | None = None 12 | 13 | 14 | class CryptographicalMixin: 15 | if TYPE_CHECKING: 16 | content: JsonValue 17 | sha256: str | None 18 | 19 | def hash_content(self) -> None: 20 | if self.content is None: 21 | raise ValueError("Content is not set.") 22 | self.sha256 = sha256_of_obj(self.content) 23 | 24 | 25 | def sha256_of_obj(obj: Union[dict, str, JsonValue]) -> str: 26 | """Deterministic SHA-256 of an arbitrary mapping.""" 27 | import hashlib 28 | 29 | if isinstance(obj, str): 30 | return hashlib.sha256(memoryview(obj.encode())).hexdigest() 31 | 32 | from .utils import sha256_of_dict 33 | 34 | return sha256_of_dict(obj) 35 | -------------------------------------------------------------------------------- /src/pydapter/protocols/embeddable.py: -------------------------------------------------------------------------------- 1 | from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable 2 | 3 | from pydantic import BaseModel 4 | 5 | from pydapter.fields.types import Embedding 6 | 7 | if TYPE_CHECKING: 8 | pass 9 | 10 | 11 | @runtime_checkable 12 | class Embeddable(Protocol): 13 | content: str | None 14 | embedding: Embedding 15 | 16 | 17 | class EmbeddableMixin: 18 | """Mixin class for embedding functionality.""" 19 | 20 | if TYPE_CHECKING: 21 | content: str | None 22 | embedding: Embedding 23 | 24 | @property 25 | def n_dim(self) -> int: 26 | """Get the number of dimensions of the embedding.""" 27 | return len(self.embedding) 28 | 29 | @staticmethod 30 | def parse_embedding_response( 31 | x: dict | list | tuple | BaseModel, 32 | ) -> Embedding: 33 | """Parse the embedding response from OpenAI or other sources.""" 34 | return parse_embedding_response(x) 35 | 36 | 37 | def parse_embedding_response(x) -> list[float] | Any: 38 | # parse openai response 39 | if ( 40 | isinstance(x, BaseModel) 41 | and hasattr(x, "data") 42 | and len(x.data) > 0 43 | and hasattr(x.data[0], "embedding") 44 | ): 45 | return x.data[0].embedding 46 | 47 | if isinstance(x, (list, tuple)): 48 | if len(x) > 0 and all(isinstance(i, float) for i in x): 49 | return x # type: ignore[return-value] 50 | if len(x) == 1 and isinstance(x[0], (dict, BaseModel)): 51 | return parse_embedding_response(x[0]) 52 | 53 | # parse dict response 54 | if isinstance(x, dict): 55 | # parse openai format response 56 | 57 | if "data" in x: 58 | data = x.get("data") 59 | if data is not None and len(data) > 0 and isinstance(data[0], dict): 60 | return parse_embedding_response(data[0]) 61 | 62 | # parse {"embedding": []} response 63 | if "embedding" in x: 64 | return parse_embedding_response(x["embedding"]) 65 | 66 | return x # type: ignore[return-value] 67 | -------------------------------------------------------------------------------- /src/pydapter/protocols/factory.py: -------------------------------------------------------------------------------- 1 | """Factory functions for creating protocol-compliant models.""" 2 | 3 | from typing import Any, Union 4 | 5 | from pydantic import BaseModel 6 | 7 | from pydapter.protocols.constants import ProtocolType 8 | from pydapter.protocols.registry import get_mixin_registry 9 | 10 | 11 | def create_protocol_model_class( 12 | name: str, 13 | *protocols: Union[ProtocolType, str], 14 | base_model: type[BaseModel] = BaseModel, 15 | **namespace: Any, 16 | ) -> type[BaseModel]: 17 | """Create a model class with both structural fields and behavioral methods. 18 | 19 | This is a convenience function that combines create_protocol_model (for fields) 20 | with the appropriate protocol mixins (for behavior) to create a fully functional 21 | protocol-compliant model class. 22 | 23 | Args: 24 | name: Name for the generated model class 25 | *protocols: Protocol names to implement (e.g., IDENTIFIABLE, TEMPORAL) 26 | base_model: Base model class to inherit from (default: BaseModel) 27 | **namespace: Additional class attributes/methods to include 28 | 29 | Returns: 30 | A new model class with both protocol fields and behaviors 31 | 32 | Example: 33 | ```python 34 | from pydapter.protocols import create_protocol_model_class, IDENTIFIABLE, TEMPORAL 35 | from pydapter.fields import FieldTemplate 36 | 37 | # Create a model with both fields and behaviors 38 | User = create_protocol_model_class( 39 | "User", 40 | IDENTIFIABLE, 41 | TEMPORAL, 42 | username=FieldTemplate(base_type=str), 43 | email=FieldTemplate(base_type=str) 44 | ) 45 | 46 | # Now you can use it 47 | user = User(username="john", email="john@example.com") 48 | user.update_timestamp() # Method from TemporalMixin 49 | ``` 50 | """ 51 | from pydapter.fields import create_protocol_model 52 | 53 | # Extract field templates from namespace 54 | field_templates = {} 55 | class_attrs = {} 56 | 57 | for key, value in namespace.items(): 58 | # Check if it's a FieldTemplate (avoid circular import) 59 | if hasattr(value, "create_field") and hasattr(value, "base_type"): 60 | field_templates[key] = value 61 | else: 62 | class_attrs[key] = value 63 | 64 | # Create the structural model with fields 65 | structural_model = create_protocol_model( 66 | f"_{name}Structure", *protocols, **field_templates 67 | ) 68 | 69 | # Collect the mixin classes 70 | mixins = [] 71 | for protocol in protocols: 72 | protocol_str = str(protocol).lower() 73 | if protocol_str in get_mixin_registry(): 74 | mixins.append(get_mixin_registry()[protocol_str]) 75 | 76 | # Create the final class with mixins 77 | # Order: structural_model -> mixins -> base_model 78 | bases = (structural_model, *mixins, base_model) 79 | 80 | return type(name, bases, class_attrs) 81 | 82 | 83 | def combine_with_mixins( 84 | model_class: type[BaseModel], 85 | *protocols: Union[ProtocolType, str], 86 | name: str = None, 87 | ) -> type[BaseModel]: 88 | """Add protocol mixins to an existing model class. 89 | 90 | This is useful when you already have a model with the required fields 91 | (e.g., from create_protocol_model) and want to add behavioral methods. 92 | 93 | Args: 94 | model_class: The model class to enhance with mixins 95 | *protocols: Protocol names whose mixins to add 96 | name: Optional name for the new class (defaults to original name) 97 | 98 | Returns: 99 | A new model class with the added behavioral mixins 100 | 101 | Example: 102 | ```python 103 | from pydapter.fields import create_protocol_model 104 | from pydapter.protocols import combine_with_mixins, IDENTIFIABLE, TEMPORAL 105 | 106 | # First create structure 107 | UserStructure = create_protocol_model( 108 | "UserStructure", 109 | IDENTIFIABLE, 110 | TEMPORAL, 111 | username=FieldTemplate(base_type=str) 112 | ) 113 | 114 | # Then add behaviors 115 | User = combine_with_mixins(UserStructure, IDENTIFIABLE, TEMPORAL) 116 | ``` 117 | """ 118 | # Collect the mixin classes 119 | mixins = [] 120 | for protocol in protocols: 121 | protocol_str = str(protocol).lower() 122 | if protocol_str in get_mixin_registry(): 123 | mixins.append(get_mixin_registry()[protocol_str]) 124 | 125 | # Determine the new class name 126 | class_name = name or model_class.__name__ 127 | 128 | # Create new class with mixins 129 | return type(class_name, (model_class, *mixins), {}) 130 | 131 | 132 | __all__ = [ 133 | "create_protocol_model_class", 134 | "combine_with_mixins", 135 | ] 136 | -------------------------------------------------------------------------------- /src/pydapter/protocols/identifiable.py: -------------------------------------------------------------------------------- 1 | from typing import TYPE_CHECKING, Protocol, runtime_checkable 2 | from uuid import UUID 3 | 4 | from pydantic import field_serializer 5 | 6 | if TYPE_CHECKING: 7 | pass 8 | 9 | __all__ = ("Identifiable",) 10 | 11 | 12 | @runtime_checkable 13 | class Identifiable(Protocol): 14 | id: UUID 15 | 16 | 17 | class IdentifiableMixin: 18 | """Base class for objects with a unique identifier""" 19 | 20 | if TYPE_CHECKING: 21 | id: UUID 22 | 23 | @field_serializer("id") 24 | def _serialize_ids(self, v: UUID) -> str: 25 | return str(v) 26 | 27 | def __hash__(self) -> int: 28 | """Returns the hash of the object.""" 29 | return hash(self.id) 30 | -------------------------------------------------------------------------------- /src/pydapter/protocols/invokable.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from asyncio.log import logger 3 | from collections.abc import Callable 4 | from datetime import datetime, timezone 5 | from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable 6 | 7 | from pydantic import PrivateAttr 8 | 9 | from pydapter.fields.execution import Execution, ExecutionStatus 10 | 11 | from .utils import validate_model_to_dict 12 | 13 | 14 | @runtime_checkable 15 | class Invokable(Protocol): 16 | """An object that can be invoked with a request""" 17 | 18 | request: dict | None 19 | execution: Execution 20 | _handler: Callable | None 21 | _handler_args: tuple[Any, ...] 22 | _handler_kwargs: dict[str, Any] 23 | 24 | 25 | class InvokableMixin: 26 | """An executable can be invoked with a request""" 27 | 28 | _handler: Callable | None = PrivateAttr(None) 29 | _handler_args: tuple[Any, ...] = PrivateAttr(()) 30 | _handler_kwargs: dict[str, Any] = PrivateAttr({}) 31 | 32 | if TYPE_CHECKING: 33 | request: dict | None 34 | execution: Execution 35 | 36 | @property 37 | def has_invoked(self) -> bool: 38 | return self.execution.status in [ 39 | ExecutionStatus.COMPLETED, 40 | ExecutionStatus.FAILED, 41 | ] 42 | 43 | async def _invoke(self): 44 | if self._handler is None: 45 | raise ValueError("Event invoke function is not set.") 46 | if asyncio.iscoroutinefunction(self._handler): 47 | return await self._handler(*self._handler_args, **self._handler_kwargs) 48 | return self._handler(*self._handler_args, **self._handler_kwargs) 49 | 50 | async def invoke(self) -> None: 51 | start = asyncio.get_event_loop().time() 52 | response = None 53 | e1 = None 54 | 55 | try: 56 | # Use the endpoint as a context manager 57 | response = await self._invoke() 58 | 59 | except asyncio.CancelledError as ce: 60 | e1 = ce 61 | logger.warning("invoke() canceled by external request.") 62 | raise 63 | except Exception as ex: 64 | e1 = ex # type: ignore 65 | 66 | finally: 67 | self.execution.duration = asyncio.get_event_loop().time() - start 68 | if response is None and e1 is not None: 69 | self.execution.error = str(e1) 70 | self.execution.status = ExecutionStatus.FAILED 71 | logger.error(f"invoke() failed for event {str(self.id)[:6]}...") 72 | else: 73 | self.execution.response_obj = response 74 | self.execution.response = validate_model_to_dict(response) 75 | self.execution.status = ExecutionStatus.COMPLETED 76 | self.execution.updated_at = datetime.now(tz=timezone.utc) 77 | -------------------------------------------------------------------------------- /src/pydapter/protocols/registry.py: -------------------------------------------------------------------------------- 1 | from pydapter.protocols.auditable import AuditableMixin 2 | from pydapter.protocols.cryptographical import CryptographicalMixin 3 | from pydapter.protocols.embeddable import EmbeddableMixin 4 | from pydapter.protocols.identifiable import IdentifiableMixin 5 | from pydapter.protocols.invokable import InvokableMixin 6 | from pydapter.protocols.soft_deletable import SoftDeletableMixin 7 | from pydapter.protocols.temporal import TemporalMixin 8 | 9 | # Mapping of protocol names to actual mixin classes 10 | _MIXIN_CLASSES = { 11 | "identifiable": IdentifiableMixin, 12 | "temporal": TemporalMixin, 13 | "embeddable": EmbeddableMixin, 14 | "invokable": InvokableMixin, 15 | "cryptographical": CryptographicalMixin, 16 | "auditable": AuditableMixin, 17 | "soft_deletable": SoftDeletableMixin, 18 | } 19 | 20 | 21 | def register_mixin(protocol_name: str, mixin_class: type) -> None: 22 | """ 23 | Register a new mixin class for a protocol. 24 | 25 | Args: 26 | protocol_name: The name of the protocol (e.g., "identifiable"). 27 | mixin_class: The mixin class to register. 28 | """ 29 | _MIXIN_CLASSES[protocol_name.lower()] = mixin_class 30 | 31 | 32 | def get_mixin_registry() -> dict[str, type]: 33 | """ 34 | Get the registry of mixin classes for protocols. 35 | 36 | Returns: 37 | A dictionary mapping protocol names to their corresponding mixin classes. 38 | """ 39 | return _MIXIN_CLASSES 40 | 41 | 42 | __all__ = ( 43 | "register_mixin", 44 | "get_mixin_registry", 45 | ) 46 | -------------------------------------------------------------------------------- /src/pydapter/protocols/soft_deletable.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, timezone 2 | 3 | 4 | class SoftDeletableMixin: 5 | """Mixin for soft delete functionality""" 6 | 7 | def soft_delete(self): 8 | """Mark entity as deleted""" 9 | self.deleted_at = datetime.now(timezone.utc) 10 | self.is_deleted = True 11 | 12 | def restore(self): 13 | """Restore soft-deleted entity""" 14 | self.deleted_at = None 15 | self.is_deleted = False 16 | -------------------------------------------------------------------------------- /src/pydapter/protocols/temporal.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, timezone 2 | from typing import TYPE_CHECKING, Protocol, runtime_checkable 3 | 4 | from pydantic import field_serializer 5 | 6 | __all__ = ( 7 | "Temporal", 8 | "TemporalMixin", 9 | ) 10 | 11 | 12 | @runtime_checkable 13 | class Temporal(Protocol): 14 | created_at: datetime 15 | updated_at: datetime 16 | 17 | 18 | class TemporalMixin: 19 | if TYPE_CHECKING: 20 | created_at: datetime 21 | updated_at: datetime 22 | 23 | def update_timestamp(self) -> None: 24 | """Update the last updated timestamp to the current time.""" 25 | self.updated_at = datetime.now(timezone.utc) 26 | 27 | @field_serializer("updated_at", "created_at") 28 | def _serialize_datetime(self, v: datetime) -> str: 29 | return v.isoformat() 30 | -------------------------------------------------------------------------------- /src/pydapter/protocols/types.py: -------------------------------------------------------------------------------- 1 | """ 2 | Basic types for protocols - maintained for backwards compatibility. 3 | """ 4 | 5 | from typing import Any 6 | 7 | from pydantic import BaseModel, ConfigDict 8 | 9 | 10 | class Log(BaseModel): 11 | """Base Log model""" 12 | 13 | model_config = ConfigDict( 14 | extra="forbid", 15 | frozen=True, 16 | validate_assignment=True, 17 | arbitrary_types_allowed=True, 18 | json_schema_extra={ 19 | "example": { 20 | "id": "some-uuid-string", 21 | "event_type": "example_event", 22 | "content": "This is an example log entry.", 23 | "embedding": [0.1, 0.2, 0.3], 24 | "metadata": {"key": "value"}, 25 | "created_at": "2023-10-01T12:00:00Z", 26 | "updated_at": "2023-10-01T12:00:00Z", 27 | "duration": 1.23, 28 | "status": "success", 29 | "error": None, 30 | "sha256": "abc123def456...", 31 | }, 32 | }, 33 | ) 34 | 35 | id: str 36 | event_type: str 37 | content: str | None = None 38 | embedding: list[float] | None = None 39 | metadata: dict[str, Any] | None = None 40 | created_at: str | None = None # ISO format string 41 | updated_at: str | None = None # ISO format string 42 | duration: float | None = None 43 | status: str | None = None 44 | error: str | None = None 45 | sha256: str | None = None 46 | 47 | 48 | __all__ = ("Log",) 49 | -------------------------------------------------------------------------------- /src/pydapter/protocols/utils.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import contextlib 3 | import functools 4 | from collections.abc import Callable 5 | from concurrent.futures import ThreadPoolExecutor 6 | from datetime import datetime 7 | from functools import cache 8 | from typing import Any, Protocol, TypeVar 9 | from uuid import UUID 10 | 11 | from pydantic import BaseModel 12 | 13 | Imp = TypeVar("Imp") 14 | 15 | 16 | class HasLen(Protocol): 17 | def __len__(self) -> int: ... 18 | 19 | 20 | Bin = list[int] 21 | T = TypeVar("T") 22 | 23 | __all__ = ( 24 | "get_bins", 25 | "import_module", 26 | "sha256_of_dict", 27 | "convert_to_datetime", 28 | "validate_uuid", 29 | "validate_model_to_dict", 30 | "is_package_installed", 31 | "is_coroutine_function", 32 | "as_async_fn", 33 | ) 34 | 35 | 36 | def import_module( 37 | package_name: str, 38 | module_name: str | None = None, 39 | import_name: str | list | None = None, 40 | ) -> Imp | list[Imp] | Any: 41 | """Import a module by its path.""" 42 | try: 43 | full_import_path = ( 44 | f"{package_name}.{module_name}" if module_name else package_name 45 | ) 46 | 47 | if import_name: 48 | import_name = ( 49 | [import_name] if not isinstance(import_name, list) else import_name 50 | ) 51 | a = __import__( 52 | full_import_path, 53 | fromlist=import_name, 54 | ) 55 | if len(import_name) == 1: 56 | return getattr(a, import_name[0]) 57 | return [getattr(a, name) for name in import_name] 58 | return __import__(full_import_path) 59 | 60 | except ImportError as e: 61 | error_msg = f"Failed to import module {full_import_path}: {e}" 62 | raise ImportError(error_msg) from e 63 | 64 | 65 | def is_package_installed(package_name: str): 66 | from importlib.util import find_spec 67 | 68 | return find_spec(package_name) is not None 69 | 70 | 71 | def get_bins(input_: list[HasLen], /, upper: int) -> list[Bin]: 72 | """Organizes indices of items into bins based on a cumulative upper limit length. 73 | 74 | Args: 75 | input_ (list[str]): The list of strings to be binned. 76 | upper (int): The cumulative length upper limit for each bin. 77 | 78 | Returns: 79 | list[list[int]]: A list of bins, each bin is a list of indices from the input list. 80 | """ 81 | current = 0 82 | bins = [] 83 | current_bin = [] 84 | for idx, item in enumerate(input_): 85 | if current + len(item) < upper: 86 | current_bin.append(idx) 87 | current += len(item) 88 | else: 89 | bins.append(current_bin) 90 | current_bin = [idx] 91 | current = len(item) 92 | if current_bin: 93 | bins.append(current_bin) 94 | return bins 95 | 96 | 97 | def sha256_of_dict(obj: dict) -> str: 98 | """Deterministic SHA-256 of an arbitrary mapping.""" 99 | import hashlib 100 | 101 | import orjson 102 | 103 | payload: bytes = orjson.dumps( 104 | obj, 105 | option=( 106 | orjson.OPT_SORT_KEYS # canonical ordering 107 | | orjson.OPT_NON_STR_KEYS # allow int / enum keys if you need them 108 | ), 109 | ) 110 | return hashlib.sha256(memoryview(payload)).hexdigest() 111 | 112 | 113 | def convert_to_datetime(v): 114 | if isinstance(v, datetime): 115 | return v 116 | if isinstance(v, str): 117 | with contextlib.suppress(ValueError): 118 | return datetime.fromisoformat(v) 119 | 120 | error_msg = "Input value for field should be a `datetime.datetime` object or `isoformat` string" 121 | raise ValueError(error_msg) 122 | 123 | 124 | def validate_uuid(v: str | UUID) -> UUID: 125 | if isinstance(v, UUID): 126 | return v 127 | try: 128 | return UUID(str(v)) 129 | except Exception as e: 130 | error_msg = "Input value for field should be a `uuid.UUID` object or a valid `uuid` representation" 131 | raise ValueError(error_msg) from e 132 | 133 | 134 | def validate_model_to_dict(v): 135 | """Serialize a Pydantic model to a dictionary. kwargs are passed to model_dump.""" 136 | 137 | if isinstance(v, BaseModel): 138 | return v.model_dump() 139 | if v is None: 140 | return {} 141 | if isinstance(v, dict): 142 | return v 143 | 144 | error_msg = "Input value for field should be a `pydantic.BaseModel` object or a `dict`" 145 | raise ValueError(error_msg) 146 | 147 | 148 | @cache 149 | def is_coroutine_function(fn, /) -> bool: 150 | """Check if a function is a coroutine function.""" 151 | return asyncio.iscoroutinefunction(fn) 152 | 153 | 154 | def force_async(fn: Callable[..., T], /) -> Callable[..., Callable[..., T]]: 155 | """force a function to be async.""" 156 | pool = ThreadPoolExecutor() 157 | 158 | @functools.wraps(fn) 159 | def wrapper(*args, **kwargs): 160 | future = pool.submit(fn, *args, **kwargs) 161 | return asyncio.wrap_future(future) # Make it awaitable 162 | 163 | return wrapper 164 | 165 | 166 | @cache 167 | def as_async_fn(fn, /): 168 | """forcefully get the async call of a function""" 169 | if is_coroutine_function(fn): 170 | return fn 171 | return force_async(fn) 172 | -------------------------------------------------------------------------------- /src/pydapter/py.typed: -------------------------------------------------------------------------------- 1 | pydapter, by HaiyangLi 2 | -------------------------------------------------------------------------------- /src/pydapter/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # pydapter.utils package 2 | -------------------------------------------------------------------------------- /src/pydapter/utils/dependencies.py: -------------------------------------------------------------------------------- 1 | from importlib.util import find_spec 2 | 3 | 4 | def check_dependency(package_name: str, feature_name: str) -> None: 5 | """ 6 | Check if an optional dependency is installed. 7 | 8 | Args: 9 | package_name: The name of the package to check for 10 | feature_name: The name of the feature requiring this package 11 | 12 | Raises: 13 | ImportError: If the package is not installed 14 | """ 15 | if find_spec(package_name) is None: 16 | raise ImportError( 17 | f"The '{feature_name}' feature requires the '{package_name}' package. " 18 | f"Install it with: pip install pydapter[{feature_name}]" 19 | ) 20 | 21 | 22 | def check_migrations_dependencies() -> None: 23 | """Check if core migrations dependencies are installed.""" 24 | pass # Core migrations only depend on pydantic, which is already a dependency 25 | 26 | 27 | def check_migrations_sql_dependencies() -> None: 28 | """Check if SQL migrations dependencies are installed.""" 29 | check_dependency("sqlalchemy", "migrations-sql") 30 | check_dependency("alembic", "migrations-sql") 31 | -------------------------------------------------------------------------------- /tests/test_adapters/test_async_adapters.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from pydapter.extras.async_mongo_ import AsyncMongoAdapter 4 | from pydapter.extras.async_postgres_ import AsyncPostgresAdapter 5 | from pydapter.extras.async_qdrant_ import AsyncQdrantAdapter 6 | 7 | # Define the async adapters to test 8 | ASYNC_KEYS = { 9 | "async_pg": AsyncPostgresAdapter, 10 | "async_mongo": AsyncMongoAdapter, 11 | "async_qdrant": AsyncQdrantAdapter, 12 | } 13 | 14 | 15 | @pytest.mark.asyncio 16 | @pytest.mark.parametrize("adapter_key", list(ASYNC_KEYS)) 17 | def skip_if_pg(adapter_key): 18 | """Skip PostgreSQL tests due to SQLAlchemy async inspection issues.""" 19 | if adapter_key == "async_pg": 20 | # We've installed greenlet, but there are still issues with SQLAlchemy's async support 21 | # The error is: "Inspection on an AsyncConnection is currently not supported" 22 | # This would require a more complex fix to the async_sql_ adapter 23 | pytest.skip("PostgreSQL async tests require additional SQLAlchemy fixes") 24 | 25 | 26 | @pytest.mark.asyncio 27 | @pytest.mark.parametrize("adapter_key", list(ASYNC_KEYS)) 28 | async def test_async_roundtrip( 29 | async_sample, adapter_key, pg_url, mongo_url, qdrant_url 30 | ): 31 | skip_if_pg(adapter_key) 32 | """Test roundtrip serialization/deserialization for async adapters.""" 33 | adapter_cls = ASYNC_KEYS[adapter_key] 34 | async_sample.__class__.register_async_adapter(adapter_cls) 35 | 36 | # Configure kwargs based on adapter type 37 | kwargs_out = {} 38 | if adapter_key == "async_pg": 39 | # Convert the URL to use asyncpg instead of psycopg2 40 | async_pg_url = pg_url.replace("postgresql+psycopg2://", "postgresql+asyncpg://") 41 | kwargs_out = {"dsn": async_pg_url, "table": "trades"} 42 | elif adapter_key == "async_mongo": 43 | kwargs_out = {"url": mongo_url, "db": "testdb", "collection": "test_collection"} 44 | elif adapter_key == "async_qdrant": 45 | kwargs_out = {"collection": "test", "url": qdrant_url} 46 | 47 | # Adapt to the target format 48 | await async_sample.adapt_to_async(obj_key=adapter_key, **kwargs_out) 49 | 50 | # Configure kwargs for retrieving the data 51 | kwargs_in = kwargs_out.copy() 52 | if adapter_key == "async_pg": 53 | # Convert the URL to use asyncpg instead of psycopg2 54 | async_pg_url = pg_url.replace("postgresql+psycopg2://", "postgresql+asyncpg://") 55 | kwargs_in = { 56 | "dsn": async_pg_url, 57 | "table": "trades", 58 | "selectors": {"id": async_sample.id}, 59 | } 60 | elif adapter_key == "async_mongo": 61 | kwargs_in = { 62 | "url": mongo_url, 63 | "db": "testdb", 64 | "collection": "test_collection", 65 | "filter": {"id": async_sample.id}, 66 | } 67 | elif adapter_key == "async_qdrant": 68 | kwargs_in = { 69 | "collection": "test", 70 | "query_vector": async_sample.embedding, 71 | "url": qdrant_url, 72 | "top_k": 1, 73 | } 74 | 75 | # Retrieve the data and verify it matches the original 76 | fetched = await async_sample.__class__.adapt_from_async( 77 | kwargs_in, obj_key=adapter_key, many=False 78 | ) 79 | 80 | assert fetched == async_sample 81 | -------------------------------------------------------------------------------- /tests/test_adapters/test_core_adapters.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | 4 | @pytest.mark.parametrize("adapter_key", ["json", "toml", "csv"]) 5 | def test_text_roundtrip(sample, adapter_key): 6 | dumped = sample.adapt_to(obj_key=adapter_key) 7 | # For CSV adapter, we need to specify many=False to get a single object 8 | if adapter_key == "csv": 9 | restored = sample.__class__.adapt_from(dumped, obj_key=adapter_key, many=False) 10 | else: 11 | restored = sample.__class__.adapt_from(dumped, obj_key=adapter_key) 12 | assert restored == sample 13 | -------------------------------------------------------------------------------- /tests/test_adapters/test_integration_postgres.py: -------------------------------------------------------------------------------- 1 | """ 2 | Integration tests for PostgreSQL adapter using TestContainers. 3 | """ 4 | 5 | import pytest 6 | import sqlalchemy as sa 7 | 8 | from pydapter.exceptions import ConnectionError 9 | from pydapter.extras.postgres_ import PostgresAdapter 10 | 11 | 12 | def is_docker_available(): 13 | """Check if Docker is available.""" 14 | import subprocess 15 | 16 | try: 17 | subprocess.run(["docker", "info"], check=True, capture_output=True) 18 | return True 19 | except (subprocess.SubprocessError, FileNotFoundError): 20 | return False 21 | 22 | 23 | # Skip tests if Docker is not available 24 | pytestmark = pytest.mark.skipif( 25 | not is_docker_available(), reason="Docker is not available" 26 | ) 27 | 28 | 29 | @pytest.fixture 30 | def postgres_table(pg_url): 31 | """Create a test table in PostgreSQL.""" 32 | engine = sa.create_engine(pg_url) 33 | with engine.begin() as conn: 34 | conn.execute( 35 | sa.text( 36 | """ 37 | CREATE TABLE IF NOT EXISTS test_table ( 38 | id INTEGER PRIMARY KEY, 39 | name TEXT, 40 | value FLOAT 41 | ) 42 | """ 43 | ) 44 | ) 45 | 46 | yield 47 | 48 | # Cleanup 49 | with engine.begin() as conn: 50 | conn.execute(sa.text("DROP TABLE IF EXISTS test_table")) 51 | 52 | 53 | class TestPostgresIntegration: 54 | """Integration tests for PostgreSQL adapter.""" 55 | 56 | def test_postgres_single_record(self, pg_url, sync_model_factory, postgres_table): 57 | """Test PostgreSQL adapter with a single record.""" 58 | # Create test instance 59 | test_model = sync_model_factory(id=42, name="test_postgres", value=12.34) 60 | 61 | # Register adapter 62 | test_model.__class__.register_adapter(PostgresAdapter) 63 | 64 | # Store in database 65 | test_model.adapt_to(obj_key="postgres", engine_url=pg_url, table="test_table") 66 | 67 | # Retrieve from database 68 | retrieved = test_model.__class__.adapt_from( 69 | {"engine_url": pg_url, "table": "test_table", "selectors": {"id": 42}}, 70 | obj_key="postgres", 71 | many=False, 72 | ) 73 | 74 | # Verify data integrity 75 | assert retrieved.id == test_model.id 76 | assert retrieved.name == test_model.name 77 | assert retrieved.value == test_model.value 78 | 79 | def test_postgres_batch_operations( 80 | self, pg_url, sync_model_factory, postgres_table 81 | ): 82 | """Test batch operations with PostgreSQL.""" 83 | model_cls = sync_model_factory(id=1, name="test", value=1.0).__class__ 84 | 85 | # Register adapter 86 | model_cls.register_adapter(PostgresAdapter) 87 | 88 | # Create multiple test instances 89 | models = [ 90 | model_cls(id=i, name=f"batch_{i}", value=i * 1.5) for i in range(1, 11) 91 | ] 92 | 93 | # Store batch in database 94 | PostgresAdapter.to_obj(models, engine_url=pg_url, table="test_table", many=True) 95 | 96 | # Retrieve all from database 97 | retrieved = model_cls.adapt_from( 98 | {"engine_url": pg_url, "table": "test_table"}, obj_key="postgres", many=True 99 | ) 100 | 101 | # Verify all records were stored and retrieved correctly 102 | assert len(retrieved) == 10 103 | 104 | # Sort by ID for consistent comparison 105 | retrieved_sorted = sorted(retrieved, key=lambda m: m.id) 106 | for i, model in enumerate(retrieved_sorted, 1): 107 | assert model.id == i 108 | assert model.name == f"batch_{i}" 109 | assert model.value == i * 1.5 110 | 111 | def test_postgres_connection_error(self, sync_model_factory): 112 | """Test handling of PostgreSQL connection errors.""" 113 | test_model = sync_model_factory(id=42, name="test_postgres", value=12.34) 114 | 115 | # Register adapter 116 | test_model.__class__.register_adapter(PostgresAdapter) 117 | 118 | # Test with invalid connection string 119 | with pytest.raises(ConnectionError): 120 | test_model.adapt_to( 121 | obj_key="postgres", 122 | engine_url="postgresql://invalid:invalid@localhost:5432/nonexistent", 123 | table="test_table", 124 | ) 125 | 126 | def test_postgres_update_record(self, pg_url, sync_model_factory, postgres_table): 127 | """Test updating an existing record in PostgreSQL.""" 128 | # Create test instance 129 | test_model = sync_model_factory(id=99, name="original", value=100.0) 130 | 131 | # Register adapter 132 | test_model.__class__.register_adapter(PostgresAdapter) 133 | 134 | # Store in database 135 | test_model.adapt_to(obj_key="postgres", engine_url=pg_url, table="test_table") 136 | 137 | # Create updated model with same ID 138 | updated_model = sync_model_factory(id=99, name="updated", value=200.0) 139 | 140 | # Register adapter for updated model 141 | updated_model.__class__.register_adapter(PostgresAdapter) 142 | 143 | # Update in database 144 | updated_model.adapt_to( 145 | obj_key="postgres", engine_url=pg_url, table="test_table" 146 | ) 147 | 148 | # Retrieve from database 149 | retrieved = test_model.__class__.adapt_from( 150 | {"engine_url": pg_url, "table": "test_table", "selectors": {"id": 99}}, 151 | obj_key="postgres", 152 | many=False, 153 | ) 154 | 155 | # Verify data was updated 156 | assert retrieved.id == 99 157 | assert retrieved.name == "updated" 158 | assert retrieved.value == 200.0 159 | -------------------------------------------------------------------------------- /tests/test_benchmark/test_bench_json.py: -------------------------------------------------------------------------------- 1 | from pydapter.adapters import JsonAdapter 2 | 3 | 4 | def test_json_perf(benchmark, sample): 5 | """Benchmark the performance of JsonAdapter.to_obj.""" 6 | benchmark(JsonAdapter.to_obj, sample) 7 | -------------------------------------------------------------------------------- /tests/test_dependencies.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | from importlib.util import find_spec 3 | 4 | import pytest 5 | 6 | from pydapter.utils.dependencies import ( 7 | check_dependency, 8 | check_migrations_dependencies, 9 | check_migrations_sql_dependencies, 10 | ) 11 | 12 | 13 | def test_check_dependency_installed(): 14 | """Test check_dependency with an installed package.""" 15 | # This should not raise an exception 16 | check_dependency("importlib", "test") 17 | 18 | 19 | def test_check_dependency_not_installed(): 20 | """Test check_dependency with a non-existent package.""" 21 | # This should raise an ImportError 22 | with pytest.raises(ImportError) as excinfo: 23 | check_dependency("non_existent_package_12345", "test") 24 | 25 | # Check that the error message contains the expected text 26 | assert ( 27 | "The 'test' feature requires the 'non_existent_package_12345' package" 28 | in str(excinfo.value) 29 | ) 30 | assert "pip install pydapter[test]" in str(excinfo.value) 31 | 32 | 33 | def test_migrations_dependencies(): 34 | """Test check_migrations_dependencies.""" 35 | # This should not raise an exception as migrations core has no additional dependencies 36 | check_migrations_dependencies() 37 | 38 | 39 | def test_migrations_sql_dependencies(): 40 | """Test check_migrations_sql_dependencies.""" 41 | # If sqlalchemy and alembic are installed, this should not raise an exception 42 | if find_spec("sqlalchemy") is not None and find_spec("alembic") is not None: 43 | check_migrations_sql_dependencies() 44 | else: 45 | # If either package is not installed, this should raise an ImportError 46 | with pytest.raises(ImportError): 47 | check_migrations_sql_dependencies() 48 | 49 | 50 | def test_lazy_import_protocols(): 51 | """Test lazy import of protocols.""" 52 | # Try importing a protocol 53 | try: 54 | importlib.import_module("pydapter.protocols") 55 | # If we get here, the import succeeded 56 | assert True 57 | except ImportError: 58 | # If typing_extensions is not installed, this should fail 59 | assert find_spec("typing_extensions") is None 60 | 61 | 62 | def test_lazy_import_migrations(): 63 | """Test lazy import of migrations.""" 64 | # Try importing a migration class 65 | try: 66 | importlib.import_module("pydapter.migrations") 67 | # If we get here, the import succeeded 68 | assert True 69 | except ImportError: 70 | # This should not fail as migrations core has no additional dependencies 71 | assert False, "Migration import failed unexpectedly" 72 | 73 | 74 | def test_lazy_import_migrations_sql(): 75 | """Test lazy import of SQL migrations.""" 76 | # Try importing a SQL migration class 77 | try: 78 | importlib.import_module("pydapter.migrations.sql") 79 | # If we get here, the import succeeded 80 | assert True 81 | except ImportError: 82 | # If sqlalchemy or alembic is not installed, this should fail 83 | assert find_spec("sqlalchemy") is None or find_spec("alembic") is None 84 | -------------------------------------------------------------------------------- /tests/test_model_adapters/test_model_adapter_config.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from pydantic import ValidationError 3 | 4 | from pydapter.model_adapters.config import PostgresAdapterConfig, VectorIndexConfig 5 | 6 | 7 | def test_vector_index_config_defaults(): 8 | """Test VectorIndexConfig default values.""" 9 | config = VectorIndexConfig() 10 | 11 | assert config.index_type == "hnsw" 12 | assert isinstance(config.params, dict) 13 | assert len(config.params) == 0 14 | assert config.m == 16 15 | assert config.ef_construction == 64 16 | assert config.lists == 100 17 | assert config.probes == 10 18 | 19 | 20 | def test_vector_index_config_custom_values(): 21 | """Test VectorIndexConfig with custom values.""" 22 | config = VectorIndexConfig( 23 | index_type="ivfflat", 24 | m=32, 25 | ef_construction=128, 26 | lists=200, 27 | probes=20, 28 | ) 29 | 30 | assert config.index_type == "ivfflat" 31 | assert config.m == 32 32 | assert config.ef_construction == 128 33 | assert config.lists == 200 34 | assert config.probes == 20 35 | 36 | 37 | def test_vector_index_config_params(): 38 | """Test VectorIndexConfig with custom params.""" 39 | config = VectorIndexConfig( 40 | index_type="hnsw", 41 | params={"m": 24, "ef_construction": 96}, 42 | ) 43 | 44 | assert config.index_type == "hnsw" 45 | assert config.params == {"m": 24, "ef_construction": 96} 46 | 47 | # get_params should return the custom params 48 | params = config.get_params() 49 | assert params == {"m": 24, "ef_construction": 96} 50 | 51 | 52 | def test_vector_index_config_get_params(): 53 | """Test VectorIndexConfig.get_params method.""" 54 | # Test HNSW params 55 | hnsw_config = VectorIndexConfig(index_type="hnsw", m=32, ef_construction=128) 56 | hnsw_params = hnsw_config.get_params() 57 | 58 | assert hnsw_params == {"m": 32, "ef_construction": 128} 59 | 60 | # Test IVFFlat params 61 | ivf_config = VectorIndexConfig(index_type="ivfflat", lists=200) 62 | ivf_params = ivf_config.get_params() 63 | 64 | assert ivf_params == {"lists": 200} 65 | 66 | # Test exact params (should be empty) 67 | exact_config = VectorIndexConfig(index_type="exact") 68 | exact_params = exact_config.get_params() 69 | 70 | assert exact_params == {} 71 | 72 | 73 | def test_vector_index_config_validation(): 74 | """Test VectorIndexConfig validation.""" 75 | # Test valid index types 76 | VectorIndexConfig(index_type="hnsw") 77 | VectorIndexConfig(index_type="ivfflat") 78 | VectorIndexConfig(index_type="exact") 79 | 80 | # Test invalid index type 81 | with pytest.raises(ValidationError): 82 | VectorIndexConfig(index_type="invalid") 83 | 84 | 85 | def test_postgres_adapter_config_defaults(): 86 | """Test PostgresAdapterConfig default values.""" 87 | config = PostgresAdapterConfig() 88 | 89 | assert config.db_schema == "public" 90 | assert config.batch_size == 1000 91 | assert isinstance(config.vector_index_config, VectorIndexConfig) 92 | assert config.validate_vector_dimensions is True 93 | 94 | 95 | def test_postgres_adapter_config_custom_values(): 96 | """Test PostgresAdapterConfig with custom values.""" 97 | config = PostgresAdapterConfig( 98 | db_schema="custom_schema", 99 | batch_size=500, 100 | validate_vector_dimensions=False, 101 | vector_index_config=VectorIndexConfig( 102 | index_type="ivfflat", 103 | lists=200, 104 | ), 105 | ) 106 | 107 | assert config.db_schema == "custom_schema" 108 | assert config.batch_size == 500 109 | assert config.validate_vector_dimensions is False 110 | assert config.vector_index_config.index_type == "ivfflat" 111 | assert config.vector_index_config.lists == 200 112 | 113 | 114 | def test_postgres_adapter_config_validation(): 115 | """Test PostgresAdapterConfig validation.""" 116 | # Test valid batch size 117 | PostgresAdapterConfig(batch_size=1) 118 | PostgresAdapterConfig(batch_size=10000) 119 | 120 | # Test invalid batch size (must be positive) 121 | with pytest.raises(ValidationError): 122 | PostgresAdapterConfig(batch_size=0) 123 | 124 | with pytest.raises(ValidationError): 125 | PostgresAdapterConfig(batch_size=-1) 126 | -------------------------------------------------------------------------------- /tests/test_model_adapters/test_pg_vector_model_adapter_additional.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | import pytest 4 | from pydantic import BaseModel, Field 5 | 6 | # Try to import pgvector, skip tests if not available 7 | try: 8 | from pgvector.sqlalchemy import Vector 9 | from sqlalchemy import Column, Integer, String 10 | 11 | from pydapter.model_adapters.pg_vector_model import PGVectorModelAdapter 12 | from pydapter.model_adapters.sql_model import create_base 13 | 14 | VECTOR_AVAILABLE = True 15 | except ImportError: 16 | VECTOR_AVAILABLE = False 17 | 18 | # Skip all tests in this module if pgvector is not available 19 | pytestmark = pytest.mark.skipif(not VECTOR_AVAILABLE, reason="pgvector not installed") 20 | 21 | 22 | class EmbeddingSchema(BaseModel): 23 | id: int | None = None 24 | text: str 25 | embedding: list[float] = Field(..., json_schema_extra={"vector_dim": 1536}) 26 | 27 | 28 | class RelatedItem(BaseModel): 29 | id: int | None = None 30 | name: str 31 | 32 | 33 | class EmbeddingWithRelationship(BaseModel): 34 | id: int | None = None 35 | text: str 36 | embedding: list[float] = Field(..., json_schema_extra={"vector_dim": 1536}) 37 | related_items: list[dict[str, Any]] = Field( 38 | default_factory=list, 39 | json_schema_extra={ 40 | "relationship": { 41 | "type": "one_to_many", 42 | "model": "RelatedItem", 43 | "back_populates": "embedding", 44 | } 45 | }, 46 | ) 47 | 48 | 49 | @pytest.mark.skipif(not VECTOR_AVAILABLE, reason="pgvector not installed") 50 | def test_sql_model_to_pydantic_simple(): 51 | """Test conversion of SQLAlchemy model to Pydantic model.""" 52 | # Create a base class for our models 53 | Base = create_base() 54 | 55 | # Define a simple model with a vector field 56 | class EmbeddingSQL(Base): 57 | __tablename__ = "embeddings" 58 | 59 | id = Column(Integer, primary_key=True) 60 | text = Column(String) 61 | embedding = Column(Vector(1536)) 62 | 63 | # Convert back to Pydantic 64 | EmbSchemaRT = PGVectorModelAdapter.sql_model_to_pydantic(EmbeddingSQL) 65 | 66 | # Check that the vector field is correctly mapped 67 | field = EmbSchemaRT.model_fields["embedding"] 68 | assert field.json_schema_extra and field.json_schema_extra["vector_dim"] == 1536 69 | 70 | # Check model config 71 | assert EmbSchemaRT.model_config["from_attributes"] is True 72 | assert EmbSchemaRT.model_config["orm_mode"] is True 73 | assert EmbSchemaRT.model_config["orm_mode"] is True 74 | 75 | 76 | @pytest.mark.skipif(not VECTOR_AVAILABLE, reason="pgvector not installed") 77 | def test_validate_vector_dimensions_with_none_expected_dim(): 78 | """Test vector dimension validation with None expected dimension.""" 79 | # Test with None expected dimension (should pass any dimension) 80 | vector = [0.1] * 100 81 | result = PGVectorModelAdapter.validate_vector_dimensions(vector, None) 82 | assert result is vector 83 | 84 | # Test with a different dimension 85 | vector = [0.1] * 768 86 | result = PGVectorModelAdapter.validate_vector_dimensions(vector, None) 87 | assert result is vector 88 | 89 | 90 | @pytest.mark.skipif(not VECTOR_AVAILABLE, reason="pgvector not installed") 91 | def test_create_index_with_default_params(): 92 | """Test create_index with default parameters.""" 93 | EmbSQL = PGVectorModelAdapter.pydantic_model_to_sql(EmbeddingSchema) 94 | 95 | # Test HNSW index with default params 96 | hnsw_index = PGVectorModelAdapter.create_index( 97 | EmbSQL, "embedding", index_type="hnsw" 98 | ) 99 | 100 | assert hnsw_index.name == "idx_embedding_hnsw" 101 | assert hnsw_index.columns[0].name == "embedding" 102 | assert hnsw_index.kwargs["postgresql_using"] == "hnsw" 103 | assert hnsw_index.kwargs["postgresql_with"] == {} 104 | 105 | # Test IVFFlat index with default params 106 | ivf_index = PGVectorModelAdapter.create_index( 107 | EmbSQL, "embedding", index_type="ivfflat" 108 | ) 109 | 110 | assert ivf_index.name == "idx_embedding_ivfflat" 111 | assert ivf_index.columns[0].name == "embedding" 112 | assert ivf_index.kwargs["postgresql_using"] == "ivfflat" 113 | assert ivf_index.kwargs["postgresql_with"] == {} 114 | 115 | 116 | @pytest.mark.skipif(not VECTOR_AVAILABLE, reason="pgvector not installed") 117 | def test_batch_insert_with_small_batch(mocker): 118 | """Test batch_insert with a small batch.""" 119 | EmbSQL = PGVectorModelAdapter.pydantic_model_to_sql(EmbeddingSchema) 120 | 121 | # Mock session 122 | mock_session = mocker.Mock() 123 | 124 | # Create test data - 5 items 125 | items = [{"text": f"Item {i}", "embedding": [float(i)] * 1536} for i in range(1, 6)] 126 | 127 | # Call batch_insert with default batch_size (1000) 128 | PGVectorModelAdapter.batch_insert(mock_session, EmbSQL, items) 129 | 130 | # Verify add_all was called once (5 items < 1000 batch size = 1 batch) 131 | assert mock_session.add_all.call_count == 1 132 | 133 | # Verify flush was called once 134 | assert mock_session.flush.call_count == 1 135 | 136 | # Verify commit was called once 137 | assert mock_session.commit.call_count == 1 138 | -------------------------------------------------------------------------------- /tests/test_model_adapters/test_sql_vector_model_adapter.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from pydantic import BaseModel, Field 3 | from sqlalchemy import inspect 4 | 5 | # Try to import pgvector, skip tests if not available 6 | try: 7 | from pgvector.sqlalchemy import Vector 8 | 9 | from pydapter.model_adapters.sql_vector_model import SQLVectorModelAdapter 10 | 11 | VECTOR_AVAILABLE = True 12 | except ImportError: 13 | VECTOR_AVAILABLE = False 14 | 15 | # Skip all tests in this module if pgvector is not available 16 | pytestmark = pytest.mark.skipif(not VECTOR_AVAILABLE, reason="pgvector not installed") 17 | 18 | 19 | # ---------- Sample Pydantic models with vector fields ----------------------------------- 20 | class EmbeddingSchema(BaseModel): 21 | id: int | None = None 22 | text: str 23 | embedding: list[float] = Field(..., json_schema_extra={"vector_dim": 1536}) 24 | 25 | 26 | class OptionalEmbeddingSchema(BaseModel): 27 | id: int | None = None 28 | text: str 29 | embedding: list[float] | None = Field(None, json_schema_extra={"vector_dim": 768}) 30 | 31 | 32 | # ---------- Tests for SQLVectorModelAdapter --------------------------------------------- 33 | @pytest.mark.skipif(not VECTOR_AVAILABLE, reason="pgvector not installed") 34 | def test_vector_column_mapping(): 35 | """Test conversion of Pydantic model with vector field to SQLAlchemy model""" 36 | EmbSQL = SQLVectorModelAdapter.pydantic_model_to_sql(EmbeddingSchema) 37 | mapper = inspect(EmbSQL) 38 | emb_col = mapper.columns["embedding"] 39 | 40 | assert isinstance(emb_col.type, Vector) 41 | assert emb_col.type.dim == 1536 42 | assert emb_col.nullable is False 43 | 44 | 45 | @pytest.mark.skipif(not VECTOR_AVAILABLE, reason="pgvector not installed") 46 | def test_optional_vector_column_mapping(): 47 | """Test conversion of Pydantic model with optional vector field""" 48 | EmbSQL = SQLVectorModelAdapter.pydantic_model_to_sql(OptionalEmbeddingSchema) 49 | mapper = inspect(EmbSQL) 50 | emb_col = mapper.columns["embedding"] 51 | 52 | assert isinstance(emb_col.type, Vector) 53 | assert emb_col.type.dim == 768 54 | assert emb_col.nullable is True 55 | 56 | 57 | @pytest.mark.skipif(not VECTOR_AVAILABLE, reason="pgvector not installed") 58 | def test_vector_round_trip_metadata(): 59 | """Test that vector dimension metadata is preserved in round-trip conversion""" 60 | EmbSQL = SQLVectorModelAdapter.pydantic_model_to_sql(EmbeddingSchema) 61 | EmbSchemaRT = SQLVectorModelAdapter.sql_model_to_pydantic(EmbSQL) 62 | 63 | field = EmbSchemaRT.model_fields["embedding"] 64 | # extra metadata about dimension should survive 65 | assert field.json_schema_extra and field.json_schema_extra["vector_dim"] == 1536 66 | 67 | 68 | @pytest.mark.skipif(not VECTOR_AVAILABLE, reason="pgvector not installed") 69 | def test_vector_without_dimension(): 70 | """Test handling of vector fields without explicit dimension""" 71 | 72 | class SimpleVectorSchema(BaseModel): 73 | id: int | None = None 74 | embedding: list[float] 75 | 76 | EmbSQL = SQLVectorModelAdapter.pydantic_model_to_sql(SimpleVectorSchema) 77 | mapper = inspect(EmbSQL) 78 | emb_col = mapper.columns["embedding"] 79 | 80 | assert isinstance(emb_col.type, Vector) 81 | assert not hasattr(emb_col.type, "dim") or emb_col.type.dim is None 82 | 83 | 84 | @pytest.mark.skipif(not VECTOR_AVAILABLE, reason="pgvector not installed") 85 | def test_mixed_model_conversion(): 86 | """Test model with both vector and scalar fields""" 87 | 88 | class MixedSchema(BaseModel): 89 | id: int | None = None 90 | name: str 91 | description: str | None = None 92 | embedding: list[float] = Field(..., json_schema_extra={"vector_dim": 384}) 93 | 94 | MixedSQL = SQLVectorModelAdapter.pydantic_model_to_sql(MixedSchema) 95 | mapper = inspect(MixedSQL) 96 | cols = {c.key: c for c in mapper.columns} 97 | 98 | # Check vector field 99 | assert isinstance(cols["embedding"].type, Vector) 100 | assert cols["embedding"].type.dim == 384 101 | 102 | # Check scalar fields 103 | assert cols["id"].primary_key # Check it's a primary key instead of type 104 | assert cols["name"].nullable is False # Check it's not nullable 105 | assert cols["description"].nullable is True 106 | 107 | # Round-trip 108 | MixedSchemaRT = SQLVectorModelAdapter.sql_model_to_pydantic(MixedSQL) 109 | fields = MixedSchemaRT.model_fields 110 | 111 | assert ( 112 | fields["embedding"].json_schema_extra 113 | and fields["embedding"].json_schema_extra["vector_dim"] == 384 114 | ) 115 | assert fields["name"].is_required() 116 | assert not fields["description"].is_required() 117 | 118 | 119 | @pytest.mark.skipif(not VECTOR_AVAILABLE, reason="pgvector not installed") 120 | def test_inheritance_from_base_adapter(): 121 | """Test that SQLVectorModelAdapter inherits and extends SQLModelAdapter functionality""" 122 | 123 | # Should handle regular scalar types just like the base adapter 124 | class UserSchema(BaseModel): 125 | id: int | None = None 126 | name: str 127 | email: str | None = None 128 | 129 | UserSQL = SQLVectorModelAdapter.pydantic_model_to_sql(UserSchema) 130 | mapper = inspect(UserSQL) 131 | cols = {c.key: c for c in mapper.columns} 132 | 133 | assert cols["id"].primary_key # Check it's a primary key 134 | assert cols["name"].nullable is False 135 | assert cols["email"].nullable is True 136 | -------------------------------------------------------------------------------- /tests/test_model_adapters/test_type_registry.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy import Integer, String 2 | 3 | from pydapter.model_adapters.type_registry import TypeRegistry 4 | 5 | 6 | def test_register_and_get_sql_type(): 7 | """Test registering and retrieving SQL types.""" 8 | # Clear existing registrations for this test 9 | original_py_to_sql = TypeRegistry._PY_TO_SQL.copy() 10 | original_sql_to_py = TypeRegistry._SQL_TO_PY.copy() 11 | 12 | try: 13 | TypeRegistry._PY_TO_SQL = {} 14 | TypeRegistry._SQL_TO_PY = {} 15 | 16 | # Register a type mapping 17 | TypeRegistry.register( 18 | python_type=int, 19 | sql_type_factory=lambda: Integer(), 20 | ) 21 | 22 | # Get the SQL type 23 | sql_type_factory = TypeRegistry.get_sql_type(int) 24 | assert sql_type_factory is not None 25 | assert isinstance(sql_type_factory(), Integer) 26 | 27 | # Get the Python type 28 | py_type = TypeRegistry.get_python_type(Integer()) 29 | assert py_type is int 30 | finally: 31 | # Restore original registrations 32 | TypeRegistry._PY_TO_SQL = original_py_to_sql 33 | TypeRegistry._SQL_TO_PY = original_sql_to_py 34 | 35 | 36 | def test_register_with_converters(): 37 | """Test registering type mappings with converters.""" 38 | # Clear existing registrations for this test 39 | original_py_to_sql = TypeRegistry._PY_TO_SQL.copy() 40 | original_sql_to_py = TypeRegistry._SQL_TO_PY.copy() 41 | original_py_to_sql_converters = TypeRegistry._PY_TO_SQL_CONVERTERS.copy() 42 | original_sql_to_py_converters = TypeRegistry._SQL_TO_PY_CONVERTERS.copy() 43 | 44 | try: 45 | TypeRegistry._PY_TO_SQL = {} 46 | TypeRegistry._SQL_TO_PY = {} 47 | TypeRegistry._PY_TO_SQL_CONVERTERS = {} 48 | TypeRegistry._SQL_TO_PY_CONVERTERS = {} 49 | 50 | # Register a type mapping with converters 51 | TypeRegistry.register( 52 | python_type=bool, 53 | sql_type_factory=lambda: String(1), 54 | python_to_sql=lambda x: "Y" if x else "N", 55 | sql_to_python=lambda x: x == "Y", 56 | ) 57 | 58 | # Convert Python to SQL 59 | sql_value = TypeRegistry.convert_to_sql(True, bool) 60 | assert sql_value == "Y" 61 | 62 | # Convert SQL to Python 63 | py_value = TypeRegistry.convert_to_python("Y", String(1)) 64 | assert py_value is True 65 | finally: 66 | # Restore original registrations 67 | TypeRegistry._PY_TO_SQL = original_py_to_sql 68 | TypeRegistry._SQL_TO_PY = original_sql_to_py 69 | TypeRegistry._PY_TO_SQL_CONVERTERS = original_py_to_sql_converters 70 | TypeRegistry._SQL_TO_PY_CONVERTERS = original_sql_to_py_converters 71 | 72 | 73 | def test_get_sql_type_inheritance(): 74 | """Test getting SQL type for a subclass.""" 75 | # Clear existing registrations for this test 76 | original_py_to_sql = TypeRegistry._PY_TO_SQL.copy() 77 | 78 | try: 79 | TypeRegistry._PY_TO_SQL = {} 80 | 81 | # Register a type mapping for a base class 82 | class Base: 83 | pass 84 | 85 | class Derived(Base): 86 | pass 87 | 88 | TypeRegistry.register( 89 | python_type=Base, 90 | sql_type_factory=lambda: String(), 91 | ) 92 | 93 | # Get the SQL type for the derived class 94 | sql_type_factory = TypeRegistry.get_sql_type(Derived) 95 | assert sql_type_factory is not None 96 | assert isinstance(sql_type_factory(), String) 97 | finally: 98 | # Restore original registrations 99 | TypeRegistry._PY_TO_SQL = original_py_to_sql 100 | 101 | 102 | def test_get_python_type_inheritance(): 103 | """Test getting Python type for a subclass of SQL type.""" 104 | # Clear existing registrations for this test 105 | original_sql_to_py = TypeRegistry._SQL_TO_PY.copy() 106 | 107 | try: 108 | TypeRegistry._SQL_TO_PY = {} 109 | 110 | # Create a custom SQL type 111 | class CustomInteger(Integer): 112 | pass 113 | 114 | # Register a type mapping 115 | TypeRegistry.register( 116 | python_type=int, 117 | sql_type_factory=lambda: Integer(), 118 | ) 119 | 120 | # Get the Python type for the custom SQL type 121 | py_type = TypeRegistry.get_python_type(CustomInteger()) 122 | assert py_type is int 123 | finally: 124 | # Restore original registrations 125 | TypeRegistry._SQL_TO_PY = original_sql_to_py 126 | --------------------------------------------------------------------------------