├── .gitattributes ├── .github └── workflows │ └── publish-package.yml ├── .gitignore ├── LICENSE ├── README.md ├── pyproject.toml ├── src └── pandas_to_pydantic │ ├── __about__.py │ ├── __init__.py │ ├── annotation_utils.py │ └── to_pydantic.py ├── taplo.toml └── tests ├── __init__.py ├── config.py ├── data ├── bookData.csv └── library_data │ ├── library_data.csv │ ├── library_data.json │ ├── library_types.py │ ├── multilist_detail_library.json │ ├── multilist_library.json │ └── nested_library.json ├── test_annotation_utils.py └── test_to_pydantic.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.github/workflows/publish-package.yml: -------------------------------------------------------------------------------- 1 | name: Publish Package 2 | 3 | on: 4 | push: 5 | branches: ["main"] 6 | 7 | jobs: 8 | deploy: 9 | runs-on: ubuntu-latest 10 | permissions: 11 | id-token: write 12 | steps: 13 | - uses: actions/checkout@v3 14 | - name: Set up Python 15 | uses: actions/setup-python@v4 16 | with: 17 | python-version: | 18 | 3.9 19 | 3.10 20 | 3.11 21 | 3.12 22 | - name: Install Hatch 23 | run: pipx install hatch 24 | - name: Run tests 25 | run: hatch run all:test 26 | - name: Build dist 27 | run: hatch build 28 | - name: Publish package distributions to PyPI 29 | uses: pypa/gh-action-pypi-publish@release/v1 30 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .hatch 2 | .obsidian 3 | .vscode 4 | 5 | dev-notebooks 6 | 7 | # Byte-compiled / optimized / DLL files 8 | __pycache__/ 9 | *.py[cod] 10 | *$py.class 11 | 12 | # C extensions 13 | *.so 14 | 15 | # Distribution / packaging 16 | .Python 17 | build/ 18 | develop-eggs/ 19 | dist/ 20 | downloads/ 21 | eggs/ 22 | .eggs/ 23 | lib/ 24 | lib64/ 25 | parts/ 26 | sdist/ 27 | var/ 28 | wheels/ 29 | share/python-wheels/ 30 | *.egg-info/ 31 | .installed.cfg 32 | *.egg 33 | MANIFEST 34 | 35 | # PyInstaller 36 | # Usually these files are written by a python script from a template 37 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 38 | *.manifest 39 | *.spec 40 | 41 | # Installer logs 42 | pip-log.txt 43 | pip-delete-this-directory.txt 44 | 45 | # Unit test / coverage reports 46 | htmlcov/ 47 | .tox/ 48 | .nox/ 49 | .coverage 50 | .coverage.* 51 | .cache 52 | nosetests.xml 53 | coverage.xml 54 | *.cover 55 | *.py,cover 56 | .hypothesis/ 57 | .pytest_cache/ 58 | cover/ 59 | 60 | # Translations 61 | *.mo 62 | *.pot 63 | 64 | # Django stuff: 65 | *.log 66 | local_settings.py 67 | db.sqlite3 68 | db.sqlite3-journal 69 | 70 | # Flask stuff: 71 | instance/ 72 | .webassets-cache 73 | 74 | # Scrapy stuff: 75 | .scrapy 76 | 77 | # Sphinx documentation 78 | docs/_build/ 79 | 80 | # PyBuilder 81 | .pybuilder/ 82 | target/ 83 | 84 | # Jupyter Notebook 85 | .ipynb_checkpoints 86 | 87 | # IPython 88 | profile_default/ 89 | ipython_config.py 90 | 91 | # pyenv 92 | # For a library or package, you might want to ignore these files since the code is 93 | # intended to run in multiple environments; otherwise, check them in: 94 | # .python-version 95 | 96 | # pipenv 97 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 98 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 99 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 100 | # install all needed dependencies. 101 | #Pipfile.lock 102 | 103 | # poetry 104 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 105 | # This is especially recommended for binary packages to ensure reproducibility, and is more 106 | # commonly ignored for libraries. 107 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 108 | #poetry.lock 109 | 110 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 111 | __pypackages__/ 112 | 113 | # Celery stuff 114 | celerybeat-schedule 115 | celerybeat.pid 116 | 117 | # SageMath parsed files 118 | *.sage.py 119 | 120 | # Environments 121 | .env 122 | .venv 123 | env/ 124 | venv/ 125 | ENV/ 126 | env.bak/ 127 | venv.bak/ 128 | 129 | # Spyder project settings 130 | .spyderproject 131 | .spyproject 132 | 133 | # Rope project settings 134 | .ropeproject 135 | 136 | # mkdocs documentation 137 | /site 138 | 139 | # mypy 140 | .mypy_cache/ 141 | .dmypy.json 142 | dmypy.json 143 | 144 | # Pyre type checker 145 | .pyre/ 146 | 147 | # pytype static type analyzer 148 | .pytype/ 149 | 150 | # Cython debug symbols 151 | cython_debug/ 152 | 153 | # PyCharm 154 | # JetBrains specific template is maintainted in a separate JetBrains.gitignore that can 155 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 156 | # and can be added to the global gitignore or merged into this file. For a more nuclear 157 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 158 | #.idea/ 159 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 puffins 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pandas-to-pydantic 2 | 3 | **WARNING: Library is currently unstable and in beta.** 4 | 5 | This library provides functions for converting Pandas Dataframes to Pydantic Models. This allows you to easily transform data in a table-like format into a json-like format. Pydantic Model annotations are matched with Pandas Dataframe columns. Supports models nested in lists. 6 | 7 | [![PyPI - Version](https://img.shields.io/pypi/v/pandas-to-pydantic.svg)](https://pypi.org/project/pandas-to-pydantic) 8 | [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/pandas-to-pydantic.svg)](https://pypi.org/project/pandas-to-pydantic) 9 | 10 | --- 11 | 12 | **Table of Contents** 13 | 14 | - [Installation](#installation) 15 | - [License](/LICENSE) 16 | - [Example 1](#example-1) 17 | - [dataframe_to_pydantic](#dataframe_to_pydantic) 18 | - [Example 2](#example-2) 19 | 20 | ## Installation 21 | 22 | ```console 23 | pip install pandas-to-pydantic 24 | ``` 25 | 26 | ## Example 1 27 | 28 | This example will show how to convert data from a flat structure (.csv file, pandas dataframe) to a hierarchical structure (json file, pydantic models) 29 | 30 | [Example Book Data](https://github.com/magicalpuffin/pandas-to-pydantic/blob/main/tests/data/bookData.csv) 31 | 32 | | BookID | Title | AuthorName | Genre | PublishedYear | 33 | | ------ | ---------------------------------------- | --------------- | ----------------- | ------------- | 34 | | 1 | Harry Potter and the Philosopher's Stone | J.K. Rowling | Fantasy | 1997 | 35 | | 2 | Harry Potter and the Chamber of Secrets | J.K. Rowling | Fantasy | 1998 | 36 | | 3 | 1984 | George Orwell | Dystopian Fiction | 1949 | 37 | | 4 | Animal Farm | George Orwell | Political Satire | 1945 | 38 | | 5 | Pride and Prejudice | Jane Austen | Romance | 1813 | 39 | | 7 | Murder on the Orient Express | Agatha Christie | Mystery | 1934 | 40 | | 9 | Adventures of Huckleberry Finn | Mark Twain | Adventure | 1884 | 41 | | 10 | The Adventures of Tom Sawyer | Mark Twain | Adventure | 1876 | 42 | | 11 | The Hobbit | J.R.R. Tolkien | Fantasy | 1937 | 43 | | 12 | The Lord of the Rings | J.R.R. Tolkien | Fantasy | 1954 | 44 | 45 | ```python 46 | import pandas as pd 47 | from pydantic import BaseModel 48 | from pandas_to_pydantic import dataframe_to_pydantic 49 | 50 | # Declare pydantic models 51 | class Book(BaseModel): 52 | BookID: int 53 | Title: str 54 | AuthorName: str 55 | Genre: str 56 | PublishedYear: int 57 | 58 | # Update this to your your file path 59 | book_data = pd.read_csv(FILE_PATH) 60 | 61 | # Convert pandas dataframe to a pydantic root model 62 | book_list_root = dataframe_to_pydantic(book_data, Book) 63 | ``` 64 | 65 | `dataframe_to_pydantic` returns a pydantic `RootModel`. Data can be accessed using its attributes and methods. https://docs.pydantic.dev/latest/api/root_model/ 66 | 67 | For example: 68 | 69 | ```python 70 | # Access data as a list of pydantic models 71 | book_list_root.root 72 | ``` 73 | 74 | Returns (output shortened): 75 | 76 | ``` 77 | [Book(BookID=1, Title="Harry Potter and the Philosopher's Stone", AuthorName='J.K. Rowling', Genre='Fantasy', PublishedYear=1997), 78 | Book(BookID=2, Title='Harry Potter and the Chamber of Secrets', AuthorName='J.K. Rowling', Genre='Fantasy', PublishedYear=1998), 79 | Book(BookID=3, Title='1984', AuthorName='George Orwell', Genre='Dystopian Fiction', PublishedYear=1949), 80 | ...] 81 | ``` 82 | 83 | For example: 84 | 85 | ```python 86 | # Access data as a list of dict 87 | book_list_root.model_dump() 88 | ``` 89 | 90 | Returns (output shortened): 91 | 92 | ``` 93 | [{'BookID': 1, 94 | 'Title': "Harry Potter and the Philosopher's Stone", 95 | 'AuthorName': 'J.K. Rowling', 96 | 'Genre': 'Fantasy', 97 | 'PublishedYear': 1997}, 98 | {'BookID': 2, 99 | 'Title': 'Harry Potter and the Chamber of Secrets', 100 | 'AuthorName': 'J.K. Rowling', 101 | 'Genre': 'Fantasy', 102 | 'PublishedYear': 1998}, 103 | {'BookID': 3, 104 | 'Title': '1984', 105 | 'AuthorName': 'George Orwell', 106 | 'Genre': 'Dystopian Fiction', 107 | 'PublishedYear': 1949}, 108 | ...] 109 | ``` 110 | 111 | ## Example 2 112 | 113 | In this example, Pydantic models are nested using the `list` type annotation. When there are multiple layers of nesting, unique id fields should be provided for each list field with a child model using `id_column_map`. 114 | 115 | Here, the unique id column for the `Genre` model is `Genre`, and the unique id column for the `Author` model is `AuthorName`. Keys in `id_column_map` can be the model name or field name. Values in `id_column_map` are the unique column name. 116 | 117 | For example: 118 | 119 | ```python 120 | class Book(BaseModel): 121 | BookID: int 122 | Title: str 123 | PublishedYear: int 124 | 125 | class Author(BaseModel): 126 | AuthorName: str 127 | BookList: list[Book] 128 | 129 | class Genre(BaseModel): 130 | Genre: str 131 | AuthorList: list[Author] 132 | 133 | dataframe_to_pydantic( 134 | data=bookData, 135 | model=Genre, 136 | id_column_map={"Genre": "Genre", "AuthorList": "AuthorName"}, 137 | ).model_dump() 138 | ``` 139 | 140 | Returns (output shortened) 141 | 142 | ``` 143 | [{'Genre': 'Fantasy', 144 | 'AuthorList': [{'AuthorName': 'J.K. Rowling', 145 | 'BookList': [{'BookID': 1, 146 | 'Title': "Harry Potter and the Philosopher's Stone", 147 | 'PublishedYear': 1997}, 148 | {'BookID': 2, 149 | 'Title': 'Harry Potter and the Chamber of Secrets', 150 | 'PublishedYear': 1998}]}, 151 | {'AuthorName': 'J.R.R. Tolkien', 152 | 'BookList': [{'BookID': 11, 'Title': 'The Hobbit', 'PublishedYear': 1937}, 153 | {'BookID': 12, 154 | 'Title': 'The Lord of the Rings', 155 | 'PublishedYear': 1954}]}]}, 156 | {'Genre': 'Dystopian Fiction', 157 | 'AuthorList': [{'AuthorName': 'George Orwell', 158 | 'BookList': [{'BookID': 3, 'Title': '1984', 'PublishedYear': 1949}]}]}, 159 | ...] 160 | ``` 161 | 162 | ## dataframe_to_pydantic 163 | 164 | ### Args 165 | 166 | - data (`pandas.DataFrame`) 167 | - Dataframe with columns matching fields in the pydantic model 168 | - When the pydantic model includes nested models, it is assumed that the first column is unique. See [Example 2](#example-2) 169 | - model (`pydantic._internal._model_construction.ModelMetaClass`) 170 | - Accepts classes created with pydantic.BaseModel 171 | - Supports nested models in lists 172 | - Annotation names must match columns in the dataframe 173 | - id_column_map(`dict[str,str]`) 174 | - Required when nesting Pydantic models 175 | - Each key corresponds with field name or model name 176 | - Each value corresponds with the unique id column for the nested Pydantic model 177 | - For the parent level model, use the model name as key 178 | 179 | ## Returns 180 | 181 | - model_list (`pydantic.RootModel`) 182 | - Pydantic root model created as a list of the input model 183 | - https://docs.pydantic.dev/latest/api/root_model/ 184 | 185 | ## Advanced Example 186 | 187 | This example uses a larger data set with additional nesting. 188 | 189 | [Example Library Data](https://github.com/magicalpuffin/pandas-to-pydantic/blob/main/tests/data/library_data/library_data.csv) 190 | 191 | ```python 192 | import pandas as pd 193 | from pydantic import BaseModel 194 | from pandas_to_pydantic import dataframe_to_pydantic 195 | 196 | # Declare pydantic models 197 | class LibaryDetail(BaseModel): 198 | LibraryName: str 199 | Location: str 200 | EstablishedYear: int 201 | BookCollectionSize: int 202 | 203 | class Author(BaseModel): 204 | AuthorID: int 205 | AuthorName: str 206 | AuthorBirthdate: str 207 | 208 | class Book(BaseModel): 209 | BookID: int 210 | Title: str 211 | Genre: str 212 | PublishedYear: int 213 | 214 | class Library(BaseModel): 215 | LibraryID: int 216 | Detail: LibaryDetail 217 | AuthorList: list[Author] 218 | BookList: list[Book] 219 | 220 | # Input data is a pandas dataframe 221 | data = pd.read_csv(FILE_PATH) 222 | 223 | # Convert pandas dataframe to a pydantic root model 224 | library_list_root = dataframe_to_pydantic( 225 | data, 226 | Library, 227 | { 228 | "Library": "LibraryID", 229 | "BookList": "BookID", 230 | "AuthorList": "AuthorID", 231 | }, 232 | ) 233 | 234 | # Access data as a list of pydantic models 235 | library_list_root.root 236 | 237 | # Access data as a list of dict 238 | library_list_root.model_dump() 239 | ``` 240 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["hatchling"] 3 | build-backend = "hatchling.build" 4 | 5 | [project] 6 | name = "pandas-to-pydantic" 7 | dynamic = ["version"] 8 | description = 'Library for converting pandas dataframes to pydantic models' 9 | readme = "README.md" 10 | requires-python = ">=3.9" 11 | license = "MIT" 12 | keywords = [] 13 | authors = [ 14 | { name = "magicalpuffin", email = "36088648+magicalpuffin@users.noreply.github.com" }, 15 | ] 16 | classifiers = [ 17 | "Development Status :: 4 - Beta", 18 | "Programming Language :: Python", 19 | "Programming Language :: Python :: 3.9", 20 | "Programming Language :: Python :: 3.10", 21 | "Programming Language :: Python :: 3.11", 22 | "Programming Language :: Python :: 3.12", 23 | "Programming Language :: Python :: 3.13", 24 | "Programming Language :: Python :: Implementation :: CPython", 25 | "Programming Language :: Python :: Implementation :: PyPy", 26 | ] 27 | dependencies = ["pandas>=2.0.0", "pydantic>=2.0.1"] 28 | 29 | [project.urls] 30 | Documentation = "https://github.com/magicalpuffin/pandas-to-pydantic#readme" 31 | Issues = "https://github.com/magicalpuffin/pandas-to-pydantic/issues" 32 | Source = "https://github.com/magicalpuffin/pandas-to-pydantic" 33 | 34 | 35 | [tool.pyright] 36 | venvPath = "./.hatch/" 37 | venv = "pandas-to-pydantic" 38 | 39 | [tool.hatch.version] 40 | path = "src/pandas_to_pydantic/__about__.py" 41 | 42 | [tool.hatch.envs.default] 43 | dependencies = ["coverage[toml]>=6.5", "pytest", "pandas-stubs"] 44 | [tool.hatch.envs.default.scripts] 45 | test = "pytest {args:tests}" 46 | test-cov = "coverage run -m pytest {args:tests}" 47 | cov-report = ["- coverage combine", "coverage report"] 48 | cov = ["test-cov", "cov-report"] 49 | 50 | [[tool.hatch.envs.all.matrix]] 51 | python = ["3.9", "3.10", "3.11", "3.12", "3.13"] 52 | 53 | [tool.hatch.envs.dev] 54 | dependencies = ["ipykernel"] 55 | 56 | [tool.hatch.envs.lint] 57 | detached = true 58 | dependencies = ["black>=23.1.0", "mypy>=1.0.0", "ruff>=0.0.243"] 59 | [tool.hatch.envs.lint.scripts] 60 | typing = "mypy --install-types --non-interactive {args:src/pandas_to_pydantic tests}" 61 | style = ["ruff {args:.}", "black --check --diff {args:.}"] 62 | fmt = ["black {args:.}", "ruff --fix {args:.}", "style"] 63 | all = ["style", "typing"] 64 | 65 | [tool.black] 66 | target-version = ["py37"] 67 | line-length = 120 68 | skip-string-normalization = true 69 | 70 | [tool.ruff] 71 | target-version = "py37" 72 | line-length = 120 73 | select = [ 74 | "A", 75 | "ARG", 76 | "B", 77 | "C", 78 | "DTZ", 79 | "E", 80 | "EM", 81 | "F", 82 | "FBT", 83 | "I", 84 | "ICN", 85 | "ISC", 86 | "N", 87 | "PLC", 88 | "PLE", 89 | "PLR", 90 | "PLW", 91 | "Q", 92 | "RUF", 93 | "S", 94 | "T", 95 | "TID", 96 | "UP", 97 | "W", 98 | "YTT", 99 | ] 100 | ignore = [ 101 | # Allow non-abstract empty methods in abstract base classes 102 | "B027", 103 | # Allow boolean positional values in function calls, like `dict.get(... True)` 104 | "FBT003", 105 | # Ignore checks for possible passwords 106 | "S105", 107 | "S106", 108 | "S107", 109 | # Ignore complexity 110 | "C901", 111 | "PLR0911", 112 | "PLR0912", 113 | "PLR0913", 114 | "PLR0915", 115 | ] 116 | unfixable = [ 117 | # Don't touch unused imports 118 | "F401", 119 | ] 120 | 121 | [tool.ruff.isort] 122 | known-first-party = ["pandas_to_pydantic"] 123 | 124 | [tool.ruff.flake8-tidy-imports] 125 | ban-relative-imports = "all" 126 | 127 | [tool.ruff.per-file-ignores] 128 | # Tests can use magic values, assertions, and relative imports 129 | "tests/**/*" = ["PLR2004", "S101", "TID252"] 130 | 131 | [tool.coverage.run] 132 | source_pkgs = ["pandas_to_pydantic", "tests"] 133 | branch = true 134 | parallel = true 135 | omit = ["src/pandas_to_pydantic/__about__.py"] 136 | 137 | [tool.coverage.paths] 138 | pandas_to_pydantic = [ 139 | "src/pandas_to_pydantic", 140 | "*/pandas-to-pydantic/src/pandas_to_pydantic", 141 | ] 142 | tests = ["tests", "*/pandas-to-pydantic/tests"] 143 | 144 | [tool.coverage.report] 145 | exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"] 146 | -------------------------------------------------------------------------------- /src/pandas_to_pydantic/__about__.py: -------------------------------------------------------------------------------- 1 | VERSION = "0.1.5" 2 | -------------------------------------------------------------------------------- /src/pandas_to_pydantic/__init__.py: -------------------------------------------------------------------------------- 1 | from pandas_to_pydantic.annotation_utils import ModelColumns, get_annotations, get_model_columns # noqa: F401 2 | from pandas_to_pydantic.to_pydantic import dataframe_to_pydantic, get_root_list, serialize_dataframe # noqa: F401 3 | -------------------------------------------------------------------------------- /src/pandas_to_pydantic/annotation_utils.py: -------------------------------------------------------------------------------- 1 | import types 2 | from typing import Optional 3 | 4 | from pydantic import BaseModel 5 | from pydantic._internal._model_construction import ModelMetaclass 6 | 7 | 8 | class ModelColumns(BaseModel): 9 | """ 10 | Describes model fields. Used when mapping Dataframe columns to fields. 11 | 12 | Args: 13 | BaseModel (_type_): Pydantic BaseModel 14 | """ 15 | 16 | name: str 17 | id_column: Optional[str] 18 | base_columns: list[str] 19 | list_columns: list["ModelColumns"] 20 | child_columns: list["ModelColumns"] 21 | 22 | 23 | def get_annotations(model: ModelMetaclass) -> dict: 24 | """ 25 | Gets annotations of model, including inherited BaseModel 26 | 27 | Args: 28 | model (ModelMetaclass): Pydantic BaseModel class 29 | 30 | Returns: 31 | dict: key as annotation name, value as type 32 | """ 33 | annotations = {} 34 | for base_model in model.mro(): 35 | if issubclass(base_model, BaseModel) and base_model is not BaseModel: 36 | annotations.update(base_model.__annotations__.copy()) 37 | return annotations 38 | 39 | 40 | def get_model_columns( 41 | model: ModelMetaclass, id_column_map: Optional[dict[str, str]] = None, name: Optional[str] = None 42 | ) -> ModelColumns: 43 | """ 44 | Creates ModelColumns for a Pydantic BaseModel 45 | 46 | Args: 47 | model (ModelMetaclass): Pydantic BaseModel class 48 | id_column_map (Optional[dict[str, str]], optional): Map of field names and unique ID. Necessary for identifying 49 | and structuring nested objects. Defaults to None. 50 | name (Optional[str], optional): For name field in ModelColumns. If None, uses model.__name__. Defaults to None. 51 | 52 | Raises: 53 | TypeError: Error if model is not a Pydantic BaseModel 54 | 55 | Returns: 56 | ModelColumns: ModelColumns generated for the model. 57 | """ 58 | # TODO consider returning field name 59 | if not issubclass(model, BaseModel): 60 | error_message = f"{model} is not a BaseModel" 61 | raise TypeError(error_message) 62 | 63 | if id_column_map is None: 64 | id_column_map = {} 65 | if name is None: 66 | name = model.__name__ 67 | 68 | # Fallback to model name if passed in name field not in column map 69 | id_column = id_column_map.get(name) 70 | if id_column is None: 71 | id_column = id_column_map.get(model.__name__) 72 | 73 | annotations = get_annotations(model) 74 | 75 | base_columns = [] 76 | list_columns = [] 77 | child_columns = [] 78 | 79 | for field_name, field_type in annotations.items(): 80 | if isinstance(field_type, types.GenericAlias): 81 | if field_type.__origin__ is list: 82 | # TODO reevaluate passed in field name 83 | list_columns.append(get_model_columns(field_type.__args__[0], id_column_map, field_name)) 84 | elif isinstance(field_type, ModelMetaclass): 85 | if issubclass(field_type, BaseModel): 86 | child_columns.append(get_model_columns(field_type, id_column_map, field_name)) 87 | else: 88 | base_columns.append(field_name) 89 | 90 | return ModelColumns( 91 | name=name, 92 | id_column=id_column, 93 | base_columns=base_columns, 94 | list_columns=list_columns, 95 | child_columns=child_columns, 96 | ) 97 | -------------------------------------------------------------------------------- /src/pandas_to_pydantic/to_pydantic.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, TypeVar, Union 2 | 3 | import pandas as pd 4 | from pydantic import BaseModel, RootModel 5 | from pydantic._internal._model_construction import ModelMetaclass 6 | 7 | from pandas_to_pydantic.annotation_utils import ModelColumns, get_model_columns 8 | 9 | 10 | def serialize_dataframe(data: pd.DataFrame, model_columns: ModelColumns) -> list[dict]: 11 | """ 12 | Converts a Pandas Dataframe into a json-like structure 13 | 14 | Args: 15 | data (pd.DataFrame): Dataframe with columns matching ModelColumns 16 | model_columns (ModelColumns): ModelColumns object for maping model fields with columns 17 | 18 | Raises: 19 | ValueError: Error for invalid data or ModelColumns 20 | 21 | Returns: 22 | list[dict]: Data in json-like structure 23 | """ 24 | # TODO maybe only return list if needed 25 | new_list = [] 26 | 27 | if not model_columns.id_column: 28 | # TODO consider returning child models with base columns 29 | return data[model_columns.base_columns].to_dict(orient="records") 30 | 31 | if data[model_columns.id_column].isna().any(): 32 | error_message = f"{model_columns.id_column} contains NA" 33 | raise ValueError(error_message) 34 | 35 | for value in data[model_columns.id_column].unique(): 36 | base_dict = {} 37 | 38 | slice_data = data[data[model_columns.id_column] == value] 39 | 40 | # Using first row for base data 41 | base_dict = {**slice_data[model_columns.base_columns].iloc[0].to_dict()} 42 | 43 | for list_model in model_columns.list_columns: 44 | base_dict[list_model.name] = serialize_dataframe(slice_data, list_model) 45 | 46 | for child_model in model_columns.child_columns: 47 | # TODO using zero index to work around returning a list 48 | base_dict[child_model.name] = serialize_dataframe(slice_data, child_model)[0] 49 | 50 | new_list.append(base_dict) 51 | 52 | return new_list 53 | 54 | 55 | T = TypeVar("T", bound=BaseModel) 56 | 57 | 58 | def get_root_list(serialize_data: Union[list[dict], list[ModelMetaclass]], model: type[T]) -> RootModel[list[T]]: 59 | """ 60 | Converts json-like data into a pydantic list RootModel 61 | 62 | Args: 63 | serialize_data (Union[list[dict], list[ModelMetaclass]]): data in json-like structure or list of pydantic object 64 | model (ModelMetaclass): pydantic model 65 | 66 | Returns: 67 | RootModel: list of pydantic model set to the input data 68 | """ 69 | root_list_model = RootModel[list[model]] 70 | root_list = root_list_model(serialize_data) # type: ignore 71 | 72 | return root_list 73 | 74 | 75 | def dataframe_to_pydantic( 76 | data: pd.DataFrame, model: type[T], id_column_map: Optional[dict[str, str]] = None 77 | ) -> RootModel[list[T]]: 78 | """ 79 | Converts a dataframe to a pydantic model 80 | 81 | Args: 82 | data (pd.DataFrame): Dataframe with columns matching Pydantic Model 83 | model (ModelMetaclass): Target Pydantic Model 84 | id_column_map (Optional[dict[str, str]], optional): Map of field names and unique ID. Necessary for identifying 85 | and structuring nested objects. 86 | 87 | Returns: 88 | RootModel: _description_ 89 | """ 90 | target_model_columns = get_model_columns(model, id_column_map) 91 | serialize_data = serialize_dataframe(data, target_model_columns) 92 | model_list = get_root_list(serialize_data, model) 93 | 94 | return model_list 95 | -------------------------------------------------------------------------------- /taplo.toml: -------------------------------------------------------------------------------- 1 | [formatting] 2 | align_entries = true 3 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/magicalpuffin/pandas-to-pydantic/fa26dab5ed2f4aa59f4acf648c6d287d251fbb66/tests/__init__.py -------------------------------------------------------------------------------- /tests/config.py: -------------------------------------------------------------------------------- 1 | TEST_DATA_DIR = "tests/data/" 2 | LIBRARY_DATA_DIR = TEST_DATA_DIR + "library_data/" 3 | 4 | LIBRARY_CSV = LIBRARY_DATA_DIR + "library_data.csv" 5 | LIBRARY_JSON = LIBRARY_DATA_DIR + "library_data.json" 6 | 7 | 8 | # TODO move to separate folder 9 | BOOK_CSV = TEST_DATA_DIR + "bookData.csv" 10 | -------------------------------------------------------------------------------- /tests/data/bookData.csv: -------------------------------------------------------------------------------- 1 | BookID,Title,AuthorName,Genre,PublishedYear 2 | 1,Harry Potter and the Philosopher's Stone,J.K. Rowling,Fantasy,1997 3 | 2,Harry Potter and the Chamber of Secrets,J.K. Rowling,Fantasy,1998 4 | 3,1984,George Orwell,Dystopian Fiction,1949 5 | 4,Animal Farm,George Orwell,Political Satire,1945 6 | 5,Pride and Prejudice,Jane Austen,Romance,1813 7 | 7,Murder on the Orient Express,Agatha Christie,Mystery,1934 8 | 9,Adventures of Huckleberry Finn,Mark Twain,Adventure,1884 9 | 10,The Adventures of Tom Sawyer,Mark Twain,Adventure,1876 10 | 11,The Hobbit,J.R.R. Tolkien,Fantasy,1937 11 | 12,The Lord of the Rings,J.R.R. Tolkien,Fantasy,1954 12 | -------------------------------------------------------------------------------- /tests/data/library_data/library_data.csv: -------------------------------------------------------------------------------- 1 | LibraryID,LibraryName,Location,EstablishedYear,BookCollectionSize,AuthorID,AuthorName,AuthorBirthdate,BookID,Title,Genre,PublishedYear,AvailableCopies 2 | 1,City Central Library,Cityville,1950,50000,1,J.K. Rowling,1965-07-31,1,Harry Potter and the Philosopher's Stone,Fantasy,1997,5 3 | 1,City Central Library,Cityville,1950,50000,1,J.K. Rowling,1965-07-31,2,Harry Potter and the Chamber of Secrets,Fantasy,1998,3 4 | 1,City Central Library,Cityville,1950,50000,5,Mark Twain,1835-11-30,10,The Adventures of Tom Sawyer,Adventure,1876,2 5 | 2,Greenwood Public Library,Greenwood,1975,35000,2,George Orwell,1903-06-25,3,1984,Dystopian Fiction,1949,7 6 | 2,Greenwood Public Library,Greenwood,1975,35000,6,J.R.R. Tolkien,1892-01-03,11,The Hobbit,Fantasy,1937,4 7 | 3,Lakeside Community Library,Lakeside,1990,25000,3,Jane Austen,1775-12-16,5,Pride and Prejudice,Romance,1813,6 8 | 4,Mountain View Library,Mountain View,1982,40000,4,Agatha Christie,1890-09-15,7,Murder on the Orient Express,Mystery,1934,1 9 | 4,Mountain View Library,Mountain View,1982,40000,5,Mark Twain,1835-11-30,9,Adventures of Huckleberry Finn,Adventure,1884,4 10 | 4,Mountain View Library,Mountain View,1982,40000,5,Mark Twain,1835-11-30,10,The Adventures of Tom Sawyer,Adventure,1876,6 11 | 4,Mountain View Library,Mountain View,1982,40000,2,George Orwell,1903-06-25,4,Animal Farm,Political Satire,1945,1 12 | 4,Mountain View Library,Mountain View,1982,40000,6,J.R.R. Tolkien,1892-01-03,12,The Lord of the Rings,Fantasy,1954,4 13 | 5,Sunset District Public Library,Sunset District,1965,30000,6,J.R.R. Tolkien,1892-01-03,11,The Hobbit,Fantasy,1937,6 14 | 5,Sunset District Public Library,Sunset District,1965,30000,6,J.R.R. Tolkien,1892-01-03,12,The Lord of the Rings,Fantasy,1954,3 15 | 5,Sunset District Public Library,Sunset District,1965,30000,1,J.K. Rowling,1965-07-31,2,Harry Potter and the Chamber of Secrets,Fantasy,1998,3 16 | 5,Sunset District Public Library,Sunset District,1965,30000,5,Mark Twain,1835-11-30,9,Adventures of Huckleberry Finn,Adventure,1884,4 17 | -------------------------------------------------------------------------------- /tests/data/library_data/library_data.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "LibraryID": 1, 4 | "LibraryName": "City Central Library", 5 | "Location": "Cityville", 6 | "EstablishedYear": 1950, 7 | "BookCollectionSize": 50000, 8 | "AuthorList": [ 9 | { 10 | "AuthorID": 1, 11 | "AuthorName": "J.K. Rowling", 12 | "AuthorBirthdate": "1965-07-31", 13 | "BookList": [ 14 | { 15 | "BookID": 1, 16 | "Title": "Harry Potter and the Philosopher's Stone", 17 | "Genre": "Fantasy", 18 | "PublishedYear": 1997, 19 | "AvailableCopies": 5 20 | }, 21 | { 22 | "BookID": 2, 23 | "Title": "Harry Potter and the Chamber of Secrets", 24 | "Genre": "Fantasy", 25 | "PublishedYear": 1998, 26 | "AvailableCopies": 3 27 | } 28 | ] 29 | }, 30 | { 31 | "AuthorID": 5, 32 | "AuthorName": "Mark Twain", 33 | "AuthorBirthdate": "1835-11-30", 34 | "BookList": [ 35 | { 36 | "BookID": 10, 37 | "Title": "The Adventures of Tom Sawyer", 38 | "Genre": "Adventure", 39 | "PublishedYear": 1876, 40 | "AvailableCopies": 2 41 | } 42 | ] 43 | } 44 | ] 45 | }, 46 | { 47 | "LibraryID": 2, 48 | "LibraryName": "Greenwood Public Library", 49 | "Location": "Greenwood", 50 | "EstablishedYear": 1975, 51 | "BookCollectionSize": 35000, 52 | "AuthorList": [ 53 | { 54 | "AuthorID": 2, 55 | "AuthorName": "George Orwell", 56 | "AuthorBirthdate": "1903-06-25", 57 | "BookList": [ 58 | { 59 | "BookID": 3, 60 | "Title": "1984", 61 | "Genre": "Dystopian Fiction", 62 | "PublishedYear": 1949, 63 | "AvailableCopies": 7 64 | } 65 | ] 66 | }, 67 | { 68 | "AuthorID": 6, 69 | "AuthorName": "J.R.R. Tolkien", 70 | "AuthorBirthdate": "1892-01-03", 71 | "BookList": [ 72 | { 73 | "BookID": 11, 74 | "Title": "The Hobbit", 75 | "Genre": "Fantasy", 76 | "PublishedYear": 1937, 77 | "AvailableCopies": 4 78 | } 79 | ] 80 | } 81 | ] 82 | }, 83 | { 84 | "LibraryID": 3, 85 | "LibraryName": "Lakeside Community Library", 86 | "Location": "Lakeside", 87 | "EstablishedYear": 1990, 88 | "BookCollectionSize": 25000, 89 | "AuthorList": [ 90 | { 91 | "AuthorID": 3, 92 | "AuthorName": "Jane Austen", 93 | "AuthorBirthdate": "1775-12-16", 94 | "BookList": [ 95 | { 96 | "BookID": 5, 97 | "Title": "Pride and Prejudice", 98 | "Genre": "Romance", 99 | "PublishedYear": 1813, 100 | "AvailableCopies": 6 101 | } 102 | ] 103 | } 104 | ] 105 | }, 106 | { 107 | "LibraryID": 4, 108 | "LibraryName": "Mountain View Library", 109 | "Location": "Mountain View", 110 | "EstablishedYear": 1982, 111 | "BookCollectionSize": 40000, 112 | "AuthorList": [ 113 | { 114 | "AuthorID": 4, 115 | "AuthorName": "Agatha Christie", 116 | "AuthorBirthdate": "1890-09-15", 117 | "BookList": [ 118 | { 119 | "BookID": 7, 120 | "Title": "Murder on the Orient Express", 121 | "Genre": "Mystery", 122 | "PublishedYear": 1934, 123 | "AvailableCopies": 1 124 | } 125 | ] 126 | }, 127 | { 128 | "AuthorID": 5, 129 | "AuthorName": "Mark Twain", 130 | "AuthorBirthdate": "1835-11-30", 131 | "BookList": [ 132 | { 133 | "BookID": 9, 134 | "Title": "Adventures of Huckleberry Finn", 135 | "Genre": "Adventure", 136 | "PublishedYear": 1884, 137 | "AvailableCopies": 4 138 | }, 139 | { 140 | "BookID": 10, 141 | "Title": "The Adventures of Tom Sawyer", 142 | "Genre": "Adventure", 143 | "PublishedYear": 1876, 144 | "AvailableCopies": 6 145 | } 146 | ] 147 | }, 148 | { 149 | "AuthorID": 2, 150 | "AuthorName": "George Orwell", 151 | "AuthorBirthdate": "1903-06-25", 152 | "BookList": [ 153 | { 154 | "BookID": 4, 155 | "Title": "Animal Farm", 156 | "Genre": "Political Satire", 157 | "PublishedYear": 1945, 158 | "AvailableCopies": 1 159 | } 160 | ] 161 | }, 162 | { 163 | "AuthorID": 6, 164 | "AuthorName": "J.R.R. Tolkien", 165 | "AuthorBirthdate": "1892-01-03", 166 | "BookList": [ 167 | { 168 | "BookID": 12, 169 | "Title": "The Lord of the Rings", 170 | "Genre": "Fantasy", 171 | "PublishedYear": 1954, 172 | "AvailableCopies": 4 173 | } 174 | ] 175 | } 176 | ] 177 | }, 178 | { 179 | "LibraryID": 5, 180 | "LibraryName": "Sunset District Public Library", 181 | "Location": "Sunset District", 182 | "EstablishedYear": 1965, 183 | "BookCollectionSize": 30000, 184 | "AuthorList": [ 185 | { 186 | "AuthorID": 6, 187 | "AuthorName": "J.R.R. Tolkien", 188 | "AuthorBirthdate": "1892-01-03", 189 | "BookList": [ 190 | { 191 | "BookID": 11, 192 | "Title": "The Hobbit", 193 | "Genre": "Fantasy", 194 | "PublishedYear": 1937, 195 | "AvailableCopies": 6 196 | }, 197 | { 198 | "BookID": 12, 199 | "Title": "The Lord of the Rings", 200 | "Genre": "Fantasy", 201 | "PublishedYear": 1954, 202 | "AvailableCopies": 3 203 | } 204 | ] 205 | }, 206 | { 207 | "AuthorID": 1, 208 | "AuthorName": "J.K. Rowling", 209 | "AuthorBirthdate": "1965-07-31", 210 | "BookList": [ 211 | { 212 | "BookID": 2, 213 | "Title": "Harry Potter and the Chamber of Secrets", 214 | "Genre": "Fantasy", 215 | "PublishedYear": 1998, 216 | "AvailableCopies": 3 217 | } 218 | ] 219 | }, 220 | { 221 | "AuthorID": 5, 222 | "AuthorName": "Mark Twain", 223 | "AuthorBirthdate": "1835-11-30", 224 | "BookList": [ 225 | { 226 | "BookID": 9, 227 | "Title": "Adventures of Huckleberry Finn", 228 | "Genre": "Adventure", 229 | "PublishedYear": 1884, 230 | "AvailableCopies": 4 231 | } 232 | ] 233 | } 234 | ] 235 | } 236 | ] -------------------------------------------------------------------------------- /tests/data/library_data/library_types.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel 2 | 3 | 4 | class Book(BaseModel): 5 | BookID: int 6 | Title: str 7 | Genre: str 8 | PublishedYear: int 9 | AvailableCopies: int 10 | 11 | 12 | class Author(BaseModel): 13 | AuthorID: int 14 | AuthorName: str 15 | AuthorBirthdate: str 16 | BookList: list[Book] 17 | 18 | 19 | class Library(BaseModel): 20 | LibraryID: int 21 | LibraryName: str 22 | Location: str 23 | EstablishedYear: int 24 | BookCollectionSize: int 25 | AuthorList: list[Author] 26 | 27 | 28 | class BaseLibrary(BaseModel): 29 | LibraryID: int 30 | LibraryName: str 31 | 32 | 33 | class InheritedLibrary(BaseLibrary): 34 | Location: str 35 | EstablishedYear: int 36 | BookCollectionSize: int 37 | AuthorList: list[Author] 38 | 39 | 40 | class NestedAuthor(BaseModel): 41 | AuthorID: int 42 | AuthorName: str 43 | AuthorBirthdate: str 44 | 45 | 46 | class NestedBook(BaseModel): 47 | BookID: int 48 | Title: str 49 | Genre: str 50 | PublishedYear: int 51 | Author: NestedAuthor 52 | 53 | 54 | class NestedLibrary(BaseModel): 55 | LibraryID: int 56 | LibraryName: str 57 | Book: NestedBook 58 | 59 | 60 | class MultiListLibrary(BaseModel): 61 | class Author(BaseModel): 62 | AuthorID: int 63 | AuthorName: str 64 | AuthorBirthdate: str 65 | 66 | class Book(BaseModel): 67 | BookID: int 68 | Title: str 69 | Genre: str 70 | PublishedYear: int 71 | 72 | LibraryID: int 73 | LibraryName: str 74 | AuthorList: list[Author] 75 | BookList: list[Book] 76 | 77 | 78 | class MultiListDetailLibrary(BaseModel): 79 | class LibaryDetail(BaseModel): 80 | LibraryName: str 81 | Location: str 82 | EstablishedYear: int 83 | BookCollectionSize: int 84 | 85 | class Author(BaseModel): 86 | AuthorID: int 87 | AuthorName: str 88 | AuthorBirthdate: str 89 | 90 | class Book(BaseModel): 91 | BookID: int 92 | Title: str 93 | Genre: str 94 | PublishedYear: int 95 | 96 | LibraryID: int 97 | Detail: LibaryDetail 98 | AuthorList: list[Author] 99 | BookList: list[Book] 100 | -------------------------------------------------------------------------------- /tests/data/library_data/multilist_detail_library.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "LibraryID": 1, 4 | "AuthorList": [ 5 | { 6 | "AuthorID": 1, 7 | "AuthorName": "J.K. Rowling", 8 | "AuthorBirthdate": "1965-07-31" 9 | }, 10 | { 11 | "AuthorID": 5, 12 | "AuthorName": "Mark Twain", 13 | "AuthorBirthdate": "1835-11-30" 14 | } 15 | ], 16 | "BookList": [ 17 | { 18 | "BookID": 1, 19 | "Title": "Harry Potter and the Philosopher's Stone", 20 | "Genre": "Fantasy", 21 | "PublishedYear": 1997 22 | }, 23 | { 24 | "BookID": 2, 25 | "Title": "Harry Potter and the Chamber of Secrets", 26 | "Genre": "Fantasy", 27 | "PublishedYear": 1998 28 | }, 29 | { 30 | "BookID": 10, 31 | "Title": "The Adventures of Tom Sawyer", 32 | "Genre": "Adventure", 33 | "PublishedYear": 1876 34 | } 35 | ], 36 | "Detail": { 37 | "LibraryName": "City Central Library", 38 | "Location": "Cityville", 39 | "EstablishedYear": 1950, 40 | "BookCollectionSize": 50000 41 | } 42 | }, 43 | { 44 | "LibraryID": 2, 45 | "AuthorList": [ 46 | { 47 | "AuthorID": 2, 48 | "AuthorName": "George Orwell", 49 | "AuthorBirthdate": "1903-06-25" 50 | }, 51 | { 52 | "AuthorID": 6, 53 | "AuthorName": "J.R.R. Tolkien", 54 | "AuthorBirthdate": "1892-01-03" 55 | } 56 | ], 57 | "BookList": [ 58 | { 59 | "BookID": 3, 60 | "Title": "1984", 61 | "Genre": "Dystopian Fiction", 62 | "PublishedYear": 1949 63 | }, 64 | { 65 | "BookID": 11, 66 | "Title": "The Hobbit", 67 | "Genre": "Fantasy", 68 | "PublishedYear": 1937 69 | } 70 | ], 71 | "Detail": { 72 | "LibraryName": "Greenwood Public Library", 73 | "Location": "Greenwood", 74 | "EstablishedYear": 1975, 75 | "BookCollectionSize": 35000 76 | } 77 | }, 78 | { 79 | "LibraryID": 3, 80 | "AuthorList": [ 81 | { 82 | "AuthorID": 3, 83 | "AuthorName": "Jane Austen", 84 | "AuthorBirthdate": "1775-12-16" 85 | } 86 | ], 87 | "BookList": [ 88 | { 89 | "BookID": 5, 90 | "Title": "Pride and Prejudice", 91 | "Genre": "Romance", 92 | "PublishedYear": 1813 93 | } 94 | ], 95 | "Detail": { 96 | "LibraryName": "Lakeside Community Library", 97 | "Location": "Lakeside", 98 | "EstablishedYear": 1990, 99 | "BookCollectionSize": 25000 100 | } 101 | }, 102 | { 103 | "LibraryID": 4, 104 | "AuthorList": [ 105 | { 106 | "AuthorID": 4, 107 | "AuthorName": "Agatha Christie", 108 | "AuthorBirthdate": "1890-09-15" 109 | }, 110 | { 111 | "AuthorID": 5, 112 | "AuthorName": "Mark Twain", 113 | "AuthorBirthdate": "1835-11-30" 114 | }, 115 | { 116 | "AuthorID": 2, 117 | "AuthorName": "George Orwell", 118 | "AuthorBirthdate": "1903-06-25" 119 | }, 120 | { 121 | "AuthorID": 6, 122 | "AuthorName": "J.R.R. Tolkien", 123 | "AuthorBirthdate": "1892-01-03" 124 | } 125 | ], 126 | "BookList": [ 127 | { 128 | "BookID": 7, 129 | "Title": "Murder on the Orient Express", 130 | "Genre": "Mystery", 131 | "PublishedYear": 1934 132 | }, 133 | { 134 | "BookID": 9, 135 | "Title": "Adventures of Huckleberry Finn", 136 | "Genre": "Adventure", 137 | "PublishedYear": 1884 138 | }, 139 | { 140 | "BookID": 10, 141 | "Title": "The Adventures of Tom Sawyer", 142 | "Genre": "Adventure", 143 | "PublishedYear": 1876 144 | }, 145 | { 146 | "BookID": 4, 147 | "Title": "Animal Farm", 148 | "Genre": "Political Satire", 149 | "PublishedYear": 1945 150 | }, 151 | { 152 | "BookID": 12, 153 | "Title": "The Lord of the Rings", 154 | "Genre": "Fantasy", 155 | "PublishedYear": 1954 156 | } 157 | ], 158 | "Detail": { 159 | "LibraryName": "Mountain View Library", 160 | "Location": "Mountain View", 161 | "EstablishedYear": 1982, 162 | "BookCollectionSize": 40000 163 | } 164 | }, 165 | { 166 | "LibraryID": 5, 167 | "AuthorList": [ 168 | { 169 | "AuthorID": 6, 170 | "AuthorName": "J.R.R. Tolkien", 171 | "AuthorBirthdate": "1892-01-03" 172 | }, 173 | { 174 | "AuthorID": 1, 175 | "AuthorName": "J.K. Rowling", 176 | "AuthorBirthdate": "1965-07-31" 177 | }, 178 | { 179 | "AuthorID": 5, 180 | "AuthorName": "Mark Twain", 181 | "AuthorBirthdate": "1835-11-30" 182 | } 183 | ], 184 | "BookList": [ 185 | { 186 | "BookID": 11, 187 | "Title": "The Hobbit", 188 | "Genre": "Fantasy", 189 | "PublishedYear": 1937 190 | }, 191 | { 192 | "BookID": 12, 193 | "Title": "The Lord of the Rings", 194 | "Genre": "Fantasy", 195 | "PublishedYear": 1954 196 | }, 197 | { 198 | "BookID": 2, 199 | "Title": "Harry Potter and the Chamber of Secrets", 200 | "Genre": "Fantasy", 201 | "PublishedYear": 1998 202 | }, 203 | { 204 | "BookID": 9, 205 | "Title": "Adventures of Huckleberry Finn", 206 | "Genre": "Adventure", 207 | "PublishedYear": 1884 208 | } 209 | ], 210 | "Detail": { 211 | "LibraryName": "Sunset District Public Library", 212 | "Location": "Sunset District", 213 | "EstablishedYear": 1965, 214 | "BookCollectionSize": 30000 215 | } 216 | } 217 | ] -------------------------------------------------------------------------------- /tests/data/library_data/multilist_library.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "LibraryID": 1, 4 | "LibraryName": "City Central Library", 5 | "AuthorList": [ 6 | { 7 | "AuthorID": 1, 8 | "AuthorName": "J.K. Rowling", 9 | "AuthorBirthdate": "1965-07-31" 10 | }, 11 | { 12 | "AuthorID": 5, 13 | "AuthorName": "Mark Twain", 14 | "AuthorBirthdate": "1835-11-30" 15 | } 16 | ], 17 | "BookList": [ 18 | { 19 | "BookID": 1, 20 | "Title": "Harry Potter and the Philosopher's Stone", 21 | "Genre": "Fantasy", 22 | "PublishedYear": 1997 23 | }, 24 | { 25 | "BookID": 2, 26 | "Title": "Harry Potter and the Chamber of Secrets", 27 | "Genre": "Fantasy", 28 | "PublishedYear": 1998 29 | }, 30 | { 31 | "BookID": 10, 32 | "Title": "The Adventures of Tom Sawyer", 33 | "Genre": "Adventure", 34 | "PublishedYear": 1876 35 | } 36 | ] 37 | }, 38 | { 39 | "LibraryID": 2, 40 | "LibraryName": "Greenwood Public Library", 41 | "AuthorList": [ 42 | { 43 | "AuthorID": 2, 44 | "AuthorName": "George Orwell", 45 | "AuthorBirthdate": "1903-06-25" 46 | }, 47 | { 48 | "AuthorID": 6, 49 | "AuthorName": "J.R.R. Tolkien", 50 | "AuthorBirthdate": "1892-01-03" 51 | } 52 | ], 53 | "BookList": [ 54 | { 55 | "BookID": 3, 56 | "Title": "1984", 57 | "Genre": "Dystopian Fiction", 58 | "PublishedYear": 1949 59 | }, 60 | { 61 | "BookID": 11, 62 | "Title": "The Hobbit", 63 | "Genre": "Fantasy", 64 | "PublishedYear": 1937 65 | } 66 | ] 67 | }, 68 | { 69 | "LibraryID": 3, 70 | "LibraryName": "Lakeside Community Library", 71 | "AuthorList": [ 72 | { 73 | "AuthorID": 3, 74 | "AuthorName": "Jane Austen", 75 | "AuthorBirthdate": "1775-12-16" 76 | } 77 | ], 78 | "BookList": [ 79 | { 80 | "BookID": 5, 81 | "Title": "Pride and Prejudice", 82 | "Genre": "Romance", 83 | "PublishedYear": 1813 84 | } 85 | ] 86 | }, 87 | { 88 | "LibraryID": 4, 89 | "LibraryName": "Mountain View Library", 90 | "AuthorList": [ 91 | { 92 | "AuthorID": 4, 93 | "AuthorName": "Agatha Christie", 94 | "AuthorBirthdate": "1890-09-15" 95 | }, 96 | { 97 | "AuthorID": 5, 98 | "AuthorName": "Mark Twain", 99 | "AuthorBirthdate": "1835-11-30" 100 | }, 101 | { 102 | "AuthorID": 2, 103 | "AuthorName": "George Orwell", 104 | "AuthorBirthdate": "1903-06-25" 105 | }, 106 | { 107 | "AuthorID": 6, 108 | "AuthorName": "J.R.R. Tolkien", 109 | "AuthorBirthdate": "1892-01-03" 110 | } 111 | ], 112 | "BookList": [ 113 | { 114 | "BookID": 7, 115 | "Title": "Murder on the Orient Express", 116 | "Genre": "Mystery", 117 | "PublishedYear": 1934 118 | }, 119 | { 120 | "BookID": 9, 121 | "Title": "Adventures of Huckleberry Finn", 122 | "Genre": "Adventure", 123 | "PublishedYear": 1884 124 | }, 125 | { 126 | "BookID": 10, 127 | "Title": "The Adventures of Tom Sawyer", 128 | "Genre": "Adventure", 129 | "PublishedYear": 1876 130 | }, 131 | { 132 | "BookID": 4, 133 | "Title": "Animal Farm", 134 | "Genre": "Political Satire", 135 | "PublishedYear": 1945 136 | }, 137 | { 138 | "BookID": 12, 139 | "Title": "The Lord of the Rings", 140 | "Genre": "Fantasy", 141 | "PublishedYear": 1954 142 | } 143 | ] 144 | }, 145 | { 146 | "LibraryID": 5, 147 | "LibraryName": "Sunset District Public Library", 148 | "AuthorList": [ 149 | { 150 | "AuthorID": 6, 151 | "AuthorName": "J.R.R. Tolkien", 152 | "AuthorBirthdate": "1892-01-03" 153 | }, 154 | { 155 | "AuthorID": 1, 156 | "AuthorName": "J.K. Rowling", 157 | "AuthorBirthdate": "1965-07-31" 158 | }, 159 | { 160 | "AuthorID": 5, 161 | "AuthorName": "Mark Twain", 162 | "AuthorBirthdate": "1835-11-30" 163 | } 164 | ], 165 | "BookList": [ 166 | { 167 | "BookID": 11, 168 | "Title": "The Hobbit", 169 | "Genre": "Fantasy", 170 | "PublishedYear": 1937 171 | }, 172 | { 173 | "BookID": 12, 174 | "Title": "The Lord of the Rings", 175 | "Genre": "Fantasy", 176 | "PublishedYear": 1954 177 | }, 178 | { 179 | "BookID": 2, 180 | "Title": "Harry Potter and the Chamber of Secrets", 181 | "Genre": "Fantasy", 182 | "PublishedYear": 1998 183 | }, 184 | { 185 | "BookID": 9, 186 | "Title": "Adventures of Huckleberry Finn", 187 | "Genre": "Adventure", 188 | "PublishedYear": 1884 189 | } 190 | ] 191 | } 192 | ] -------------------------------------------------------------------------------- /tests/data/library_data/nested_library.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "LibraryID": 1, 4 | "LibraryName": "City Central Library", 5 | "Book": { 6 | "BookID": 1, 7 | "Title": "Harry Potter and the Philosopher's Stone", 8 | "Genre": "Fantasy", 9 | "PublishedYear": 1997, 10 | "Author": { 11 | "AuthorID": 1, 12 | "AuthorName": "J.K. Rowling", 13 | "AuthorBirthdate": "1965-07-31" 14 | } 15 | } 16 | }, 17 | { 18 | "LibraryID": 2, 19 | "LibraryName": "Greenwood Public Library", 20 | "Book": { 21 | "BookID": 3, 22 | "Title": "1984", 23 | "Genre": "Dystopian Fiction", 24 | "PublishedYear": 1949, 25 | "Author": { 26 | "AuthorID": 2, 27 | "AuthorName": "George Orwell", 28 | "AuthorBirthdate": "1903-06-25" 29 | } 30 | } 31 | }, 32 | { 33 | "LibraryID": 3, 34 | "LibraryName": "Lakeside Community Library", 35 | "Book": { 36 | "BookID": 5, 37 | "Title": "Pride and Prejudice", 38 | "Genre": "Romance", 39 | "PublishedYear": 1813, 40 | "Author": { 41 | "AuthorID": 3, 42 | "AuthorName": "Jane Austen", 43 | "AuthorBirthdate": "1775-12-16" 44 | } 45 | } 46 | }, 47 | { 48 | "LibraryID": 4, 49 | "LibraryName": "Mountain View Library", 50 | "Book": { 51 | "BookID": 7, 52 | "Title": "Murder on the Orient Express", 53 | "Genre": "Mystery", 54 | "PublishedYear": 1934, 55 | "Author": { 56 | "AuthorID": 4, 57 | "AuthorName": "Agatha Christie", 58 | "AuthorBirthdate": "1890-09-15" 59 | } 60 | } 61 | }, 62 | { 63 | "LibraryID": 5, 64 | "LibraryName": "Sunset District Public Library", 65 | "Book": { 66 | "BookID": 11, 67 | "Title": "The Hobbit", 68 | "Genre": "Fantasy", 69 | "PublishedYear": 1937, 70 | "Author": { 71 | "AuthorID": 6, 72 | "AuthorName": "J.R.R. Tolkien", 73 | "AuthorBirthdate": "1892-01-03" 74 | } 75 | } 76 | } 77 | ] -------------------------------------------------------------------------------- /tests/test_annotation_utils.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from pydantic import BaseModel 3 | 4 | from pandas_to_pydantic import ModelColumns, get_annotations, get_model_columns 5 | 6 | 7 | class TestGetModelColumns: 8 | def test_basic_model_columns(self): 9 | class Book(BaseModel): 10 | BookID: int 11 | Title: str 12 | Genre: str 13 | PublishedYear: int 14 | AvailableCopies: int 15 | 16 | assert get_model_columns(Book) == ModelColumns( 17 | name="Book", 18 | id_column=None, 19 | base_columns=["BookID", "Title", "Genre", "PublishedYear", "AvailableCopies"], 20 | list_columns=[], 21 | child_columns=[], 22 | ) 23 | 24 | def test_child_model_columns(self): 25 | class Author(BaseModel): 26 | AuthorID: int 27 | AuthorName: str 28 | AuthorBirthdate: str 29 | 30 | class Book(BaseModel): 31 | BookID: int 32 | BookAuthor: Author 33 | Title: str 34 | Genre: str 35 | PublishedYear: int 36 | AvailableCopies: int 37 | 38 | assert get_model_columns(Book) == ModelColumns( 39 | name="Book", 40 | id_column=None, 41 | base_columns=["BookID", "Title", "Genre", "PublishedYear", "AvailableCopies"], 42 | list_columns=[], 43 | child_columns=[ 44 | ModelColumns( 45 | name="BookAuthor", 46 | id_column=None, 47 | base_columns=["AuthorID", "AuthorName", "AuthorBirthdate"], 48 | list_columns=[], 49 | child_columns=[], 50 | ) 51 | ], 52 | ) 53 | 54 | def test_list_model_columns(self): 55 | class Book(BaseModel): 56 | BookID: int 57 | Title: str 58 | Genre: str 59 | PublishedYear: int 60 | AvailableCopies: int 61 | 62 | class Author(BaseModel): 63 | AuthorID: int 64 | AuthorName: str 65 | AuthorBirthdate: str 66 | BookList: list[Book] 67 | 68 | assert get_model_columns(Author, {"Author": "AuthorID", "BookList": "BookID"}) == ModelColumns( 69 | name="Author", 70 | id_column="AuthorID", 71 | base_columns=["AuthorID", "AuthorName", "AuthorBirthdate"], 72 | list_columns=[ 73 | ModelColumns( 74 | name="BookList", 75 | id_column="BookID", 76 | base_columns=["BookID", "Title", "Genre", "PublishedYear", "AvailableCopies"], 77 | list_columns=[], 78 | child_columns=[], 79 | ) 80 | ], 81 | child_columns=[], 82 | ) 83 | 84 | def test_inherited_model_columns(self): 85 | class BaseLibrary(BaseModel): 86 | LibraryID: int 87 | LibraryName: str 88 | 89 | class InheritedLibrary(BaseLibrary): 90 | Location: str 91 | EstablishedYear: int 92 | BookCollectionSize: int 93 | 94 | assert get_model_columns(InheritedLibrary) == ModelColumns( 95 | name="InheritedLibrary", 96 | id_column=None, 97 | base_columns=["Location", "EstablishedYear", "BookCollectionSize", "LibraryID", "LibraryName"], 98 | list_columns=[], 99 | child_columns=[], 100 | ) 101 | 102 | def test_empty_model_columns(self): 103 | class EmptyModel(BaseModel): 104 | pass 105 | 106 | assert get_model_columns(EmptyModel) == ModelColumns( 107 | name="EmptyModel", 108 | id_column=None, 109 | base_columns=[], 110 | list_columns=[], 111 | child_columns=[], 112 | ) 113 | 114 | def test_base_model_exception(self): 115 | class BaseModelErrorModel(BaseModel): 116 | error_list: list[str] 117 | 118 | with pytest.raises(TypeError): 119 | get_model_columns(BaseModelErrorModel) 120 | 121 | 122 | class TestGetAnnotations: 123 | def test_basic_annotations(self): 124 | class Book(BaseModel): 125 | BookID: int 126 | Title: str 127 | Genre: str 128 | PublishedYear: int 129 | AvailableCopies: int 130 | 131 | assert get_annotations(Book) == { 132 | "BookID": int, 133 | "Title": str, 134 | "Genre": str, 135 | "PublishedYear": int, 136 | "AvailableCopies": int, 137 | } 138 | 139 | def test_child_annotations(self): 140 | class Author(BaseModel): 141 | AuthorID: int 142 | AuthorName: str 143 | AuthorBirthdate: str 144 | 145 | class Book(BaseModel): 146 | BookID: int 147 | BookAuthor: Author 148 | Title: str 149 | Genre: str 150 | PublishedYear: int 151 | AvailableCopies: int 152 | 153 | assert get_annotations(Book) == { 154 | "BookID": int, 155 | "BookAuthor": Author, 156 | "Title": str, 157 | "Genre": str, 158 | "PublishedYear": int, 159 | "AvailableCopies": int, 160 | } 161 | 162 | def test_list_annotations(self): 163 | class Book(BaseModel): 164 | BookID: int 165 | Title: str 166 | Genre: str 167 | PublishedYear: int 168 | AvailableCopies: int 169 | 170 | class Author(BaseModel): 171 | AuthorID: int 172 | AuthorName: str 173 | AuthorBirthdate: str 174 | BookList: list[Book] 175 | 176 | assert get_annotations(Author) == { 177 | "AuthorID": int, 178 | "AuthorName": str, 179 | "AuthorBirthdate": str, 180 | "BookList": list[Book], 181 | } 182 | 183 | def test_inherited_annotations(self): 184 | class BaseLibrary(BaseModel): 185 | LibraryID: int 186 | LibraryName: str 187 | 188 | class InheritedLibrary(BaseLibrary): 189 | Location: str 190 | EstablishedYear: int 191 | BookCollectionSize: int 192 | 193 | assert get_annotations(InheritedLibrary) == { 194 | "LibraryID": int, 195 | "LibraryName": str, 196 | "Location": str, 197 | "EstablishedYear": int, 198 | "BookCollectionSize": int, 199 | } 200 | -------------------------------------------------------------------------------- /tests/test_to_pydantic.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import pandas as pd 4 | import pytest 5 | 6 | from pandas_to_pydantic import dataframe_to_pydantic, get_model_columns, get_root_list, serialize_dataframe 7 | 8 | from .config import LIBRARY_CSV, LIBRARY_DATA_DIR, LIBRARY_JSON 9 | from .data.library_data.library_types import ( 10 | InheritedLibrary, 11 | Library, 12 | MultiListDetailLibrary, 13 | MultiListLibrary, 14 | NestedLibrary, 15 | ) 16 | 17 | library_df = pd.read_csv(LIBRARY_CSV) 18 | 19 | with open(LIBRARY_JSON) as file: 20 | library_dict = json.load(file) 21 | 22 | # TODO consider parameterizing even further, parameterize data source 23 | json_model_columns_data = [ 24 | ("library_data.json", Library, {"Library": "LibraryID", "AuthorList": "AuthorID"}), 25 | ("library_data.json", InheritedLibrary, {"InheritedLibrary": "LibraryID", "AuthorList": "AuthorID"}), 26 | ("nested_library.json", NestedLibrary, {"NestedLibrary": "LibraryID", "Book": "BookID"}), 27 | ( 28 | "multilist_library.json", 29 | MultiListLibrary, 30 | {"MultiListLibrary": "LibraryID", "BookList": "BookID", "AuthorList": "AuthorID"}, 31 | ), 32 | ( 33 | "multilist_library.json", 34 | MultiListLibrary, 35 | {"MultiListLibrary": "LibraryID", "Book": "BookID", "Author": "AuthorID"}, 36 | ), 37 | ( 38 | "multilist_detail_library.json", 39 | MultiListDetailLibrary, 40 | {"MultiListDetailLibrary": "LibraryID", "BookList": "BookID", "AuthorList": "AuthorID"}, 41 | ), 42 | ] 43 | 44 | 45 | # TODO paramertize this 46 | class TestSerialzeDataframe: 47 | @pytest.mark.parametrize("output_json, input_model, input_id_columns", json_model_columns_data) 48 | def test_serialize_dataframe(self, output_json, input_model, input_id_columns): 49 | with open(LIBRARY_DATA_DIR + output_json) as file: 50 | json_dict = json.load(file) 51 | 52 | serialized_data = serialize_dataframe(library_df, get_model_columns(input_model, input_id_columns)) 53 | 54 | assert serialized_data == json_dict 55 | 56 | def test_parent_id_missing(self): 57 | data_copy = library_df.copy() 58 | data_copy["LibraryID"] = data_copy["LibraryID"].replace({1: None}) 59 | 60 | with pytest.raises(ValueError): 61 | serialize_dataframe( 62 | data_copy, get_model_columns(Library, {"Library": "LibraryID", "AuthorList": "AuthorID"}) 63 | ) 64 | 65 | def test_child_id_missing(self): 66 | data_copy = library_df.copy() 67 | data_copy["AuthorID"] = data_copy["AuthorID"].replace({1: None}) 68 | 69 | with pytest.raises(ValueError): 70 | serialize_dataframe( 71 | data_copy, get_model_columns(Library, {"Library": "LibraryID", "AuthorList": "AuthorID"}) 72 | ) 73 | 74 | 75 | class TestGetRootList: 76 | def test_library_root(self): 77 | library_root_list_from_df = dataframe_to_pydantic( 78 | library_df, Library, {"Library": "LibraryID", "AuthorList": "AuthorID"} 79 | ) 80 | library_root_list_from_dict = get_root_list(library_dict, Library) 81 | assert library_root_list_from_df == library_root_list_from_dict 82 | 83 | 84 | class TestDataframeToPydantic: 85 | def test_library(self): 86 | library_root_list = dataframe_to_pydantic( 87 | library_df, Library, {"Library": "LibraryID", "AuthorList": "AuthorID"} 88 | ) 89 | 90 | assert library_root_list.model_dump() == library_dict 91 | --------------------------------------------------------------------------------