├── .devcontainer └── devcontainer.json ├── .github └── workflows │ ├── docs.yaml │ ├── pypi.yml │ └── tests.yml ├── .gitignore ├── CITATION.cff ├── LICENSE ├── README.md ├── docs ├── build ├── conf.py └── index.md ├── pandas_dataclasses ├── __init__.py ├── core │ ├── __init__.py │ ├── api.py │ ├── specs.py │ ├── tagging.py │ └── typing.py ├── extras │ ├── __init__.py │ ├── hints.py │ └── new.py └── py.typed ├── pyproject.toml ├── tests ├── __init__.py ├── data.py ├── test_core_api.py ├── test_core_specs.py ├── test_core_tagging.py ├── test_core_typing.py └── test_extras_new.py └── uv.lock /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "pandas-dataclasses", 3 | "image": "ghcr.io/astral-sh/uv:python3.12-bookworm", 4 | "runArgs": [ 5 | "--name=pandas-dataclasses" 6 | ], 7 | "containerEnv": { 8 | "UV_PROJECT_ENVIRONMENT": "/usr/local" 9 | }, 10 | "postCreateCommand": "uv sync --frozen", 11 | "customizations": { 12 | "vscode": { 13 | "extensions": [ 14 | "ms-python.black-formatter", 15 | "streetsidesoftware.code-spell-checker", 16 | "tamasfe.even-better-toml" 17 | ], 18 | "settings": { 19 | "python.languageServer": "Pylance", 20 | "[python]": { 21 | "editor.defaultFormatter": "ms-python.black-formatter", 22 | "editor.formatOnSave": true 23 | } 24 | } 25 | } 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /.github/workflows/docs.yaml: -------------------------------------------------------------------------------- 1 | name: Docs 2 | 3 | on: 4 | release: 5 | types: 6 | - created 7 | 8 | jobs: 9 | job: 10 | name: Docs 11 | runs-on: ubuntu-latest 12 | container: ghcr.io/astral-sh/uv:python3.12-bookworm 13 | env: 14 | UV_PROJECT_ENVIRONMENT: /usr/local 15 | steps: 16 | - uses: actions/checkout@v4 17 | - run: echo "::set-output name=tag::${GITHUB_REF##*/}" 18 | id: tag 19 | - run: uv sync --frozen 20 | - run: docs/build 21 | - uses: peaceiris/actions-gh-pages@v4 22 | with: 23 | destination_dir: ${{ steps.tag.outputs.tag }} 24 | github_token: ${{ secrets.GITHUB_TOKEN }} 25 | publish_dir: ./docs/_build 26 | -------------------------------------------------------------------------------- /.github/workflows/pypi.yml: -------------------------------------------------------------------------------- 1 | name: PyPI 2 | 3 | on: 4 | release: 5 | types: 6 | - created 7 | 8 | jobs: 9 | job: 10 | name: PyPI 11 | runs-on: ubuntu-latest 12 | container: ghcr.io/astral-sh/uv:python3.12-bookworm 13 | env: 14 | UV_PROJECT_ENVIRONMENT: /usr/local 15 | UV_PUBLISH_TOKEN: ${{ secrets.PYPI_TOKEN }} 16 | steps: 17 | - uses: actions/checkout@v4 18 | - run: uv build && uv publish 19 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | 11 | jobs: 12 | job: 13 | name: Test (${{ matrix.env }}) 14 | runs-on: ubuntu-latest 15 | container: ghcr.io/astral-sh/uv:${{ matrix.env }} 16 | env: 17 | PYTHON_DIRS: docs tests pandas_dataclasses 18 | UV_PROJECT_ENVIRONMENT: /usr/local 19 | strategy: 20 | fail-fast: false 21 | matrix: 22 | env: 23 | - python3.9-bookworm 24 | - python3.10-bookworm 25 | - python3.11-bookworm 26 | - python3.12-bookworm 27 | - python3.13-bookworm 28 | steps: 29 | - uses: actions/checkout@v4 30 | - run: uv sync --frozen 31 | - run: black --check ${PYTHON_DIRS} 32 | - run: pyright ${PYTHON_DIRS} 33 | - run: pytest -v 34 | - run: docs/build 35 | if: ${{ matrix.env != 'python3.9-bookworm' }} 36 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by https://www.toptal.com/developers/gitignore/api/python 2 | # Edit at https://www.toptal.com/developers/gitignore?templates=python 3 | 4 | ### Python ### 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | wheels/ 27 | share/python-wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .nox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | *.py,cover 54 | .hypothesis/ 55 | .pytest_cache/ 56 | cover/ 57 | 58 | # Translations 59 | *.mo 60 | *.pot 61 | 62 | # Django stuff: 63 | *.log 64 | local_settings.py 65 | db.sqlite3 66 | db.sqlite3-journal 67 | 68 | # Flask stuff: 69 | instance/ 70 | .webassets-cache 71 | 72 | # Scrapy stuff: 73 | .scrapy 74 | 75 | # Sphinx documentation 76 | docs/_apidoc/ 77 | docs/_build/ 78 | 79 | # PyBuilder 80 | .pybuilder/ 81 | target/ 82 | 83 | # Jupyter Notebook 84 | .ipynb_checkpoints 85 | 86 | # IPython 87 | profile_default/ 88 | ipython_config.py 89 | 90 | # pyenv 91 | # For a library or package, you might want to ignore these files since the code is 92 | # intended to run in multiple environments; otherwise, check them in: 93 | # .python-version 94 | 95 | # pipenv 96 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 97 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 98 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 99 | # install all needed dependencies. 100 | #Pipfile.lock 101 | 102 | # poetry 103 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 104 | # This is especially recommended for binary packages to ensure reproducibility, and is more 105 | # commonly ignored for libraries. 106 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 107 | #poetry.lock 108 | 109 | # pdm 110 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 111 | #pdm.lock 112 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 113 | # in version control. 114 | # https://pdm.fming.dev/#use-with-ide 115 | .pdm.toml 116 | 117 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 118 | __pypackages__/ 119 | 120 | # Celery stuff 121 | celerybeat-schedule 122 | celerybeat.pid 123 | 124 | # SageMath parsed files 125 | *.sage.py 126 | 127 | # Environments 128 | .env 129 | .venv 130 | env/ 131 | venv/ 132 | ENV/ 133 | env.bak/ 134 | venv.bak/ 135 | 136 | # Spyder project settings 137 | .spyderproject 138 | .spyproject 139 | 140 | # Rope project settings 141 | .ropeproject 142 | 143 | # mkdocs documentation 144 | /site 145 | 146 | # mypy 147 | .mypy_cache/ 148 | .dmypy.json 149 | dmypy.json 150 | 151 | # Pyre type checker 152 | .pyre/ 153 | 154 | # pytype static type analyzer 155 | .pytype/ 156 | 157 | # Cython debug symbols 158 | cython_debug/ 159 | 160 | # PyCharm 161 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 162 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 163 | # and can be added to the global gitignore or merged into this file. For a more nuclear 164 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 165 | #.idea/ 166 | 167 | # End of https://www.toptal.com/developers/gitignore/api/python 168 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | # This CITATION.cff file was generated with cffinit. 2 | # Visit https://bit.ly/cffinit to generate yours today! 3 | 4 | cff-version: 1.2.0 5 | title: pandas-dataclasses 6 | message: >- 7 | If you use this software, please cite it using the 8 | metadata from this file. 9 | type: software 10 | authors: 11 | - given-names: Akio 12 | family-names: Taniguchi 13 | email: taniguchi.akio@gmail.com 14 | affiliation: Kitami Institute of Technology 15 | orcid: 'https://orcid.org/0000-0002-9695-6183' 16 | identifiers: 17 | - type: doi 18 | value: 10.5281/zenodo.10652375 19 | repository-code: 'https://github.com/astropenguin/pandas-dataclasses' 20 | url: 'https://astropenguin.github.io/pandas-dataclasses/v1.0.0' 21 | abstract: pandas data creation by data classes 22 | keywords: 23 | - python 24 | - dataclasses 25 | - pandas 26 | - specifications 27 | - typing 28 | license: MIT 29 | version: 1.0.0 30 | date-released: '2025-01-01' 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021-2025 Akio Taniguchi 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pandas-dataclasses 2 | 3 | [![Release](https://img.shields.io/pypi/v/pandas-dataclasses?label=Release&color=cornflowerblue&style=flat-square)](https://pypi.org/project/pandas-dataclasses/) 4 | [![Python](https://img.shields.io/pypi/pyversions/pandas-dataclasses?label=Python&color=cornflowerblue&style=flat-square)](https://pypi.org/project/pandas-dataclasses/) 5 | [![Downloads](https://img.shields.io/pypi/dm/pandas-dataclasses?label=Downloads&color=cornflowerblue&style=flat-square)](https://pepy.tech/project/pandas-dataclasses) 6 | [![DOI](https://img.shields.io/badge/DOI-10.5281/zenodo.6127352-cornflowerblue?style=flat-square)](https://doi.org/10.5281/zenodo.6127352) 7 | [![Tests](https://img.shields.io/github/actions/workflow/status/astropenguin/pandas-dataclasses/tests.yml?label=Tests&style=flat-square)](https://github.com/astropenguin/pandas-dataclasses/actions) 8 | 9 | pandas data creation by data classes 10 | 11 | ## Overview 12 | 13 | pandas-dataclass makes it easy to create [pandas] data (DataFrame and Series) by specifying their data types, attributes, and names using the Python's dataclass: 14 | 15 |
16 | Click to see all imports 17 | 18 | ```python 19 | from dataclasses import dataclass 20 | from pandas_dataclasses import AsFrame, Data, Index 21 | ``` 22 |
23 | 24 | ```python 25 | @dataclass 26 | class Weather(AsFrame): 27 | """Weather information.""" 28 | 29 | year: Index[int] 30 | month: Index[int] 31 | temp: Data[float] 32 | wind: Data[float] 33 | 34 | 35 | df = Weather.new( 36 | [2020, 2020, 2021, 2021, 2022], 37 | [1, 7, 1, 7, 1], 38 | [7.1, 24.3, 5.4, 25.9, 4.9], 39 | [2.4, 3.1, 2.3, 2.4, 2.6], 40 | ) 41 | ``` 42 | 43 | where `df` will become a DataFrame object like: 44 | 45 | ``` 46 | temp wind 47 | year month 48 | 2020 1 7.1 2.4 49 | 7 24.3 3.1 50 | 2021 1 5.4 2.3 51 | 7 25.9 2.4 52 | 2022 1 4.9 2.6 53 | ``` 54 | 55 | ### Features 56 | 57 | - Specifying data types and names of each element in pandas data 58 | - Specifying metadata stored in pandas data attributes (attrs) 59 | - Support for hierarchical index and columns 60 | - Support for custom factory for data creation 61 | - Support for full [dataclass] features 62 | - Support for static type check by [mypy] and [Pyright] ([Pylance]) 63 | 64 | ### Installation 65 | 66 | ```bash 67 | pip install pandas-dataclasses 68 | ``` 69 | 70 | ## How it works 71 | 72 | pandas-dataclasses provides you the following features: 73 | 74 | - Type hints for dataclass fields (`Attr`, `Data`, `Index`) to specify the data type and name of each element in pandas data 75 | - Mix-in classes for dataclasses (`As`, `AsFrame`, `AsSeries`) to create pandas data by a classmethod (`new`) that takes the same arguments as dataclass initialization 76 | 77 | When you call `new`, it will first create a dataclass object and then create a Series or DataFrame object from the dataclass object according the type hints and values in it. 78 | In the example above, `df = Weather.new(...)` is thus equivalent to: 79 | 80 |
81 | Click to see all imports 82 | 83 | ```python 84 | from pandas_dataclasses import asframe 85 | ``` 86 |
87 | 88 | ```python 89 | obj = Weather([2020, ...], [1, ...], [7.1, ...], [2.4, ...]) 90 | df = asframe(obj) 91 | ``` 92 | 93 | where `asframe` is a conversion function. 94 | pandas-dataclasses does not touch the dataclass object creation itself; this allows you to fully customize your dataclass before conversion by the dataclass features (`field`, `__post_init__`, ...). 95 | 96 | ## Basic usage 97 | 98 | ### DataFrame creation 99 | 100 | As shown in the example above, a dataclass that has the `AsFrame` (or `AsDataFrame` as an alias) mix-in will create DataFrame objects: 101 | 102 |
103 | Click to see all imports 104 | 105 | ```python 106 | from dataclasses import dataclass 107 | from pandas_dataclasses import AsFrame, Data, Index 108 | ``` 109 |
110 | 111 | ```python 112 | @dataclass 113 | class Weather(AsFrame): 114 | """Weather information.""" 115 | 116 | year: Index[int] 117 | month: Index[int] 118 | temp: Data[float] 119 | wind: Data[float] 120 | 121 | 122 | df = Weather.new(...) 123 | ``` 124 | 125 | where fields typed by `Index` are *index fields*, each value of which will become an index or a part of a hierarchical index of a DataFrame object. 126 | Fields typed by `Data` are *data fields*, each value of which will become a data column of a DataFrame object. 127 | Fields typed by other types are just ignored in the DataFrame creation. 128 | 129 | Each data or index will be cast to the data type specified in a type hint like `Index[int]`. 130 | Use `Any` or `None` (like `Index[Any]`) if you do not want type casting. 131 | See also [data typing rules](#data-typing-rules) for more examples. 132 | 133 | By default, a field name (i.e. an argument name) is used for the name of corresponding data or index. 134 | See also [custom naming](#custom-naming) and [naming rules](#naming-rules) if you want customization. 135 | 136 | ### Series creation 137 | 138 | A dataclass that has the `AsSeries` mix-in will create Series objects: 139 | 140 |
141 | Click to see all imports 142 | 143 | ```python 144 | from dataclasses import dataclass 145 | from pandas_dataclasses import AsSeries, Data, Index 146 | ``` 147 |
148 | 149 | ```python 150 | @dataclass 151 | class Weather(AsSeries): 152 | """Weather information.""" 153 | 154 | year: Index[int] 155 | month: Index[int] 156 | temp: Data[float] 157 | 158 | 159 | ser = Weather.new(...) 160 | ``` 161 | 162 | Unlike `AsFrame`, the second and subsequent data fields are ignored in the Series creation even if they exist. 163 | Other rules are the same as for the DataFrame creation. 164 | 165 | ## Advanced usage 166 | 167 | ### Metadata storing 168 | 169 | Fields typed by `Attr` are *attribute fields*, each value of which will become an item of attributes of a DataFrame or a Series object: 170 | 171 |
172 | Click to see all imports 173 | 174 | ```python 175 | from dataclasses import dataclass 176 | from pandas_dataclasses import AsFrame, Attr, Data, Index 177 | ``` 178 |
179 | 180 | ```python 181 | @dataclass 182 | class Weather(AsFrame): 183 | """Weather information.""" 184 | 185 | year: Index[int] 186 | month: Index[int] 187 | temp: Data[float] 188 | wind: Data[float] 189 | loc: Attr[str] = "Tokyo" 190 | lon: Attr[float] = 139.69167 191 | lat: Attr[float] = 35.68944 192 | 193 | 194 | df = Weather.new(...) 195 | ``` 196 | 197 | where `df.attrs` will become like: 198 | 199 | ```python 200 | {"loc": "Tokyo", "lon": 139.69167, "lat": 35.68944} 201 | ``` 202 | 203 | ### Custom naming 204 | 205 | The name of attribute, data, or index can be explicitly specified by adding a hashable annotation to the corresponding type: 206 | 207 |
208 | Click to see all imports 209 | 210 | ```python 211 | from dataclasses import dataclass 212 | from typing import Annotated as Ann 213 | from pandas_dataclasses import AsFrame, Attr, Data, Index 214 | ``` 215 |
216 | 217 | ```python 218 | @dataclass 219 | class Weather(AsFrame): 220 | """Weather information.""" 221 | 222 | year: Ann[Index[int], "Year"] 223 | month: Ann[Index[int], "Month"] 224 | temp: Ann[Data[float], "Temperature (deg C)"] 225 | wind: Ann[Data[float], "Wind speed (m/s)"] 226 | loc: Ann[Attr[str], "Location"] = "Tokyo" 227 | lon: Ann[Attr[float], "Longitude (deg)"] = 139.69167 228 | lat: Ann[Attr[float], "Latitude (deg)"] = 35.68944 229 | 230 | 231 | df = Weather.new(...) 232 | ``` 233 | 234 | where `df` and `df.attrs` will become like: 235 | 236 | ``` 237 | Temperature (deg C) Wind speed (m/s) 238 | Year Month 239 | 2020 1 7.1 2.4 240 | 7 24.3 3.1 241 | 2021 1 5.4 2.3 242 | 7 25.9 2.4 243 | 2022 1 4.9 2.6 244 | ``` 245 | 246 | ```python 247 | {"Location": "Tokyo", "Longitude (deg)": 139.69167, "Latitude (deg)": 35.68944} 248 | ``` 249 | 250 | If an annotation is a [format string], it will be formatted by a dataclass object before the data creation: 251 | 252 |
253 | Click to see all imports 254 | 255 | ```python 256 | from dataclasses import dataclass 257 | from typing import Annotated as Ann 258 | from pandas_dataclasses import AsFrame, Data, Index 259 | ``` 260 |
261 | 262 | ```python 263 | @dataclass 264 | class Weather(AsFrame): 265 | """Weather information.""" 266 | 267 | year: Ann[Index[int], "Year"] 268 | month: Ann[Index[int], "Month"] 269 | temp: Ann[Data[float], "Temperature ({.temp_unit})"] 270 | wind: Ann[Data[float], "Wind speed ({.wind_unit})"] 271 | temp_unit: str = "deg C" 272 | wind_unit: str = "m/s" 273 | 274 | 275 | df = Weather.new(..., temp_unit="deg F", wind_unit="km/h") 276 | ``` 277 | 278 | where units of the temperature and the wind speed will be dynamically updated (see also [naming rules](#naming-rules)). 279 | 280 | ### Hierarchical columns 281 | 282 | Adding tuple annotations to data fields will create DataFrame objects with hierarchical columns: 283 | 284 |
285 | Click to see all imports 286 | 287 | ```python 288 | from dataclasses import dataclass 289 | from typing import Annotated as Ann 290 | from pandas_dataclasses import AsFrame, Data, Index 291 | ``` 292 |
293 | 294 | ```python 295 | @dataclass 296 | class Weather(AsFrame): 297 | """Weather information.""" 298 | 299 | year: Ann[Index[int], "Year"] 300 | month: Ann[Index[int], "Month"] 301 | temp_avg: Ann[Data[float], ("Temperature (deg C)", "Average")] 302 | temp_max: Ann[Data[float], ("Temperature (deg C)", "Maximum")] 303 | wind_avg: Ann[Data[float], ("Wind speed (m/s)", "Average")] 304 | wind_max: Ann[Data[float], ("Wind speed (m/s)", "Maximum")] 305 | 306 | 307 | df = Weather.new(...) 308 | ``` 309 | 310 | where `df` will become like: 311 | 312 | ``` 313 | Temperature (deg C) Wind speed (m/s) 314 | Average Maximum Average Maximum 315 | Year Month 316 | 2020 1 7.1 11.1 2.4 8.8 317 | 7 24.3 27.7 3.1 10.2 318 | 2021 1 5.4 10.3 2.3 10.7 319 | 7 25.9 30.3 2.4 9.0 320 | 2022 1 4.9 9.4 2.6 8.8 321 | ``` 322 | 323 | Column names can be (explicitly) specified by dictionary annotations: 324 | 325 |
326 | Click to see all imports 327 | 328 | ```python 329 | from dataclasses import dataclass 330 | from typing import Annotated as Ann 331 | from pandas_dataclasses import AsFrame, Data, Index 332 | ``` 333 |
334 | 335 | ```python 336 | def name(meas: str, stat: str) -> dict[str, str]: 337 | """Create a dictionary annotation for a column name.""" 338 | return {"Measurement": meas, "Statistic": stat} 339 | 340 | 341 | @dataclass 342 | class Weather(AsFrame): 343 | """Weather information.""" 344 | 345 | year: Ann[Index[int], "Year"] 346 | month: Ann[Index[int], "Month"] 347 | temp_avg: Ann[Data[float], name("Temperature (deg C)", "Average")] 348 | temp_max: Ann[Data[float], name("Temperature (deg C)", "Maximum")] 349 | wind_avg: Ann[Data[float], name("Wind speed (m/s)", "Average")] 350 | wind_max: Ann[Data[float], name("Wind speed (m/s)", "Maximum")] 351 | 352 | 353 | df = Weather.new(...) 354 | ``` 355 | 356 | where `df` will become like: 357 | 358 | ``` 359 | Measurement Temperature (deg C) Wind speed (m/s) 360 | Statistic Average Maximum Average Maximum 361 | Year Month 362 | 2020 1 7.1 11.1 2.4 8.8 363 | 7 24.3 27.7 3.1 10.2 364 | 2021 1 5.4 10.3 2.3 10.7 365 | 7 25.9 30.3 2.4 9.0 366 | 2022 1 4.9 9.4 2.6 8.8 367 | ``` 368 | 369 | If a tuple or dictionary annotation has [format string]s, they will also be formatted by a dataclass object (see also [naming rules](#naming-rules)). 370 | 371 | ### Multiple-item fields 372 | 373 | Multiple (and possibly extra) attributes, data, or indices can be added by fields with corresponding type hints wrapped by `Multiple`: 374 | 375 |
376 | Click to see all imports 377 | 378 | ```python 379 | from dataclasses import dataclass 380 | from pandas_dataclasses import AsFrame, Data, Index, Multiple 381 | ``` 382 |
383 | 384 | 385 | ```python 386 | @dataclass 387 | class Weather(AsFrame): 388 | """Weather information.""" 389 | 390 | year: Index[int] 391 | month: Index[int] 392 | temp: Data[float] 393 | wind: Data[float] 394 | extra_index: Multiple[Index[int]] 395 | extra_data: Multiple[Data[float]] 396 | 397 | 398 | df = Weather.new( 399 | [2020, 2020, 2021, 2021, 2022], 400 | [1, 7, 1, 7, 1], 401 | [7.1, 24.3, 5.4, 25.9, 4.9], 402 | [2.4, 3.1, 2.3, 2.4, 2.6], 403 | extra_index={ 404 | "day": [1, 1, 1, 1, 1], 405 | "week": [2, 2, 4, 3, 5], 406 | }, 407 | extra_data={ 408 | "humid": [65, 89, 57, 83, 52], 409 | "press": [1013.8, 1006.2, 1014.1, 1007.7, 1012.7], 410 | }, 411 | ) 412 | ``` 413 | 414 | where `df` will become like: 415 | 416 | ``` 417 | temp wind humid press 418 | year month day week 419 | 2020 1 1 2 7.1 2.4 65.0 1013.8 420 | 7 1 2 24.3 3.1 89.0 1006.2 421 | 2021 1 1 4 5.4 2.3 57.0 1014.1 422 | 7 1 3 25.9 2.4 83.0 1007.7 423 | 2022 1 1 5 4.9 2.6 52.0 1012.7 424 | ``` 425 | 426 | If multiple items of the same name exist, the last-defined one will be finally used. 427 | For example, if the `extra_index` field contains `"month": [2, 8, 2, 8, 2]`, the values given by the `month` field will be overwritten. 428 | 429 | ### Custom pandas factory 430 | 431 | A custom class can be specified as a factory for the Series or DataFrame creation by `As`, the generic version of `AsFrame` and `AsSeries`. 432 | Note that the custom class must be a subclass of either `pandas.Series` or `pandas.DataFrame`: 433 | 434 |
435 | Click to see all imports 436 | 437 | ```python 438 | import pandas as pd 439 | from dataclasses import dataclass 440 | from pandas_dataclasses import As, Data, Index 441 | ``` 442 |
443 | 444 | ```python 445 | class CustomSeries(pd.Series): 446 | """Custom pandas Series.""" 447 | 448 | pass 449 | 450 | 451 | @dataclass 452 | class Temperature(As[CustomSeries]): 453 | """Temperature information.""" 454 | 455 | year: Index[int] 456 | month: Index[int] 457 | temp: Data[float] 458 | 459 | 460 | ser = Temperature.new(...) 461 | ``` 462 | 463 | where `ser` is statically regarded as `CustomSeries` and will become a `CustomSeries` object. 464 | 465 | Generic Series type (`Series[T]`) is also supported, however, it is only for static the type check in the current pandas versions. 466 | In such cases, you can additionally give a factory that must work in runtime as a class argument: 467 | 468 |
469 | Click to see all imports 470 | 471 | ```python 472 | import pandas as pd 473 | from dataclasses import dataclass 474 | from pandas_dataclasses import As, Data, Index 475 | ``` 476 |
477 | 478 | ```python 479 | @dataclass 480 | class Temperature(As["pd.Series[float]"], factory=pd.Series): 481 | """Temperature information.""" 482 | 483 | year: Index[int] 484 | month: Index[int] 485 | temp: Data[float] 486 | 487 | 488 | ser = Temperature.new(...) 489 | ``` 490 | 491 | where `ser` is statically regarded as `Series[float]` but will become a `Series` object in runtime. 492 | 493 | ## Appendix 494 | 495 | ### Data typing rules 496 | 497 | The data type (dtype) of data or index is determined from the first `Data` or `Index` type of the corresponding field, respectively. 498 | The following table shows how the data type is inferred: 499 | 500 |
501 | Click to see all imports 502 | 503 | ```python 504 | from typing import Any, Annotated as Ann, Literal as L 505 | from pandas_dataclasses import Data 506 | ``` 507 |
508 | 509 | Type hint | Inferred data type 510 | --- | --- 511 | `Data[Any]` | `None` (no type casting) 512 | `Data[None]` | `None` (no type casting) 513 | `Data[int]` | `numpy.int64` 514 | `Data[int \| str]` | `numpy.int64` 515 | `Data[numpy.int32]` | `numpy.int32` 516 | `Data[L["datetime64[ns]"]]` | `numpy.dtype(" 531 | Click to see all imports 532 | 533 | ```python 534 | from typing import Any, Annotated as Ann 535 | from pandas_dataclasses import Data 536 | ``` 537 | 538 | 539 | Type hint | Inferred name 540 | --- | --- 541 | `Data[Any]` | (field name) 542 | `Ann[Data[Any], ..., "spam"]` | (field name) 543 | `Ann[Data[Any], "spam"]` | `"spam"` 544 | `Ann[Data[Any], "spam", "ham"]` | `"spam"` 545 | `Ann[Data[Any], "spam"] \| Ann[str, "ham"]` | `"spam"` 546 | `Ann[Data[Any], "spam"] \| Ann[Data[float], "ham"]` | `"spam"` 547 | `Ann[Data[Any], "{.name}"` | `"{.name}".format(obj)` 548 | `Ann[Data[Any], ("spam", "ham")]` | `("spam", "ham")` 549 | `Ann[Data[Any], ("{.name}", "ham")]` | `("{.name}".format(obj), "ham")` 550 | 551 | where `obj` is a dataclass object that is expected to have `obj.name`. 552 | 553 | ### Development roadmap 554 | 555 | Release version | Features 556 | --- | --- 557 | v0.5 | Support for dynamic naming 558 | v0.6 | Support for extension array and dtype 559 | v0.7 | Support for hierarchical columns 560 | v0.8 | Support for mypy and callable pandas factory 561 | v0.9 | Support for Ellipsis (`...`) as an alias of field name 562 | v0.10 | Support for union type in type hints 563 | v0.11 | Support for Python 3.11 and drop support for Python 3.7 564 | v0.12 | Support for multiple items received in a single field 565 | v1.0 | Initial major release (freezing public features until v2.0) 566 | 567 | 568 | [dataclass]: https://docs.python.org/3/library/dataclasses.html 569 | [format string]: https://docs.python.org/3/library/string.html#format-string-syntax 570 | [mypy]: http://www.mypy-lang.org 571 | [NumPy]: https://numpy.org 572 | [pandas]: https://pandas.pydata.org 573 | [Pylance]: https://github.com/microsoft/pylance-release 574 | [Pyright]: https://github.com/microsoft/pyright 575 | -------------------------------------------------------------------------------- /docs/build: -------------------------------------------------------------------------------- 1 | #!/bin/bash -eu 2 | 3 | sphinx-apidoc -efMT -d 2 -o docs/_apidoc pandas_dataclasses 4 | sphinx-build -a docs docs/_build 5 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # project information 2 | author = "Akio Taniguchi" 3 | copyright = "2021-2025 Akio Taniguchi" 4 | 5 | 6 | # general configuration 7 | add_module_names = False 8 | autodoc_member_order = "bysource" 9 | autodoc_typehints = "both" 10 | autodoc_typehints_format = "short" 11 | exclude_patterns = [ 12 | "_build", 13 | "Thumbs.db", 14 | ".DS_Store", 15 | ] 16 | extensions = [ 17 | "myst_parser", 18 | "sphinx.ext.autodoc", 19 | "sphinx.ext.autosummary", 20 | "sphinx.ext.napoleon", 21 | "sphinx.ext.viewcode", 22 | ] 23 | myst_heading_anchors = 3 24 | templates_path = ["_templates"] 25 | 26 | 27 | # options for HTML output 28 | html_theme = "pydata_sphinx_theme" 29 | html_theme_options = { 30 | "github_url": "https://github.com/astropenguin/pandas-dataclasses", 31 | "logo": {"text": "pandas-dataclasses"}, 32 | } 33 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | ```{include} ../README.md 2 | ``` 3 | 4 | ```{toctree} 5 | --- 6 | hidden: 7 | --- 8 | 9 | Home 10 | Package guide <_apidoc/pandas_dataclasses> 11 | ``` 12 | -------------------------------------------------------------------------------- /pandas_dataclasses/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | "As", 3 | "AsDataFrame", 4 | "AsFrame", 5 | "AsSeries", 6 | "Attr", 7 | "Data", 8 | "Index", 9 | "Multiple", 10 | "Spec", 11 | "Tag", 12 | "asdataframe", 13 | "asframe", 14 | "aspandas", 15 | "asseries", 16 | "core", 17 | "extras", 18 | ] 19 | __version__ = "1.0.0" 20 | 21 | 22 | # submodules 23 | from . import core 24 | from . import extras 25 | from .core.api import * 26 | from .core.specs import * 27 | from .core.tagging import * 28 | from .core.typing import * 29 | from .extras.hints import * 30 | from .extras.new import * 31 | 32 | 33 | # aliases 34 | AsDataFrame = AsFrame 35 | """Alias of ``core.mixins.AsFrame``.""" 36 | 37 | 38 | asdataframe = asframe 39 | """Alias of ``core.aspandas.asframe``.""" 40 | -------------------------------------------------------------------------------- /pandas_dataclasses/core/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ["api", "specs", "tagging", "typing"] 2 | 3 | 4 | from . import api 5 | from . import specs 6 | from . import tagging 7 | from . import typing 8 | -------------------------------------------------------------------------------- /pandas_dataclasses/core/api.py: -------------------------------------------------------------------------------- 1 | __all__ = ["asframe", "aspandas", "asseries"] 2 | 3 | 4 | # standard library 5 | from types import FunctionType 6 | from typing import Any, Callable, Hashable, Iterable, Optional, overload 7 | 8 | 9 | # dependencies 10 | import numpy as np 11 | import pandas as pd 12 | from pandas.api.types import is_list_like 13 | from typing_extensions import get_origin 14 | from .specs import Field, Fields, Spec 15 | from .tagging import Tag 16 | from .typing import DataClass, DataClassOf, PAny, TFrame, TPandas, TSeries 17 | 18 | 19 | @overload 20 | def aspandas(obj: DataClassOf[TPandas, PAny], *, factory: None = None) -> TPandas: ... 21 | 22 | 23 | @overload 24 | def aspandas(obj: DataClass[PAny], *, factory: Callable[..., TPandas]) -> TPandas: ... 25 | 26 | 27 | def aspandas(obj: Any, *, factory: Any = None) -> Any: 28 | """Create a DataFrame or Series object from a dataclass object. 29 | 30 | Which data structure is created will be determined by a factory 31 | defined as the ``__pandas_factory__`` attribute in the original 32 | dataclass of ``obj`` or the ``factory`` argument. If a factory is 33 | a function, it must have an annotation of the return type. 34 | 35 | Args: 36 | obj: Dataclass object that should have attribute, column, data, 37 | and/or index fields. If the original dataclass has the 38 | ``__pandas_factory__`` attribute, it will be used as a 39 | factory for the data creation. 40 | 41 | Keyword Args: 42 | factory: Class or function for the DataFrame or Series creation. 43 | It must take the same parameters as ``pandas.DataFrame`` 44 | or ``pandas.Series``, and return an object of it or its 45 | subclass. If it is a function, it must have an annotation 46 | of the return type. If passed, it will be preferentially 47 | used even if the original dataclass of ``obj`` has the 48 | ``__pandas_factory__`` attribute. 49 | 50 | Returns: 51 | DataFrame or Series object that complies with the original dataclass. 52 | 53 | Raises: 54 | ValueError: Raised if no factory is found or the return type 55 | cannot be inferred from a factory when it is a function. 56 | 57 | """ 58 | spec = Spec.from_dataclass(type(obj)) @ obj 59 | 60 | if factory is None: 61 | factory = spec.factory 62 | 63 | if factory is None: 64 | raise ValueError("Could not find any factory.") 65 | 66 | if isinstance(factory, FunctionType): 67 | return_ = factory.__annotations__["return"] 68 | else: 69 | return_ = factory 70 | 71 | origin = get_origin(return_) or return_ 72 | 73 | if issubclass(origin, pd.DataFrame): 74 | return asframe(obj, factory=factory) 75 | elif issubclass(origin, pd.Series): 76 | return asseries(obj, factory=factory) 77 | else: 78 | raise ValueError("Could not infer an object type.") 79 | 80 | 81 | @overload 82 | def asframe(obj: DataClassOf[TFrame, PAny], *, factory: None = None) -> TFrame: ... 83 | 84 | 85 | @overload 86 | def asframe(obj: DataClass[PAny], *, factory: Callable[..., TFrame]) -> TFrame: ... 87 | 88 | 89 | @overload 90 | def asframe(obj: DataClass[PAny], *, factory: None = None) -> pd.DataFrame: ... 91 | 92 | 93 | def asframe(obj: Any, *, factory: Any = None) -> Any: 94 | """Create a DataFrame object from a dataclass object. 95 | 96 | The return type will be determined by a factory defined as the 97 | ``__pandas_factory__`` attribute in the original dataclass of 98 | ``obj`` or the ``factory`` argument. If neither is specified, 99 | it defaults to ``pandas.DataFrame``. 100 | 101 | Args: 102 | obj: Dataclass object that should have attribute, column, data, 103 | and/or index fields. If the original dataclass has the 104 | ``__pandas_factory__`` attribute, it will be used as a 105 | factory for the DataFrame creation. 106 | 107 | Keyword Args: 108 | factory: Class or function for the DataFrame creation. 109 | It must take the same parameters as ``pandas.DataFrame``, 110 | and return an object of it or its subclass. If passed, it 111 | will be preferentially used even if the original dataclass 112 | of ``obj`` has the ``__pandas_factory__`` attribute. 113 | 114 | Returns: 115 | DataFrame object that complies with the original dataclass. 116 | 117 | """ 118 | spec = Spec.from_dataclass(type(obj)) @ obj 119 | 120 | if factory is None: 121 | factory = spec.factory or pd.DataFrame 122 | 123 | dataframe = factory( 124 | data=get_data(spec), 125 | index=get_index(spec), 126 | columns=get_columns(spec), 127 | ) 128 | 129 | dataframe.attrs.update(get_attrs(spec)) 130 | return squeeze(dataframe) 131 | 132 | 133 | @overload 134 | def asseries(obj: DataClassOf[TSeries, PAny], *, factory: None = None) -> TSeries: ... 135 | 136 | 137 | @overload 138 | def asseries(obj: DataClass[PAny], *, factory: Callable[..., TSeries]) -> TSeries: ... 139 | 140 | 141 | @overload 142 | def asseries(obj: DataClass[PAny], *, factory: None = None) -> "pd.Series[Any]": ... 143 | 144 | 145 | def asseries(obj: Any, *, factory: Any = None) -> Any: 146 | """Create a Series object from a dataclass object. 147 | 148 | The return type will be determined by a factory defined as the 149 | ``__pandas_factory__`` attribute in the original dataclass of 150 | ``obj`` or the ``factory`` argument. If neither is specified, 151 | it defaults to ``pandas.Series``. 152 | 153 | Args: 154 | obj: Dataclass object that should have attribute, column, data, 155 | and/or index fields. If the original dataclass has the 156 | ``__pandas_factory__`` attribute, it will be used as a 157 | factory for the Series creation. 158 | 159 | Keyword Args: 160 | factory: Class or function for the Series creation. 161 | It must take the same parameters as ``pandas.Series``, 162 | and return an object of it or its subclass. If passed, it 163 | will be preferentially used even if the original dataclass 164 | of ``obj`` has the ``__pandas_factory__`` attribute. 165 | 166 | Returns: 167 | Series object that complies with the original dataclass. 168 | 169 | """ 170 | spec = Spec.from_dataclass(type(obj)) @ obj 171 | 172 | if factory is None: 173 | factory = spec.factory or pd.Series 174 | 175 | data = get_data(spec) 176 | index = get_index(spec) 177 | 178 | if not data: 179 | series = factory(index=index) 180 | else: 181 | name, data = next(iter(data.items())) 182 | series = factory(data=data, index=index, name=name) 183 | 184 | series.attrs.update(get_attrs(spec)) 185 | return squeeze(series) 186 | 187 | 188 | def get_attrs(spec: Spec) -> dict[Hashable, Any]: 189 | """Derive attributes from a specification.""" 190 | data: dict[Hashable, Any] = {} 191 | 192 | for field in spec.fields.of(Tag.ATTR): 193 | data.update(items(field)) 194 | 195 | return data 196 | 197 | 198 | def get_columns(spec: Spec) -> Optional[pd.MultiIndex]: 199 | """Derive columns from a specification.""" 200 | if not (fields := spec.fields.of(Tag.DATA)): 201 | return None 202 | 203 | if (names := name(fields)) is None: 204 | return None 205 | 206 | return pd.MultiIndex.from_tuples( 207 | map(name, fields), 208 | names=names, 209 | ) 210 | 211 | 212 | def get_data(spec: Spec) -> dict[Hashable, Any]: 213 | """Derive data from a specification.""" 214 | data: dict[Hashable, Any] = {} 215 | 216 | for field in spec.fields.of(Tag.DATA): 217 | for key, val in items(field): 218 | data[key] = ensure(val, field.dtype) 219 | 220 | return data 221 | 222 | 223 | def get_index(spec: Spec) -> Optional[pd.MultiIndex]: 224 | """Derive index from a specification.""" 225 | if not (fields := spec.fields.of(Tag.INDEX)): 226 | return None 227 | 228 | data: dict[Hashable, Any] = {} 229 | 230 | for field in fields: 231 | for key, val in items(field): 232 | data[key] = ensure(val, field.dtype) 233 | 234 | return pd.MultiIndex.from_arrays( 235 | np.broadcast_arrays(*data.values()), 236 | names=data.keys(), 237 | ) 238 | 239 | 240 | def ensure(data: Any, dtype: Optional[str]) -> Any: 241 | """Ensure data to be 1D and have given data type.""" 242 | if not is_list_like(data): 243 | data = [data] 244 | 245 | if isinstance(data, (pd.Index, pd.Series)): 246 | return type(data)(data, dtype=dtype, copy=False) # type: ignore 247 | else: 248 | return pd.array(data, dtype=dtype, copy=False) 249 | 250 | 251 | def items(field: Field) -> Iterable[tuple[Hashable, Any]]: 252 | """Generate default(s) of a field specification.""" 253 | if field.has(Tag.MULTIPLE): 254 | yield from field.default.items() 255 | else: 256 | yield (name(field), field.default) 257 | 258 | 259 | @overload 260 | def name(fields: Field) -> Hashable: ... 261 | 262 | 263 | @overload 264 | def name(fields: Fields) -> Optional[Hashable]: ... 265 | 266 | 267 | def name(fields: Any) -> Any: 268 | """Derive name of a field(s) specification.""" 269 | if isinstance(fields, Field): 270 | if isinstance(name := fields.name, dict): 271 | return tuple(name.values()) 272 | else: 273 | return name 274 | 275 | if isinstance(fields, Fields): 276 | for field in fields: 277 | if isinstance(name := field.name, dict): 278 | return tuple(name.keys()) 279 | 280 | 281 | def squeeze(data: TPandas) -> TPandas: 282 | """Drop levels of an index and columns if possible.""" 283 | if data.index.nlevels == 1: 284 | data.index = data.index.get_level_values(0) 285 | 286 | if isinstance(data, pd.Series): 287 | return data # type: ignore 288 | 289 | if data.columns.nlevels == 1: 290 | data.columns = data.columns.get_level_values(0) 291 | 292 | return data 293 | -------------------------------------------------------------------------------- /pandas_dataclasses/core/specs.py: -------------------------------------------------------------------------------- 1 | __all__ = ["Spec"] 2 | 3 | 4 | # standard library 5 | from dataclasses import Field as Field_, dataclass, fields as fields_, replace 6 | from functools import lru_cache 7 | from itertools import repeat 8 | from typing import Any, Callable, Hashable, Literal, Optional, Union 9 | 10 | 11 | # dependencies 12 | from pandas.api.types import pandas_dtype 13 | from typing_extensions import Self, get_args, get_origin, get_type_hints 14 | from .tagging import Tag, get_nontags, get_tagged, get_tags 15 | from .typing import HashDict, Pandas, TAny, is_union 16 | 17 | 18 | @dataclass(frozen=True) 19 | class Field: 20 | """Specification of a field.""" 21 | 22 | id: str 23 | """Identifier of the field.""" 24 | 25 | name: Union[Hashable, HashDict] 26 | """Name of the field data.""" 27 | 28 | tags: tuple[Tag, ...] = () 29 | """Tags of the field.""" 30 | 31 | type: Optional[Any] = None 32 | """Type or type hint of the field data.""" 33 | 34 | dtype: Optional[str] = None 35 | """Data type of the field data.""" 36 | 37 | default: Any = None 38 | """Default value of the field data.""" 39 | 40 | def has(self, tag: Tag) -> bool: 41 | """Check if the specification has a tag.""" 42 | return bool(tag & Tag.union(self.tags)) 43 | 44 | def update(self, obj: Any) -> Self: 45 | """Update the specification by an object.""" 46 | return replace( 47 | self, 48 | name=format(self.name, obj), 49 | default=getattr(obj, self.id, self.default), 50 | ) 51 | 52 | 53 | class Fields(tuple[Field, ...]): 54 | """List of field specifications with selectors.""" 55 | 56 | def of(self, tag: Tag) -> Self: 57 | """Select only fields that have a tag.""" 58 | return type(self)(filter(lambda field: field.has(tag), self)) 59 | 60 | def update(self, obj: Any) -> Self: 61 | """Update the specifications by an object.""" 62 | return type(self)(field.update(obj) for field in self) 63 | 64 | 65 | @dataclass(frozen=True) 66 | class Spec: 67 | """Specification of pandas data creation.""" 68 | 69 | name: Optional[str] = None 70 | """Name of the specification.""" 71 | 72 | origin: Optional[type] = None 73 | """Original dataclass of the specification.""" 74 | 75 | factory: Optional[Callable[..., Pandas]] = None 76 | """Factory for pandas data creation.""" 77 | 78 | fields: Fields = Fields() 79 | """List of field specifications.""" 80 | 81 | @classmethod 82 | def from_dataclass(cls, dataclass: type) -> Self: 83 | """Create a specification from a data class.""" 84 | eval_field_types(dataclass) 85 | 86 | return cls( 87 | name=dataclass.__name__, 88 | origin=dataclass, 89 | factory=getattr(dataclass, "__pandas_factory__", None), 90 | fields=Fields(map(convert_field, fields_(dataclass))), 91 | ) 92 | 93 | def update(self, obj: Any) -> Self: 94 | """Update the specification by an object.""" 95 | if self.origin is not None: 96 | if not isinstance(obj, self.origin): 97 | obj = self.origin(obj) 98 | 99 | return replace(self, fields=self.fields.update(obj)) 100 | 101 | def __matmul__(self, obj: Any) -> Self: 102 | """Alias of the update method.""" 103 | return self.update(obj) 104 | 105 | 106 | @lru_cache(maxsize=None) 107 | def convert_field(field_: Field_[Any]) -> Field: 108 | """Convert a dataclass field to a field specification.""" 109 | return Field( 110 | id=field_.name, 111 | name=get_first(field_.type, field_.name), 112 | tags=get_tags(field_.type, Tag.FIELD), 113 | type=field_.type, 114 | dtype=get_dtype(field_.type), 115 | default=field_.default, 116 | ) 117 | 118 | 119 | @lru_cache(maxsize=None) 120 | def eval_field_types(dataclass: type) -> None: 121 | """Evaluate field types of a dataclass.""" 122 | types = get_type_hints(dataclass, include_extras=True) 123 | 124 | for field_ in fields_(dataclass): 125 | field_.type = types[field_.name] 126 | 127 | 128 | def format(obj: TAny, by: Any) -> TAny: 129 | """Format a string or nested strings in an object.""" 130 | if isinstance(obj, str): 131 | return type(obj)(obj.format(by)) # type: ignore 132 | 133 | if isinstance(obj, (list, tuple)): 134 | return type(obj)(map(format, obj, repeat(by))) # type: ignore 135 | 136 | if isinstance(obj, dict): 137 | return type(obj)(map(format, obj.items(), repeat(by))) # type: ignore 138 | 139 | return obj 140 | 141 | 142 | def get_dtype(tp: Any) -> Optional[str]: 143 | """Extract a data type of NumPy or pandas from a type hint.""" 144 | if (tp := get_tagged(tp, Tag.DATA | Tag.INDEX, True)) is None: 145 | return None 146 | 147 | if (dtype := get_tagged(tp, Tag.DTYPE)) is None: 148 | return None 149 | 150 | if dtype is Any or dtype is type(None): 151 | return None 152 | 153 | if is_union(dtype): 154 | dtype = get_args(dtype)[0] 155 | 156 | if get_origin(dtype) is Literal: 157 | dtype = get_args(dtype)[0] 158 | 159 | return pandas_dtype(dtype).name 160 | 161 | 162 | def get_first(tp: Any, default: Any = None) -> Optional[Any]: 163 | """Extract the first nontag annotation from a type hint.""" 164 | if not (nontags := get_nontags(tp, Tag.FIELD)): 165 | return default 166 | 167 | if (first := nontags[0]) is Ellipsis: 168 | return default 169 | 170 | return first 171 | -------------------------------------------------------------------------------- /pandas_dataclasses/core/tagging.py: -------------------------------------------------------------------------------- 1 | __all__ = ["Tag"] 2 | 3 | 4 | # standard library 5 | from enum import Flag, auto 6 | from functools import reduce 7 | from itertools import chain, filterfalse 8 | from operator import or_ 9 | from typing import Annotated, Any, Iterable, Optional 10 | 11 | 12 | # dependencies 13 | from typing_extensions import Self, TypeGuard, get_args, get_origin 14 | 15 | 16 | class Tag(Flag): 17 | """Collection of tags for annotating types.""" 18 | 19 | ATTR = auto() 20 | """Tag for a type specifying an attribute field.""" 21 | 22 | DATA = auto() 23 | """Tag for a type specifying a data field.""" 24 | 25 | INDEX = auto() 26 | """Tag for a type specifying an index field.""" 27 | 28 | DTYPE = auto() 29 | """Tag for a type specifying a data type.""" 30 | 31 | MULTIPLE = auto() 32 | """Tag for a type specifying a multiple-item field.""" 33 | 34 | FIELD = ATTR | DATA | INDEX 35 | """Union of field-related tags.""" 36 | 37 | ANY = FIELD | DTYPE | MULTIPLE 38 | """Union of all tags.""" 39 | 40 | def annotates(self, tp: Any) -> bool: 41 | """Check if the tag annotates a type hint.""" 42 | tags = filter(type(self).creates, get_args(tp)) 43 | return bool(self & type(self).union(tags)) 44 | 45 | @classmethod 46 | def creates(cls, obj: Any) -> TypeGuard[Self]: 47 | """Check if Tag is the type of an object.""" 48 | return isinstance(obj, cls) 49 | 50 | @classmethod 51 | def union(cls, tags: Iterable[Self]) -> Self: 52 | """Create a tag as an union of tags.""" 53 | return reduce(or_, tags, cls(0)) 54 | 55 | def __repr__(self) -> str: 56 | """Return the bracket-style string of the tag.""" 57 | return str(self) 58 | 59 | def __str__(self) -> str: 60 | """Return the bracket-style string of the tag.""" 61 | return f"<{str(self.name).lower()}>" 62 | 63 | 64 | def gen_annotated(tp: Any) -> Iterable[Any]: 65 | """Generate all annotated types in a type hint.""" 66 | if get_origin(tp) is Annotated: 67 | yield tp 68 | yield from gen_annotated(get_args(tp)[0]) 69 | else: 70 | yield from chain(*map(gen_annotated, get_args(tp))) 71 | 72 | 73 | def get_tagged( 74 | tp: Any, 75 | bound: Tag = Tag.ANY, 76 | keep_annotations: bool = False, 77 | ) -> Optional[Any]: 78 | """Extract the first tagged type from a type hint.""" 79 | for tagged in filter(bound.annotates, gen_annotated(tp)): 80 | return tagged if keep_annotations else get_args(tagged)[0] 81 | 82 | 83 | def get_tags(tp: Any, bound: Tag = Tag.ANY) -> tuple[Tag, ...]: 84 | """Extract all tags from the first tagged type.""" 85 | tagged = get_tagged(tp, bound, True) 86 | return tuple(filter(Tag.creates, get_args(tagged)[1:])) 87 | 88 | 89 | def get_nontags(tp: Any, bound: Tag = Tag.ANY) -> tuple[Any, ...]: 90 | """Extract all except tags from the first tagged type.""" 91 | tagged = get_tagged(tp, bound, True) 92 | return tuple(filterfalse(Tag.creates, get_args(tagged)[1:])) 93 | -------------------------------------------------------------------------------- /pandas_dataclasses/core/typing.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | "DataClass", 3 | "DataClassOf", 4 | "HashDict", 5 | "Pandas", 6 | "PAny", 7 | "TAny", 8 | "TFrame", 9 | "TPandas", 10 | "TSeries", 11 | "is_union", 12 | ] 13 | 14 | 15 | # standard library 16 | import types 17 | from dataclasses import Field 18 | from typing import Any, Callable, ClassVar, Hashable, Protocol, TypeVar, Union 19 | 20 | 21 | # dependencies 22 | from pandas import DataFrame, Series 23 | from typing_extensions import ParamSpec, get_origin 24 | 25 | 26 | HashDict = dict[Hashable, Hashable] 27 | """Type hint for dictionary of hashable keys and values.""" 28 | 29 | Pandas = Union[DataFrame, "Series[Any]"] 30 | """Type hint for any pandas object.""" 31 | 32 | PAny = ParamSpec("PAny") 33 | """Parameter specification variable for any function.""" 34 | 35 | TAny = TypeVar("TAny") 36 | """Type variable for any class.""" 37 | 38 | TFrame = TypeVar("TFrame", bound=DataFrame) 39 | """Type variable for pandas DataFrame.""" 40 | 41 | TPandas = TypeVar("TPandas", bound=Pandas) 42 | """Type variable for any class of pandas object.""" 43 | 44 | TSeries = TypeVar("TSeries", bound="Series[Any]") 45 | """Type variable for pandas Series (of any dtype).""" 46 | 47 | 48 | class DataClass(Protocol[PAny]): 49 | """Protocol for any dataclass object.""" 50 | 51 | __dataclass_fields__: ClassVar[dict[str, Field[Any]]] 52 | 53 | def __init__(self, *args: PAny.args, **kwargs: PAny.kwargs) -> None: ... 54 | 55 | 56 | class DataClassOf(Protocol[TPandas, PAny]): 57 | """Protocol for any dataclass object with a factory.""" 58 | 59 | __dataclass_fields__: ClassVar[dict[str, Field[Any]]] 60 | __pandas_factory__: Callable[..., TPandas] 61 | 62 | def __init__(self, *args: PAny.args, **kwargs: PAny.kwargs) -> None: ... 63 | 64 | 65 | def is_union(tp: Any) -> bool: 66 | """Check if a type hint is a union of types.""" 67 | if UnionType := getattr(types, "UnionType", None): 68 | return get_origin(tp) is Union or isinstance(tp, UnionType) 69 | else: 70 | return get_origin(tp) is Union 71 | -------------------------------------------------------------------------------- /pandas_dataclasses/extras/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ["hints", "new"] 2 | 3 | 4 | from . import hints 5 | from . import new 6 | -------------------------------------------------------------------------------- /pandas_dataclasses/extras/hints.py: -------------------------------------------------------------------------------- 1 | __all__ = ["Attr", "Data", "Index", "Multiple"] 2 | 3 | 4 | # standard library 5 | from typing import Annotated, Collection 6 | 7 | 8 | # dependencies 9 | from ..core.tagging import Tag 10 | from ..core.typing import TAny 11 | 12 | 13 | # type hints 14 | Attr = Annotated[TAny, Tag.ATTR] 15 | """Type hint for attribute fields (``Attr[TAny]``).""" 16 | 17 | Data = Annotated[Collection[Annotated[TAny, Tag.DTYPE]], Tag.DATA] 18 | """Type hint for data fields (``Data[TAny]``).""" 19 | 20 | Index = Annotated[Collection[Annotated[TAny, Tag.DTYPE]], Tag.INDEX] 21 | """Type hint for index fields (``Index[TAny]``).""" 22 | 23 | Multiple = dict[str, Annotated[TAny, Tag.MULTIPLE]] 24 | """Type hint for multiple-item fields (``Multiple[TAny]``).""" 25 | -------------------------------------------------------------------------------- /pandas_dataclasses/extras/new.py: -------------------------------------------------------------------------------- 1 | __all__ = ["As", "AsFrame", "AsSeries"] 2 | 3 | 4 | # standard library 5 | from inspect import signature 6 | from types import MethodType 7 | from typing import Any, Callable, ForwardRef, Generic, Union 8 | 9 | 10 | # dependencies 11 | import pandas as pd 12 | from typing_extensions import get_args, get_origin 13 | from ..core.api import aspandas 14 | from ..core.typing import DataClassOf, PAny, TPandas 15 | 16 | 17 | class classproperty: 18 | """Class property decorator dedicated to ``As.new``.""" 19 | 20 | def __init__(self, fget: Callable[..., Any]) -> None: 21 | self.fget = fget 22 | 23 | def __get__( 24 | self, 25 | obj: Any, 26 | cls: type[DataClassOf[TPandas, PAny]], 27 | ) -> Callable[PAny, TPandas]: 28 | return self.fget(cls) # type: ignore 29 | 30 | 31 | class As(Generic[TPandas]): 32 | """Pandas data creation by a classmethod (``new``).""" 33 | 34 | __pandas_factory__: Callable[..., TPandas] 35 | """Factory for pandas data creation.""" 36 | 37 | def __init_subclass__(cls, **kwargs: Any) -> None: 38 | """Add a pandas factory to an inheriting class.""" 39 | factory = kwargs.pop("factory", None) 40 | cls.__pandas_factory__ = factory or get_factory(cls) 41 | super().__init_subclass__(**kwargs) 42 | 43 | @classproperty 44 | def new(cls) -> MethodType: 45 | """Return a classmethod for pandas data creation.""" 46 | 47 | sig = signature(cls.__init__) # type: ignore 48 | sig = sig.replace(return_annotation=get_return(cls)) 49 | 50 | def new(cls: Any, *args: Any, **kwargs: Any) -> Any: 51 | """Create a pandas data from dataclass arguments.""" 52 | return aspandas(cls(*args, **kwargs)) 53 | 54 | setattr(new, "__signature__", sig) 55 | return MethodType(new, cls) 56 | 57 | 58 | AsFrame = As[pd.DataFrame] 59 | """Alias of ``As[pandas.DataFrame]``.""" 60 | 61 | 62 | AsSeries = As["pd.Series[Any]"] 63 | """Alias of ``As[pandas.Series[Any]]``.""" 64 | 65 | 66 | def get_factory(cls: Any) -> Callable[..., Any]: 67 | """Extract a pandas factory from a class.""" 68 | factory = get_return(cls) 69 | 70 | if callable(factory): 71 | return factory 72 | 73 | # special handling for AsSeries 74 | if factory == "pd.Series[Any]": 75 | return pd.Series 76 | 77 | raise TypeError("Factory must be callable.") 78 | 79 | 80 | def get_return(cls: Any) -> Union[type[Any], str]: 81 | """Extract a return type from a class.""" 82 | for base in getattr(cls, "__orig_bases__", ()): 83 | if get_origin(base) is not As: 84 | continue 85 | 86 | tp = get_args(base)[0] 87 | 88 | if isinstance(tp, ForwardRef): 89 | return tp.__forward_arg__ 90 | else: 91 | return tp # type: ignore 92 | 93 | raise TypeError("Could not find any return type.") 94 | -------------------------------------------------------------------------------- /pandas_dataclasses/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astropenguin/pandas-dataclasses/d4a7b311d29df1566d61a2cd703047f5f6b16c48/pandas_dataclasses/py.typed -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "pandas-dataclasses" 3 | version = "1.0.0" 4 | description = "pandas data creation by data classes" 5 | readme = "README.md" 6 | keywords = ["dataclasses", "specifications", "typing", "pandas"] 7 | requires-python = ">=3.9,<3.14" 8 | dependencies = [ 9 | "numpy>=1.22,<3.0", 10 | "pandas>=1.5,<3.0", 11 | "pandas-stubs>=1.5,<3.0", 12 | "typing-extensions>=4.1,<5.0", 13 | ] 14 | classifiers = [ 15 | "License :: OSI Approved :: MIT License", 16 | "Programming Language :: Python :: 3", 17 | "Programming Language :: Python :: 3.9", 18 | "Programming Language :: Python :: 3.10", 19 | "Programming Language :: Python :: 3.11", 20 | "Programming Language :: Python :: 3.12", 21 | "Programming Language :: Python :: 3.13", 22 | ] 23 | 24 | [[project.authors]] 25 | name = "Akio Taniguchi" 26 | email = "taniguchi.akio@gmail.com" 27 | 28 | [project.license] 29 | file = "LICENSE" 30 | 31 | [project.urls] 32 | homepage = "https://astropenguin.github.io/pandas-dataclasses/v1.0.0" 33 | repository = "https://github.com/astropenguin/pandas-dataclasses" 34 | 35 | [dependency-groups] 36 | dev = [ 37 | "black>=24.8", 38 | "ipython>=8.18", 39 | "myst-parser>=3.0", 40 | "pydata-sphinx-theme>=0.16", 41 | "pyright>=1.1", 42 | "pytest>=8.3", 43 | "sphinx>=7.4", 44 | ] 45 | 46 | [build-system] 47 | requires = ["hatchling"] 48 | build-backend = "hatchling.build" 49 | 50 | [tool.pyright] 51 | reportUnknownArgumentType = "warning" 52 | reportUnknownMemberType = "warning" 53 | reportUnknownVariableType = "warning" 54 | reportUnnecessaryIsInstance = "warning" 55 | typeCheckingMode = "strict" 56 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astropenguin/pandas-dataclasses/d4a7b311d29df1566d61a2cd703047f5f6b16c48/tests/__init__.py -------------------------------------------------------------------------------- /tests/data.py: -------------------------------------------------------------------------------- 1 | __all__ = ["Weather", "weather", "df_weather_true", "ser_weather_true"] 2 | 3 | 4 | # standard library 5 | from dataclasses import dataclass, field 6 | from typing import Annotated as Ann, Any 7 | 8 | 9 | # dependencies 10 | import pandas as pd 11 | from pandas_dataclasses import Attr, Data, Index, Multiple 12 | 13 | 14 | # test dataclass and object 15 | def name(meas: str, stat: str) -> dict[str, str]: 16 | return {"Measurement": meas, "Statistic": stat} 17 | 18 | 19 | @dataclass 20 | class Weather: 21 | """Weather information.""" 22 | 23 | year: Ann[Index[int], "Year"] 24 | """Year of the measured time.""" 25 | 26 | month: Ann[Index[int], "Month"] 27 | """Month of the measured time.""" 28 | 29 | temp_avg: Ann[Data[float], name("Temperature ({.temp_unit})", "Average")] 30 | """Monthly average temperature with given units.""" 31 | 32 | temp_max: Ann[Data[float], name("Temperature ({.temp_unit})", "Maximum")] 33 | """Monthly maximum temperature with given units.""" 34 | 35 | wind_avg: Ann[Data[float], name("Wind speed ({.wind_unit})", "Average")] 36 | """Monthly average wind speed with given units.""" 37 | 38 | wind_max: Ann[Data[float], name("Wind speed ({.wind_unit})", "Maximum")] 39 | """Monthly maximum wind speed with given units.""" 40 | 41 | loc: Ann[Attr[str], "Location"] = "Tokyo" 42 | """Name of the measured location.""" 43 | 44 | lon: Ann[Attr[float], "Longitude ({.lon_unit})"] = 139.69167 45 | """Longitude at the measured location.""" 46 | 47 | lat: Ann[Attr[float], "Latitude ({.lat_unit})"] = 35.68944 48 | """Latitude at the measured location.""" 49 | 50 | temp_unit: str = "deg C" 51 | """Units of the temperature.""" 52 | 53 | wind_unit: str = "m/s" 54 | """Units of the wind speed.""" 55 | 56 | lon_unit: str = "deg" 57 | """Units of the longitude.""" 58 | 59 | lat_unit: str = "deg" 60 | """Units of the latitude.""" 61 | 62 | attrs: Multiple[Attr[Any]] = field(default_factory=dict) 63 | """Other attributes.""" 64 | 65 | 66 | weather = Weather( 67 | [2020, 2020, 2021, 2021, 2022], 68 | [1, 7, 1, 7, 1], 69 | [7.1, 24.3, 5.4, 25.9, 4.9], 70 | [11.1, 27.7, 10.3, 30.3, 9.4], 71 | [2.4, 3.1, 2.3, 2.4, 2.6], 72 | [8.8, 10.2, 10.7, 9.0, 8.8], 73 | ) 74 | 75 | 76 | # expected pandas data 77 | df_weather_true = pd.DataFrame( 78 | data={ 79 | ("Temperature (deg C)", "Average"): [7.1, 24.3, 5.4, 25.9, 4.9], 80 | ("Temperature (deg C)", "Maximum"): [11.1, 27.7, 10.3, 30.3, 9.4], 81 | ("Wind speed (m/s)", "Average"): [2.4, 3.1, 2.3, 2.4, 2.6], 82 | ("Wind speed (m/s)", "Maximum"): [8.8, 10.2, 10.7, 9.0, 8.8], 83 | }, 84 | index=pd.MultiIndex.from_arrays( 85 | [ 86 | [2020, 2020, 2021, 2021, 2022], 87 | [1, 7, 1, 7, 1], 88 | ], 89 | names=("Year", "Month"), 90 | ), 91 | columns=pd.MultiIndex.from_tuples( 92 | [ 93 | ("Temperature (deg C)", "Average"), 94 | ("Temperature (deg C)", "Maximum"), 95 | ("Wind speed (m/s)", "Average"), 96 | ("Wind speed (m/s)", "Maximum"), 97 | ], 98 | names=("Measurement", "Statistic"), 99 | ), 100 | ) 101 | df_weather_true.attrs = { 102 | "Location": "Tokyo", 103 | "Longitude (deg)": 139.69167, 104 | "Latitude (deg)": 35.68944, 105 | } 106 | 107 | 108 | ser_weather_true: "pd.Series[Any]" = pd.Series( 109 | data=[7.1, 24.3, 5.4, 25.9, 4.9], 110 | index=pd.MultiIndex.from_arrays( 111 | [ 112 | [2020, 2020, 2021, 2021, 2022], 113 | [1, 7, 1, 7, 1], 114 | ], 115 | names=("Year", "Month"), 116 | ), 117 | name=("Temperature (deg C)", "Average"), 118 | ) 119 | ser_weather_true.attrs = { 120 | "Location": "Tokyo", 121 | "Longitude (deg)": 139.69167, 122 | "Latitude (deg)": 35.68944, 123 | } 124 | -------------------------------------------------------------------------------- /tests/test_core_api.py: -------------------------------------------------------------------------------- 1 | # standard library 2 | from typing import cast 3 | 4 | 5 | # dependencies 6 | import pandas as pd 7 | from pandas.testing import assert_frame_equal, assert_series_equal 8 | from pandas_dataclasses import Spec, Tag, asframe, asseries 9 | from pandas_dataclasses.core.api import ( 10 | get_attrs, 11 | get_columns, 12 | get_data, 13 | get_index, 14 | name, 15 | ) 16 | from .data import Weather, weather, df_weather_true, ser_weather_true 17 | 18 | 19 | # test data 20 | spec = Spec.from_dataclass(Weather) @ weather 21 | 22 | 23 | # test functions 24 | def test_asframe() -> None: 25 | assert_frame_equal(asframe(weather), df_weather_true) 26 | 27 | 28 | def test_asseries() -> None: 29 | assert_series_equal(asseries(weather), ser_weather_true) 30 | 31 | 32 | def test_get_attrs() -> None: 33 | attrs = get_attrs(spec) 34 | 35 | for i, (key, val) in enumerate(attrs.items()): 36 | assert key == spec.fields.of(Tag.ATTR)[i].name 37 | assert val == spec.fields.of(Tag.ATTR)[i].default 38 | 39 | 40 | def test_get_columns() -> None: 41 | columns = cast(pd.MultiIndex, get_columns(spec)) 42 | 43 | for i in range(len(columns)): 44 | assert columns[i] == name(spec.fields.of(Tag.DATA)[i]) 45 | 46 | assert columns.names == name(spec.fields.of(Tag.DATA)) # type: ignore 47 | 48 | 49 | def test_get_data() -> None: 50 | data = get_data(spec) 51 | 52 | for i, (key, val) in enumerate(data.items()): 53 | assert key == name(spec.fields.of(Tag.DATA)[i]) 54 | assert val.dtype.name == spec.fields.of(Tag.DATA)[i].dtype 55 | assert (val == spec.fields.of(Tag.DATA)[i].default).all() 56 | 57 | 58 | def test_get_index() -> None: 59 | index = cast(pd.MultiIndex, get_index(spec)) 60 | 61 | for i in range(index.nlevels): 62 | level = index.get_level_values(i) 63 | assert level.name == spec.fields.of(Tag.INDEX)[i].name 64 | assert level.dtype.name == spec.fields.of(Tag.INDEX)[i].dtype 65 | assert (level == spec.fields.of(Tag.INDEX)[i].default).all() 66 | -------------------------------------------------------------------------------- /tests/test_core_specs.py: -------------------------------------------------------------------------------- 1 | # standard library 2 | from dataclasses import MISSING 3 | 4 | 5 | # dependencies 6 | from pandas_dataclasses import Spec, Tag 7 | from .data import Weather, name, weather 8 | 9 | 10 | # test data 11 | spec = Spec.from_dataclass(Weather) 12 | spec_updated = spec @ weather 13 | 14 | 15 | # test functions 16 | def test_year() -> None: 17 | field = spec.fields.of(Tag.INDEX)[0] 18 | 19 | assert field.id == "year" 20 | assert field.tags == (Tag.INDEX,) 21 | assert field.name == "Year" 22 | assert field.dtype == "int64" 23 | assert field.default is MISSING 24 | 25 | 26 | def test_year_updated() -> None: 27 | field = spec_updated.fields.of(Tag.INDEX)[0] 28 | 29 | assert field.id == "year" 30 | assert field.tags == (Tag.INDEX,) 31 | assert field.name == "Year" 32 | assert field.dtype == "int64" 33 | assert field.default == weather.year 34 | 35 | 36 | def test_month() -> None: 37 | field = spec.fields.of(Tag.INDEX)[1] 38 | 39 | assert field.id == "month" 40 | assert field.tags == (Tag.INDEX,) 41 | assert field.name == "Month" 42 | assert field.dtype == "int64" 43 | assert field.default is MISSING 44 | 45 | 46 | def test_month_updated() -> None: 47 | field = spec_updated.fields.of(Tag.INDEX)[1] 48 | 49 | assert field.id == "month" 50 | assert field.tags == (Tag.INDEX,) 51 | assert field.name == "Month" 52 | assert field.dtype == "int64" 53 | assert field.default == weather.month 54 | 55 | 56 | def test_temp_avg() -> None: 57 | field = spec.fields.of(Tag.DATA)[0] 58 | 59 | assert field.id == "temp_avg" 60 | assert field.tags == (Tag.DATA,) 61 | assert field.name == name("Temperature ({.temp_unit})", "Average") 62 | assert field.dtype == "float64" 63 | assert field.default is MISSING 64 | 65 | 66 | def test_temp_avg_updated() -> None: 67 | field = spec_updated.fields.of(Tag.DATA)[0] 68 | 69 | assert field.id == "temp_avg" 70 | assert field.tags == (Tag.DATA,) 71 | assert field.name == name("Temperature (deg C)", "Average") 72 | assert field.dtype == "float64" 73 | assert field.default == weather.temp_avg 74 | 75 | 76 | def test_temp_max() -> None: 77 | field = spec.fields.of(Tag.DATA)[1] 78 | 79 | assert field.id == "temp_max" 80 | assert field.tags == (Tag.DATA,) 81 | assert field.name == name("Temperature ({.temp_unit})", "Maximum") 82 | assert field.dtype == "float64" 83 | assert field.default is MISSING 84 | 85 | 86 | def test_temp_max_updated() -> None: 87 | field = spec_updated.fields.of(Tag.DATA)[1] 88 | 89 | assert field.id == "temp_max" 90 | assert field.tags == (Tag.DATA,) 91 | assert field.name == name("Temperature (deg C)", "Maximum") 92 | assert field.dtype == "float64" 93 | assert field.default == weather.temp_max 94 | 95 | 96 | def test_wind_avg() -> None: 97 | field = spec.fields.of(Tag.DATA)[2] 98 | 99 | assert field.id == "wind_avg" 100 | assert field.tags == (Tag.DATA,) 101 | assert field.name == name("Wind speed ({.wind_unit})", "Average") 102 | assert field.dtype == "float64" 103 | assert field.default is MISSING 104 | 105 | 106 | def test_wind_avg_updated() -> None: 107 | field = spec_updated.fields.of(Tag.DATA)[2] 108 | 109 | assert field.id == "wind_avg" 110 | assert field.tags == (Tag.DATA,) 111 | assert field.name == name("Wind speed (m/s)", "Average") 112 | assert field.dtype == "float64" 113 | assert field.default == weather.wind_avg 114 | 115 | 116 | def test_wind_max() -> None: 117 | field = spec.fields.of(Tag.DATA)[3] 118 | 119 | assert field.id == "wind_max" 120 | assert field.tags == (Tag.DATA,) 121 | assert field.name == name("Wind speed ({.wind_unit})", "Maximum") 122 | assert field.dtype == "float64" 123 | assert field.default is MISSING 124 | 125 | 126 | def test_wind_max_updated() -> None: 127 | field = spec_updated.fields.of(Tag.DATA)[3] 128 | 129 | assert field.id == "wind_max" 130 | assert field.tags == (Tag.DATA,) 131 | assert field.name == name("Wind speed (m/s)", "Maximum") 132 | assert field.dtype == "float64" 133 | assert field.default == weather.wind_max 134 | 135 | 136 | def test_loc() -> None: 137 | field = spec.fields.of(Tag.ATTR)[0] 138 | 139 | assert field.id == "loc" 140 | assert field.tags == (Tag.ATTR,) 141 | assert field.name == "Location" 142 | assert field.default == Weather.loc 143 | 144 | 145 | def test_loc_updated() -> None: 146 | field = spec_updated.fields.of(Tag.ATTR)[0] 147 | 148 | assert field.id == "loc" 149 | assert field.tags == (Tag.ATTR,) 150 | assert field.name == "Location" 151 | assert field.default == weather.loc 152 | 153 | 154 | def test_lon() -> None: 155 | field = spec.fields.of(Tag.ATTR)[1] 156 | 157 | assert field.id == "lon" 158 | assert field.tags == (Tag.ATTR,) 159 | assert field.name == "Longitude ({.lon_unit})" 160 | assert field.default == Weather.lon 161 | 162 | 163 | def test_lon_updated() -> None: 164 | field = spec_updated.fields.of(Tag.ATTR)[1] 165 | 166 | assert field.id == "lon" 167 | assert field.tags == (Tag.ATTR,) 168 | assert field.name == "Longitude (deg)" 169 | assert field.default == weather.lon 170 | 171 | 172 | def test_lat() -> None: 173 | field = spec.fields.of(Tag.ATTR)[2] 174 | 175 | assert field.id == "lat" 176 | assert field.tags == (Tag.ATTR,) 177 | assert field.name == "Latitude ({.lat_unit})" 178 | assert field.default == Weather.lat 179 | 180 | 181 | def test_lat_updated() -> None: 182 | field = spec_updated.fields.of(Tag.ATTR)[2] 183 | 184 | assert field.id == "lat" 185 | assert field.tags == (Tag.ATTR,) 186 | assert field.name == "Latitude (deg)" 187 | assert field.default == weather.lat 188 | 189 | 190 | def test_attrs() -> None: 191 | field = spec.fields.of(Tag.ATTR)[3] 192 | 193 | assert field.id == "attrs" 194 | assert field.tags == (Tag.ATTR, Tag.MULTIPLE) 195 | assert field.name == "attrs" 196 | assert field.default is MISSING 197 | 198 | 199 | def test_attrs_updated() -> None: 200 | field = spec_updated.fields.of(Tag.ATTR)[3] 201 | 202 | assert field.id == "attrs" 203 | assert field.tags == (Tag.ATTR, Tag.MULTIPLE) 204 | assert field.name == "attrs" 205 | assert field.default == weather.attrs 206 | 207 | 208 | def test_factory() -> None: 209 | assert spec.factory is None 210 | 211 | 212 | def test_name() -> None: 213 | assert spec.name == Weather.__name__ 214 | 215 | 216 | def test_origin() -> None: 217 | assert spec.origin is Weather 218 | -------------------------------------------------------------------------------- /tests/test_core_tagging.py: -------------------------------------------------------------------------------- 1 | # standard library 2 | from typing import Any, Union 3 | 4 | 5 | # dependencies 6 | from pandas_dataclasses import Attr, Data, Index, Tag 7 | from pandas_dataclasses.core.tagging import get_tags 8 | from pytest import mark 9 | from typing_extensions import Annotated as Ann 10 | 11 | 12 | # test data 13 | testdata: list[tuple[Any, tuple[Tag, ...]]] = [ 14 | (Attr[Any], (Tag.ATTR,)), # type: ignore 15 | (Data[Any], (Tag.DATA,)), 16 | (Index[Any], (Tag.INDEX,)), 17 | (Any, ()), 18 | (Ann[Attr[Any], "attr"], (Tag.ATTR,)), # type: ignore 19 | (Ann[Data[Any], "data"], (Tag.DATA,)), 20 | (Ann[Index[Any], "index"], (Tag.INDEX,)), 21 | (Ann[Any, "other"], ()), 22 | (Union[Ann[Attr[Any], "attr"], Ann[Any, "any"]], (Tag.ATTR,)), # type: ignore 23 | (Union[Ann[Data[Any], "data"], Ann[Any, "any"]], (Tag.DATA,)), 24 | (Union[Ann[Index[Any], "index"], Ann[Any, "any"]], (Tag.INDEX,)), 25 | (Union[Ann[Any, "other"], Ann[Any, "any"]], ()), 26 | ] 27 | 28 | 29 | # test functions 30 | @mark.parametrize("tp, tags", testdata) 31 | def test_get_tags(tp: Any, tags: tuple[Tag, ...]) -> None: 32 | assert get_tags(tp) == tags 33 | -------------------------------------------------------------------------------- /tests/test_core_typing.py: -------------------------------------------------------------------------------- 1 | # standard library 2 | from typing import Annotated as Ann, Any, Hashable, Literal as L, Optional, Union 3 | 4 | 5 | # dependencies 6 | import numpy as np 7 | import pandas as pd 8 | from pandas_dataclasses import Attr, Data, Index 9 | from pandas_dataclasses.core.specs import get_dtype, get_first 10 | from pytest import mark 11 | 12 | 13 | # test data 14 | testdata_dtype: list[tuple[Any, Any]] = [ 15 | (Data[Any], None), 16 | (Data[None], None), 17 | (Data[int], np.dtype("i8")), 18 | (Data[Union[int, None]], np.dtype("i8")), 19 | (Data[L["i8"]], np.dtype("i8")), 20 | (Data[L["boolean"]], pd.BooleanDtype()), 21 | (Data[L["category"]], pd.CategoricalDtype()), 22 | (Index[Any], None), 23 | (Index[None], None), 24 | (Index[int], np.dtype("i8")), 25 | (Index[Union[int, None]], np.dtype("i8")), 26 | (Index[L["i8"]], np.dtype("i8")), 27 | (Index[L["boolean"]], pd.BooleanDtype()), 28 | (Index[L["category"]], pd.CategoricalDtype()), 29 | (Ann[Data[float], "data"], np.dtype("f8")), 30 | (Ann[Index[float], "index"], np.dtype("f8")), 31 | (Union[Ann[Data[float], "data"], Ann[Any, "any"]], np.dtype("f8")), 32 | (Union[Ann[Index[float], "index"], Ann[Any, "any"]], np.dtype("f8")), 33 | ] 34 | 35 | testdata_first: list[tuple[Any, Optional[Hashable]]] = [ 36 | (Attr[Any], None), # type: ignore 37 | (Data[Any], None), 38 | (Index[Any], None), 39 | (Any, None), 40 | (Ann[Attr[Any], "attr"], "attr"), # type: ignore 41 | (Ann[Data[Any], "data"], "data"), 42 | (Ann[Index[Any], "index"], "index"), 43 | (Ann[Any, "other"], None), 44 | (Ann[Attr[Any], ..., "attr"], None), # type: ignore 45 | (Ann[Data[Any], ..., "data"], None), 46 | (Ann[Index[Any], ..., "index"], None), 47 | (Ann[Any, ..., "other"], None), 48 | (Union[Ann[Attr[Any], "attr"], Ann[Any, "any"]], "attr"), # type: ignore 49 | (Union[Ann[Data[Any], "data"], Ann[Any, "any"]], "data"), 50 | (Union[Ann[Index[Any], "index"], Ann[Any, "any"]], "index"), 51 | (Union[Ann[Any, "other"], Ann[Any, "any"]], None), 52 | ] 53 | 54 | 55 | # test functions 56 | @mark.parametrize("tp, dtype", testdata_dtype) 57 | def test_get_dtype(tp: Any, dtype: Optional[str]) -> None: 58 | assert get_dtype(tp) == dtype 59 | 60 | 61 | @mark.parametrize("tp, first", testdata_first) 62 | def test_get_first(tp: Any, first: Optional[Any]) -> None: 63 | assert get_first(tp, None) == first 64 | -------------------------------------------------------------------------------- /tests/test_extras_new.py: -------------------------------------------------------------------------------- 1 | # standard library 2 | from dataclasses import dataclass 3 | from typing import Any 4 | 5 | 6 | # dependencies 7 | import pandas as pd 8 | from pandas.testing import assert_frame_equal, assert_series_equal 9 | from pandas_dataclasses import As, AsFrame, AsSeries 10 | from .data import Weather, weather, df_weather_true, ser_weather_true 11 | 12 | 13 | # test data 14 | def factory(*args: Any, **kwargs: Any) -> pd.Series: # type: ignore 15 | return pd.Series(*args, **kwargs) # type: ignore 16 | 17 | 18 | class UserFrame(pd.DataFrame): 19 | pass 20 | 21 | 22 | class UserSeries(pd.Series): # type: ignore 23 | pass 24 | 25 | 26 | @dataclass 27 | class Frame(Weather, AsFrame): 28 | pass 29 | 30 | 31 | @dataclass 32 | class CustomFrame(Weather, As[UserFrame]): 33 | pass 34 | 35 | 36 | @dataclass 37 | class Series(Weather, AsSeries): 38 | pass 39 | 40 | 41 | @dataclass 42 | class CustomSeries(Weather, As[UserSeries]): 43 | pass 44 | 45 | 46 | @dataclass 47 | class FactorySeries(Weather, AsSeries, factory=factory): 48 | pass 49 | 50 | 51 | @dataclass 52 | class FloatSeries(Weather, As["pd.Series[float]"], factory=pd.Series): 53 | pass 54 | 55 | 56 | # test functions 57 | def test_frame() -> None: 58 | df_weather = Frame.new( 59 | year=weather.year, 60 | month=weather.month, 61 | temp_avg=weather.temp_avg, 62 | temp_max=weather.temp_max, 63 | wind_avg=weather.wind_avg, 64 | wind_max=weather.wind_max, 65 | ) 66 | 67 | assert isinstance(df_weather, pd.DataFrame) 68 | assert_frame_equal(df_weather, df_weather_true) 69 | 70 | 71 | def test_custom_frame() -> None: 72 | df_weather = CustomFrame.new( 73 | year=weather.year, 74 | month=weather.month, 75 | temp_avg=weather.temp_avg, 76 | temp_max=weather.temp_max, 77 | wind_avg=weather.wind_avg, 78 | wind_max=weather.wind_max, 79 | ) 80 | 81 | assert isinstance(df_weather, UserFrame) 82 | assert_frame_equal(df_weather, df_weather_true, check_frame_type=False) 83 | 84 | 85 | def test_series() -> None: 86 | ser_weather = Series.new( 87 | year=weather.year, 88 | month=weather.month, 89 | temp_avg=weather.temp_avg, 90 | temp_max=weather.temp_max, 91 | wind_avg=weather.wind_avg, 92 | wind_max=weather.wind_max, 93 | ) 94 | 95 | assert isinstance(ser_weather, pd.Series) 96 | assert_series_equal(ser_weather, ser_weather_true) 97 | 98 | 99 | def test_custom_series() -> None: 100 | ser_weather = CustomSeries.new( 101 | year=weather.year, 102 | month=weather.month, 103 | temp_avg=weather.temp_avg, 104 | temp_max=weather.temp_max, 105 | wind_avg=weather.wind_avg, 106 | wind_max=weather.wind_max, 107 | ) 108 | 109 | assert isinstance(ser_weather, UserSeries) 110 | assert_series_equal(ser_weather, ser_weather_true, check_series_type=False) 111 | 112 | 113 | def test_factory_series() -> None: 114 | ser_weather = FactorySeries.new( 115 | year=weather.year, 116 | month=weather.month, 117 | temp_avg=weather.temp_avg, 118 | temp_max=weather.temp_max, 119 | wind_avg=weather.wind_avg, 120 | wind_max=weather.wind_max, 121 | ) 122 | 123 | assert isinstance(ser_weather, pd.Series) 124 | assert_series_equal(ser_weather, ser_weather_true) 125 | 126 | 127 | def test_float_series() -> None: 128 | ser_weather = FloatSeries.new( 129 | year=weather.year, 130 | month=weather.month, 131 | temp_avg=weather.temp_avg, 132 | temp_max=weather.temp_max, 133 | wind_avg=weather.wind_avg, 134 | wind_max=weather.wind_max, 135 | ) 136 | 137 | assert isinstance(ser_weather, pd.Series) 138 | assert_series_equal(ser_weather, ser_weather_true, check_series_type=False) 139 | --------------------------------------------------------------------------------