├── .devcontainer
    └── devcontainer.json
├── .github
    └── workflows
    │   ├── docs.yaml
    │   ├── pypi.yml
    │   └── tests.yml
├── .gitignore
├── CITATION.cff
├── LICENSE
├── README.md
├── docs
    ├── build
    ├── conf.py
    └── index.md
├── pandas_dataclasses
    ├── __init__.py
    ├── core
    │   ├── __init__.py
    │   ├── api.py
    │   ├── specs.py
    │   ├── tagging.py
    │   └── typing.py
    ├── extras
    │   ├── __init__.py
    │   ├── hints.py
    │   └── new.py
    └── py.typed
├── pyproject.toml
├── tests
    ├── __init__.py
    ├── data.py
    ├── test_core_api.py
    ├── test_core_specs.py
    ├── test_core_tagging.py
    ├── test_core_typing.py
    └── test_extras_new.py
└── uv.lock


/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "pandas-dataclasses",
 3 |     "image": "ghcr.io/astral-sh/uv:python3.12-bookworm",
 4 |     "runArgs": [
 5 |         "--name=pandas-dataclasses"
 6 |     ],
 7 |     "containerEnv": {
 8 |         "UV_PROJECT_ENVIRONMENT": "/usr/local"
 9 |     },
10 |     "postCreateCommand": "uv sync --frozen",
11 |     "customizations": {
12 |         "vscode": {
13 |             "extensions": [
14 |                 "ms-python.black-formatter",
15 |                 "streetsidesoftware.code-spell-checker",
16 |                 "tamasfe.even-better-toml"
17 |             ],
18 |             "settings": {
19 |                 "python.languageServer": "Pylance",
20 |                 "[python]": {
21 |                     "editor.defaultFormatter": "ms-python.black-formatter",
22 |                     "editor.formatOnSave": true
23 |                 }
24 |             }
25 |         }
26 |     }
27 | }
28 | 


--------------------------------------------------------------------------------
/.github/workflows/docs.yaml:
--------------------------------------------------------------------------------
 1 | name: Docs
 2 | 
 3 | on:
 4 |   release:
 5 |     types:
 6 |       - created
 7 | 
 8 | jobs:
 9 |   job:
10 |     name: Docs
11 |     runs-on: ubuntu-latest
12 |     container: ghcr.io/astral-sh/uv:python3.12-bookworm
13 |     env:
14 |       UV_PROJECT_ENVIRONMENT: /usr/local
15 |     steps:
16 |       - uses: actions/checkout@v4
17 |       - run: echo "::set-output name=tag::${GITHUB_REF##*/}"
18 |         id: tag
19 |       - run: uv sync --frozen
20 |       - run: docs/build
21 |       - uses: peaceiris/actions-gh-pages@v4
22 |         with:
23 |           destination_dir: ${{ steps.tag.outputs.tag }}
24 |           github_token: ${{ secrets.GITHUB_TOKEN }}
25 |           publish_dir: ./docs/_build
26 | 


--------------------------------------------------------------------------------
/.github/workflows/pypi.yml:
--------------------------------------------------------------------------------
 1 | name: PyPI
 2 | 
 3 | on:
 4 |   release:
 5 |     types:
 6 |       - created
 7 | 
 8 | jobs:
 9 |   job:
10 |     name: PyPI
11 |     runs-on: ubuntu-latest
12 |     container: ghcr.io/astral-sh/uv:python3.12-bookworm
13 |     env:
14 |       UV_PROJECT_ENVIRONMENT: /usr/local
15 |       UV_PUBLISH_TOKEN: ${{ secrets.PYPI_TOKEN }}
16 |     steps:
17 |       - uses: actions/checkout@v4
18 |       - run: uv build && uv publish
19 | 


--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
 1 | name: Tests
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |   pull_request:
 8 |     branches:
 9 |       - main
10 | 
11 | jobs:
12 |   job:
13 |     name: Test (${{ matrix.env }})
14 |     runs-on: ubuntu-latest
15 |     container: ghcr.io/astral-sh/uv:${{ matrix.env }}
16 |     env:
17 |       PYTHON_DIRS: docs tests pandas_dataclasses
18 |       UV_PROJECT_ENVIRONMENT: /usr/local
19 |     strategy:
20 |       fail-fast: false
21 |       matrix:
22 |         env:
23 |           - python3.9-bookworm
24 |           - python3.10-bookworm
25 |           - python3.11-bookworm
26 |           - python3.12-bookworm
27 |           - python3.13-bookworm
28 |     steps:
29 |       - uses: actions/checkout@v4
30 |       - run: uv sync --frozen
31 |       - run: black --check ${PYTHON_DIRS}
32 |       - run: pyright ${PYTHON_DIRS}
33 |       - run: pytest -v
34 |       - run: docs/build
35 |         if: ${{ matrix.env != 'python3.9-bookworm' }}
36 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Created by https://www.toptal.com/developers/gitignore/api/python
  2 | # Edit at https://www.toptal.com/developers/gitignore?templates=python
  3 | 
  4 | ### Python ###
  5 | # Byte-compiled / optimized / DLL files
  6 | __pycache__/
  7 | *.py[cod]
  8 | *$py.class
  9 | 
 10 | # C extensions
 11 | *.so
 12 | 
 13 | # Distribution / packaging
 14 | .Python
 15 | build/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | lib/
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | wheels/
 27 | share/python-wheels/
 28 | *.egg-info/
 29 | .installed.cfg
 30 | *.egg
 31 | MANIFEST
 32 | 
 33 | # PyInstaller
 34 | #  Usually these files are written by a python script from a template
 35 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 36 | *.manifest
 37 | *.spec
 38 | 
 39 | # Installer logs
 40 | pip-log.txt
 41 | pip-delete-this-directory.txt
 42 | 
 43 | # Unit test / coverage reports
 44 | htmlcov/
 45 | .tox/
 46 | .nox/
 47 | .coverage
 48 | .coverage.*
 49 | .cache
 50 | nosetests.xml
 51 | coverage.xml
 52 | *.cover
 53 | *.py,cover
 54 | .hypothesis/
 55 | .pytest_cache/
 56 | cover/
 57 | 
 58 | # Translations
 59 | *.mo
 60 | *.pot
 61 | 
 62 | # Django stuff:
 63 | *.log
 64 | local_settings.py
 65 | db.sqlite3
 66 | db.sqlite3-journal
 67 | 
 68 | # Flask stuff:
 69 | instance/
 70 | .webassets-cache
 71 | 
 72 | # Scrapy stuff:
 73 | .scrapy
 74 | 
 75 | # Sphinx documentation
 76 | docs/_apidoc/
 77 | docs/_build/
 78 | 
 79 | # PyBuilder
 80 | .pybuilder/
 81 | target/
 82 | 
 83 | # Jupyter Notebook
 84 | .ipynb_checkpoints
 85 | 
 86 | # IPython
 87 | profile_default/
 88 | ipython_config.py
 89 | 
 90 | # pyenv
 91 | #   For a library or package, you might want to ignore these files since the code is
 92 | #   intended to run in multiple environments; otherwise, check them in:
 93 | # .python-version
 94 | 
 95 | # pipenv
 96 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 97 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 98 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 99 | #   install all needed dependencies.
100 | #Pipfile.lock
101 | 
102 | # poetry
103 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
104 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
105 | #   commonly ignored for libraries.
106 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
107 | #poetry.lock
108 | 
109 | # pdm
110 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
111 | #pdm.lock
112 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
113 | #   in version control.
114 | #   https://pdm.fming.dev/#use-with-ide
115 | .pdm.toml
116 | 
117 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
118 | __pypackages__/
119 | 
120 | # Celery stuff
121 | celerybeat-schedule
122 | celerybeat.pid
123 | 
124 | # SageMath parsed files
125 | *.sage.py
126 | 
127 | # Environments
128 | .env
129 | .venv
130 | env/
131 | venv/
132 | ENV/
133 | env.bak/
134 | venv.bak/
135 | 
136 | # Spyder project settings
137 | .spyderproject
138 | .spyproject
139 | 
140 | # Rope project settings
141 | .ropeproject
142 | 
143 | # mkdocs documentation
144 | /site
145 | 
146 | # mypy
147 | .mypy_cache/
148 | .dmypy.json
149 | dmypy.json
150 | 
151 | # Pyre type checker
152 | .pyre/
153 | 
154 | # pytype static type analyzer
155 | .pytype/
156 | 
157 | # Cython debug symbols
158 | cython_debug/
159 | 
160 | # PyCharm
161 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
162 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
163 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
164 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
165 | #.idea/
166 | 
167 | # End of https://www.toptal.com/developers/gitignore/api/python
168 | 


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
 1 | # This CITATION.cff file was generated with cffinit.
 2 | # Visit https://bit.ly/cffinit to generate yours today!
 3 | 
 4 | cff-version: 1.2.0
 5 | title: pandas-dataclasses
 6 | message: >-
 7 |   If you use this software, please cite it using the
 8 |   metadata from this file.
 9 | type: software
10 | authors:
11 |   - given-names: Akio
12 |     family-names: Taniguchi
13 |     email: taniguchi.akio@gmail.com
14 |     affiliation: Kitami Institute of Technology
15 |     orcid: 'https://orcid.org/0000-0002-9695-6183'
16 | identifiers:
17 |   - type: doi
18 |     value: 10.5281/zenodo.10652375
19 | repository-code: 'https://github.com/astropenguin/pandas-dataclasses'
20 | url: 'https://astropenguin.github.io/pandas-dataclasses/v1.0.0'
21 | abstract: pandas data creation by data classes
22 | keywords:
23 |   - python
24 |   - dataclasses
25 |   - pandas
26 |   - specifications
27 |   - typing
28 | license: MIT
29 | version: 1.0.0
30 | date-released: '2025-01-01'
31 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021-2025 Akio Taniguchi
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # pandas-dataclasses
  2 | 
  3 | [![Release](https://img.shields.io/pypi/v/pandas-dataclasses?label=Release&color=cornflowerblue&style=flat-square)](https://pypi.org/project/pandas-dataclasses/)
  4 | [![Python](https://img.shields.io/pypi/pyversions/pandas-dataclasses?label=Python&color=cornflowerblue&style=flat-square)](https://pypi.org/project/pandas-dataclasses/)
  5 | [![Downloads](https://img.shields.io/pypi/dm/pandas-dataclasses?label=Downloads&color=cornflowerblue&style=flat-square)](https://pepy.tech/project/pandas-dataclasses)
  6 | [![DOI](https://img.shields.io/badge/DOI-10.5281/zenodo.6127352-cornflowerblue?style=flat-square)](https://doi.org/10.5281/zenodo.6127352)
  7 | [![Tests](https://img.shields.io/github/actions/workflow/status/astropenguin/pandas-dataclasses/tests.yml?label=Tests&style=flat-square)](https://github.com/astropenguin/pandas-dataclasses/actions)
  8 | 
  9 | pandas data creation by data classes
 10 | 
 11 | ## Overview
 12 | 
 13 | pandas-dataclass makes it easy to create [pandas] data (DataFrame and Series) by specifying their data types, attributes, and names using the Python's dataclass:
 14 | 
 15 | <details>
 16 | <summary>Click to see all imports</summary>
 17 | 
 18 | ```python
 19 | from dataclasses import dataclass
 20 | from pandas_dataclasses import AsFrame, Data, Index
 21 | ```
 22 | </details>
 23 | 
 24 | ```python
 25 | @dataclass
 26 | class Weather(AsFrame):
 27 |     """Weather information."""
 28 | 
 29 |     year: Index[int]
 30 |     month: Index[int]
 31 |     temp: Data[float]
 32 |     wind: Data[float]
 33 | 
 34 | 
 35 | df = Weather.new(
 36 |     [2020, 2020, 2021, 2021, 2022],
 37 |     [1, 7, 1, 7, 1],
 38 |     [7.1, 24.3, 5.4, 25.9, 4.9],
 39 |     [2.4, 3.1, 2.3, 2.4, 2.6],
 40 | )
 41 | ```
 42 | 
 43 | where `df` will become a DataFrame object like:
 44 | 
 45 | ```
 46 |             temp  wind
 47 | year month
 48 | 2020 1       7.1   2.4
 49 |      7      24.3   3.1
 50 | 2021 1       5.4   2.3
 51 |      7      25.9   2.4
 52 | 2022 1       4.9   2.6
 53 | ```
 54 | 
 55 | ### Features
 56 | 
 57 | - Specifying data types and names of each element in pandas data
 58 | - Specifying metadata stored in pandas data attributes (attrs)
 59 | - Support for hierarchical index and columns
 60 | - Support for custom factory for data creation
 61 | - Support for full [dataclass] features
 62 | - Support for static type check by [mypy] and [Pyright] ([Pylance])
 63 | 
 64 | ### Installation
 65 | 
 66 | ```bash
 67 | pip install pandas-dataclasses
 68 | ```
 69 | 
 70 | ## How it works
 71 | 
 72 | pandas-dataclasses provides you the following features:
 73 | 
 74 | - Type hints for dataclass fields (`Attr`, `Data`, `Index`) to specify the data type and name of each element in pandas data
 75 | - Mix-in classes for dataclasses (`As`, `AsFrame`, `AsSeries`) to create pandas data by a classmethod (`new`) that takes the same arguments as dataclass initialization
 76 | 
 77 | When you call `new`, it will first create a dataclass object and then create a Series or DataFrame object from the dataclass object according the type hints and values in it.
 78 | In the example above, `df = Weather.new(...)` is thus equivalent to:
 79 | 
 80 | <details>
 81 | <summary>Click to see all imports</summary>
 82 | 
 83 | ```python
 84 | from pandas_dataclasses import asframe
 85 | ```
 86 | </details>
 87 | 
 88 | ```python
 89 | obj = Weather([2020, ...], [1, ...], [7.1, ...], [2.4, ...])
 90 | df = asframe(obj)
 91 | ```
 92 | 
 93 | where `asframe` is a conversion function.
 94 | pandas-dataclasses does not touch the dataclass object creation itself; this allows you to fully customize your dataclass before conversion by the dataclass features (`field`, `__post_init__`, ...).
 95 | 
 96 | ## Basic usage
 97 | 
 98 | ### DataFrame creation
 99 | 
100 | As shown in the example above, a dataclass that has the `AsFrame` (or `AsDataFrame` as an alias) mix-in will create DataFrame objects:
101 | 
102 | <details>
103 | <summary>Click to see all imports</summary>
104 | 
105 | ```python
106 | from dataclasses import dataclass
107 | from pandas_dataclasses import AsFrame, Data, Index
108 | ```
109 | </details>
110 | 
111 | ```python
112 | @dataclass
113 | class Weather(AsFrame):
114 |     """Weather information."""
115 | 
116 |     year: Index[int]
117 |     month: Index[int]
118 |     temp: Data[float]
119 |     wind: Data[float]
120 | 
121 | 
122 | df = Weather.new(...)
123 | ```
124 | 
125 | where fields typed by `Index` are *index fields*, each value of which will become an index or a part of a hierarchical index of a DataFrame object.
126 | Fields typed by `Data` are *data fields*, each value of which will become a data column of a DataFrame object.
127 | Fields typed by other types are just ignored in the DataFrame creation.
128 | 
129 | Each data or index will be cast to the data type specified in a type hint like `Index[int]`.
130 | Use `Any` or `None` (like `Index[Any]`) if you do not want type casting.
131 | See also [data typing rules](#data-typing-rules) for more examples.
132 | 
133 | By default, a field name (i.e. an argument name) is used for the name of corresponding data or index.
134 | See also [custom naming](#custom-naming) and [naming rules](#naming-rules) if you want customization.
135 | 
136 | ### Series creation
137 | 
138 | A dataclass that has the `AsSeries` mix-in will create Series objects:
139 | 
140 | <details>
141 | <summary>Click to see all imports</summary>
142 | 
143 | ```python
144 | from dataclasses import dataclass
145 | from pandas_dataclasses import AsSeries, Data, Index
146 | ```
147 | </details>
148 | 
149 | ```python
150 | @dataclass
151 | class Weather(AsSeries):
152 |     """Weather information."""
153 | 
154 |     year: Index[int]
155 |     month: Index[int]
156 |     temp: Data[float]
157 | 
158 | 
159 | ser = Weather.new(...)
160 | ```
161 | 
162 | Unlike `AsFrame`, the second and subsequent data fields are ignored in the Series creation even if they exist.
163 | Other rules are the same as for the DataFrame creation.
164 | 
165 | ## Advanced usage
166 | 
167 | ### Metadata storing
168 | 
169 | Fields typed by `Attr` are *attribute fields*, each value of which will become an item of attributes of a DataFrame or a Series object:
170 | 
171 | <details>
172 | <summary>Click to see all imports</summary>
173 | 
174 | ```python
175 | from dataclasses import dataclass
176 | from pandas_dataclasses import AsFrame, Attr, Data, Index
177 | ```
178 | </details>
179 | 
180 | ```python
181 | @dataclass
182 | class Weather(AsFrame):
183 |     """Weather information."""
184 | 
185 |     year: Index[int]
186 |     month: Index[int]
187 |     temp: Data[float]
188 |     wind: Data[float]
189 |     loc: Attr[str] = "Tokyo"
190 |     lon: Attr[float] = 139.69167
191 |     lat: Attr[float] = 35.68944
192 | 
193 | 
194 | df = Weather.new(...)
195 | ```
196 | 
197 | where `df.attrs` will become like:
198 | 
199 | ```python
200 | {"loc": "Tokyo", "lon": 139.69167, "lat": 35.68944}
201 | ```
202 | 
203 | ### Custom naming
204 | 
205 | The name of attribute, data, or index can be explicitly specified by adding a hashable annotation to the corresponding type:
206 | 
207 | <details>
208 | <summary>Click to see all imports</summary>
209 | 
210 | ```python
211 | from dataclasses import dataclass
212 | from typing import Annotated as Ann
213 | from pandas_dataclasses import AsFrame, Attr, Data, Index
214 | ```
215 | </details>
216 | 
217 | ```python
218 | @dataclass
219 | class Weather(AsFrame):
220 |     """Weather information."""
221 | 
222 |     year: Ann[Index[int], "Year"]
223 |     month: Ann[Index[int], "Month"]
224 |     temp: Ann[Data[float], "Temperature (deg C)"]
225 |     wind: Ann[Data[float], "Wind speed (m/s)"]
226 |     loc: Ann[Attr[str], "Location"] = "Tokyo"
227 |     lon: Ann[Attr[float], "Longitude (deg)"] = 139.69167
228 |     lat: Ann[Attr[float], "Latitude (deg)"] = 35.68944
229 | 
230 | 
231 | df = Weather.new(...)
232 | ```
233 | 
234 | where `df` and `df.attrs` will become like:
235 | 
236 | ```
237 |             Temperature (deg C)  Wind speed (m/s)
238 | Year Month
239 | 2020 1                      7.1               2.4
240 |      7                     24.3               3.1
241 | 2021 1                      5.4               2.3
242 |      7                     25.9               2.4
243 | 2022 1                      4.9               2.6
244 | ```
245 | 
246 | ```python
247 | {"Location": "Tokyo", "Longitude (deg)": 139.69167, "Latitude (deg)": 35.68944}
248 | ```
249 | 
250 | If an annotation is a [format string], it will be formatted by a dataclass object before the data creation:
251 | 
252 | <details>
253 | <summary>Click to see all imports</summary>
254 | 
255 | ```python
256 | from dataclasses import dataclass
257 | from typing import Annotated as Ann
258 | from pandas_dataclasses import AsFrame, Data, Index
259 | ```
260 | </details>
261 | 
262 | ```python
263 | @dataclass
264 | class Weather(AsFrame):
265 |     """Weather information."""
266 | 
267 |     year: Ann[Index[int], "Year"]
268 |     month: Ann[Index[int], "Month"]
269 |     temp: Ann[Data[float], "Temperature ({.temp_unit})"]
270 |     wind: Ann[Data[float], "Wind speed ({.wind_unit})"]
271 |     temp_unit: str = "deg C"
272 |     wind_unit: str = "m/s"
273 | 
274 | 
275 | df = Weather.new(..., temp_unit="deg F", wind_unit="km/h")
276 | ```
277 | 
278 | where units of the temperature and the wind speed will be dynamically updated (see also [naming rules](#naming-rules)).
279 | 
280 | ### Hierarchical columns
281 | 
282 | Adding tuple annotations to data fields will create DataFrame objects with hierarchical columns:
283 | 
284 | <details>
285 | <summary>Click to see all imports</summary>
286 | 
287 | ```python
288 | from dataclasses import dataclass
289 | from typing import Annotated as Ann
290 | from pandas_dataclasses import AsFrame, Data, Index
291 | ```
292 | </details>
293 | 
294 | ```python
295 | @dataclass
296 | class Weather(AsFrame):
297 |     """Weather information."""
298 | 
299 |     year: Ann[Index[int], "Year"]
300 |     month: Ann[Index[int], "Month"]
301 |     temp_avg: Ann[Data[float], ("Temperature (deg C)", "Average")]
302 |     temp_max: Ann[Data[float], ("Temperature (deg C)", "Maximum")]
303 |     wind_avg: Ann[Data[float], ("Wind speed (m/s)", "Average")]
304 |     wind_max: Ann[Data[float], ("Wind speed (m/s)", "Maximum")]
305 | 
306 | 
307 | df = Weather.new(...)
308 | ```
309 | 
310 | where `df` will become like:
311 | 
312 | ```
313 |            Temperature (deg C)         Wind speed (m/s)
314 |                        Average Maximum          Average Maximum
315 | Year Month
316 | 2020 1                     7.1    11.1              2.4     8.8
317 |      7                    24.3    27.7              3.1    10.2
318 | 2021 1                     5.4    10.3              2.3    10.7
319 |      7                    25.9    30.3              2.4     9.0
320 | 2022 1                     4.9     9.4              2.6     8.8
321 | ```
322 | 
323 | Column names can be (explicitly) specified by dictionary annotations:
324 | 
325 | <details>
326 | <summary>Click to see all imports</summary>
327 | 
328 | ```python
329 | from dataclasses import dataclass
330 | from typing import Annotated as Ann
331 | from pandas_dataclasses import AsFrame, Data, Index
332 | ```
333 | </details>
334 | 
335 | ```python
336 | def name(meas: str, stat: str) -> dict[str, str]:
337 |     """Create a dictionary annotation for a column name."""
338 |     return {"Measurement": meas, "Statistic": stat}
339 | 
340 | 
341 | @dataclass
342 | class Weather(AsFrame):
343 |     """Weather information."""
344 | 
345 |     year: Ann[Index[int], "Year"]
346 |     month: Ann[Index[int], "Month"]
347 |     temp_avg: Ann[Data[float], name("Temperature (deg C)", "Average")]
348 |     temp_max: Ann[Data[float], name("Temperature (deg C)", "Maximum")]
349 |     wind_avg: Ann[Data[float], name("Wind speed (m/s)", "Average")]
350 |     wind_max: Ann[Data[float], name("Wind speed (m/s)", "Maximum")]
351 | 
352 | 
353 | df = Weather.new(...)
354 | ```
355 | 
356 | where `df` will become like:
357 | 
358 | ```
359 | Measurement Temperature (deg C)         Wind speed (m/s)
360 | Statistic               Average Maximum          Average Maximum
361 | Year Month
362 | 2020 1                      7.1    11.1              2.4     8.8
363 |      7                     24.3    27.7              3.1    10.2
364 | 2021 1                      5.4    10.3              2.3    10.7
365 |      7                     25.9    30.3              2.4     9.0
366 | 2022 1                      4.9     9.4              2.6     8.8
367 | ```
368 | 
369 | If a tuple or dictionary annotation has [format string]s, they will also be formatted by a dataclass object (see also [naming rules](#naming-rules)).
370 | 
371 | ### Multiple-item fields
372 | 
373 | Multiple (and possibly extra) attributes, data, or indices can be added by fields with corresponding type hints wrapped by `Multiple`:
374 | 
375 | <details>
376 | <summary>Click to see all imports</summary>
377 | 
378 | ```python
379 | from dataclasses import dataclass
380 | from pandas_dataclasses import AsFrame, Data, Index, Multiple
381 | ```
382 | </details>
383 | 
384 | 
385 | ```python
386 | @dataclass
387 | class Weather(AsFrame):
388 |     """Weather information."""
389 | 
390 |     year: Index[int]
391 |     month: Index[int]
392 |     temp: Data[float]
393 |     wind: Data[float]
394 |     extra_index: Multiple[Index[int]]
395 |     extra_data: Multiple[Data[float]]
396 | 
397 | 
398 | df = Weather.new(
399 |     [2020, 2020, 2021, 2021, 2022],
400 |     [1, 7, 1, 7, 1],
401 |     [7.1, 24.3, 5.4, 25.9, 4.9],
402 |     [2.4, 3.1, 2.3, 2.4, 2.6],
403 |     extra_index={
404 |         "day": [1, 1, 1, 1, 1],
405 |         "week": [2, 2, 4, 3, 5],
406 |     },
407 |     extra_data={
408 |         "humid": [65, 89, 57, 83, 52],
409 |         "press": [1013.8, 1006.2, 1014.1, 1007.7, 1012.7],
410 |     },
411 | )
412 | ```
413 | 
414 | where `df` will become like:
415 | 
416 | ```
417 |                      temp  wind  humid   press
418 | year month day week
419 | 2020 1     1   2      7.1   2.4   65.0  1013.8
420 |      7     1   2     24.3   3.1   89.0  1006.2
421 | 2021 1     1   4      5.4   2.3   57.0  1014.1
422 |      7     1   3     25.9   2.4   83.0  1007.7
423 | 2022 1     1   5      4.9   2.6   52.0  1012.7
424 | ```
425 | 
426 | If multiple items of the same name exist, the last-defined one will be finally used.
427 | For example, if the `extra_index` field contains `"month": [2, 8, 2, 8, 2]`, the values given by the `month` field will be overwritten.
428 | 
429 | ### Custom pandas factory
430 | 
431 | A custom class can be specified as a factory for the Series or DataFrame creation by `As`, the generic version of `AsFrame` and `AsSeries`.
432 | Note that the custom class must be a subclass of either `pandas.Series` or `pandas.DataFrame`:
433 | 
434 | <details>
435 | <summary>Click to see all imports</summary>
436 | 
437 | ```python
438 | import pandas as pd
439 | from dataclasses import dataclass
440 | from pandas_dataclasses import As, Data, Index
441 | ```
442 | </details>
443 | 
444 | ```python
445 | class CustomSeries(pd.Series):
446 |     """Custom pandas Series."""
447 | 
448 |     pass
449 | 
450 | 
451 | @dataclass
452 | class Temperature(As[CustomSeries]):
453 |     """Temperature information."""
454 | 
455 |     year: Index[int]
456 |     month: Index[int]
457 |     temp: Data[float]
458 | 
459 | 
460 | ser = Temperature.new(...)
461 | ```
462 | 
463 | where `ser` is statically regarded as `CustomSeries` and will become a `CustomSeries` object.
464 | 
465 | Generic Series type (`Series[T]`) is also supported, however, it is only for static the type check in the current pandas versions.
466 | In such cases, you can additionally give a factory that must work in runtime as a class argument:
467 | 
468 | <details>
469 | <summary>Click to see all imports</summary>
470 | 
471 | ```python
472 | import pandas as pd
473 | from dataclasses import dataclass
474 | from pandas_dataclasses import As, Data, Index
475 | ```
476 | </details>
477 | 
478 | ```python
479 | @dataclass
480 | class Temperature(As["pd.Series[float]"], factory=pd.Series):
481 |     """Temperature information."""
482 | 
483 |     year: Index[int]
484 |     month: Index[int]
485 |     temp: Data[float]
486 | 
487 | 
488 | ser = Temperature.new(...)
489 | ```
490 | 
491 | where `ser` is statically regarded as `Series[float]` but will become a `Series` object in runtime.
492 | 
493 | ## Appendix
494 | 
495 | ### Data typing rules
496 | 
497 | The data type (dtype) of data or index is determined from the first `Data` or `Index` type of the corresponding field, respectively.
498 | The following table shows how the data type is inferred:
499 | 
500 | <details>
501 | <summary>Click to see all imports</summary>
502 | 
503 | ```python
504 | from typing import Any, Annotated as Ann, Literal as L
505 | from pandas_dataclasses import Data
506 | ```
507 | </details>
508 | 
509 | Type hint | Inferred data type
510 | --- | ---
511 | `Data[Any]` | `None` (no type casting)
512 | `Data[None]` | `None` (no type casting)
513 | `Data[int]` | `numpy.int64`
514 | `Data[int \| str]` | `numpy.int64`
515 | `Data[numpy.int32]` | `numpy.int32`
516 | `Data[L["datetime64[ns]"]]` | `numpy.dtype("<M8[ns]")`
517 | `Data[L["category"]]` | `pandas.CategoricalDtype()`
518 | `Data[int] \| str` | `numpy.int64`
519 | `Data[int] \| Data[float]` | `numpy.int64`
520 | `Ann[Data[int], "spam"]` | `numpy.int64`
521 | `Data[Ann[int, "spam"]]` | `numpy.int64`
522 | 
523 | ### Naming rules
524 | 
525 | The name of attribute, data, or index is determined from the first annotation of the first `Attr`, `Data`, or `Index` type of the corresponding field, respectively.
526 | If the annotation is a [format string] or a tuple that has [format string]s, it (they) will be formatted by a dataclass object before the data creation.
527 | Otherwise, the field name (i.e. argument name) will be used.
528 | The following table shows how the name is inferred:
529 | 
530 | <details>
531 | <summary>Click to see all imports</summary>
532 | 
533 | ```python
534 | from typing import Any, Annotated as Ann
535 | from pandas_dataclasses import Data
536 | ```
537 | </details>
538 | 
539 | Type hint | Inferred name
540 | --- | ---
541 | `Data[Any]` | (field name)
542 | `Ann[Data[Any], ..., "spam"]` | (field name)
543 | `Ann[Data[Any], "spam"]` | `"spam"`
544 | `Ann[Data[Any], "spam", "ham"]` | `"spam"`
545 | `Ann[Data[Any], "spam"] \| Ann[str, "ham"]` | `"spam"`
546 | `Ann[Data[Any], "spam"] \| Ann[Data[float], "ham"]` | `"spam"`
547 | `Ann[Data[Any], "{.name}"` | `"{.name}".format(obj)`
548 | `Ann[Data[Any], ("spam", "ham")]` | `("spam", "ham")`
549 | `Ann[Data[Any], ("{.name}", "ham")]` | `("{.name}".format(obj), "ham")`
550 | 
551 | where `obj` is a dataclass object that is expected to have `obj.name`.
552 | 
553 | ### Development roadmap
554 | 
555 | Release version | Features
556 | --- | ---
557 | v0.5 | Support for dynamic naming
558 | v0.6 | Support for extension array and dtype
559 | v0.7 | Support for hierarchical columns
560 | v0.8 | Support for mypy and callable pandas factory
561 | v0.9 | Support for Ellipsis (`...`) as an alias of field name
562 | v0.10 | Support for union type in type hints
563 | v0.11 | Support for Python 3.11 and drop support for Python 3.7
564 | v0.12 | Support for multiple items received in a single field
565 | v1.0 | Initial major release (freezing public features until v2.0)
566 | 
567 | <!-- References -->
568 | [dataclass]: https://docs.python.org/3/library/dataclasses.html
569 | [format string]: https://docs.python.org/3/library/string.html#format-string-syntax
570 | [mypy]: http://www.mypy-lang.org
571 | [NumPy]: https://numpy.org
572 | [pandas]: https://pandas.pydata.org
573 | [Pylance]: https://github.com/microsoft/pylance-release
574 | [Pyright]: https://github.com/microsoft/pyright
575 | 


--------------------------------------------------------------------------------
/docs/build:
--------------------------------------------------------------------------------
1 | #!/bin/bash -eu
2 | 
3 | sphinx-apidoc -efMT -d 2 -o docs/_apidoc pandas_dataclasses
4 | sphinx-build -a docs docs/_build
5 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
 1 | # project information
 2 | author = "Akio Taniguchi"
 3 | copyright = "2021-2025 Akio Taniguchi"
 4 | 
 5 | 
 6 | # general configuration
 7 | add_module_names = False
 8 | autodoc_member_order = "bysource"
 9 | autodoc_typehints = "both"
10 | autodoc_typehints_format = "short"
11 | exclude_patterns = [
12 |     "_build",
13 |     "Thumbs.db",
14 |     ".DS_Store",
15 | ]
16 | extensions = [
17 |     "myst_parser",
18 |     "sphinx.ext.autodoc",
19 |     "sphinx.ext.autosummary",
20 |     "sphinx.ext.napoleon",
21 |     "sphinx.ext.viewcode",
22 | ]
23 | myst_heading_anchors = 3
24 | templates_path = ["_templates"]
25 | 
26 | 
27 | # options for HTML output
28 | html_theme = "pydata_sphinx_theme"
29 | html_theme_options = {
30 |     "github_url": "https://github.com/astropenguin/pandas-dataclasses",
31 |     "logo": {"text": "pandas-dataclasses"},
32 | }
33 | 


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
 1 | ```{include} ../README.md
 2 | ```
 3 | 
 4 | ```{toctree}
 5 | ---
 6 | hidden:
 7 | ---
 8 | 
 9 | Home <self>
10 | Package guide <_apidoc/pandas_dataclasses>
11 | ```
12 | 


--------------------------------------------------------------------------------
/pandas_dataclasses/__init__.py:
--------------------------------------------------------------------------------
 1 | __all__ = [
 2 |     "As",
 3 |     "AsDataFrame",
 4 |     "AsFrame",
 5 |     "AsSeries",
 6 |     "Attr",
 7 |     "Data",
 8 |     "Index",
 9 |     "Multiple",
10 |     "Spec",
11 |     "Tag",
12 |     "asdataframe",
13 |     "asframe",
14 |     "aspandas",
15 |     "asseries",
16 |     "core",
17 |     "extras",
18 | ]
19 | __version__ = "1.0.0"
20 | 
21 | 
22 | # submodules
23 | from . import core
24 | from . import extras
25 | from .core.api import *
26 | from .core.specs import *
27 | from .core.tagging import *
28 | from .core.typing import *
29 | from .extras.hints import *
30 | from .extras.new import *
31 | 
32 | 
33 | # aliases
34 | AsDataFrame = AsFrame
35 | """Alias of ``core.mixins.AsFrame``."""
36 | 
37 | 
38 | asdataframe = asframe
39 | """Alias of ``core.aspandas.asframe``."""
40 | 


--------------------------------------------------------------------------------
/pandas_dataclasses/core/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = ["api", "specs", "tagging", "typing"]
2 | 
3 | 
4 | from . import api
5 | from . import specs
6 | from . import tagging
7 | from . import typing
8 | 


--------------------------------------------------------------------------------
/pandas_dataclasses/core/api.py:
--------------------------------------------------------------------------------
  1 | __all__ = ["asframe", "aspandas", "asseries"]
  2 | 
  3 | 
  4 | # standard library
  5 | from types import FunctionType
  6 | from typing import Any, Callable, Hashable, Iterable, Optional, overload
  7 | 
  8 | 
  9 | # dependencies
 10 | import numpy as np
 11 | import pandas as pd
 12 | from pandas.api.types import is_list_like
 13 | from typing_extensions import get_origin
 14 | from .specs import Field, Fields, Spec
 15 | from .tagging import Tag
 16 | from .typing import DataClass, DataClassOf, PAny, TFrame, TPandas, TSeries
 17 | 
 18 | 
 19 | @overload
 20 | def aspandas(obj: DataClassOf[TPandas, PAny], *, factory: None = None) -> TPandas: ...
 21 | 
 22 | 
 23 | @overload
 24 | def aspandas(obj: DataClass[PAny], *, factory: Callable[..., TPandas]) -> TPandas: ...
 25 | 
 26 | 
 27 | def aspandas(obj: Any, *, factory: Any = None) -> Any:
 28 |     """Create a DataFrame or Series object from a dataclass object.
 29 | 
 30 |     Which data structure is created will be determined by a factory
 31 |     defined as the ``__pandas_factory__`` attribute in the original
 32 |     dataclass of ``obj`` or the ``factory`` argument. If a factory is
 33 |     a function, it must have an annotation of the return type.
 34 | 
 35 |     Args:
 36 |         obj: Dataclass object that should have attribute, column, data,
 37 |             and/or index fields. If the original dataclass has the
 38 |             ``__pandas_factory__`` attribute, it will be used as a
 39 |             factory for the data creation.
 40 | 
 41 |     Keyword Args:
 42 |         factory: Class or function for the DataFrame or Series creation.
 43 |             It must take the same parameters as ``pandas.DataFrame``
 44 |             or ``pandas.Series``, and return an object of it or its
 45 |             subclass. If it is a function, it must have an annotation
 46 |             of the return type. If passed, it will be preferentially
 47 |             used even if the original dataclass of ``obj`` has the
 48 |             ``__pandas_factory__`` attribute.
 49 | 
 50 |     Returns:
 51 |         DataFrame or Series object that complies with the original dataclass.
 52 | 
 53 |     Raises:
 54 |         ValueError: Raised if no factory is found or the return type
 55 |             cannot be inferred from a factory when it is a function.
 56 | 
 57 |     """
 58 |     spec = Spec.from_dataclass(type(obj)) @ obj
 59 | 
 60 |     if factory is None:
 61 |         factory = spec.factory
 62 | 
 63 |     if factory is None:
 64 |         raise ValueError("Could not find any factory.")
 65 | 
 66 |     if isinstance(factory, FunctionType):
 67 |         return_ = factory.__annotations__["return"]
 68 |     else:
 69 |         return_ = factory
 70 | 
 71 |     origin = get_origin(return_) or return_
 72 | 
 73 |     if issubclass(origin, pd.DataFrame):
 74 |         return asframe(obj, factory=factory)
 75 |     elif issubclass(origin, pd.Series):
 76 |         return asseries(obj, factory=factory)
 77 |     else:
 78 |         raise ValueError("Could not infer an object type.")
 79 | 
 80 | 
 81 | @overload
 82 | def asframe(obj: DataClassOf[TFrame, PAny], *, factory: None = None) -> TFrame: ...
 83 | 
 84 | 
 85 | @overload
 86 | def asframe(obj: DataClass[PAny], *, factory: Callable[..., TFrame]) -> TFrame: ...
 87 | 
 88 | 
 89 | @overload
 90 | def asframe(obj: DataClass[PAny], *, factory: None = None) -> pd.DataFrame: ...
 91 | 
 92 | 
 93 | def asframe(obj: Any, *, factory: Any = None) -> Any:
 94 |     """Create a DataFrame object from a dataclass object.
 95 | 
 96 |     The return type will be determined by a factory defined as the
 97 |     ``__pandas_factory__`` attribute in the original dataclass of
 98 |     ``obj`` or the ``factory`` argument. If neither is specified,
 99 |     it defaults to ``pandas.DataFrame``.
100 | 
101 |     Args:
102 |         obj: Dataclass object that should have attribute, column, data,
103 |             and/or index fields. If the original dataclass has the
104 |             ``__pandas_factory__`` attribute, it will be used as a
105 |             factory for the DataFrame creation.
106 | 
107 |     Keyword Args:
108 |         factory: Class or function for the DataFrame creation.
109 |             It must take the same parameters as ``pandas.DataFrame``,
110 |             and return an object of it or its subclass. If passed, it
111 |             will be preferentially used even if the original dataclass
112 |             of ``obj`` has the ``__pandas_factory__`` attribute.
113 | 
114 |     Returns:
115 |         DataFrame object that complies with the original dataclass.
116 | 
117 |     """
118 |     spec = Spec.from_dataclass(type(obj)) @ obj
119 | 
120 |     if factory is None:
121 |         factory = spec.factory or pd.DataFrame
122 | 
123 |     dataframe = factory(
124 |         data=get_data(spec),
125 |         index=get_index(spec),
126 |         columns=get_columns(spec),
127 |     )
128 | 
129 |     dataframe.attrs.update(get_attrs(spec))
130 |     return squeeze(dataframe)
131 | 
132 | 
133 | @overload
134 | def asseries(obj: DataClassOf[TSeries, PAny], *, factory: None = None) -> TSeries: ...
135 | 
136 | 
137 | @overload
138 | def asseries(obj: DataClass[PAny], *, factory: Callable[..., TSeries]) -> TSeries: ...
139 | 
140 | 
141 | @overload
142 | def asseries(obj: DataClass[PAny], *, factory: None = None) -> "pd.Series[Any]": ...
143 | 
144 | 
145 | def asseries(obj: Any, *, factory: Any = None) -> Any:
146 |     """Create a Series object from a dataclass object.
147 | 
148 |     The return type will be determined by a factory defined as the
149 |     ``__pandas_factory__`` attribute in the original dataclass of
150 |     ``obj`` or the ``factory`` argument. If neither is specified,
151 |     it defaults to ``pandas.Series``.
152 | 
153 |     Args:
154 |         obj: Dataclass object that should have attribute, column, data,
155 |             and/or index fields. If the original dataclass has the
156 |             ``__pandas_factory__`` attribute, it will be used as a
157 |             factory for the Series creation.
158 | 
159 |     Keyword Args:
160 |         factory: Class or function for the Series creation.
161 |             It must take the same parameters as ``pandas.Series``,
162 |             and return an object of it or its subclass. If passed, it
163 |             will be preferentially used even if the original dataclass
164 |             of ``obj`` has the ``__pandas_factory__`` attribute.
165 | 
166 |     Returns:
167 |         Series object that complies with the original dataclass.
168 | 
169 |     """
170 |     spec = Spec.from_dataclass(type(obj)) @ obj
171 | 
172 |     if factory is None:
173 |         factory = spec.factory or pd.Series
174 | 
175 |     data = get_data(spec)
176 |     index = get_index(spec)
177 | 
178 |     if not data:
179 |         series = factory(index=index)
180 |     else:
181 |         name, data = next(iter(data.items()))
182 |         series = factory(data=data, index=index, name=name)
183 | 
184 |     series.attrs.update(get_attrs(spec))
185 |     return squeeze(series)
186 | 
187 | 
188 | def get_attrs(spec: Spec) -> dict[Hashable, Any]:
189 |     """Derive attributes from a specification."""
190 |     data: dict[Hashable, Any] = {}
191 | 
192 |     for field in spec.fields.of(Tag.ATTR):
193 |         data.update(items(field))
194 | 
195 |     return data
196 | 
197 | 
198 | def get_columns(spec: Spec) -> Optional[pd.MultiIndex]:
199 |     """Derive columns from a specification."""
200 |     if not (fields := spec.fields.of(Tag.DATA)):
201 |         return None
202 | 
203 |     if (names := name(fields)) is None:
204 |         return None
205 | 
206 |     return pd.MultiIndex.from_tuples(
207 |         map(name, fields),
208 |         names=names,
209 |     )
210 | 
211 | 
212 | def get_data(spec: Spec) -> dict[Hashable, Any]:
213 |     """Derive data from a specification."""
214 |     data: dict[Hashable, Any] = {}
215 | 
216 |     for field in spec.fields.of(Tag.DATA):
217 |         for key, val in items(field):
218 |             data[key] = ensure(val, field.dtype)
219 | 
220 |     return data
221 | 
222 | 
223 | def get_index(spec: Spec) -> Optional[pd.MultiIndex]:
224 |     """Derive index from a specification."""
225 |     if not (fields := spec.fields.of(Tag.INDEX)):
226 |         return None
227 | 
228 |     data: dict[Hashable, Any] = {}
229 | 
230 |     for field in fields:
231 |         for key, val in items(field):
232 |             data[key] = ensure(val, field.dtype)
233 | 
234 |     return pd.MultiIndex.from_arrays(
235 |         np.broadcast_arrays(*data.values()),
236 |         names=data.keys(),
237 |     )
238 | 
239 | 
240 | def ensure(data: Any, dtype: Optional[str]) -> Any:
241 |     """Ensure data to be 1D and have given data type."""
242 |     if not is_list_like(data):
243 |         data = [data]
244 | 
245 |     if isinstance(data, (pd.Index, pd.Series)):
246 |         return type(data)(data, dtype=dtype, copy=False)  # type: ignore
247 |     else:
248 |         return pd.array(data, dtype=dtype, copy=False)
249 | 
250 | 
251 | def items(field: Field) -> Iterable[tuple[Hashable, Any]]:
252 |     """Generate default(s) of a field specification."""
253 |     if field.has(Tag.MULTIPLE):
254 |         yield from field.default.items()
255 |     else:
256 |         yield (name(field), field.default)
257 | 
258 | 
259 | @overload
260 | def name(fields: Field) -> Hashable: ...
261 | 
262 | 
263 | @overload
264 | def name(fields: Fields) -> Optional[Hashable]: ...
265 | 
266 | 
267 | def name(fields: Any) -> Any:
268 |     """Derive name of a field(s) specification."""
269 |     if isinstance(fields, Field):
270 |         if isinstance(name := fields.name, dict):
271 |             return tuple(name.values())
272 |         else:
273 |             return name
274 | 
275 |     if isinstance(fields, Fields):
276 |         for field in fields:
277 |             if isinstance(name := field.name, dict):
278 |                 return tuple(name.keys())
279 | 
280 | 
281 | def squeeze(data: TPandas) -> TPandas:
282 |     """Drop levels of an index and columns if possible."""
283 |     if data.index.nlevels == 1:
284 |         data.index = data.index.get_level_values(0)
285 | 
286 |     if isinstance(data, pd.Series):
287 |         return data  # type: ignore
288 | 
289 |     if data.columns.nlevels == 1:
290 |         data.columns = data.columns.get_level_values(0)
291 | 
292 |     return data
293 | 


--------------------------------------------------------------------------------
/pandas_dataclasses/core/specs.py:
--------------------------------------------------------------------------------
  1 | __all__ = ["Spec"]
  2 | 
  3 | 
  4 | # standard library
  5 | from dataclasses import Field as Field_, dataclass, fields as fields_, replace
  6 | from functools import lru_cache
  7 | from itertools import repeat
  8 | from typing import Any, Callable, Hashable, Literal, Optional, Union
  9 | 
 10 | 
 11 | # dependencies
 12 | from pandas.api.types import pandas_dtype
 13 | from typing_extensions import Self, get_args, get_origin, get_type_hints
 14 | from .tagging import Tag, get_nontags, get_tagged, get_tags
 15 | from .typing import HashDict, Pandas, TAny, is_union
 16 | 
 17 | 
 18 | @dataclass(frozen=True)
 19 | class Field:
 20 |     """Specification of a field."""
 21 | 
 22 |     id: str
 23 |     """Identifier of the field."""
 24 | 
 25 |     name: Union[Hashable, HashDict]
 26 |     """Name of the field data."""
 27 | 
 28 |     tags: tuple[Tag, ...] = ()
 29 |     """Tags of the field."""
 30 | 
 31 |     type: Optional[Any] = None
 32 |     """Type or type hint of the field data."""
 33 | 
 34 |     dtype: Optional[str] = None
 35 |     """Data type of the field data."""
 36 | 
 37 |     default: Any = None
 38 |     """Default value of the field data."""
 39 | 
 40 |     def has(self, tag: Tag) -> bool:
 41 |         """Check if the specification has a tag."""
 42 |         return bool(tag & Tag.union(self.tags))
 43 | 
 44 |     def update(self, obj: Any) -> Self:
 45 |         """Update the specification by an object."""
 46 |         return replace(
 47 |             self,
 48 |             name=format(self.name, obj),
 49 |             default=getattr(obj, self.id, self.default),
 50 |         )
 51 | 
 52 | 
 53 | class Fields(tuple[Field, ...]):
 54 |     """List of field specifications with selectors."""
 55 | 
 56 |     def of(self, tag: Tag) -> Self:
 57 |         """Select only fields that have a tag."""
 58 |         return type(self)(filter(lambda field: field.has(tag), self))
 59 | 
 60 |     def update(self, obj: Any) -> Self:
 61 |         """Update the specifications by an object."""
 62 |         return type(self)(field.update(obj) for field in self)
 63 | 
 64 | 
 65 | @dataclass(frozen=True)
 66 | class Spec:
 67 |     """Specification of pandas data creation."""
 68 | 
 69 |     name: Optional[str] = None
 70 |     """Name of the specification."""
 71 | 
 72 |     origin: Optional[type] = None
 73 |     """Original dataclass of the specification."""
 74 | 
 75 |     factory: Optional[Callable[..., Pandas]] = None
 76 |     """Factory for pandas data creation."""
 77 | 
 78 |     fields: Fields = Fields()
 79 |     """List of field specifications."""
 80 | 
 81 |     @classmethod
 82 |     def from_dataclass(cls, dataclass: type) -> Self:
 83 |         """Create a specification from a data class."""
 84 |         eval_field_types(dataclass)
 85 | 
 86 |         return cls(
 87 |             name=dataclass.__name__,
 88 |             origin=dataclass,
 89 |             factory=getattr(dataclass, "__pandas_factory__", None),
 90 |             fields=Fields(map(convert_field, fields_(dataclass))),
 91 |         )
 92 | 
 93 |     def update(self, obj: Any) -> Self:
 94 |         """Update the specification by an object."""
 95 |         if self.origin is not None:
 96 |             if not isinstance(obj, self.origin):
 97 |                 obj = self.origin(obj)
 98 | 
 99 |         return replace(self, fields=self.fields.update(obj))
100 | 
101 |     def __matmul__(self, obj: Any) -> Self:
102 |         """Alias of the update method."""
103 |         return self.update(obj)
104 | 
105 | 
106 | @lru_cache(maxsize=None)
107 | def convert_field(field_: Field_[Any]) -> Field:
108 |     """Convert a dataclass field to a field specification."""
109 |     return Field(
110 |         id=field_.name,
111 |         name=get_first(field_.type, field_.name),
112 |         tags=get_tags(field_.type, Tag.FIELD),
113 |         type=field_.type,
114 |         dtype=get_dtype(field_.type),
115 |         default=field_.default,
116 |     )
117 | 
118 | 
119 | @lru_cache(maxsize=None)
120 | def eval_field_types(dataclass: type) -> None:
121 |     """Evaluate field types of a dataclass."""
122 |     types = get_type_hints(dataclass, include_extras=True)
123 | 
124 |     for field_ in fields_(dataclass):
125 |         field_.type = types[field_.name]
126 | 
127 | 
128 | def format(obj: TAny, by: Any) -> TAny:
129 |     """Format a string or nested strings in an object."""
130 |     if isinstance(obj, str):
131 |         return type(obj)(obj.format(by))  # type: ignore
132 | 
133 |     if isinstance(obj, (list, tuple)):
134 |         return type(obj)(map(format, obj, repeat(by)))  # type: ignore
135 | 
136 |     if isinstance(obj, dict):
137 |         return type(obj)(map(format, obj.items(), repeat(by)))  # type: ignore
138 | 
139 |     return obj
140 | 
141 | 
142 | def get_dtype(tp: Any) -> Optional[str]:
143 |     """Extract a data type of NumPy or pandas from a type hint."""
144 |     if (tp := get_tagged(tp, Tag.DATA | Tag.INDEX, True)) is None:
145 |         return None
146 | 
147 |     if (dtype := get_tagged(tp, Tag.DTYPE)) is None:
148 |         return None
149 | 
150 |     if dtype is Any or dtype is type(None):
151 |         return None
152 | 
153 |     if is_union(dtype):
154 |         dtype = get_args(dtype)[0]
155 | 
156 |     if get_origin(dtype) is Literal:
157 |         dtype = get_args(dtype)[0]
158 | 
159 |     return pandas_dtype(dtype).name
160 | 
161 | 
162 | def get_first(tp: Any, default: Any = None) -> Optional[Any]:
163 |     """Extract the first nontag annotation from a type hint."""
164 |     if not (nontags := get_nontags(tp, Tag.FIELD)):
165 |         return default
166 | 
167 |     if (first := nontags[0]) is Ellipsis:
168 |         return default
169 | 
170 |     return first
171 | 


--------------------------------------------------------------------------------
/pandas_dataclasses/core/tagging.py:
--------------------------------------------------------------------------------
 1 | __all__ = ["Tag"]
 2 | 
 3 | 
 4 | # standard library
 5 | from enum import Flag, auto
 6 | from functools import reduce
 7 | from itertools import chain, filterfalse
 8 | from operator import or_
 9 | from typing import Annotated, Any, Iterable, Optional
10 | 
11 | 
12 | # dependencies
13 | from typing_extensions import Self, TypeGuard, get_args, get_origin
14 | 
15 | 
16 | class Tag(Flag):
17 |     """Collection of tags for annotating types."""
18 | 
19 |     ATTR = auto()
20 |     """Tag for a type specifying an attribute field."""
21 | 
22 |     DATA = auto()
23 |     """Tag for a type specifying a data field."""
24 | 
25 |     INDEX = auto()
26 |     """Tag for a type specifying an index field."""
27 | 
28 |     DTYPE = auto()
29 |     """Tag for a type specifying a data type."""
30 | 
31 |     MULTIPLE = auto()
32 |     """Tag for a type specifying a multiple-item field."""
33 | 
34 |     FIELD = ATTR | DATA | INDEX
35 |     """Union of field-related tags."""
36 | 
37 |     ANY = FIELD | DTYPE | MULTIPLE
38 |     """Union of all tags."""
39 | 
40 |     def annotates(self, tp: Any) -> bool:
41 |         """Check if the tag annotates a type hint."""
42 |         tags = filter(type(self).creates, get_args(tp))
43 |         return bool(self & type(self).union(tags))
44 | 
45 |     @classmethod
46 |     def creates(cls, obj: Any) -> TypeGuard[Self]:
47 |         """Check if Tag is the type of an object."""
48 |         return isinstance(obj, cls)
49 | 
50 |     @classmethod
51 |     def union(cls, tags: Iterable[Self]) -> Self:
52 |         """Create a tag as an union of tags."""
53 |         return reduce(or_, tags, cls(0))
54 | 
55 |     def __repr__(self) -> str:
56 |         """Return the bracket-style string of the tag."""
57 |         return str(self)
58 | 
59 |     def __str__(self) -> str:
60 |         """Return the bracket-style string of the tag."""
61 |         return f"<{str(self.name).lower()}>"
62 | 
63 | 
64 | def gen_annotated(tp: Any) -> Iterable[Any]:
65 |     """Generate all annotated types in a type hint."""
66 |     if get_origin(tp) is Annotated:
67 |         yield tp
68 |         yield from gen_annotated(get_args(tp)[0])
69 |     else:
70 |         yield from chain(*map(gen_annotated, get_args(tp)))
71 | 
72 | 
73 | def get_tagged(
74 |     tp: Any,
75 |     bound: Tag = Tag.ANY,
76 |     keep_annotations: bool = False,
77 | ) -> Optional[Any]:
78 |     """Extract the first tagged type from a type hint."""
79 |     for tagged in filter(bound.annotates, gen_annotated(tp)):
80 |         return tagged if keep_annotations else get_args(tagged)[0]
81 | 
82 | 
83 | def get_tags(tp: Any, bound: Tag = Tag.ANY) -> tuple[Tag, ...]:
84 |     """Extract all tags from the first tagged type."""
85 |     tagged = get_tagged(tp, bound, True)
86 |     return tuple(filter(Tag.creates, get_args(tagged)[1:]))
87 | 
88 | 
89 | def get_nontags(tp: Any, bound: Tag = Tag.ANY) -> tuple[Any, ...]:
90 |     """Extract all except tags from the first tagged type."""
91 |     tagged = get_tagged(tp, bound, True)
92 |     return tuple(filterfalse(Tag.creates, get_args(tagged)[1:]))
93 | 


--------------------------------------------------------------------------------
/pandas_dataclasses/core/typing.py:
--------------------------------------------------------------------------------
 1 | __all__ = [
 2 |     "DataClass",
 3 |     "DataClassOf",
 4 |     "HashDict",
 5 |     "Pandas",
 6 |     "PAny",
 7 |     "TAny",
 8 |     "TFrame",
 9 |     "TPandas",
10 |     "TSeries",
11 |     "is_union",
12 | ]
13 | 
14 | 
15 | # standard library
16 | import types
17 | from dataclasses import Field
18 | from typing import Any, Callable, ClassVar, Hashable, Protocol, TypeVar, Union
19 | 
20 | 
21 | # dependencies
22 | from pandas import DataFrame, Series
23 | from typing_extensions import ParamSpec, get_origin
24 | 
25 | 
26 | HashDict = dict[Hashable, Hashable]
27 | """Type hint for dictionary of hashable keys and values."""
28 | 
29 | Pandas = Union[DataFrame, "Series[Any]"]
30 | """Type hint for any pandas object."""
31 | 
32 | PAny = ParamSpec("PAny")
33 | """Parameter specification variable for any function."""
34 | 
35 | TAny = TypeVar("TAny")
36 | """Type variable for any class."""
37 | 
38 | TFrame = TypeVar("TFrame", bound=DataFrame)
39 | """Type variable for pandas DataFrame."""
40 | 
41 | TPandas = TypeVar("TPandas", bound=Pandas)
42 | """Type variable for any class of pandas object."""
43 | 
44 | TSeries = TypeVar("TSeries", bound="Series[Any]")
45 | """Type variable for pandas Series (of any dtype)."""
46 | 
47 | 
48 | class DataClass(Protocol[PAny]):
49 |     """Protocol for any dataclass object."""
50 | 
51 |     __dataclass_fields__: ClassVar[dict[str, Field[Any]]]
52 | 
53 |     def __init__(self, *args: PAny.args, **kwargs: PAny.kwargs) -> None: ...
54 | 
55 | 
56 | class DataClassOf(Protocol[TPandas, PAny]):
57 |     """Protocol for any dataclass object with a factory."""
58 | 
59 |     __dataclass_fields__: ClassVar[dict[str, Field[Any]]]
60 |     __pandas_factory__: Callable[..., TPandas]
61 | 
62 |     def __init__(self, *args: PAny.args, **kwargs: PAny.kwargs) -> None: ...
63 | 
64 | 
65 | def is_union(tp: Any) -> bool:
66 |     """Check if a type hint is a union of types."""
67 |     if UnionType := getattr(types, "UnionType", None):
68 |         return get_origin(tp) is Union or isinstance(tp, UnionType)
69 |     else:
70 |         return get_origin(tp) is Union
71 | 


--------------------------------------------------------------------------------
/pandas_dataclasses/extras/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = ["hints", "new"]
2 | 
3 | 
4 | from . import hints
5 | from . import new
6 | 


--------------------------------------------------------------------------------
/pandas_dataclasses/extras/hints.py:
--------------------------------------------------------------------------------
 1 | __all__ = ["Attr", "Data", "Index", "Multiple"]
 2 | 
 3 | 
 4 | # standard library
 5 | from typing import Annotated, Collection
 6 | 
 7 | 
 8 | # dependencies
 9 | from ..core.tagging import Tag
10 | from ..core.typing import TAny
11 | 
12 | 
13 | # type hints
14 | Attr = Annotated[TAny, Tag.ATTR]
15 | """Type hint for attribute fields (``Attr[TAny]``)."""
16 | 
17 | Data = Annotated[Collection[Annotated[TAny, Tag.DTYPE]], Tag.DATA]
18 | """Type hint for data fields (``Data[TAny]``)."""
19 | 
20 | Index = Annotated[Collection[Annotated[TAny, Tag.DTYPE]], Tag.INDEX]
21 | """Type hint for index fields (``Index[TAny]``)."""
22 | 
23 | Multiple = dict[str, Annotated[TAny, Tag.MULTIPLE]]
24 | """Type hint for multiple-item fields (``Multiple[TAny]``)."""
25 | 


--------------------------------------------------------------------------------
/pandas_dataclasses/extras/new.py:
--------------------------------------------------------------------------------
 1 | __all__ = ["As", "AsFrame", "AsSeries"]
 2 | 
 3 | 
 4 | # standard library
 5 | from inspect import signature
 6 | from types import MethodType
 7 | from typing import Any, Callable, ForwardRef, Generic, Union
 8 | 
 9 | 
10 | # dependencies
11 | import pandas as pd
12 | from typing_extensions import get_args, get_origin
13 | from ..core.api import aspandas
14 | from ..core.typing import DataClassOf, PAny, TPandas
15 | 
16 | 
17 | class classproperty:
18 |     """Class property decorator dedicated to ``As.new``."""
19 | 
20 |     def __init__(self, fget: Callable[..., Any]) -> None:
21 |         self.fget = fget
22 | 
23 |     def __get__(
24 |         self,
25 |         obj: Any,
26 |         cls: type[DataClassOf[TPandas, PAny]],
27 |     ) -> Callable[PAny, TPandas]:
28 |         return self.fget(cls)  # type: ignore
29 | 
30 | 
31 | class As(Generic[TPandas]):
32 |     """Pandas data creation by a classmethod (``new``)."""
33 | 
34 |     __pandas_factory__: Callable[..., TPandas]
35 |     """Factory for pandas data creation."""
36 | 
37 |     def __init_subclass__(cls, **kwargs: Any) -> None:
38 |         """Add a pandas factory to an inheriting class."""
39 |         factory = kwargs.pop("factory", None)
40 |         cls.__pandas_factory__ = factory or get_factory(cls)
41 |         super().__init_subclass__(**kwargs)
42 | 
43 |     @classproperty
44 |     def new(cls) -> MethodType:
45 |         """Return a classmethod for pandas data creation."""
46 | 
47 |         sig = signature(cls.__init__)  # type: ignore
48 |         sig = sig.replace(return_annotation=get_return(cls))
49 | 
50 |         def new(cls: Any, *args: Any, **kwargs: Any) -> Any:
51 |             """Create a pandas data from dataclass arguments."""
52 |             return aspandas(cls(*args, **kwargs))
53 | 
54 |         setattr(new, "__signature__", sig)
55 |         return MethodType(new, cls)
56 | 
57 | 
58 | AsFrame = As[pd.DataFrame]
59 | """Alias of ``As[pandas.DataFrame]``."""
60 | 
61 | 
62 | AsSeries = As["pd.Series[Any]"]
63 | """Alias of ``As[pandas.Series[Any]]``."""
64 | 
65 | 
66 | def get_factory(cls: Any) -> Callable[..., Any]:
67 |     """Extract a pandas factory from a class."""
68 |     factory = get_return(cls)
69 | 
70 |     if callable(factory):
71 |         return factory
72 | 
73 |     # special handling for AsSeries
74 |     if factory == "pd.Series[Any]":
75 |         return pd.Series
76 | 
77 |     raise TypeError("Factory must be callable.")
78 | 
79 | 
80 | def get_return(cls: Any) -> Union[type[Any], str]:
81 |     """Extract a return type from a class."""
82 |     for base in getattr(cls, "__orig_bases__", ()):
83 |         if get_origin(base) is not As:
84 |             continue
85 | 
86 |         tp = get_args(base)[0]
87 | 
88 |         if isinstance(tp, ForwardRef):
89 |             return tp.__forward_arg__
90 |         else:
91 |             return tp  # type: ignore
92 | 
93 |     raise TypeError("Could not find any return type.")
94 | 


--------------------------------------------------------------------------------
/pandas_dataclasses/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astropenguin/pandas-dataclasses/d4a7b311d29df1566d61a2cd703047f5f6b16c48/pandas_dataclasses/py.typed


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "pandas-dataclasses"
 3 | version = "1.0.0"
 4 | description = "pandas data creation by data classes"
 5 | readme = "README.md"
 6 | keywords = ["dataclasses", "specifications", "typing", "pandas"]
 7 | requires-python = ">=3.9,<3.14"
 8 | dependencies = [
 9 |     "numpy>=1.22,<3.0",
10 |     "pandas>=1.5,<3.0",
11 |     "pandas-stubs>=1.5,<3.0",
12 |     "typing-extensions>=4.1,<5.0",
13 | ]
14 | classifiers = [
15 |     "License :: OSI Approved :: MIT License",
16 |     "Programming Language :: Python :: 3",
17 |     "Programming Language :: Python :: 3.9",
18 |     "Programming Language :: Python :: 3.10",
19 |     "Programming Language :: Python :: 3.11",
20 |     "Programming Language :: Python :: 3.12",
21 |     "Programming Language :: Python :: 3.13",
22 | ]
23 | 
24 | [[project.authors]]
25 | name = "Akio Taniguchi"
26 | email = "taniguchi.akio@gmail.com"
27 | 
28 | [project.license]
29 | file = "LICENSE"
30 | 
31 | [project.urls]
32 | homepage = "https://astropenguin.github.io/pandas-dataclasses/v1.0.0"
33 | repository = "https://github.com/astropenguin/pandas-dataclasses"
34 | 
35 | [dependency-groups]
36 | dev = [
37 |     "black>=24.8",
38 |     "ipython>=8.18",
39 |     "myst-parser>=3.0",
40 |     "pydata-sphinx-theme>=0.16",
41 |     "pyright>=1.1",
42 |     "pytest>=8.3",
43 |     "sphinx>=7.4",
44 | ]
45 | 
46 | [build-system]
47 | requires = ["hatchling"]
48 | build-backend = "hatchling.build"
49 | 
50 | [tool.pyright]
51 | reportUnknownArgumentType = "warning"
52 | reportUnknownMemberType = "warning"
53 | reportUnknownVariableType = "warning"
54 | reportUnnecessaryIsInstance = "warning"
55 | typeCheckingMode = "strict"
56 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astropenguin/pandas-dataclasses/d4a7b311d29df1566d61a2cd703047f5f6b16c48/tests/__init__.py


--------------------------------------------------------------------------------
/tests/data.py:
--------------------------------------------------------------------------------
  1 | __all__ = ["Weather", "weather", "df_weather_true", "ser_weather_true"]
  2 | 
  3 | 
  4 | # standard library
  5 | from dataclasses import dataclass, field
  6 | from typing import Annotated as Ann, Any
  7 | 
  8 | 
  9 | # dependencies
 10 | import pandas as pd
 11 | from pandas_dataclasses import Attr, Data, Index, Multiple
 12 | 
 13 | 
 14 | # test dataclass and object
 15 | def name(meas: str, stat: str) -> dict[str, str]:
 16 |     return {"Measurement": meas, "Statistic": stat}
 17 | 
 18 | 
 19 | @dataclass
 20 | class Weather:
 21 |     """Weather information."""
 22 | 
 23 |     year: Ann[Index[int], "Year"]
 24 |     """Year of the measured time."""
 25 | 
 26 |     month: Ann[Index[int], "Month"]
 27 |     """Month of the measured time."""
 28 | 
 29 |     temp_avg: Ann[Data[float], name("Temperature ({.temp_unit})", "Average")]
 30 |     """Monthly average temperature with given units."""
 31 | 
 32 |     temp_max: Ann[Data[float], name("Temperature ({.temp_unit})", "Maximum")]
 33 |     """Monthly maximum temperature with given units."""
 34 | 
 35 |     wind_avg: Ann[Data[float], name("Wind speed ({.wind_unit})", "Average")]
 36 |     """Monthly average wind speed with given units."""
 37 | 
 38 |     wind_max: Ann[Data[float], name("Wind speed ({.wind_unit})", "Maximum")]
 39 |     """Monthly maximum wind speed with given units."""
 40 | 
 41 |     loc: Ann[Attr[str], "Location"] = "Tokyo"
 42 |     """Name of the measured location."""
 43 | 
 44 |     lon: Ann[Attr[float], "Longitude ({.lon_unit})"] = 139.69167
 45 |     """Longitude at the measured location."""
 46 | 
 47 |     lat: Ann[Attr[float], "Latitude ({.lat_unit})"] = 35.68944
 48 |     """Latitude at the measured location."""
 49 | 
 50 |     temp_unit: str = "deg C"
 51 |     """Units of the temperature."""
 52 | 
 53 |     wind_unit: str = "m/s"
 54 |     """Units of the wind speed."""
 55 | 
 56 |     lon_unit: str = "deg"
 57 |     """Units of the longitude."""
 58 | 
 59 |     lat_unit: str = "deg"
 60 |     """Units of the latitude."""
 61 | 
 62 |     attrs: Multiple[Attr[Any]] = field(default_factory=dict)
 63 |     """Other attributes."""
 64 | 
 65 | 
 66 | weather = Weather(
 67 |     [2020, 2020, 2021, 2021, 2022],
 68 |     [1, 7, 1, 7, 1],
 69 |     [7.1, 24.3, 5.4, 25.9, 4.9],
 70 |     [11.1, 27.7, 10.3, 30.3, 9.4],
 71 |     [2.4, 3.1, 2.3, 2.4, 2.6],
 72 |     [8.8, 10.2, 10.7, 9.0, 8.8],
 73 | )
 74 | 
 75 | 
 76 | # expected pandas data
 77 | df_weather_true = pd.DataFrame(
 78 |     data={
 79 |         ("Temperature (deg C)", "Average"): [7.1, 24.3, 5.4, 25.9, 4.9],
 80 |         ("Temperature (deg C)", "Maximum"): [11.1, 27.7, 10.3, 30.3, 9.4],
 81 |         ("Wind speed (m/s)", "Average"): [2.4, 3.1, 2.3, 2.4, 2.6],
 82 |         ("Wind speed (m/s)", "Maximum"): [8.8, 10.2, 10.7, 9.0, 8.8],
 83 |     },
 84 |     index=pd.MultiIndex.from_arrays(
 85 |         [
 86 |             [2020, 2020, 2021, 2021, 2022],
 87 |             [1, 7, 1, 7, 1],
 88 |         ],
 89 |         names=("Year", "Month"),
 90 |     ),
 91 |     columns=pd.MultiIndex.from_tuples(
 92 |         [
 93 |             ("Temperature (deg C)", "Average"),
 94 |             ("Temperature (deg C)", "Maximum"),
 95 |             ("Wind speed (m/s)", "Average"),
 96 |             ("Wind speed (m/s)", "Maximum"),
 97 |         ],
 98 |         names=("Measurement", "Statistic"),
 99 |     ),
100 | )
101 | df_weather_true.attrs = {
102 |     "Location": "Tokyo",
103 |     "Longitude (deg)": 139.69167,
104 |     "Latitude (deg)": 35.68944,
105 | }
106 | 
107 | 
108 | ser_weather_true: "pd.Series[Any]" = pd.Series(
109 |     data=[7.1, 24.3, 5.4, 25.9, 4.9],
110 |     index=pd.MultiIndex.from_arrays(
111 |         [
112 |             [2020, 2020, 2021, 2021, 2022],
113 |             [1, 7, 1, 7, 1],
114 |         ],
115 |         names=("Year", "Month"),
116 |     ),
117 |     name=("Temperature (deg C)", "Average"),
118 | )
119 | ser_weather_true.attrs = {
120 |     "Location": "Tokyo",
121 |     "Longitude (deg)": 139.69167,
122 |     "Latitude (deg)": 35.68944,
123 | }
124 | 


--------------------------------------------------------------------------------
/tests/test_core_api.py:
--------------------------------------------------------------------------------
 1 | # standard library
 2 | from typing import cast
 3 | 
 4 | 
 5 | # dependencies
 6 | import pandas as pd
 7 | from pandas.testing import assert_frame_equal, assert_series_equal
 8 | from pandas_dataclasses import Spec, Tag, asframe, asseries
 9 | from pandas_dataclasses.core.api import (
10 |     get_attrs,
11 |     get_columns,
12 |     get_data,
13 |     get_index,
14 |     name,
15 | )
16 | from .data import Weather, weather, df_weather_true, ser_weather_true
17 | 
18 | 
19 | # test data
20 | spec = Spec.from_dataclass(Weather) @ weather
21 | 
22 | 
23 | # test functions
24 | def test_asframe() -> None:
25 |     assert_frame_equal(asframe(weather), df_weather_true)
26 | 
27 | 
28 | def test_asseries() -> None:
29 |     assert_series_equal(asseries(weather), ser_weather_true)
30 | 
31 | 
32 | def test_get_attrs() -> None:
33 |     attrs = get_attrs(spec)
34 | 
35 |     for i, (key, val) in enumerate(attrs.items()):
36 |         assert key == spec.fields.of(Tag.ATTR)[i].name
37 |         assert val == spec.fields.of(Tag.ATTR)[i].default
38 | 
39 | 
40 | def test_get_columns() -> None:
41 |     columns = cast(pd.MultiIndex, get_columns(spec))
42 | 
43 |     for i in range(len(columns)):
44 |         assert columns[i] == name(spec.fields.of(Tag.DATA)[i])
45 | 
46 |     assert columns.names == name(spec.fields.of(Tag.DATA))  # type: ignore
47 | 
48 | 
49 | def test_get_data() -> None:
50 |     data = get_data(spec)
51 | 
52 |     for i, (key, val) in enumerate(data.items()):
53 |         assert key == name(spec.fields.of(Tag.DATA)[i])
54 |         assert val.dtype.name == spec.fields.of(Tag.DATA)[i].dtype
55 |         assert (val == spec.fields.of(Tag.DATA)[i].default).all()
56 | 
57 | 
58 | def test_get_index() -> None:
59 |     index = cast(pd.MultiIndex, get_index(spec))
60 | 
61 |     for i in range(index.nlevels):
62 |         level = index.get_level_values(i)
63 |         assert level.name == spec.fields.of(Tag.INDEX)[i].name
64 |         assert level.dtype.name == spec.fields.of(Tag.INDEX)[i].dtype
65 |         assert (level == spec.fields.of(Tag.INDEX)[i].default).all()
66 | 


--------------------------------------------------------------------------------
/tests/test_core_specs.py:
--------------------------------------------------------------------------------
  1 | # standard library
  2 | from dataclasses import MISSING
  3 | 
  4 | 
  5 | # dependencies
  6 | from pandas_dataclasses import Spec, Tag
  7 | from .data import Weather, name, weather
  8 | 
  9 | 
 10 | # test data
 11 | spec = Spec.from_dataclass(Weather)
 12 | spec_updated = spec @ weather
 13 | 
 14 | 
 15 | # test functions
 16 | def test_year() -> None:
 17 |     field = spec.fields.of(Tag.INDEX)[0]
 18 | 
 19 |     assert field.id == "year"
 20 |     assert field.tags == (Tag.INDEX,)
 21 |     assert field.name == "Year"
 22 |     assert field.dtype == "int64"
 23 |     assert field.default is MISSING
 24 | 
 25 | 
 26 | def test_year_updated() -> None:
 27 |     field = spec_updated.fields.of(Tag.INDEX)[0]
 28 | 
 29 |     assert field.id == "year"
 30 |     assert field.tags == (Tag.INDEX,)
 31 |     assert field.name == "Year"
 32 |     assert field.dtype == "int64"
 33 |     assert field.default == weather.year
 34 | 
 35 | 
 36 | def test_month() -> None:
 37 |     field = spec.fields.of(Tag.INDEX)[1]
 38 | 
 39 |     assert field.id == "month"
 40 |     assert field.tags == (Tag.INDEX,)
 41 |     assert field.name == "Month"
 42 |     assert field.dtype == "int64"
 43 |     assert field.default is MISSING
 44 | 
 45 | 
 46 | def test_month_updated() -> None:
 47 |     field = spec_updated.fields.of(Tag.INDEX)[1]
 48 | 
 49 |     assert field.id == "month"
 50 |     assert field.tags == (Tag.INDEX,)
 51 |     assert field.name == "Month"
 52 |     assert field.dtype == "int64"
 53 |     assert field.default == weather.month
 54 | 
 55 | 
 56 | def test_temp_avg() -> None:
 57 |     field = spec.fields.of(Tag.DATA)[0]
 58 | 
 59 |     assert field.id == "temp_avg"
 60 |     assert field.tags == (Tag.DATA,)
 61 |     assert field.name == name("Temperature ({.temp_unit})", "Average")
 62 |     assert field.dtype == "float64"
 63 |     assert field.default is MISSING
 64 | 
 65 | 
 66 | def test_temp_avg_updated() -> None:
 67 |     field = spec_updated.fields.of(Tag.DATA)[0]
 68 | 
 69 |     assert field.id == "temp_avg"
 70 |     assert field.tags == (Tag.DATA,)
 71 |     assert field.name == name("Temperature (deg C)", "Average")
 72 |     assert field.dtype == "float64"
 73 |     assert field.default == weather.temp_avg
 74 | 
 75 | 
 76 | def test_temp_max() -> None:
 77 |     field = spec.fields.of(Tag.DATA)[1]
 78 | 
 79 |     assert field.id == "temp_max"
 80 |     assert field.tags == (Tag.DATA,)
 81 |     assert field.name == name("Temperature ({.temp_unit})", "Maximum")
 82 |     assert field.dtype == "float64"
 83 |     assert field.default is MISSING
 84 | 
 85 | 
 86 | def test_temp_max_updated() -> None:
 87 |     field = spec_updated.fields.of(Tag.DATA)[1]
 88 | 
 89 |     assert field.id == "temp_max"
 90 |     assert field.tags == (Tag.DATA,)
 91 |     assert field.name == name("Temperature (deg C)", "Maximum")
 92 |     assert field.dtype == "float64"
 93 |     assert field.default == weather.temp_max
 94 | 
 95 | 
 96 | def test_wind_avg() -> None:
 97 |     field = spec.fields.of(Tag.DATA)[2]
 98 | 
 99 |     assert field.id == "wind_avg"
100 |     assert field.tags == (Tag.DATA,)
101 |     assert field.name == name("Wind speed ({.wind_unit})", "Average")
102 |     assert field.dtype == "float64"
103 |     assert field.default is MISSING
104 | 
105 | 
106 | def test_wind_avg_updated() -> None:
107 |     field = spec_updated.fields.of(Tag.DATA)[2]
108 | 
109 |     assert field.id == "wind_avg"
110 |     assert field.tags == (Tag.DATA,)
111 |     assert field.name == name("Wind speed (m/s)", "Average")
112 |     assert field.dtype == "float64"
113 |     assert field.default == weather.wind_avg
114 | 
115 | 
116 | def test_wind_max() -> None:
117 |     field = spec.fields.of(Tag.DATA)[3]
118 | 
119 |     assert field.id == "wind_max"
120 |     assert field.tags == (Tag.DATA,)
121 |     assert field.name == name("Wind speed ({.wind_unit})", "Maximum")
122 |     assert field.dtype == "float64"
123 |     assert field.default is MISSING
124 | 
125 | 
126 | def test_wind_max_updated() -> None:
127 |     field = spec_updated.fields.of(Tag.DATA)[3]
128 | 
129 |     assert field.id == "wind_max"
130 |     assert field.tags == (Tag.DATA,)
131 |     assert field.name == name("Wind speed (m/s)", "Maximum")
132 |     assert field.dtype == "float64"
133 |     assert field.default == weather.wind_max
134 | 
135 | 
136 | def test_loc() -> None:
137 |     field = spec.fields.of(Tag.ATTR)[0]
138 | 
139 |     assert field.id == "loc"
140 |     assert field.tags == (Tag.ATTR,)
141 |     assert field.name == "Location"
142 |     assert field.default == Weather.loc
143 | 
144 | 
145 | def test_loc_updated() -> None:
146 |     field = spec_updated.fields.of(Tag.ATTR)[0]
147 | 
148 |     assert field.id == "loc"
149 |     assert field.tags == (Tag.ATTR,)
150 |     assert field.name == "Location"
151 |     assert field.default == weather.loc
152 | 
153 | 
154 | def test_lon() -> None:
155 |     field = spec.fields.of(Tag.ATTR)[1]
156 | 
157 |     assert field.id == "lon"
158 |     assert field.tags == (Tag.ATTR,)
159 |     assert field.name == "Longitude ({.lon_unit})"
160 |     assert field.default == Weather.lon
161 | 
162 | 
163 | def test_lon_updated() -> None:
164 |     field = spec_updated.fields.of(Tag.ATTR)[1]
165 | 
166 |     assert field.id == "lon"
167 |     assert field.tags == (Tag.ATTR,)
168 |     assert field.name == "Longitude (deg)"
169 |     assert field.default == weather.lon
170 | 
171 | 
172 | def test_lat() -> None:
173 |     field = spec.fields.of(Tag.ATTR)[2]
174 | 
175 |     assert field.id == "lat"
176 |     assert field.tags == (Tag.ATTR,)
177 |     assert field.name == "Latitude ({.lat_unit})"
178 |     assert field.default == Weather.lat
179 | 
180 | 
181 | def test_lat_updated() -> None:
182 |     field = spec_updated.fields.of(Tag.ATTR)[2]
183 | 
184 |     assert field.id == "lat"
185 |     assert field.tags == (Tag.ATTR,)
186 |     assert field.name == "Latitude (deg)"
187 |     assert field.default == weather.lat
188 | 
189 | 
190 | def test_attrs() -> None:
191 |     field = spec.fields.of(Tag.ATTR)[3]
192 | 
193 |     assert field.id == "attrs"
194 |     assert field.tags == (Tag.ATTR, Tag.MULTIPLE)
195 |     assert field.name == "attrs"
196 |     assert field.default is MISSING
197 | 
198 | 
199 | def test_attrs_updated() -> None:
200 |     field = spec_updated.fields.of(Tag.ATTR)[3]
201 | 
202 |     assert field.id == "attrs"
203 |     assert field.tags == (Tag.ATTR, Tag.MULTIPLE)
204 |     assert field.name == "attrs"
205 |     assert field.default == weather.attrs
206 | 
207 | 
208 | def test_factory() -> None:
209 |     assert spec.factory is None
210 | 
211 | 
212 | def test_name() -> None:
213 |     assert spec.name == Weather.__name__
214 | 
215 | 
216 | def test_origin() -> None:
217 |     assert spec.origin is Weather
218 | 


--------------------------------------------------------------------------------
/tests/test_core_tagging.py:
--------------------------------------------------------------------------------
 1 | # standard library
 2 | from typing import Any, Union
 3 | 
 4 | 
 5 | # dependencies
 6 | from pandas_dataclasses import Attr, Data, Index, Tag
 7 | from pandas_dataclasses.core.tagging import get_tags
 8 | from pytest import mark
 9 | from typing_extensions import Annotated as Ann
10 | 
11 | 
12 | # test data
13 | testdata: list[tuple[Any, tuple[Tag, ...]]] = [
14 |     (Attr[Any], (Tag.ATTR,)),  # type: ignore
15 |     (Data[Any], (Tag.DATA,)),
16 |     (Index[Any], (Tag.INDEX,)),
17 |     (Any, ()),
18 |     (Ann[Attr[Any], "attr"], (Tag.ATTR,)),  # type: ignore
19 |     (Ann[Data[Any], "data"], (Tag.DATA,)),
20 |     (Ann[Index[Any], "index"], (Tag.INDEX,)),
21 |     (Ann[Any, "other"], ()),
22 |     (Union[Ann[Attr[Any], "attr"], Ann[Any, "any"]], (Tag.ATTR,)),  # type: ignore
23 |     (Union[Ann[Data[Any], "data"], Ann[Any, "any"]], (Tag.DATA,)),
24 |     (Union[Ann[Index[Any], "index"], Ann[Any, "any"]], (Tag.INDEX,)),
25 |     (Union[Ann[Any, "other"], Ann[Any, "any"]], ()),
26 | ]
27 | 
28 | 
29 | # test functions
30 | @mark.parametrize("tp, tags", testdata)
31 | def test_get_tags(tp: Any, tags: tuple[Tag, ...]) -> None:
32 |     assert get_tags(tp) == tags
33 | 


--------------------------------------------------------------------------------
/tests/test_core_typing.py:
--------------------------------------------------------------------------------
 1 | # standard library
 2 | from typing import Annotated as Ann, Any, Hashable, Literal as L, Optional, Union
 3 | 
 4 | 
 5 | # dependencies
 6 | import numpy as np
 7 | import pandas as pd
 8 | from pandas_dataclasses import Attr, Data, Index
 9 | from pandas_dataclasses.core.specs import get_dtype, get_first
10 | from pytest import mark
11 | 
12 | 
13 | # test data
14 | testdata_dtype: list[tuple[Any, Any]] = [
15 |     (Data[Any], None),
16 |     (Data[None], None),
17 |     (Data[int], np.dtype("i8")),
18 |     (Data[Union[int, None]], np.dtype("i8")),
19 |     (Data[L["i8"]], np.dtype("i8")),
20 |     (Data[L["boolean"]], pd.BooleanDtype()),
21 |     (Data[L["category"]], pd.CategoricalDtype()),
22 |     (Index[Any], None),
23 |     (Index[None], None),
24 |     (Index[int], np.dtype("i8")),
25 |     (Index[Union[int, None]], np.dtype("i8")),
26 |     (Index[L["i8"]], np.dtype("i8")),
27 |     (Index[L["boolean"]], pd.BooleanDtype()),
28 |     (Index[L["category"]], pd.CategoricalDtype()),
29 |     (Ann[Data[float], "data"], np.dtype("f8")),
30 |     (Ann[Index[float], "index"], np.dtype("f8")),
31 |     (Union[Ann[Data[float], "data"], Ann[Any, "any"]], np.dtype("f8")),
32 |     (Union[Ann[Index[float], "index"], Ann[Any, "any"]], np.dtype("f8")),
33 | ]
34 | 
35 | testdata_first: list[tuple[Any, Optional[Hashable]]] = [
36 |     (Attr[Any], None),  # type: ignore
37 |     (Data[Any], None),
38 |     (Index[Any], None),
39 |     (Any, None),
40 |     (Ann[Attr[Any], "attr"], "attr"),  # type: ignore
41 |     (Ann[Data[Any], "data"], "data"),
42 |     (Ann[Index[Any], "index"], "index"),
43 |     (Ann[Any, "other"], None),
44 |     (Ann[Attr[Any], ..., "attr"], None),  # type: ignore
45 |     (Ann[Data[Any], ..., "data"], None),
46 |     (Ann[Index[Any], ..., "index"], None),
47 |     (Ann[Any, ..., "other"], None),
48 |     (Union[Ann[Attr[Any], "attr"], Ann[Any, "any"]], "attr"),  # type: ignore
49 |     (Union[Ann[Data[Any], "data"], Ann[Any, "any"]], "data"),
50 |     (Union[Ann[Index[Any], "index"], Ann[Any, "any"]], "index"),
51 |     (Union[Ann[Any, "other"], Ann[Any, "any"]], None),
52 | ]
53 | 
54 | 
55 | # test functions
56 | @mark.parametrize("tp, dtype", testdata_dtype)
57 | def test_get_dtype(tp: Any, dtype: Optional[str]) -> None:
58 |     assert get_dtype(tp) == dtype
59 | 
60 | 
61 | @mark.parametrize("tp, first", testdata_first)
62 | def test_get_first(tp: Any, first: Optional[Any]) -> None:
63 |     assert get_first(tp, None) == first
64 | 


--------------------------------------------------------------------------------
/tests/test_extras_new.py:
--------------------------------------------------------------------------------
  1 | # standard library
  2 | from dataclasses import dataclass
  3 | from typing import Any
  4 | 
  5 | 
  6 | # dependencies
  7 | import pandas as pd
  8 | from pandas.testing import assert_frame_equal, assert_series_equal
  9 | from pandas_dataclasses import As, AsFrame, AsSeries
 10 | from .data import Weather, weather, df_weather_true, ser_weather_true
 11 | 
 12 | 
 13 | # test data
 14 | def factory(*args: Any, **kwargs: Any) -> pd.Series:  # type: ignore
 15 |     return pd.Series(*args, **kwargs)  # type: ignore
 16 | 
 17 | 
 18 | class UserFrame(pd.DataFrame):
 19 |     pass
 20 | 
 21 | 
 22 | class UserSeries(pd.Series):  # type: ignore
 23 |     pass
 24 | 
 25 | 
 26 | @dataclass
 27 | class Frame(Weather, AsFrame):
 28 |     pass
 29 | 
 30 | 
 31 | @dataclass
 32 | class CustomFrame(Weather, As[UserFrame]):
 33 |     pass
 34 | 
 35 | 
 36 | @dataclass
 37 | class Series(Weather, AsSeries):
 38 |     pass
 39 | 
 40 | 
 41 | @dataclass
 42 | class CustomSeries(Weather, As[UserSeries]):
 43 |     pass
 44 | 
 45 | 
 46 | @dataclass
 47 | class FactorySeries(Weather, AsSeries, factory=factory):
 48 |     pass
 49 | 
 50 | 
 51 | @dataclass
 52 | class FloatSeries(Weather, As["pd.Series[float]"], factory=pd.Series):
 53 |     pass
 54 | 
 55 | 
 56 | # test functions
 57 | def test_frame() -> None:
 58 |     df_weather = Frame.new(
 59 |         year=weather.year,
 60 |         month=weather.month,
 61 |         temp_avg=weather.temp_avg,
 62 |         temp_max=weather.temp_max,
 63 |         wind_avg=weather.wind_avg,
 64 |         wind_max=weather.wind_max,
 65 |     )
 66 | 
 67 |     assert isinstance(df_weather, pd.DataFrame)
 68 |     assert_frame_equal(df_weather, df_weather_true)
 69 | 
 70 | 
 71 | def test_custom_frame() -> None:
 72 |     df_weather = CustomFrame.new(
 73 |         year=weather.year,
 74 |         month=weather.month,
 75 |         temp_avg=weather.temp_avg,
 76 |         temp_max=weather.temp_max,
 77 |         wind_avg=weather.wind_avg,
 78 |         wind_max=weather.wind_max,
 79 |     )
 80 | 
 81 |     assert isinstance(df_weather, UserFrame)
 82 |     assert_frame_equal(df_weather, df_weather_true, check_frame_type=False)
 83 | 
 84 | 
 85 | def test_series() -> None:
 86 |     ser_weather = Series.new(
 87 |         year=weather.year,
 88 |         month=weather.month,
 89 |         temp_avg=weather.temp_avg,
 90 |         temp_max=weather.temp_max,
 91 |         wind_avg=weather.wind_avg,
 92 |         wind_max=weather.wind_max,
 93 |     )
 94 | 
 95 |     assert isinstance(ser_weather, pd.Series)
 96 |     assert_series_equal(ser_weather, ser_weather_true)
 97 | 
 98 | 
 99 | def test_custom_series() -> None:
100 |     ser_weather = CustomSeries.new(
101 |         year=weather.year,
102 |         month=weather.month,
103 |         temp_avg=weather.temp_avg,
104 |         temp_max=weather.temp_max,
105 |         wind_avg=weather.wind_avg,
106 |         wind_max=weather.wind_max,
107 |     )
108 | 
109 |     assert isinstance(ser_weather, UserSeries)
110 |     assert_series_equal(ser_weather, ser_weather_true, check_series_type=False)
111 | 
112 | 
113 | def test_factory_series() -> None:
114 |     ser_weather = FactorySeries.new(
115 |         year=weather.year,
116 |         month=weather.month,
117 |         temp_avg=weather.temp_avg,
118 |         temp_max=weather.temp_max,
119 |         wind_avg=weather.wind_avg,
120 |         wind_max=weather.wind_max,
121 |     )
122 | 
123 |     assert isinstance(ser_weather, pd.Series)
124 |     assert_series_equal(ser_weather, ser_weather_true)
125 | 
126 | 
127 | def test_float_series() -> None:
128 |     ser_weather = FloatSeries.new(
129 |         year=weather.year,
130 |         month=weather.month,
131 |         temp_avg=weather.temp_avg,
132 |         temp_max=weather.temp_max,
133 |         wind_avg=weather.wind_avg,
134 |         wind_max=weather.wind_max,
135 |     )
136 | 
137 |     assert isinstance(ser_weather, pd.Series)
138 |     assert_series_equal(ser_weather, ser_weather_true, check_series_type=False)
139 | 


--------------------------------------------------------------------------------