├── .devcontainer
└── devcontainer.json
├── .github
└── workflows
│ ├── docs.yaml
│ ├── pypi.yml
│ └── tests.yml
├── .gitignore
├── CITATION.cff
├── LICENSE
├── README.md
├── docs
├── build
├── conf.py
└── index.md
├── pandas_dataclasses
├── __init__.py
├── core
│ ├── __init__.py
│ ├── api.py
│ ├── specs.py
│ ├── tagging.py
│ └── typing.py
├── extras
│ ├── __init__.py
│ ├── hints.py
│ └── new.py
└── py.typed
├── pyproject.toml
├── tests
├── __init__.py
├── data.py
├── test_core_api.py
├── test_core_specs.py
├── test_core_tagging.py
├── test_core_typing.py
└── test_extras_new.py
└── uv.lock
/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "pandas-dataclasses",
3 | "image": "ghcr.io/astral-sh/uv:python3.12-bookworm",
4 | "runArgs": [
5 | "--name=pandas-dataclasses"
6 | ],
7 | "containerEnv": {
8 | "UV_PROJECT_ENVIRONMENT": "/usr/local"
9 | },
10 | "postCreateCommand": "uv sync --frozen",
11 | "customizations": {
12 | "vscode": {
13 | "extensions": [
14 | "ms-python.black-formatter",
15 | "streetsidesoftware.code-spell-checker",
16 | "tamasfe.even-better-toml"
17 | ],
18 | "settings": {
19 | "python.languageServer": "Pylance",
20 | "[python]": {
21 | "editor.defaultFormatter": "ms-python.black-formatter",
22 | "editor.formatOnSave": true
23 | }
24 | }
25 | }
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/.github/workflows/docs.yaml:
--------------------------------------------------------------------------------
1 | name: Docs
2 |
3 | on:
4 | release:
5 | types:
6 | - created
7 |
8 | jobs:
9 | job:
10 | name: Docs
11 | runs-on: ubuntu-latest
12 | container: ghcr.io/astral-sh/uv:python3.12-bookworm
13 | env:
14 | UV_PROJECT_ENVIRONMENT: /usr/local
15 | steps:
16 | - uses: actions/checkout@v4
17 | - run: echo "::set-output name=tag::${GITHUB_REF##*/}"
18 | id: tag
19 | - run: uv sync --frozen
20 | - run: docs/build
21 | - uses: peaceiris/actions-gh-pages@v4
22 | with:
23 | destination_dir: ${{ steps.tag.outputs.tag }}
24 | github_token: ${{ secrets.GITHUB_TOKEN }}
25 | publish_dir: ./docs/_build
26 |
--------------------------------------------------------------------------------
/.github/workflows/pypi.yml:
--------------------------------------------------------------------------------
1 | name: PyPI
2 |
3 | on:
4 | release:
5 | types:
6 | - created
7 |
8 | jobs:
9 | job:
10 | name: PyPI
11 | runs-on: ubuntu-latest
12 | container: ghcr.io/astral-sh/uv:python3.12-bookworm
13 | env:
14 | UV_PROJECT_ENVIRONMENT: /usr/local
15 | UV_PUBLISH_TOKEN: ${{ secrets.PYPI_TOKEN }}
16 | steps:
17 | - uses: actions/checkout@v4
18 | - run: uv build && uv publish
19 |
--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
1 | name: Tests
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 | pull_request:
8 | branches:
9 | - main
10 |
11 | jobs:
12 | job:
13 | name: Test (${{ matrix.env }})
14 | runs-on: ubuntu-latest
15 | container: ghcr.io/astral-sh/uv:${{ matrix.env }}
16 | env:
17 | PYTHON_DIRS: docs tests pandas_dataclasses
18 | UV_PROJECT_ENVIRONMENT: /usr/local
19 | strategy:
20 | fail-fast: false
21 | matrix:
22 | env:
23 | - python3.9-bookworm
24 | - python3.10-bookworm
25 | - python3.11-bookworm
26 | - python3.12-bookworm
27 | - python3.13-bookworm
28 | steps:
29 | - uses: actions/checkout@v4
30 | - run: uv sync --frozen
31 | - run: black --check ${PYTHON_DIRS}
32 | - run: pyright ${PYTHON_DIRS}
33 | - run: pytest -v
34 | - run: docs/build
35 | if: ${{ matrix.env != 'python3.9-bookworm' }}
36 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Created by https://www.toptal.com/developers/gitignore/api/python
2 | # Edit at https://www.toptal.com/developers/gitignore?templates=python
3 |
4 | ### Python ###
5 | # Byte-compiled / optimized / DLL files
6 | __pycache__/
7 | *.py[cod]
8 | *$py.class
9 |
10 | # C extensions
11 | *.so
12 |
13 | # Distribution / packaging
14 | .Python
15 | build/
16 | develop-eggs/
17 | dist/
18 | downloads/
19 | eggs/
20 | .eggs/
21 | lib/
22 | lib64/
23 | parts/
24 | sdist/
25 | var/
26 | wheels/
27 | share/python-wheels/
28 | *.egg-info/
29 | .installed.cfg
30 | *.egg
31 | MANIFEST
32 |
33 | # PyInstaller
34 | # Usually these files are written by a python script from a template
35 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
36 | *.manifest
37 | *.spec
38 |
39 | # Installer logs
40 | pip-log.txt
41 | pip-delete-this-directory.txt
42 |
43 | # Unit test / coverage reports
44 | htmlcov/
45 | .tox/
46 | .nox/
47 | .coverage
48 | .coverage.*
49 | .cache
50 | nosetests.xml
51 | coverage.xml
52 | *.cover
53 | *.py,cover
54 | .hypothesis/
55 | .pytest_cache/
56 | cover/
57 |
58 | # Translations
59 | *.mo
60 | *.pot
61 |
62 | # Django stuff:
63 | *.log
64 | local_settings.py
65 | db.sqlite3
66 | db.sqlite3-journal
67 |
68 | # Flask stuff:
69 | instance/
70 | .webassets-cache
71 |
72 | # Scrapy stuff:
73 | .scrapy
74 |
75 | # Sphinx documentation
76 | docs/_apidoc/
77 | docs/_build/
78 |
79 | # PyBuilder
80 | .pybuilder/
81 | target/
82 |
83 | # Jupyter Notebook
84 | .ipynb_checkpoints
85 |
86 | # IPython
87 | profile_default/
88 | ipython_config.py
89 |
90 | # pyenv
91 | # For a library or package, you might want to ignore these files since the code is
92 | # intended to run in multiple environments; otherwise, check them in:
93 | # .python-version
94 |
95 | # pipenv
96 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
97 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
98 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
99 | # install all needed dependencies.
100 | #Pipfile.lock
101 |
102 | # poetry
103 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
104 | # This is especially recommended for binary packages to ensure reproducibility, and is more
105 | # commonly ignored for libraries.
106 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
107 | #poetry.lock
108 |
109 | # pdm
110 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
111 | #pdm.lock
112 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
113 | # in version control.
114 | # https://pdm.fming.dev/#use-with-ide
115 | .pdm.toml
116 |
117 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
118 | __pypackages__/
119 |
120 | # Celery stuff
121 | celerybeat-schedule
122 | celerybeat.pid
123 |
124 | # SageMath parsed files
125 | *.sage.py
126 |
127 | # Environments
128 | .env
129 | .venv
130 | env/
131 | venv/
132 | ENV/
133 | env.bak/
134 | venv.bak/
135 |
136 | # Spyder project settings
137 | .spyderproject
138 | .spyproject
139 |
140 | # Rope project settings
141 | .ropeproject
142 |
143 | # mkdocs documentation
144 | /site
145 |
146 | # mypy
147 | .mypy_cache/
148 | .dmypy.json
149 | dmypy.json
150 |
151 | # Pyre type checker
152 | .pyre/
153 |
154 | # pytype static type analyzer
155 | .pytype/
156 |
157 | # Cython debug symbols
158 | cython_debug/
159 |
160 | # PyCharm
161 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
162 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
163 | # and can be added to the global gitignore or merged into this file. For a more nuclear
164 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
165 | #.idea/
166 |
167 | # End of https://www.toptal.com/developers/gitignore/api/python
168 |
--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
1 | # This CITATION.cff file was generated with cffinit.
2 | # Visit https://bit.ly/cffinit to generate yours today!
3 |
4 | cff-version: 1.2.0
5 | title: pandas-dataclasses
6 | message: >-
7 | If you use this software, please cite it using the
8 | metadata from this file.
9 | type: software
10 | authors:
11 | - given-names: Akio
12 | family-names: Taniguchi
13 | email: taniguchi.akio@gmail.com
14 | affiliation: Kitami Institute of Technology
15 | orcid: 'https://orcid.org/0000-0002-9695-6183'
16 | identifiers:
17 | - type: doi
18 | value: 10.5281/zenodo.10652375
19 | repository-code: 'https://github.com/astropenguin/pandas-dataclasses'
20 | url: 'https://astropenguin.github.io/pandas-dataclasses/v1.0.0'
21 | abstract: pandas data creation by data classes
22 | keywords:
23 | - python
24 | - dataclasses
25 | - pandas
26 | - specifications
27 | - typing
28 | license: MIT
29 | version: 1.0.0
30 | date-released: '2025-01-01'
31 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021-2025 Akio Taniguchi
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # pandas-dataclasses
2 |
3 | [](https://pypi.org/project/pandas-dataclasses/)
4 | [](https://pypi.org/project/pandas-dataclasses/)
5 | [](https://pepy.tech/project/pandas-dataclasses)
6 | [](https://doi.org/10.5281/zenodo.6127352)
7 | [](https://github.com/astropenguin/pandas-dataclasses/actions)
8 |
9 | pandas data creation by data classes
10 |
11 | ## Overview
12 |
13 | pandas-dataclass makes it easy to create [pandas] data (DataFrame and Series) by specifying their data types, attributes, and names using the Python's dataclass:
14 |
15 |
16 | Click to see all imports
17 |
18 | ```python
19 | from dataclasses import dataclass
20 | from pandas_dataclasses import AsFrame, Data, Index
21 | ```
22 |
23 |
24 | ```python
25 | @dataclass
26 | class Weather(AsFrame):
27 | """Weather information."""
28 |
29 | year: Index[int]
30 | month: Index[int]
31 | temp: Data[float]
32 | wind: Data[float]
33 |
34 |
35 | df = Weather.new(
36 | [2020, 2020, 2021, 2021, 2022],
37 | [1, 7, 1, 7, 1],
38 | [7.1, 24.3, 5.4, 25.9, 4.9],
39 | [2.4, 3.1, 2.3, 2.4, 2.6],
40 | )
41 | ```
42 |
43 | where `df` will become a DataFrame object like:
44 |
45 | ```
46 | temp wind
47 | year month
48 | 2020 1 7.1 2.4
49 | 7 24.3 3.1
50 | 2021 1 5.4 2.3
51 | 7 25.9 2.4
52 | 2022 1 4.9 2.6
53 | ```
54 |
55 | ### Features
56 |
57 | - Specifying data types and names of each element in pandas data
58 | - Specifying metadata stored in pandas data attributes (attrs)
59 | - Support for hierarchical index and columns
60 | - Support for custom factory for data creation
61 | - Support for full [dataclass] features
62 | - Support for static type check by [mypy] and [Pyright] ([Pylance])
63 |
64 | ### Installation
65 |
66 | ```bash
67 | pip install pandas-dataclasses
68 | ```
69 |
70 | ## How it works
71 |
72 | pandas-dataclasses provides you the following features:
73 |
74 | - Type hints for dataclass fields (`Attr`, `Data`, `Index`) to specify the data type and name of each element in pandas data
75 | - Mix-in classes for dataclasses (`As`, `AsFrame`, `AsSeries`) to create pandas data by a classmethod (`new`) that takes the same arguments as dataclass initialization
76 |
77 | When you call `new`, it will first create a dataclass object and then create a Series or DataFrame object from the dataclass object according the type hints and values in it.
78 | In the example above, `df = Weather.new(...)` is thus equivalent to:
79 |
80 |
81 | Click to see all imports
82 |
83 | ```python
84 | from pandas_dataclasses import asframe
85 | ```
86 |
87 |
88 | ```python
89 | obj = Weather([2020, ...], [1, ...], [7.1, ...], [2.4, ...])
90 | df = asframe(obj)
91 | ```
92 |
93 | where `asframe` is a conversion function.
94 | pandas-dataclasses does not touch the dataclass object creation itself; this allows you to fully customize your dataclass before conversion by the dataclass features (`field`, `__post_init__`, ...).
95 |
96 | ## Basic usage
97 |
98 | ### DataFrame creation
99 |
100 | As shown in the example above, a dataclass that has the `AsFrame` (or `AsDataFrame` as an alias) mix-in will create DataFrame objects:
101 |
102 |
103 | Click to see all imports
104 |
105 | ```python
106 | from dataclasses import dataclass
107 | from pandas_dataclasses import AsFrame, Data, Index
108 | ```
109 |
110 |
111 | ```python
112 | @dataclass
113 | class Weather(AsFrame):
114 | """Weather information."""
115 |
116 | year: Index[int]
117 | month: Index[int]
118 | temp: Data[float]
119 | wind: Data[float]
120 |
121 |
122 | df = Weather.new(...)
123 | ```
124 |
125 | where fields typed by `Index` are *index fields*, each value of which will become an index or a part of a hierarchical index of a DataFrame object.
126 | Fields typed by `Data` are *data fields*, each value of which will become a data column of a DataFrame object.
127 | Fields typed by other types are just ignored in the DataFrame creation.
128 |
129 | Each data or index will be cast to the data type specified in a type hint like `Index[int]`.
130 | Use `Any` or `None` (like `Index[Any]`) if you do not want type casting.
131 | See also [data typing rules](#data-typing-rules) for more examples.
132 |
133 | By default, a field name (i.e. an argument name) is used for the name of corresponding data or index.
134 | See also [custom naming](#custom-naming) and [naming rules](#naming-rules) if you want customization.
135 |
136 | ### Series creation
137 |
138 | A dataclass that has the `AsSeries` mix-in will create Series objects:
139 |
140 |
141 | Click to see all imports
142 |
143 | ```python
144 | from dataclasses import dataclass
145 | from pandas_dataclasses import AsSeries, Data, Index
146 | ```
147 |
148 |
149 | ```python
150 | @dataclass
151 | class Weather(AsSeries):
152 | """Weather information."""
153 |
154 | year: Index[int]
155 | month: Index[int]
156 | temp: Data[float]
157 |
158 |
159 | ser = Weather.new(...)
160 | ```
161 |
162 | Unlike `AsFrame`, the second and subsequent data fields are ignored in the Series creation even if they exist.
163 | Other rules are the same as for the DataFrame creation.
164 |
165 | ## Advanced usage
166 |
167 | ### Metadata storing
168 |
169 | Fields typed by `Attr` are *attribute fields*, each value of which will become an item of attributes of a DataFrame or a Series object:
170 |
171 |
172 | Click to see all imports
173 |
174 | ```python
175 | from dataclasses import dataclass
176 | from pandas_dataclasses import AsFrame, Attr, Data, Index
177 | ```
178 |
179 |
180 | ```python
181 | @dataclass
182 | class Weather(AsFrame):
183 | """Weather information."""
184 |
185 | year: Index[int]
186 | month: Index[int]
187 | temp: Data[float]
188 | wind: Data[float]
189 | loc: Attr[str] = "Tokyo"
190 | lon: Attr[float] = 139.69167
191 | lat: Attr[float] = 35.68944
192 |
193 |
194 | df = Weather.new(...)
195 | ```
196 |
197 | where `df.attrs` will become like:
198 |
199 | ```python
200 | {"loc": "Tokyo", "lon": 139.69167, "lat": 35.68944}
201 | ```
202 |
203 | ### Custom naming
204 |
205 | The name of attribute, data, or index can be explicitly specified by adding a hashable annotation to the corresponding type:
206 |
207 |
208 | Click to see all imports
209 |
210 | ```python
211 | from dataclasses import dataclass
212 | from typing import Annotated as Ann
213 | from pandas_dataclasses import AsFrame, Attr, Data, Index
214 | ```
215 |
216 |
217 | ```python
218 | @dataclass
219 | class Weather(AsFrame):
220 | """Weather information."""
221 |
222 | year: Ann[Index[int], "Year"]
223 | month: Ann[Index[int], "Month"]
224 | temp: Ann[Data[float], "Temperature (deg C)"]
225 | wind: Ann[Data[float], "Wind speed (m/s)"]
226 | loc: Ann[Attr[str], "Location"] = "Tokyo"
227 | lon: Ann[Attr[float], "Longitude (deg)"] = 139.69167
228 | lat: Ann[Attr[float], "Latitude (deg)"] = 35.68944
229 |
230 |
231 | df = Weather.new(...)
232 | ```
233 |
234 | where `df` and `df.attrs` will become like:
235 |
236 | ```
237 | Temperature (deg C) Wind speed (m/s)
238 | Year Month
239 | 2020 1 7.1 2.4
240 | 7 24.3 3.1
241 | 2021 1 5.4 2.3
242 | 7 25.9 2.4
243 | 2022 1 4.9 2.6
244 | ```
245 |
246 | ```python
247 | {"Location": "Tokyo", "Longitude (deg)": 139.69167, "Latitude (deg)": 35.68944}
248 | ```
249 |
250 | If an annotation is a [format string], it will be formatted by a dataclass object before the data creation:
251 |
252 |
253 | Click to see all imports
254 |
255 | ```python
256 | from dataclasses import dataclass
257 | from typing import Annotated as Ann
258 | from pandas_dataclasses import AsFrame, Data, Index
259 | ```
260 |
261 |
262 | ```python
263 | @dataclass
264 | class Weather(AsFrame):
265 | """Weather information."""
266 |
267 | year: Ann[Index[int], "Year"]
268 | month: Ann[Index[int], "Month"]
269 | temp: Ann[Data[float], "Temperature ({.temp_unit})"]
270 | wind: Ann[Data[float], "Wind speed ({.wind_unit})"]
271 | temp_unit: str = "deg C"
272 | wind_unit: str = "m/s"
273 |
274 |
275 | df = Weather.new(..., temp_unit="deg F", wind_unit="km/h")
276 | ```
277 |
278 | where units of the temperature and the wind speed will be dynamically updated (see also [naming rules](#naming-rules)).
279 |
280 | ### Hierarchical columns
281 |
282 | Adding tuple annotations to data fields will create DataFrame objects with hierarchical columns:
283 |
284 |
285 | Click to see all imports
286 |
287 | ```python
288 | from dataclasses import dataclass
289 | from typing import Annotated as Ann
290 | from pandas_dataclasses import AsFrame, Data, Index
291 | ```
292 |
293 |
294 | ```python
295 | @dataclass
296 | class Weather(AsFrame):
297 | """Weather information."""
298 |
299 | year: Ann[Index[int], "Year"]
300 | month: Ann[Index[int], "Month"]
301 | temp_avg: Ann[Data[float], ("Temperature (deg C)", "Average")]
302 | temp_max: Ann[Data[float], ("Temperature (deg C)", "Maximum")]
303 | wind_avg: Ann[Data[float], ("Wind speed (m/s)", "Average")]
304 | wind_max: Ann[Data[float], ("Wind speed (m/s)", "Maximum")]
305 |
306 |
307 | df = Weather.new(...)
308 | ```
309 |
310 | where `df` will become like:
311 |
312 | ```
313 | Temperature (deg C) Wind speed (m/s)
314 | Average Maximum Average Maximum
315 | Year Month
316 | 2020 1 7.1 11.1 2.4 8.8
317 | 7 24.3 27.7 3.1 10.2
318 | 2021 1 5.4 10.3 2.3 10.7
319 | 7 25.9 30.3 2.4 9.0
320 | 2022 1 4.9 9.4 2.6 8.8
321 | ```
322 |
323 | Column names can be (explicitly) specified by dictionary annotations:
324 |
325 |
326 | Click to see all imports
327 |
328 | ```python
329 | from dataclasses import dataclass
330 | from typing import Annotated as Ann
331 | from pandas_dataclasses import AsFrame, Data, Index
332 | ```
333 |
334 |
335 | ```python
336 | def name(meas: str, stat: str) -> dict[str, str]:
337 | """Create a dictionary annotation for a column name."""
338 | return {"Measurement": meas, "Statistic": stat}
339 |
340 |
341 | @dataclass
342 | class Weather(AsFrame):
343 | """Weather information."""
344 |
345 | year: Ann[Index[int], "Year"]
346 | month: Ann[Index[int], "Month"]
347 | temp_avg: Ann[Data[float], name("Temperature (deg C)", "Average")]
348 | temp_max: Ann[Data[float], name("Temperature (deg C)", "Maximum")]
349 | wind_avg: Ann[Data[float], name("Wind speed (m/s)", "Average")]
350 | wind_max: Ann[Data[float], name("Wind speed (m/s)", "Maximum")]
351 |
352 |
353 | df = Weather.new(...)
354 | ```
355 |
356 | where `df` will become like:
357 |
358 | ```
359 | Measurement Temperature (deg C) Wind speed (m/s)
360 | Statistic Average Maximum Average Maximum
361 | Year Month
362 | 2020 1 7.1 11.1 2.4 8.8
363 | 7 24.3 27.7 3.1 10.2
364 | 2021 1 5.4 10.3 2.3 10.7
365 | 7 25.9 30.3 2.4 9.0
366 | 2022 1 4.9 9.4 2.6 8.8
367 | ```
368 |
369 | If a tuple or dictionary annotation has [format string]s, they will also be formatted by a dataclass object (see also [naming rules](#naming-rules)).
370 |
371 | ### Multiple-item fields
372 |
373 | Multiple (and possibly extra) attributes, data, or indices can be added by fields with corresponding type hints wrapped by `Multiple`:
374 |
375 |
376 | Click to see all imports
377 |
378 | ```python
379 | from dataclasses import dataclass
380 | from pandas_dataclasses import AsFrame, Data, Index, Multiple
381 | ```
382 |
383 |
384 |
385 | ```python
386 | @dataclass
387 | class Weather(AsFrame):
388 | """Weather information."""
389 |
390 | year: Index[int]
391 | month: Index[int]
392 | temp: Data[float]
393 | wind: Data[float]
394 | extra_index: Multiple[Index[int]]
395 | extra_data: Multiple[Data[float]]
396 |
397 |
398 | df = Weather.new(
399 | [2020, 2020, 2021, 2021, 2022],
400 | [1, 7, 1, 7, 1],
401 | [7.1, 24.3, 5.4, 25.9, 4.9],
402 | [2.4, 3.1, 2.3, 2.4, 2.6],
403 | extra_index={
404 | "day": [1, 1, 1, 1, 1],
405 | "week": [2, 2, 4, 3, 5],
406 | },
407 | extra_data={
408 | "humid": [65, 89, 57, 83, 52],
409 | "press": [1013.8, 1006.2, 1014.1, 1007.7, 1012.7],
410 | },
411 | )
412 | ```
413 |
414 | where `df` will become like:
415 |
416 | ```
417 | temp wind humid press
418 | year month day week
419 | 2020 1 1 2 7.1 2.4 65.0 1013.8
420 | 7 1 2 24.3 3.1 89.0 1006.2
421 | 2021 1 1 4 5.4 2.3 57.0 1014.1
422 | 7 1 3 25.9 2.4 83.0 1007.7
423 | 2022 1 1 5 4.9 2.6 52.0 1012.7
424 | ```
425 |
426 | If multiple items of the same name exist, the last-defined one will be finally used.
427 | For example, if the `extra_index` field contains `"month": [2, 8, 2, 8, 2]`, the values given by the `month` field will be overwritten.
428 |
429 | ### Custom pandas factory
430 |
431 | A custom class can be specified as a factory for the Series or DataFrame creation by `As`, the generic version of `AsFrame` and `AsSeries`.
432 | Note that the custom class must be a subclass of either `pandas.Series` or `pandas.DataFrame`:
433 |
434 |
435 | Click to see all imports
436 |
437 | ```python
438 | import pandas as pd
439 | from dataclasses import dataclass
440 | from pandas_dataclasses import As, Data, Index
441 | ```
442 |
443 |
444 | ```python
445 | class CustomSeries(pd.Series):
446 | """Custom pandas Series."""
447 |
448 | pass
449 |
450 |
451 | @dataclass
452 | class Temperature(As[CustomSeries]):
453 | """Temperature information."""
454 |
455 | year: Index[int]
456 | month: Index[int]
457 | temp: Data[float]
458 |
459 |
460 | ser = Temperature.new(...)
461 | ```
462 |
463 | where `ser` is statically regarded as `CustomSeries` and will become a `CustomSeries` object.
464 |
465 | Generic Series type (`Series[T]`) is also supported, however, it is only for static the type check in the current pandas versions.
466 | In such cases, you can additionally give a factory that must work in runtime as a class argument:
467 |
468 |
469 | Click to see all imports
470 |
471 | ```python
472 | import pandas as pd
473 | from dataclasses import dataclass
474 | from pandas_dataclasses import As, Data, Index
475 | ```
476 |
477 |
478 | ```python
479 | @dataclass
480 | class Temperature(As["pd.Series[float]"], factory=pd.Series):
481 | """Temperature information."""
482 |
483 | year: Index[int]
484 | month: Index[int]
485 | temp: Data[float]
486 |
487 |
488 | ser = Temperature.new(...)
489 | ```
490 |
491 | where `ser` is statically regarded as `Series[float]` but will become a `Series` object in runtime.
492 |
493 | ## Appendix
494 |
495 | ### Data typing rules
496 |
497 | The data type (dtype) of data or index is determined from the first `Data` or `Index` type of the corresponding field, respectively.
498 | The following table shows how the data type is inferred:
499 |
500 |
501 | Click to see all imports
502 |
503 | ```python
504 | from typing import Any, Annotated as Ann, Literal as L
505 | from pandas_dataclasses import Data
506 | ```
507 |
508 |
509 | Type hint | Inferred data type
510 | --- | ---
511 | `Data[Any]` | `None` (no type casting)
512 | `Data[None]` | `None` (no type casting)
513 | `Data[int]` | `numpy.int64`
514 | `Data[int \| str]` | `numpy.int64`
515 | `Data[numpy.int32]` | `numpy.int32`
516 | `Data[L["datetime64[ns]"]]` | `numpy.dtype("
531 | Click to see all imports
532 |
533 | ```python
534 | from typing import Any, Annotated as Ann
535 | from pandas_dataclasses import Data
536 | ```
537 |
538 |
539 | Type hint | Inferred name
540 | --- | ---
541 | `Data[Any]` | (field name)
542 | `Ann[Data[Any], ..., "spam"]` | (field name)
543 | `Ann[Data[Any], "spam"]` | `"spam"`
544 | `Ann[Data[Any], "spam", "ham"]` | `"spam"`
545 | `Ann[Data[Any], "spam"] \| Ann[str, "ham"]` | `"spam"`
546 | `Ann[Data[Any], "spam"] \| Ann[Data[float], "ham"]` | `"spam"`
547 | `Ann[Data[Any], "{.name}"` | `"{.name}".format(obj)`
548 | `Ann[Data[Any], ("spam", "ham")]` | `("spam", "ham")`
549 | `Ann[Data[Any], ("{.name}", "ham")]` | `("{.name}".format(obj), "ham")`
550 |
551 | where `obj` is a dataclass object that is expected to have `obj.name`.
552 |
553 | ### Development roadmap
554 |
555 | Release version | Features
556 | --- | ---
557 | v0.5 | Support for dynamic naming
558 | v0.6 | Support for extension array and dtype
559 | v0.7 | Support for hierarchical columns
560 | v0.8 | Support for mypy and callable pandas factory
561 | v0.9 | Support for Ellipsis (`...`) as an alias of field name
562 | v0.10 | Support for union type in type hints
563 | v0.11 | Support for Python 3.11 and drop support for Python 3.7
564 | v0.12 | Support for multiple items received in a single field
565 | v1.0 | Initial major release (freezing public features until v2.0)
566 |
567 |
568 | [dataclass]: https://docs.python.org/3/library/dataclasses.html
569 | [format string]: https://docs.python.org/3/library/string.html#format-string-syntax
570 | [mypy]: http://www.mypy-lang.org
571 | [NumPy]: https://numpy.org
572 | [pandas]: https://pandas.pydata.org
573 | [Pylance]: https://github.com/microsoft/pylance-release
574 | [Pyright]: https://github.com/microsoft/pyright
575 |
--------------------------------------------------------------------------------
/docs/build:
--------------------------------------------------------------------------------
1 | #!/bin/bash -eu
2 |
3 | sphinx-apidoc -efMT -d 2 -o docs/_apidoc pandas_dataclasses
4 | sphinx-build -a docs docs/_build
5 |
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | # project information
2 | author = "Akio Taniguchi"
3 | copyright = "2021-2025 Akio Taniguchi"
4 |
5 |
6 | # general configuration
7 | add_module_names = False
8 | autodoc_member_order = "bysource"
9 | autodoc_typehints = "both"
10 | autodoc_typehints_format = "short"
11 | exclude_patterns = [
12 | "_build",
13 | "Thumbs.db",
14 | ".DS_Store",
15 | ]
16 | extensions = [
17 | "myst_parser",
18 | "sphinx.ext.autodoc",
19 | "sphinx.ext.autosummary",
20 | "sphinx.ext.napoleon",
21 | "sphinx.ext.viewcode",
22 | ]
23 | myst_heading_anchors = 3
24 | templates_path = ["_templates"]
25 |
26 |
27 | # options for HTML output
28 | html_theme = "pydata_sphinx_theme"
29 | html_theme_options = {
30 | "github_url": "https://github.com/astropenguin/pandas-dataclasses",
31 | "logo": {"text": "pandas-dataclasses"},
32 | }
33 |
--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
1 | ```{include} ../README.md
2 | ```
3 |
4 | ```{toctree}
5 | ---
6 | hidden:
7 | ---
8 |
9 | Home
10 | Package guide <_apidoc/pandas_dataclasses>
11 | ```
12 |
--------------------------------------------------------------------------------
/pandas_dataclasses/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = [
2 | "As",
3 | "AsDataFrame",
4 | "AsFrame",
5 | "AsSeries",
6 | "Attr",
7 | "Data",
8 | "Index",
9 | "Multiple",
10 | "Spec",
11 | "Tag",
12 | "asdataframe",
13 | "asframe",
14 | "aspandas",
15 | "asseries",
16 | "core",
17 | "extras",
18 | ]
19 | __version__ = "1.0.0"
20 |
21 |
22 | # submodules
23 | from . import core
24 | from . import extras
25 | from .core.api import *
26 | from .core.specs import *
27 | from .core.tagging import *
28 | from .core.typing import *
29 | from .extras.hints import *
30 | from .extras.new import *
31 |
32 |
33 | # aliases
34 | AsDataFrame = AsFrame
35 | """Alias of ``core.mixins.AsFrame``."""
36 |
37 |
38 | asdataframe = asframe
39 | """Alias of ``core.aspandas.asframe``."""
40 |
--------------------------------------------------------------------------------
/pandas_dataclasses/core/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = ["api", "specs", "tagging", "typing"]
2 |
3 |
4 | from . import api
5 | from . import specs
6 | from . import tagging
7 | from . import typing
8 |
--------------------------------------------------------------------------------
/pandas_dataclasses/core/api.py:
--------------------------------------------------------------------------------
1 | __all__ = ["asframe", "aspandas", "asseries"]
2 |
3 |
4 | # standard library
5 | from types import FunctionType
6 | from typing import Any, Callable, Hashable, Iterable, Optional, overload
7 |
8 |
9 | # dependencies
10 | import numpy as np
11 | import pandas as pd
12 | from pandas.api.types import is_list_like
13 | from typing_extensions import get_origin
14 | from .specs import Field, Fields, Spec
15 | from .tagging import Tag
16 | from .typing import DataClass, DataClassOf, PAny, TFrame, TPandas, TSeries
17 |
18 |
19 | @overload
20 | def aspandas(obj: DataClassOf[TPandas, PAny], *, factory: None = None) -> TPandas: ...
21 |
22 |
23 | @overload
24 | def aspandas(obj: DataClass[PAny], *, factory: Callable[..., TPandas]) -> TPandas: ...
25 |
26 |
27 | def aspandas(obj: Any, *, factory: Any = None) -> Any:
28 | """Create a DataFrame or Series object from a dataclass object.
29 |
30 | Which data structure is created will be determined by a factory
31 | defined as the ``__pandas_factory__`` attribute in the original
32 | dataclass of ``obj`` or the ``factory`` argument. If a factory is
33 | a function, it must have an annotation of the return type.
34 |
35 | Args:
36 | obj: Dataclass object that should have attribute, column, data,
37 | and/or index fields. If the original dataclass has the
38 | ``__pandas_factory__`` attribute, it will be used as a
39 | factory for the data creation.
40 |
41 | Keyword Args:
42 | factory: Class or function for the DataFrame or Series creation.
43 | It must take the same parameters as ``pandas.DataFrame``
44 | or ``pandas.Series``, and return an object of it or its
45 | subclass. If it is a function, it must have an annotation
46 | of the return type. If passed, it will be preferentially
47 | used even if the original dataclass of ``obj`` has the
48 | ``__pandas_factory__`` attribute.
49 |
50 | Returns:
51 | DataFrame or Series object that complies with the original dataclass.
52 |
53 | Raises:
54 | ValueError: Raised if no factory is found or the return type
55 | cannot be inferred from a factory when it is a function.
56 |
57 | """
58 | spec = Spec.from_dataclass(type(obj)) @ obj
59 |
60 | if factory is None:
61 | factory = spec.factory
62 |
63 | if factory is None:
64 | raise ValueError("Could not find any factory.")
65 |
66 | if isinstance(factory, FunctionType):
67 | return_ = factory.__annotations__["return"]
68 | else:
69 | return_ = factory
70 |
71 | origin = get_origin(return_) or return_
72 |
73 | if issubclass(origin, pd.DataFrame):
74 | return asframe(obj, factory=factory)
75 | elif issubclass(origin, pd.Series):
76 | return asseries(obj, factory=factory)
77 | else:
78 | raise ValueError("Could not infer an object type.")
79 |
80 |
81 | @overload
82 | def asframe(obj: DataClassOf[TFrame, PAny], *, factory: None = None) -> TFrame: ...
83 |
84 |
85 | @overload
86 | def asframe(obj: DataClass[PAny], *, factory: Callable[..., TFrame]) -> TFrame: ...
87 |
88 |
89 | @overload
90 | def asframe(obj: DataClass[PAny], *, factory: None = None) -> pd.DataFrame: ...
91 |
92 |
93 | def asframe(obj: Any, *, factory: Any = None) -> Any:
94 | """Create a DataFrame object from a dataclass object.
95 |
96 | The return type will be determined by a factory defined as the
97 | ``__pandas_factory__`` attribute in the original dataclass of
98 | ``obj`` or the ``factory`` argument. If neither is specified,
99 | it defaults to ``pandas.DataFrame``.
100 |
101 | Args:
102 | obj: Dataclass object that should have attribute, column, data,
103 | and/or index fields. If the original dataclass has the
104 | ``__pandas_factory__`` attribute, it will be used as a
105 | factory for the DataFrame creation.
106 |
107 | Keyword Args:
108 | factory: Class or function for the DataFrame creation.
109 | It must take the same parameters as ``pandas.DataFrame``,
110 | and return an object of it or its subclass. If passed, it
111 | will be preferentially used even if the original dataclass
112 | of ``obj`` has the ``__pandas_factory__`` attribute.
113 |
114 | Returns:
115 | DataFrame object that complies with the original dataclass.
116 |
117 | """
118 | spec = Spec.from_dataclass(type(obj)) @ obj
119 |
120 | if factory is None:
121 | factory = spec.factory or pd.DataFrame
122 |
123 | dataframe = factory(
124 | data=get_data(spec),
125 | index=get_index(spec),
126 | columns=get_columns(spec),
127 | )
128 |
129 | dataframe.attrs.update(get_attrs(spec))
130 | return squeeze(dataframe)
131 |
132 |
133 | @overload
134 | def asseries(obj: DataClassOf[TSeries, PAny], *, factory: None = None) -> TSeries: ...
135 |
136 |
137 | @overload
138 | def asseries(obj: DataClass[PAny], *, factory: Callable[..., TSeries]) -> TSeries: ...
139 |
140 |
141 | @overload
142 | def asseries(obj: DataClass[PAny], *, factory: None = None) -> "pd.Series[Any]": ...
143 |
144 |
145 | def asseries(obj: Any, *, factory: Any = None) -> Any:
146 | """Create a Series object from a dataclass object.
147 |
148 | The return type will be determined by a factory defined as the
149 | ``__pandas_factory__`` attribute in the original dataclass of
150 | ``obj`` or the ``factory`` argument. If neither is specified,
151 | it defaults to ``pandas.Series``.
152 |
153 | Args:
154 | obj: Dataclass object that should have attribute, column, data,
155 | and/or index fields. If the original dataclass has the
156 | ``__pandas_factory__`` attribute, it will be used as a
157 | factory for the Series creation.
158 |
159 | Keyword Args:
160 | factory: Class or function for the Series creation.
161 | It must take the same parameters as ``pandas.Series``,
162 | and return an object of it or its subclass. If passed, it
163 | will be preferentially used even if the original dataclass
164 | of ``obj`` has the ``__pandas_factory__`` attribute.
165 |
166 | Returns:
167 | Series object that complies with the original dataclass.
168 |
169 | """
170 | spec = Spec.from_dataclass(type(obj)) @ obj
171 |
172 | if factory is None:
173 | factory = spec.factory or pd.Series
174 |
175 | data = get_data(spec)
176 | index = get_index(spec)
177 |
178 | if not data:
179 | series = factory(index=index)
180 | else:
181 | name, data = next(iter(data.items()))
182 | series = factory(data=data, index=index, name=name)
183 |
184 | series.attrs.update(get_attrs(spec))
185 | return squeeze(series)
186 |
187 |
188 | def get_attrs(spec: Spec) -> dict[Hashable, Any]:
189 | """Derive attributes from a specification."""
190 | data: dict[Hashable, Any] = {}
191 |
192 | for field in spec.fields.of(Tag.ATTR):
193 | data.update(items(field))
194 |
195 | return data
196 |
197 |
198 | def get_columns(spec: Spec) -> Optional[pd.MultiIndex]:
199 | """Derive columns from a specification."""
200 | if not (fields := spec.fields.of(Tag.DATA)):
201 | return None
202 |
203 | if (names := name(fields)) is None:
204 | return None
205 |
206 | return pd.MultiIndex.from_tuples(
207 | map(name, fields),
208 | names=names,
209 | )
210 |
211 |
212 | def get_data(spec: Spec) -> dict[Hashable, Any]:
213 | """Derive data from a specification."""
214 | data: dict[Hashable, Any] = {}
215 |
216 | for field in spec.fields.of(Tag.DATA):
217 | for key, val in items(field):
218 | data[key] = ensure(val, field.dtype)
219 |
220 | return data
221 |
222 |
223 | def get_index(spec: Spec) -> Optional[pd.MultiIndex]:
224 | """Derive index from a specification."""
225 | if not (fields := spec.fields.of(Tag.INDEX)):
226 | return None
227 |
228 | data: dict[Hashable, Any] = {}
229 |
230 | for field in fields:
231 | for key, val in items(field):
232 | data[key] = ensure(val, field.dtype)
233 |
234 | return pd.MultiIndex.from_arrays(
235 | np.broadcast_arrays(*data.values()),
236 | names=data.keys(),
237 | )
238 |
239 |
240 | def ensure(data: Any, dtype: Optional[str]) -> Any:
241 | """Ensure data to be 1D and have given data type."""
242 | if not is_list_like(data):
243 | data = [data]
244 |
245 | if isinstance(data, (pd.Index, pd.Series)):
246 | return type(data)(data, dtype=dtype, copy=False) # type: ignore
247 | else:
248 | return pd.array(data, dtype=dtype, copy=False)
249 |
250 |
251 | def items(field: Field) -> Iterable[tuple[Hashable, Any]]:
252 | """Generate default(s) of a field specification."""
253 | if field.has(Tag.MULTIPLE):
254 | yield from field.default.items()
255 | else:
256 | yield (name(field), field.default)
257 |
258 |
259 | @overload
260 | def name(fields: Field) -> Hashable: ...
261 |
262 |
263 | @overload
264 | def name(fields: Fields) -> Optional[Hashable]: ...
265 |
266 |
267 | def name(fields: Any) -> Any:
268 | """Derive name of a field(s) specification."""
269 | if isinstance(fields, Field):
270 | if isinstance(name := fields.name, dict):
271 | return tuple(name.values())
272 | else:
273 | return name
274 |
275 | if isinstance(fields, Fields):
276 | for field in fields:
277 | if isinstance(name := field.name, dict):
278 | return tuple(name.keys())
279 |
280 |
281 | def squeeze(data: TPandas) -> TPandas:
282 | """Drop levels of an index and columns if possible."""
283 | if data.index.nlevels == 1:
284 | data.index = data.index.get_level_values(0)
285 |
286 | if isinstance(data, pd.Series):
287 | return data # type: ignore
288 |
289 | if data.columns.nlevels == 1:
290 | data.columns = data.columns.get_level_values(0)
291 |
292 | return data
293 |
--------------------------------------------------------------------------------
/pandas_dataclasses/core/specs.py:
--------------------------------------------------------------------------------
1 | __all__ = ["Spec"]
2 |
3 |
4 | # standard library
5 | from dataclasses import Field as Field_, dataclass, fields as fields_, replace
6 | from functools import lru_cache
7 | from itertools import repeat
8 | from typing import Any, Callable, Hashable, Literal, Optional, Union
9 |
10 |
11 | # dependencies
12 | from pandas.api.types import pandas_dtype
13 | from typing_extensions import Self, get_args, get_origin, get_type_hints
14 | from .tagging import Tag, get_nontags, get_tagged, get_tags
15 | from .typing import HashDict, Pandas, TAny, is_union
16 |
17 |
18 | @dataclass(frozen=True)
19 | class Field:
20 | """Specification of a field."""
21 |
22 | id: str
23 | """Identifier of the field."""
24 |
25 | name: Union[Hashable, HashDict]
26 | """Name of the field data."""
27 |
28 | tags: tuple[Tag, ...] = ()
29 | """Tags of the field."""
30 |
31 | type: Optional[Any] = None
32 | """Type or type hint of the field data."""
33 |
34 | dtype: Optional[str] = None
35 | """Data type of the field data."""
36 |
37 | default: Any = None
38 | """Default value of the field data."""
39 |
40 | def has(self, tag: Tag) -> bool:
41 | """Check if the specification has a tag."""
42 | return bool(tag & Tag.union(self.tags))
43 |
44 | def update(self, obj: Any) -> Self:
45 | """Update the specification by an object."""
46 | return replace(
47 | self,
48 | name=format(self.name, obj),
49 | default=getattr(obj, self.id, self.default),
50 | )
51 |
52 |
53 | class Fields(tuple[Field, ...]):
54 | """List of field specifications with selectors."""
55 |
56 | def of(self, tag: Tag) -> Self:
57 | """Select only fields that have a tag."""
58 | return type(self)(filter(lambda field: field.has(tag), self))
59 |
60 | def update(self, obj: Any) -> Self:
61 | """Update the specifications by an object."""
62 | return type(self)(field.update(obj) for field in self)
63 |
64 |
65 | @dataclass(frozen=True)
66 | class Spec:
67 | """Specification of pandas data creation."""
68 |
69 | name: Optional[str] = None
70 | """Name of the specification."""
71 |
72 | origin: Optional[type] = None
73 | """Original dataclass of the specification."""
74 |
75 | factory: Optional[Callable[..., Pandas]] = None
76 | """Factory for pandas data creation."""
77 |
78 | fields: Fields = Fields()
79 | """List of field specifications."""
80 |
81 | @classmethod
82 | def from_dataclass(cls, dataclass: type) -> Self:
83 | """Create a specification from a data class."""
84 | eval_field_types(dataclass)
85 |
86 | return cls(
87 | name=dataclass.__name__,
88 | origin=dataclass,
89 | factory=getattr(dataclass, "__pandas_factory__", None),
90 | fields=Fields(map(convert_field, fields_(dataclass))),
91 | )
92 |
93 | def update(self, obj: Any) -> Self:
94 | """Update the specification by an object."""
95 | if self.origin is not None:
96 | if not isinstance(obj, self.origin):
97 | obj = self.origin(obj)
98 |
99 | return replace(self, fields=self.fields.update(obj))
100 |
101 | def __matmul__(self, obj: Any) -> Self:
102 | """Alias of the update method."""
103 | return self.update(obj)
104 |
105 |
106 | @lru_cache(maxsize=None)
107 | def convert_field(field_: Field_[Any]) -> Field:
108 | """Convert a dataclass field to a field specification."""
109 | return Field(
110 | id=field_.name,
111 | name=get_first(field_.type, field_.name),
112 | tags=get_tags(field_.type, Tag.FIELD),
113 | type=field_.type,
114 | dtype=get_dtype(field_.type),
115 | default=field_.default,
116 | )
117 |
118 |
119 | @lru_cache(maxsize=None)
120 | def eval_field_types(dataclass: type) -> None:
121 | """Evaluate field types of a dataclass."""
122 | types = get_type_hints(dataclass, include_extras=True)
123 |
124 | for field_ in fields_(dataclass):
125 | field_.type = types[field_.name]
126 |
127 |
128 | def format(obj: TAny, by: Any) -> TAny:
129 | """Format a string or nested strings in an object."""
130 | if isinstance(obj, str):
131 | return type(obj)(obj.format(by)) # type: ignore
132 |
133 | if isinstance(obj, (list, tuple)):
134 | return type(obj)(map(format, obj, repeat(by))) # type: ignore
135 |
136 | if isinstance(obj, dict):
137 | return type(obj)(map(format, obj.items(), repeat(by))) # type: ignore
138 |
139 | return obj
140 |
141 |
142 | def get_dtype(tp: Any) -> Optional[str]:
143 | """Extract a data type of NumPy or pandas from a type hint."""
144 | if (tp := get_tagged(tp, Tag.DATA | Tag.INDEX, True)) is None:
145 | return None
146 |
147 | if (dtype := get_tagged(tp, Tag.DTYPE)) is None:
148 | return None
149 |
150 | if dtype is Any or dtype is type(None):
151 | return None
152 |
153 | if is_union(dtype):
154 | dtype = get_args(dtype)[0]
155 |
156 | if get_origin(dtype) is Literal:
157 | dtype = get_args(dtype)[0]
158 |
159 | return pandas_dtype(dtype).name
160 |
161 |
162 | def get_first(tp: Any, default: Any = None) -> Optional[Any]:
163 | """Extract the first nontag annotation from a type hint."""
164 | if not (nontags := get_nontags(tp, Tag.FIELD)):
165 | return default
166 |
167 | if (first := nontags[0]) is Ellipsis:
168 | return default
169 |
170 | return first
171 |
--------------------------------------------------------------------------------
/pandas_dataclasses/core/tagging.py:
--------------------------------------------------------------------------------
1 | __all__ = ["Tag"]
2 |
3 |
4 | # standard library
5 | from enum import Flag, auto
6 | from functools import reduce
7 | from itertools import chain, filterfalse
8 | from operator import or_
9 | from typing import Annotated, Any, Iterable, Optional
10 |
11 |
12 | # dependencies
13 | from typing_extensions import Self, TypeGuard, get_args, get_origin
14 |
15 |
16 | class Tag(Flag):
17 | """Collection of tags for annotating types."""
18 |
19 | ATTR = auto()
20 | """Tag for a type specifying an attribute field."""
21 |
22 | DATA = auto()
23 | """Tag for a type specifying a data field."""
24 |
25 | INDEX = auto()
26 | """Tag for a type specifying an index field."""
27 |
28 | DTYPE = auto()
29 | """Tag for a type specifying a data type."""
30 |
31 | MULTIPLE = auto()
32 | """Tag for a type specifying a multiple-item field."""
33 |
34 | FIELD = ATTR | DATA | INDEX
35 | """Union of field-related tags."""
36 |
37 | ANY = FIELD | DTYPE | MULTIPLE
38 | """Union of all tags."""
39 |
40 | def annotates(self, tp: Any) -> bool:
41 | """Check if the tag annotates a type hint."""
42 | tags = filter(type(self).creates, get_args(tp))
43 | return bool(self & type(self).union(tags))
44 |
45 | @classmethod
46 | def creates(cls, obj: Any) -> TypeGuard[Self]:
47 | """Check if Tag is the type of an object."""
48 | return isinstance(obj, cls)
49 |
50 | @classmethod
51 | def union(cls, tags: Iterable[Self]) -> Self:
52 | """Create a tag as an union of tags."""
53 | return reduce(or_, tags, cls(0))
54 |
55 | def __repr__(self) -> str:
56 | """Return the bracket-style string of the tag."""
57 | return str(self)
58 |
59 | def __str__(self) -> str:
60 | """Return the bracket-style string of the tag."""
61 | return f"<{str(self.name).lower()}>"
62 |
63 |
64 | def gen_annotated(tp: Any) -> Iterable[Any]:
65 | """Generate all annotated types in a type hint."""
66 | if get_origin(tp) is Annotated:
67 | yield tp
68 | yield from gen_annotated(get_args(tp)[0])
69 | else:
70 | yield from chain(*map(gen_annotated, get_args(tp)))
71 |
72 |
73 | def get_tagged(
74 | tp: Any,
75 | bound: Tag = Tag.ANY,
76 | keep_annotations: bool = False,
77 | ) -> Optional[Any]:
78 | """Extract the first tagged type from a type hint."""
79 | for tagged in filter(bound.annotates, gen_annotated(tp)):
80 | return tagged if keep_annotations else get_args(tagged)[0]
81 |
82 |
83 | def get_tags(tp: Any, bound: Tag = Tag.ANY) -> tuple[Tag, ...]:
84 | """Extract all tags from the first tagged type."""
85 | tagged = get_tagged(tp, bound, True)
86 | return tuple(filter(Tag.creates, get_args(tagged)[1:]))
87 |
88 |
89 | def get_nontags(tp: Any, bound: Tag = Tag.ANY) -> tuple[Any, ...]:
90 | """Extract all except tags from the first tagged type."""
91 | tagged = get_tagged(tp, bound, True)
92 | return tuple(filterfalse(Tag.creates, get_args(tagged)[1:]))
93 |
--------------------------------------------------------------------------------
/pandas_dataclasses/core/typing.py:
--------------------------------------------------------------------------------
1 | __all__ = [
2 | "DataClass",
3 | "DataClassOf",
4 | "HashDict",
5 | "Pandas",
6 | "PAny",
7 | "TAny",
8 | "TFrame",
9 | "TPandas",
10 | "TSeries",
11 | "is_union",
12 | ]
13 |
14 |
15 | # standard library
16 | import types
17 | from dataclasses import Field
18 | from typing import Any, Callable, ClassVar, Hashable, Protocol, TypeVar, Union
19 |
20 |
21 | # dependencies
22 | from pandas import DataFrame, Series
23 | from typing_extensions import ParamSpec, get_origin
24 |
25 |
26 | HashDict = dict[Hashable, Hashable]
27 | """Type hint for dictionary of hashable keys and values."""
28 |
29 | Pandas = Union[DataFrame, "Series[Any]"]
30 | """Type hint for any pandas object."""
31 |
32 | PAny = ParamSpec("PAny")
33 | """Parameter specification variable for any function."""
34 |
35 | TAny = TypeVar("TAny")
36 | """Type variable for any class."""
37 |
38 | TFrame = TypeVar("TFrame", bound=DataFrame)
39 | """Type variable for pandas DataFrame."""
40 |
41 | TPandas = TypeVar("TPandas", bound=Pandas)
42 | """Type variable for any class of pandas object."""
43 |
44 | TSeries = TypeVar("TSeries", bound="Series[Any]")
45 | """Type variable for pandas Series (of any dtype)."""
46 |
47 |
48 | class DataClass(Protocol[PAny]):
49 | """Protocol for any dataclass object."""
50 |
51 | __dataclass_fields__: ClassVar[dict[str, Field[Any]]]
52 |
53 | def __init__(self, *args: PAny.args, **kwargs: PAny.kwargs) -> None: ...
54 |
55 |
56 | class DataClassOf(Protocol[TPandas, PAny]):
57 | """Protocol for any dataclass object with a factory."""
58 |
59 | __dataclass_fields__: ClassVar[dict[str, Field[Any]]]
60 | __pandas_factory__: Callable[..., TPandas]
61 |
62 | def __init__(self, *args: PAny.args, **kwargs: PAny.kwargs) -> None: ...
63 |
64 |
65 | def is_union(tp: Any) -> bool:
66 | """Check if a type hint is a union of types."""
67 | if UnionType := getattr(types, "UnionType", None):
68 | return get_origin(tp) is Union or isinstance(tp, UnionType)
69 | else:
70 | return get_origin(tp) is Union
71 |
--------------------------------------------------------------------------------
/pandas_dataclasses/extras/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = ["hints", "new"]
2 |
3 |
4 | from . import hints
5 | from . import new
6 |
--------------------------------------------------------------------------------
/pandas_dataclasses/extras/hints.py:
--------------------------------------------------------------------------------
1 | __all__ = ["Attr", "Data", "Index", "Multiple"]
2 |
3 |
4 | # standard library
5 | from typing import Annotated, Collection
6 |
7 |
8 | # dependencies
9 | from ..core.tagging import Tag
10 | from ..core.typing import TAny
11 |
12 |
13 | # type hints
14 | Attr = Annotated[TAny, Tag.ATTR]
15 | """Type hint for attribute fields (``Attr[TAny]``)."""
16 |
17 | Data = Annotated[Collection[Annotated[TAny, Tag.DTYPE]], Tag.DATA]
18 | """Type hint for data fields (``Data[TAny]``)."""
19 |
20 | Index = Annotated[Collection[Annotated[TAny, Tag.DTYPE]], Tag.INDEX]
21 | """Type hint for index fields (``Index[TAny]``)."""
22 |
23 | Multiple = dict[str, Annotated[TAny, Tag.MULTIPLE]]
24 | """Type hint for multiple-item fields (``Multiple[TAny]``)."""
25 |
--------------------------------------------------------------------------------
/pandas_dataclasses/extras/new.py:
--------------------------------------------------------------------------------
1 | __all__ = ["As", "AsFrame", "AsSeries"]
2 |
3 |
4 | # standard library
5 | from inspect import signature
6 | from types import MethodType
7 | from typing import Any, Callable, ForwardRef, Generic, Union
8 |
9 |
10 | # dependencies
11 | import pandas as pd
12 | from typing_extensions import get_args, get_origin
13 | from ..core.api import aspandas
14 | from ..core.typing import DataClassOf, PAny, TPandas
15 |
16 |
17 | class classproperty:
18 | """Class property decorator dedicated to ``As.new``."""
19 |
20 | def __init__(self, fget: Callable[..., Any]) -> None:
21 | self.fget = fget
22 |
23 | def __get__(
24 | self,
25 | obj: Any,
26 | cls: type[DataClassOf[TPandas, PAny]],
27 | ) -> Callable[PAny, TPandas]:
28 | return self.fget(cls) # type: ignore
29 |
30 |
31 | class As(Generic[TPandas]):
32 | """Pandas data creation by a classmethod (``new``)."""
33 |
34 | __pandas_factory__: Callable[..., TPandas]
35 | """Factory for pandas data creation."""
36 |
37 | def __init_subclass__(cls, **kwargs: Any) -> None:
38 | """Add a pandas factory to an inheriting class."""
39 | factory = kwargs.pop("factory", None)
40 | cls.__pandas_factory__ = factory or get_factory(cls)
41 | super().__init_subclass__(**kwargs)
42 |
43 | @classproperty
44 | def new(cls) -> MethodType:
45 | """Return a classmethod for pandas data creation."""
46 |
47 | sig = signature(cls.__init__) # type: ignore
48 | sig = sig.replace(return_annotation=get_return(cls))
49 |
50 | def new(cls: Any, *args: Any, **kwargs: Any) -> Any:
51 | """Create a pandas data from dataclass arguments."""
52 | return aspandas(cls(*args, **kwargs))
53 |
54 | setattr(new, "__signature__", sig)
55 | return MethodType(new, cls)
56 |
57 |
58 | AsFrame = As[pd.DataFrame]
59 | """Alias of ``As[pandas.DataFrame]``."""
60 |
61 |
62 | AsSeries = As["pd.Series[Any]"]
63 | """Alias of ``As[pandas.Series[Any]]``."""
64 |
65 |
66 | def get_factory(cls: Any) -> Callable[..., Any]:
67 | """Extract a pandas factory from a class."""
68 | factory = get_return(cls)
69 |
70 | if callable(factory):
71 | return factory
72 |
73 | # special handling for AsSeries
74 | if factory == "pd.Series[Any]":
75 | return pd.Series
76 |
77 | raise TypeError("Factory must be callable.")
78 |
79 |
80 | def get_return(cls: Any) -> Union[type[Any], str]:
81 | """Extract a return type from a class."""
82 | for base in getattr(cls, "__orig_bases__", ()):
83 | if get_origin(base) is not As:
84 | continue
85 |
86 | tp = get_args(base)[0]
87 |
88 | if isinstance(tp, ForwardRef):
89 | return tp.__forward_arg__
90 | else:
91 | return tp # type: ignore
92 |
93 | raise TypeError("Could not find any return type.")
94 |
--------------------------------------------------------------------------------
/pandas_dataclasses/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astropenguin/pandas-dataclasses/d4a7b311d29df1566d61a2cd703047f5f6b16c48/pandas_dataclasses/py.typed
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | name = "pandas-dataclasses"
3 | version = "1.0.0"
4 | description = "pandas data creation by data classes"
5 | readme = "README.md"
6 | keywords = ["dataclasses", "specifications", "typing", "pandas"]
7 | requires-python = ">=3.9,<3.14"
8 | dependencies = [
9 | "numpy>=1.22,<3.0",
10 | "pandas>=1.5,<3.0",
11 | "pandas-stubs>=1.5,<3.0",
12 | "typing-extensions>=4.1,<5.0",
13 | ]
14 | classifiers = [
15 | "License :: OSI Approved :: MIT License",
16 | "Programming Language :: Python :: 3",
17 | "Programming Language :: Python :: 3.9",
18 | "Programming Language :: Python :: 3.10",
19 | "Programming Language :: Python :: 3.11",
20 | "Programming Language :: Python :: 3.12",
21 | "Programming Language :: Python :: 3.13",
22 | ]
23 |
24 | [[project.authors]]
25 | name = "Akio Taniguchi"
26 | email = "taniguchi.akio@gmail.com"
27 |
28 | [project.license]
29 | file = "LICENSE"
30 |
31 | [project.urls]
32 | homepage = "https://astropenguin.github.io/pandas-dataclasses/v1.0.0"
33 | repository = "https://github.com/astropenguin/pandas-dataclasses"
34 |
35 | [dependency-groups]
36 | dev = [
37 | "black>=24.8",
38 | "ipython>=8.18",
39 | "myst-parser>=3.0",
40 | "pydata-sphinx-theme>=0.16",
41 | "pyright>=1.1",
42 | "pytest>=8.3",
43 | "sphinx>=7.4",
44 | ]
45 |
46 | [build-system]
47 | requires = ["hatchling"]
48 | build-backend = "hatchling.build"
49 |
50 | [tool.pyright]
51 | reportUnknownArgumentType = "warning"
52 | reportUnknownMemberType = "warning"
53 | reportUnknownVariableType = "warning"
54 | reportUnnecessaryIsInstance = "warning"
55 | typeCheckingMode = "strict"
56 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astropenguin/pandas-dataclasses/d4a7b311d29df1566d61a2cd703047f5f6b16c48/tests/__init__.py
--------------------------------------------------------------------------------
/tests/data.py:
--------------------------------------------------------------------------------
1 | __all__ = ["Weather", "weather", "df_weather_true", "ser_weather_true"]
2 |
3 |
4 | # standard library
5 | from dataclasses import dataclass, field
6 | from typing import Annotated as Ann, Any
7 |
8 |
9 | # dependencies
10 | import pandas as pd
11 | from pandas_dataclasses import Attr, Data, Index, Multiple
12 |
13 |
14 | # test dataclass and object
15 | def name(meas: str, stat: str) -> dict[str, str]:
16 | return {"Measurement": meas, "Statistic": stat}
17 |
18 |
19 | @dataclass
20 | class Weather:
21 | """Weather information."""
22 |
23 | year: Ann[Index[int], "Year"]
24 | """Year of the measured time."""
25 |
26 | month: Ann[Index[int], "Month"]
27 | """Month of the measured time."""
28 |
29 | temp_avg: Ann[Data[float], name("Temperature ({.temp_unit})", "Average")]
30 | """Monthly average temperature with given units."""
31 |
32 | temp_max: Ann[Data[float], name("Temperature ({.temp_unit})", "Maximum")]
33 | """Monthly maximum temperature with given units."""
34 |
35 | wind_avg: Ann[Data[float], name("Wind speed ({.wind_unit})", "Average")]
36 | """Monthly average wind speed with given units."""
37 |
38 | wind_max: Ann[Data[float], name("Wind speed ({.wind_unit})", "Maximum")]
39 | """Monthly maximum wind speed with given units."""
40 |
41 | loc: Ann[Attr[str], "Location"] = "Tokyo"
42 | """Name of the measured location."""
43 |
44 | lon: Ann[Attr[float], "Longitude ({.lon_unit})"] = 139.69167
45 | """Longitude at the measured location."""
46 |
47 | lat: Ann[Attr[float], "Latitude ({.lat_unit})"] = 35.68944
48 | """Latitude at the measured location."""
49 |
50 | temp_unit: str = "deg C"
51 | """Units of the temperature."""
52 |
53 | wind_unit: str = "m/s"
54 | """Units of the wind speed."""
55 |
56 | lon_unit: str = "deg"
57 | """Units of the longitude."""
58 |
59 | lat_unit: str = "deg"
60 | """Units of the latitude."""
61 |
62 | attrs: Multiple[Attr[Any]] = field(default_factory=dict)
63 | """Other attributes."""
64 |
65 |
66 | weather = Weather(
67 | [2020, 2020, 2021, 2021, 2022],
68 | [1, 7, 1, 7, 1],
69 | [7.1, 24.3, 5.4, 25.9, 4.9],
70 | [11.1, 27.7, 10.3, 30.3, 9.4],
71 | [2.4, 3.1, 2.3, 2.4, 2.6],
72 | [8.8, 10.2, 10.7, 9.0, 8.8],
73 | )
74 |
75 |
76 | # expected pandas data
77 | df_weather_true = pd.DataFrame(
78 | data={
79 | ("Temperature (deg C)", "Average"): [7.1, 24.3, 5.4, 25.9, 4.9],
80 | ("Temperature (deg C)", "Maximum"): [11.1, 27.7, 10.3, 30.3, 9.4],
81 | ("Wind speed (m/s)", "Average"): [2.4, 3.1, 2.3, 2.4, 2.6],
82 | ("Wind speed (m/s)", "Maximum"): [8.8, 10.2, 10.7, 9.0, 8.8],
83 | },
84 | index=pd.MultiIndex.from_arrays(
85 | [
86 | [2020, 2020, 2021, 2021, 2022],
87 | [1, 7, 1, 7, 1],
88 | ],
89 | names=("Year", "Month"),
90 | ),
91 | columns=pd.MultiIndex.from_tuples(
92 | [
93 | ("Temperature (deg C)", "Average"),
94 | ("Temperature (deg C)", "Maximum"),
95 | ("Wind speed (m/s)", "Average"),
96 | ("Wind speed (m/s)", "Maximum"),
97 | ],
98 | names=("Measurement", "Statistic"),
99 | ),
100 | )
101 | df_weather_true.attrs = {
102 | "Location": "Tokyo",
103 | "Longitude (deg)": 139.69167,
104 | "Latitude (deg)": 35.68944,
105 | }
106 |
107 |
108 | ser_weather_true: "pd.Series[Any]" = pd.Series(
109 | data=[7.1, 24.3, 5.4, 25.9, 4.9],
110 | index=pd.MultiIndex.from_arrays(
111 | [
112 | [2020, 2020, 2021, 2021, 2022],
113 | [1, 7, 1, 7, 1],
114 | ],
115 | names=("Year", "Month"),
116 | ),
117 | name=("Temperature (deg C)", "Average"),
118 | )
119 | ser_weather_true.attrs = {
120 | "Location": "Tokyo",
121 | "Longitude (deg)": 139.69167,
122 | "Latitude (deg)": 35.68944,
123 | }
124 |
--------------------------------------------------------------------------------
/tests/test_core_api.py:
--------------------------------------------------------------------------------
1 | # standard library
2 | from typing import cast
3 |
4 |
5 | # dependencies
6 | import pandas as pd
7 | from pandas.testing import assert_frame_equal, assert_series_equal
8 | from pandas_dataclasses import Spec, Tag, asframe, asseries
9 | from pandas_dataclasses.core.api import (
10 | get_attrs,
11 | get_columns,
12 | get_data,
13 | get_index,
14 | name,
15 | )
16 | from .data import Weather, weather, df_weather_true, ser_weather_true
17 |
18 |
19 | # test data
20 | spec = Spec.from_dataclass(Weather) @ weather
21 |
22 |
23 | # test functions
24 | def test_asframe() -> None:
25 | assert_frame_equal(asframe(weather), df_weather_true)
26 |
27 |
28 | def test_asseries() -> None:
29 | assert_series_equal(asseries(weather), ser_weather_true)
30 |
31 |
32 | def test_get_attrs() -> None:
33 | attrs = get_attrs(spec)
34 |
35 | for i, (key, val) in enumerate(attrs.items()):
36 | assert key == spec.fields.of(Tag.ATTR)[i].name
37 | assert val == spec.fields.of(Tag.ATTR)[i].default
38 |
39 |
40 | def test_get_columns() -> None:
41 | columns = cast(pd.MultiIndex, get_columns(spec))
42 |
43 | for i in range(len(columns)):
44 | assert columns[i] == name(spec.fields.of(Tag.DATA)[i])
45 |
46 | assert columns.names == name(spec.fields.of(Tag.DATA)) # type: ignore
47 |
48 |
49 | def test_get_data() -> None:
50 | data = get_data(spec)
51 |
52 | for i, (key, val) in enumerate(data.items()):
53 | assert key == name(spec.fields.of(Tag.DATA)[i])
54 | assert val.dtype.name == spec.fields.of(Tag.DATA)[i].dtype
55 | assert (val == spec.fields.of(Tag.DATA)[i].default).all()
56 |
57 |
58 | def test_get_index() -> None:
59 | index = cast(pd.MultiIndex, get_index(spec))
60 |
61 | for i in range(index.nlevels):
62 | level = index.get_level_values(i)
63 | assert level.name == spec.fields.of(Tag.INDEX)[i].name
64 | assert level.dtype.name == spec.fields.of(Tag.INDEX)[i].dtype
65 | assert (level == spec.fields.of(Tag.INDEX)[i].default).all()
66 |
--------------------------------------------------------------------------------
/tests/test_core_specs.py:
--------------------------------------------------------------------------------
1 | # standard library
2 | from dataclasses import MISSING
3 |
4 |
5 | # dependencies
6 | from pandas_dataclasses import Spec, Tag
7 | from .data import Weather, name, weather
8 |
9 |
10 | # test data
11 | spec = Spec.from_dataclass(Weather)
12 | spec_updated = spec @ weather
13 |
14 |
15 | # test functions
16 | def test_year() -> None:
17 | field = spec.fields.of(Tag.INDEX)[0]
18 |
19 | assert field.id == "year"
20 | assert field.tags == (Tag.INDEX,)
21 | assert field.name == "Year"
22 | assert field.dtype == "int64"
23 | assert field.default is MISSING
24 |
25 |
26 | def test_year_updated() -> None:
27 | field = spec_updated.fields.of(Tag.INDEX)[0]
28 |
29 | assert field.id == "year"
30 | assert field.tags == (Tag.INDEX,)
31 | assert field.name == "Year"
32 | assert field.dtype == "int64"
33 | assert field.default == weather.year
34 |
35 |
36 | def test_month() -> None:
37 | field = spec.fields.of(Tag.INDEX)[1]
38 |
39 | assert field.id == "month"
40 | assert field.tags == (Tag.INDEX,)
41 | assert field.name == "Month"
42 | assert field.dtype == "int64"
43 | assert field.default is MISSING
44 |
45 |
46 | def test_month_updated() -> None:
47 | field = spec_updated.fields.of(Tag.INDEX)[1]
48 |
49 | assert field.id == "month"
50 | assert field.tags == (Tag.INDEX,)
51 | assert field.name == "Month"
52 | assert field.dtype == "int64"
53 | assert field.default == weather.month
54 |
55 |
56 | def test_temp_avg() -> None:
57 | field = spec.fields.of(Tag.DATA)[0]
58 |
59 | assert field.id == "temp_avg"
60 | assert field.tags == (Tag.DATA,)
61 | assert field.name == name("Temperature ({.temp_unit})", "Average")
62 | assert field.dtype == "float64"
63 | assert field.default is MISSING
64 |
65 |
66 | def test_temp_avg_updated() -> None:
67 | field = spec_updated.fields.of(Tag.DATA)[0]
68 |
69 | assert field.id == "temp_avg"
70 | assert field.tags == (Tag.DATA,)
71 | assert field.name == name("Temperature (deg C)", "Average")
72 | assert field.dtype == "float64"
73 | assert field.default == weather.temp_avg
74 |
75 |
76 | def test_temp_max() -> None:
77 | field = spec.fields.of(Tag.DATA)[1]
78 |
79 | assert field.id == "temp_max"
80 | assert field.tags == (Tag.DATA,)
81 | assert field.name == name("Temperature ({.temp_unit})", "Maximum")
82 | assert field.dtype == "float64"
83 | assert field.default is MISSING
84 |
85 |
86 | def test_temp_max_updated() -> None:
87 | field = spec_updated.fields.of(Tag.DATA)[1]
88 |
89 | assert field.id == "temp_max"
90 | assert field.tags == (Tag.DATA,)
91 | assert field.name == name("Temperature (deg C)", "Maximum")
92 | assert field.dtype == "float64"
93 | assert field.default == weather.temp_max
94 |
95 |
96 | def test_wind_avg() -> None:
97 | field = spec.fields.of(Tag.DATA)[2]
98 |
99 | assert field.id == "wind_avg"
100 | assert field.tags == (Tag.DATA,)
101 | assert field.name == name("Wind speed ({.wind_unit})", "Average")
102 | assert field.dtype == "float64"
103 | assert field.default is MISSING
104 |
105 |
106 | def test_wind_avg_updated() -> None:
107 | field = spec_updated.fields.of(Tag.DATA)[2]
108 |
109 | assert field.id == "wind_avg"
110 | assert field.tags == (Tag.DATA,)
111 | assert field.name == name("Wind speed (m/s)", "Average")
112 | assert field.dtype == "float64"
113 | assert field.default == weather.wind_avg
114 |
115 |
116 | def test_wind_max() -> None:
117 | field = spec.fields.of(Tag.DATA)[3]
118 |
119 | assert field.id == "wind_max"
120 | assert field.tags == (Tag.DATA,)
121 | assert field.name == name("Wind speed ({.wind_unit})", "Maximum")
122 | assert field.dtype == "float64"
123 | assert field.default is MISSING
124 |
125 |
126 | def test_wind_max_updated() -> None:
127 | field = spec_updated.fields.of(Tag.DATA)[3]
128 |
129 | assert field.id == "wind_max"
130 | assert field.tags == (Tag.DATA,)
131 | assert field.name == name("Wind speed (m/s)", "Maximum")
132 | assert field.dtype == "float64"
133 | assert field.default == weather.wind_max
134 |
135 |
136 | def test_loc() -> None:
137 | field = spec.fields.of(Tag.ATTR)[0]
138 |
139 | assert field.id == "loc"
140 | assert field.tags == (Tag.ATTR,)
141 | assert field.name == "Location"
142 | assert field.default == Weather.loc
143 |
144 |
145 | def test_loc_updated() -> None:
146 | field = spec_updated.fields.of(Tag.ATTR)[0]
147 |
148 | assert field.id == "loc"
149 | assert field.tags == (Tag.ATTR,)
150 | assert field.name == "Location"
151 | assert field.default == weather.loc
152 |
153 |
154 | def test_lon() -> None:
155 | field = spec.fields.of(Tag.ATTR)[1]
156 |
157 | assert field.id == "lon"
158 | assert field.tags == (Tag.ATTR,)
159 | assert field.name == "Longitude ({.lon_unit})"
160 | assert field.default == Weather.lon
161 |
162 |
163 | def test_lon_updated() -> None:
164 | field = spec_updated.fields.of(Tag.ATTR)[1]
165 |
166 | assert field.id == "lon"
167 | assert field.tags == (Tag.ATTR,)
168 | assert field.name == "Longitude (deg)"
169 | assert field.default == weather.lon
170 |
171 |
172 | def test_lat() -> None:
173 | field = spec.fields.of(Tag.ATTR)[2]
174 |
175 | assert field.id == "lat"
176 | assert field.tags == (Tag.ATTR,)
177 | assert field.name == "Latitude ({.lat_unit})"
178 | assert field.default == Weather.lat
179 |
180 |
181 | def test_lat_updated() -> None:
182 | field = spec_updated.fields.of(Tag.ATTR)[2]
183 |
184 | assert field.id == "lat"
185 | assert field.tags == (Tag.ATTR,)
186 | assert field.name == "Latitude (deg)"
187 | assert field.default == weather.lat
188 |
189 |
190 | def test_attrs() -> None:
191 | field = spec.fields.of(Tag.ATTR)[3]
192 |
193 | assert field.id == "attrs"
194 | assert field.tags == (Tag.ATTR, Tag.MULTIPLE)
195 | assert field.name == "attrs"
196 | assert field.default is MISSING
197 |
198 |
199 | def test_attrs_updated() -> None:
200 | field = spec_updated.fields.of(Tag.ATTR)[3]
201 |
202 | assert field.id == "attrs"
203 | assert field.tags == (Tag.ATTR, Tag.MULTIPLE)
204 | assert field.name == "attrs"
205 | assert field.default == weather.attrs
206 |
207 |
208 | def test_factory() -> None:
209 | assert spec.factory is None
210 |
211 |
212 | def test_name() -> None:
213 | assert spec.name == Weather.__name__
214 |
215 |
216 | def test_origin() -> None:
217 | assert spec.origin is Weather
218 |
--------------------------------------------------------------------------------
/tests/test_core_tagging.py:
--------------------------------------------------------------------------------
1 | # standard library
2 | from typing import Any, Union
3 |
4 |
5 | # dependencies
6 | from pandas_dataclasses import Attr, Data, Index, Tag
7 | from pandas_dataclasses.core.tagging import get_tags
8 | from pytest import mark
9 | from typing_extensions import Annotated as Ann
10 |
11 |
12 | # test data
13 | testdata: list[tuple[Any, tuple[Tag, ...]]] = [
14 | (Attr[Any], (Tag.ATTR,)), # type: ignore
15 | (Data[Any], (Tag.DATA,)),
16 | (Index[Any], (Tag.INDEX,)),
17 | (Any, ()),
18 | (Ann[Attr[Any], "attr"], (Tag.ATTR,)), # type: ignore
19 | (Ann[Data[Any], "data"], (Tag.DATA,)),
20 | (Ann[Index[Any], "index"], (Tag.INDEX,)),
21 | (Ann[Any, "other"], ()),
22 | (Union[Ann[Attr[Any], "attr"], Ann[Any, "any"]], (Tag.ATTR,)), # type: ignore
23 | (Union[Ann[Data[Any], "data"], Ann[Any, "any"]], (Tag.DATA,)),
24 | (Union[Ann[Index[Any], "index"], Ann[Any, "any"]], (Tag.INDEX,)),
25 | (Union[Ann[Any, "other"], Ann[Any, "any"]], ()),
26 | ]
27 |
28 |
29 | # test functions
30 | @mark.parametrize("tp, tags", testdata)
31 | def test_get_tags(tp: Any, tags: tuple[Tag, ...]) -> None:
32 | assert get_tags(tp) == tags
33 |
--------------------------------------------------------------------------------
/tests/test_core_typing.py:
--------------------------------------------------------------------------------
1 | # standard library
2 | from typing import Annotated as Ann, Any, Hashable, Literal as L, Optional, Union
3 |
4 |
5 | # dependencies
6 | import numpy as np
7 | import pandas as pd
8 | from pandas_dataclasses import Attr, Data, Index
9 | from pandas_dataclasses.core.specs import get_dtype, get_first
10 | from pytest import mark
11 |
12 |
13 | # test data
14 | testdata_dtype: list[tuple[Any, Any]] = [
15 | (Data[Any], None),
16 | (Data[None], None),
17 | (Data[int], np.dtype("i8")),
18 | (Data[Union[int, None]], np.dtype("i8")),
19 | (Data[L["i8"]], np.dtype("i8")),
20 | (Data[L["boolean"]], pd.BooleanDtype()),
21 | (Data[L["category"]], pd.CategoricalDtype()),
22 | (Index[Any], None),
23 | (Index[None], None),
24 | (Index[int], np.dtype("i8")),
25 | (Index[Union[int, None]], np.dtype("i8")),
26 | (Index[L["i8"]], np.dtype("i8")),
27 | (Index[L["boolean"]], pd.BooleanDtype()),
28 | (Index[L["category"]], pd.CategoricalDtype()),
29 | (Ann[Data[float], "data"], np.dtype("f8")),
30 | (Ann[Index[float], "index"], np.dtype("f8")),
31 | (Union[Ann[Data[float], "data"], Ann[Any, "any"]], np.dtype("f8")),
32 | (Union[Ann[Index[float], "index"], Ann[Any, "any"]], np.dtype("f8")),
33 | ]
34 |
35 | testdata_first: list[tuple[Any, Optional[Hashable]]] = [
36 | (Attr[Any], None), # type: ignore
37 | (Data[Any], None),
38 | (Index[Any], None),
39 | (Any, None),
40 | (Ann[Attr[Any], "attr"], "attr"), # type: ignore
41 | (Ann[Data[Any], "data"], "data"),
42 | (Ann[Index[Any], "index"], "index"),
43 | (Ann[Any, "other"], None),
44 | (Ann[Attr[Any], ..., "attr"], None), # type: ignore
45 | (Ann[Data[Any], ..., "data"], None),
46 | (Ann[Index[Any], ..., "index"], None),
47 | (Ann[Any, ..., "other"], None),
48 | (Union[Ann[Attr[Any], "attr"], Ann[Any, "any"]], "attr"), # type: ignore
49 | (Union[Ann[Data[Any], "data"], Ann[Any, "any"]], "data"),
50 | (Union[Ann[Index[Any], "index"], Ann[Any, "any"]], "index"),
51 | (Union[Ann[Any, "other"], Ann[Any, "any"]], None),
52 | ]
53 |
54 |
55 | # test functions
56 | @mark.parametrize("tp, dtype", testdata_dtype)
57 | def test_get_dtype(tp: Any, dtype: Optional[str]) -> None:
58 | assert get_dtype(tp) == dtype
59 |
60 |
61 | @mark.parametrize("tp, first", testdata_first)
62 | def test_get_first(tp: Any, first: Optional[Any]) -> None:
63 | assert get_first(tp, None) == first
64 |
--------------------------------------------------------------------------------
/tests/test_extras_new.py:
--------------------------------------------------------------------------------
1 | # standard library
2 | from dataclasses import dataclass
3 | from typing import Any
4 |
5 |
6 | # dependencies
7 | import pandas as pd
8 | from pandas.testing import assert_frame_equal, assert_series_equal
9 | from pandas_dataclasses import As, AsFrame, AsSeries
10 | from .data import Weather, weather, df_weather_true, ser_weather_true
11 |
12 |
13 | # test data
14 | def factory(*args: Any, **kwargs: Any) -> pd.Series: # type: ignore
15 | return pd.Series(*args, **kwargs) # type: ignore
16 |
17 |
18 | class UserFrame(pd.DataFrame):
19 | pass
20 |
21 |
22 | class UserSeries(pd.Series): # type: ignore
23 | pass
24 |
25 |
26 | @dataclass
27 | class Frame(Weather, AsFrame):
28 | pass
29 |
30 |
31 | @dataclass
32 | class CustomFrame(Weather, As[UserFrame]):
33 | pass
34 |
35 |
36 | @dataclass
37 | class Series(Weather, AsSeries):
38 | pass
39 |
40 |
41 | @dataclass
42 | class CustomSeries(Weather, As[UserSeries]):
43 | pass
44 |
45 |
46 | @dataclass
47 | class FactorySeries(Weather, AsSeries, factory=factory):
48 | pass
49 |
50 |
51 | @dataclass
52 | class FloatSeries(Weather, As["pd.Series[float]"], factory=pd.Series):
53 | pass
54 |
55 |
56 | # test functions
57 | def test_frame() -> None:
58 | df_weather = Frame.new(
59 | year=weather.year,
60 | month=weather.month,
61 | temp_avg=weather.temp_avg,
62 | temp_max=weather.temp_max,
63 | wind_avg=weather.wind_avg,
64 | wind_max=weather.wind_max,
65 | )
66 |
67 | assert isinstance(df_weather, pd.DataFrame)
68 | assert_frame_equal(df_weather, df_weather_true)
69 |
70 |
71 | def test_custom_frame() -> None:
72 | df_weather = CustomFrame.new(
73 | year=weather.year,
74 | month=weather.month,
75 | temp_avg=weather.temp_avg,
76 | temp_max=weather.temp_max,
77 | wind_avg=weather.wind_avg,
78 | wind_max=weather.wind_max,
79 | )
80 |
81 | assert isinstance(df_weather, UserFrame)
82 | assert_frame_equal(df_weather, df_weather_true, check_frame_type=False)
83 |
84 |
85 | def test_series() -> None:
86 | ser_weather = Series.new(
87 | year=weather.year,
88 | month=weather.month,
89 | temp_avg=weather.temp_avg,
90 | temp_max=weather.temp_max,
91 | wind_avg=weather.wind_avg,
92 | wind_max=weather.wind_max,
93 | )
94 |
95 | assert isinstance(ser_weather, pd.Series)
96 | assert_series_equal(ser_weather, ser_weather_true)
97 |
98 |
99 | def test_custom_series() -> None:
100 | ser_weather = CustomSeries.new(
101 | year=weather.year,
102 | month=weather.month,
103 | temp_avg=weather.temp_avg,
104 | temp_max=weather.temp_max,
105 | wind_avg=weather.wind_avg,
106 | wind_max=weather.wind_max,
107 | )
108 |
109 | assert isinstance(ser_weather, UserSeries)
110 | assert_series_equal(ser_weather, ser_weather_true, check_series_type=False)
111 |
112 |
113 | def test_factory_series() -> None:
114 | ser_weather = FactorySeries.new(
115 | year=weather.year,
116 | month=weather.month,
117 | temp_avg=weather.temp_avg,
118 | temp_max=weather.temp_max,
119 | wind_avg=weather.wind_avg,
120 | wind_max=weather.wind_max,
121 | )
122 |
123 | assert isinstance(ser_weather, pd.Series)
124 | assert_series_equal(ser_weather, ser_weather_true)
125 |
126 |
127 | def test_float_series() -> None:
128 | ser_weather = FloatSeries.new(
129 | year=weather.year,
130 | month=weather.month,
131 | temp_avg=weather.temp_avg,
132 | temp_max=weather.temp_max,
133 | wind_avg=weather.wind_avg,
134 | wind_max=weather.wind_max,
135 | )
136 |
137 | assert isinstance(ser_weather, pd.Series)
138 | assert_series_equal(ser_weather, ser_weather_true, check_series_type=False)
139 |
--------------------------------------------------------------------------------