├── .coveragerc ├── .github └── workflows │ ├── gh-pages.yml │ ├── python-package.yml │ └── release.yml ├── .gitignore ├── LICENSE ├── README.md ├── battdat ├── __init__.py ├── consistency │ ├── __init__.py │ ├── base.py │ └── current.py ├── data.py ├── io │ ├── __init__.py │ ├── arbin.py │ ├── ba.py │ ├── base.py │ ├── batterydata.py │ ├── hdf.py │ ├── maccor.py │ └── parquet.py ├── postprocess │ ├── __init__.py │ ├── base.py │ ├── integral.py │ ├── tagging.py │ └── timing.py ├── schemas │ ├── __init__.py │ ├── battery.py │ ├── column.py │ ├── cycling.py │ ├── eis.py │ ├── modeling.py │ └── ontology.py ├── streaming │ ├── __init__.py │ └── hdf5.py ├── utils.py └── version.py ├── dev ├── README.md └── environment.yml ├── docs ├── Makefile ├── README.md ├── _static │ └── logo.png ├── conf.py ├── getting-started.rst ├── index.rst ├── make.bat ├── pptx-files │ └── logo.pptx ├── source │ ├── consistency.rst │ ├── data.rst │ ├── io.rst │ ├── modules.rst │ ├── postprocess.rst │ ├── schemas.rst │ └── streaming.rst └── user-guide │ ├── consistency │ ├── check-sign-convention.ipynb │ └── index.rst │ ├── dataset.rst │ ├── formats.rst │ ├── index.rst │ ├── io.rst │ ├── post-processing │ ├── cell-capacity.ipynb │ ├── cycle-times.ipynb │ ├── figures │ │ └── explain-capacities.png │ └── index.rst │ ├── schemas │ ├── column-schema.rst │ ├── export-schemas.py │ ├── index.rst │ └── source-metadata.rst │ └── streaming.rst ├── notebooks ├── README.md └── extract-from-batterydata.ipynb ├── pyproject.toml ├── setup.cfg └── tests ├── conftest.py ├── consistency └── test_sign.py ├── exporters └── test_ba.py ├── files ├── arbin_example.csv ├── batteryarchive │ ├── CALCE_CX2-33_prism_LCO_25C_0-100_0.5-0.5C_d_cycle_data.csv │ └── CALCE_CX2-33_prism_LCO_25C_0-100_0.5-0.5C_d_timeseries.csv ├── batterydata │ ├── .gitattributes │ ├── p492-13-raw.csv │ └── p492-13-summary.csv ├── example-data │ ├── README.md │ ├── resistor-only_complex-cycling.ipynb │ ├── resistor-only_simple-cycling.ipynb │ ├── single-resistor-complex-charge_from-discharged.hdf │ ├── single-resistor-complex-charge_from-discharged │ │ └── raw_data.parquet │ ├── single-resistor-constant-charge_from-charged.hdf │ └── single-resistor-constant-charge_from-discharged.hdf ├── maccor_example.001 └── maccor_example.002 ├── io ├── test_arbin.py ├── test_batterydata.py ├── test_cell_consistency.py ├── test_hdf.py └── test_maccor.py ├── postprocess ├── test_integral.py ├── test_stats.py └── test_tagging.py ├── schemas ├── test_cycling.py ├── test_eis.py └── test_ontology.py ├── test_data.py └── test_stream.py /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | omit = */tests/* 3 | -------------------------------------------------------------------------------- /.github/workflows/gh-pages.yml: -------------------------------------------------------------------------------- 1 | name: Deploy Docs 2 | on: 3 | push: 4 | branches: [ "main" ] 5 | pull_request: 6 | branches: [ "main" ] 7 | workflow_dispatch: 8 | 9 | jobs: 10 | build: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v3 14 | - uses: actions/setup-python@v3 15 | with: 16 | python-version: '3.10' 17 | - name: Install Pandoc 18 | run: | 19 | sudo apt update 20 | sudo apt install -y pandoc 21 | - name: Install dependencies 22 | run: | 23 | pip install -e .[docs] 24 | - name: Sphinx build 25 | run: | 26 | cd docs 27 | make html 28 | - name: Upload artifact 29 | uses: actions/upload-pages-artifact@v3 30 | with: 31 | path: docs/_build/html 32 | 33 | deploy: 34 | if: github.ref == 'refs/heads/main' 35 | needs: build 36 | permissions: 37 | pages: write # to deploy to Pages 38 | id-token: write # to verify the deployment originates from an appropriate source 39 | 40 | environment: 41 | name: github-pages 42 | url: ${{ steps.deployment.outputs.page_url }} 43 | 44 | runs-on: ubuntu-latest 45 | steps: 46 | - name: Deploy to GitHub Pages 47 | id: deployment 48 | uses: actions/deploy-pages@v4 49 | -------------------------------------------------------------------------------- /.github/workflows/python-package.yml: -------------------------------------------------------------------------------- 1 | name: Python Package 2 | 3 | on: [ push, pull_request ] 4 | 5 | jobs: 6 | build: 7 | strategy: 8 | matrix: 9 | os: [ ubuntu-latest, macos-latest, windows-latest ] 10 | max-parallel: 5 11 | runs-on: ${{ matrix.os }} 12 | steps: 13 | - uses: actions/checkout@v2 14 | - name: Set up Python 3.10 15 | uses: actions/setup-python@v2 16 | with: 17 | python-version: '3.10' 18 | - name: Install package 19 | run: | 20 | pip install -e .[test] 21 | - name: Lint with flake8 22 | run: | 23 | flake8 battdat/ tests 24 | - name: Test with pytest 25 | run: | 26 | pytest --cov=batdata tests 27 | - name: Coveralls 28 | run: | 29 | pip install coveralls 30 | coveralls --service=github-actions 31 | env: 32 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 33 | COVERALLS_REPO_TOKEN: ${{ secrets.COVERALLS_REPO_TOKEN }} 34 | - name: Test example notebooks 35 | if: ${{ runner.os == 'Linux' }} 36 | run: | 37 | pip install jupyter matplotlib 38 | home_dir=`pwd` 39 | for notebook in `find . -name "*.ipynb"`; do 40 | cd `dirname $notebook` 41 | jupyter execute `basename $notebook` 42 | cd $home_dir 43 | done 44 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Publish to PyPi 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | jobs: 8 | publish: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v2 12 | 13 | - name: Install pypa/build 14 | run: >- 15 | python -m 16 | pip install 17 | build 18 | --user 19 | 20 | - name: Build a binary wheel and a source tarball 21 | run: >- 22 | python -m 23 | build 24 | --sdist 25 | --wheel 26 | --outdir dist/ 27 | . 28 | 29 | - name: pypi-publish 30 | if: startsWith(github.ref, 'refs/tags') 31 | uses: pypa/gh-action-pypi-publish@release/v1 32 | with: 33 | password: ${{ secrets.PYPI_TOKEN }} 34 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # VSCode settings 114 | .vscode/ 115 | 116 | # Spyder project settings 117 | .spyderproject 118 | .spyproject 119 | 120 | # Rope project settings 121 | .ropeproject 122 | 123 | # mkdocs documentation 124 | /site 125 | 126 | # mypy 127 | .mypy_cache/ 128 | .dmypy.json 129 | dmypy.json 130 | 131 | # Pyre type checker 132 | .pyre/ 133 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Battery Data Toolkit 2 | 3 | [![Python Package](https://github.com/rovi-org/battery-data-toolkit/actions/workflows/python-package.yml/badge.svg)](https://github.com/rovi-org/battery-data-toolkit/actions/workflows/python-package.yml) 4 | [![Deploy Docs](https://github.com/ROVI-org/battery-data-toolkit/actions/workflows/gh-pages.yml/badge.svg?branch=main)](https://rovi-org.github.io/battery-data-toolkit/) 5 | [![Coverage Status](https://coveralls.io/repos/github/ROVI-org/battery-data-toolkit/badge.svg?branch=main)](https://coveralls.io/github/ROVI-org/battery-data-toolkit?branch=main) 6 | [![PyPI version](https://badge.fury.io/py/battery-data-toolkit.svg)](https://badge.fury.io/py/battery-data-toolkit) 7 | 8 | The battery-data-toolkit, `battdat`, creates consistently-formatted collections of battery data. 9 | The library has three main purposes: 10 | 11 | 1. *Storing battery data in standardized formats.* ``battdat`` stores data in 12 | [HDF5 or Parquet files](https://rovi-org.github.io/battery-data-toolkit/user-guide/formats.html) which include 13 | [extensive metadata](https://rovi-org.github.io/battery-data-toolkit/user-guide/schemas/index.html). 14 | 2. *Interfacing battery data with the PyData ecosystem*. The core data model, 15 | [``BatteryDataset``](https://rovi-org.github.io/battery-data-toolkit/user-guide/dataset.html), 16 | is built atop Pandas DataFrames. 17 | 3. *Providing standard implementations of common analysis techniques*. ``battdat`` implements functions which 18 | [ensure quality](https://rovi-org.github.io/battery-data-toolkit/user-guide/consistency/index.html) 19 | or [perform common analyses](https://rovi-org.github.io/battery-data-toolkit/user-guide/post-processing/index.html). 20 | 21 | ## Installation 22 | 23 | Install ``battdat`` with pip: `pip install battery-data-toolkit` 24 | 25 | ## Documentation 26 | 27 | Find the documentation at: https://rovi-org.github.io/battery-data-toolkit/ 28 | 29 | ## Support 30 | 31 | The motivation and funding for this project came from the Rapid Operational Validation Initiative (ROVI) sponsored by the Office of Electricity. 32 | The focus of ROVI is "to greatly reduce time required for emerging energy storage technologies to go from lab to market by developing new tools that will accelerate the testing and validation process needed to ensure commercial success." 33 | If interested, you can read more about ROVI here. 34 | -------------------------------------------------------------------------------- /battdat/__init__.py: -------------------------------------------------------------------------------- 1 | from .version import __version__ # noqa: 401 2 | -------------------------------------------------------------------------------- /battdat/consistency/__init__.py: -------------------------------------------------------------------------------- 1 | """Tools for checking whether data contained in a dataset is self-consistent""" 2 | -------------------------------------------------------------------------------- /battdat/consistency/base.py: -------------------------------------------------------------------------------- 1 | """Base class for consistency checkers""" 2 | from typing import List 3 | 4 | from battdat.data import BatteryDataset 5 | 6 | 7 | # TODO (wardlt): Consider standardizing the error messages: which table, how bad, possible remedy 8 | # TODO (wardlt): Make attributes defining which subsets to explore part of the base class 9 | class ConsistencyChecker: 10 | """Interface for classes which assess whether data in a :class:`~battdata.data.BatteryDataset` are self-consistent""" 11 | 12 | def check(self, dataset: BatteryDataset) -> List[str]: 13 | """Report possible inconsistencies within a dataset 14 | 15 | Args: 16 | dataset: Dataset to be evaluated 17 | Returns: 18 | List of observed inconsistencies 19 | """ 20 | raise NotImplementedError() 21 | -------------------------------------------------------------------------------- /battdat/consistency/current.py: -------------------------------------------------------------------------------- 1 | """Checks related to the current in time series data""" 2 | from dataclasses import dataclass 3 | from typing import List, Collection, Optional 4 | 5 | from datetime import datetime, timedelta 6 | 7 | import numpy as np 8 | import pandas as pd 9 | 10 | from battdat.data import BatteryDataset 11 | from battdat.consistency.base import ConsistencyChecker 12 | 13 | 14 | # TODO (wardlt): Check over different cycles 15 | @dataclass 16 | class SignConventionChecker(ConsistencyChecker): 17 | """Estimate whether the sign convention of a dataset is likely to be correct 18 | 19 | The concept underpinning this class is that the voltage of a cell should increase as it is charged. 20 | The algorithm looks for a period where the current is the most consistent the measures whether 21 | the change in measured voltage during that period. 22 | """ 23 | 24 | subsets_to_check: Collection[str] = ('raw_data',) 25 | """Which subsets within a dataset to evaluate""" 26 | window_length: float = 360. 27 | """Length of time period over which to assess voltage change (units: s)""" 28 | minimum_current: float = 1e-6 29 | """Minimum current used when determining periods of charge or discharge""" 30 | 31 | def check(self, dataset: BatteryDataset) -> List[str]: 32 | warnings = [] 33 | for subset in self.subsets_to_check: 34 | if (warning := self.check_subset(dataset.tables[subset])) is not None: 35 | warnings.append(warning) 36 | return warnings 37 | 38 | def check_subset(self, time_series: pd.DataFrame) -> Optional[str]: 39 | # Convert the test time (seconds) to a time object so that Panda's rolling window can use a time 40 | time_series['timestamp'] = time_series['test_time'].apply(datetime.fromtimestamp) 41 | nonzero_current = time_series.query(f'current > {self.minimum_current} or current < {-self.minimum_current}') # Only get nonzero currents 42 | windowed = nonzero_current[['timestamp', 'test_time', 'current', 'voltage']].rolling( 43 | window=timedelta(seconds=self.window_length), on='timestamp', min_periods=4, 44 | ) 45 | if len(nonzero_current) < 4: 46 | raise ValueError(f'Insufficient data to judge the sign convention (only {len(nonzero_current)}). Consider raising the minimum current threshold.') 47 | 48 | # Find the region with the lowest standard deviation 49 | most_stable_point = windowed['current'].std().idxmin() 50 | most_stable_time = nonzero_current['test_time'].loc[most_stable_point] 51 | stable_window = nonzero_current.query(f'test_time < {most_stable_time} and test_time > {most_stable_time - self.window_length}') 52 | curr_volt_cov = np.cov(stable_window['voltage'], stable_window['test_time'])[0, 1] 53 | if np.sign(curr_volt_cov) != np.sign(stable_window['current'].mean()): 54 | return (f'Potential sign error in current. Average current between test_time={most_stable_time - self.window_length:.1f}s and ' 55 | f'test_time={most_stable_time:.1f} is {stable_window["current"].mean():.1e} A and the covariance between the voltage and current ' 56 | f'is {curr_volt_cov:.1e} V-s. The current and this covariance should have the same sign.') 57 | -------------------------------------------------------------------------------- /battdat/io/__init__.py: -------------------------------------------------------------------------------- 1 | """Tools for reading external formats into :class:`~battdat.data.BatteryDataset` objects 2 | and exporting data to disk.""" 3 | -------------------------------------------------------------------------------- /battdat/io/arbin.py: -------------------------------------------------------------------------------- 1 | """Extractor for Arbin-format files""" 2 | from typing import Union, List, Iterator, Tuple 3 | 4 | import numpy as np 5 | import pandas as pd 6 | 7 | from battdat.io.base import CycleTestReader 8 | from battdat.schemas.column import ChargingState 9 | from battdat.utils import drop_cycles 10 | from battdat.postprocess.tagging import AddMethod, AddSteps, AddSubSteps 11 | 12 | 13 | class ArbinReader(CycleTestReader): 14 | """Parser for reading from Arbin-format files 15 | 16 | Expects the files to be in CSV format""" 17 | 18 | def group(self, files: Union[str, List[str]], directories: List[str] = None, 19 | context: dict = None) -> Iterator[Tuple[str, ...]]: 20 | for file in files: 21 | if file.lower().endswith('.csv'): 22 | yield file 23 | 24 | def read_file(self, file: str, file_number: int = 0, start_cycle: int = 0, 25 | start_time: float = 0) -> pd.DataFrame: 26 | 27 | # Read the file and rename the file 28 | df = pd.read_csv(file) 29 | df = df.rename(columns={'DateTime': 'test_time'}) 30 | 31 | # create fresh dataframe 32 | df_out = pd.DataFrame() 33 | 34 | # Convert the column names 35 | df_out['cycle_number'] = df['Cycle_Index'] + start_cycle - df['Cycle_Index'].min() 36 | df_out['cycle_number'] = df_out['cycle_number'].astype('int64') 37 | df_out['file_number'] = file_number # df_out['cycle_number']*0 38 | df_out['test_time'] = np.array(df['test_time'] - df['test_time'][0] + start_time, dtype=float) 39 | df_out['current'] = df['Current'] # TODO (wardlt): Check this!? 40 | df_out['temperature'] = df['Temperature'] 41 | df_out['internal_resistance'] = df['Internal_Resistance'] 42 | df_out['voltage'] = df['Voltage'] 43 | 44 | # Drop the duplicate rows 45 | df_out = drop_cycles(df_out) 46 | 47 | # Determine whether the battery is charging or discharging: 48 | # 0 is rest, 1 is charge, -1 is discharge 49 | # TODO (wardlt): This function should move to post-processing 50 | def compute_state(x): 51 | if abs(x) < 1e-6: 52 | return ChargingState.rest 53 | return ChargingState.charging if x > 0 else ChargingState.discharging 54 | 55 | df_out['state'] = df_out['current'].apply(compute_state) 56 | 57 | # Determine the method uses to control charging/discharging 58 | AddSteps().enhance(df_out) 59 | AddMethod().enhance(df_out) 60 | AddSubSteps().enhance(df_out) 61 | return df_out 62 | -------------------------------------------------------------------------------- /battdat/io/ba.py: -------------------------------------------------------------------------------- 1 | """Tools for streamlining upload to `Battery Archive `_""" 2 | 3 | from typing import Callable, Any, Optional 4 | from dataclasses import dataclass 5 | from datetime import datetime 6 | from pathlib import Path 7 | from uuid import uuid4 8 | import logging 9 | import json 10 | 11 | import numpy as np 12 | import pandas as pd 13 | 14 | from battdat.io.base import DatasetWriter 15 | from battdat.data import BatteryDataset 16 | from battdat.schemas import BatteryMetadata 17 | 18 | logger = logging.getLogger(__name__) 19 | 20 | # Mappings between our column names and theirs, with an optional function to perform conversion 21 | # TODO (wardlt): Standardize fields for the cumulative charge and discharge for each cycle separately (#75) 22 | # TODO (wardlt): Differentiate the cell temperature from the environment temperature (#76) 23 | # TODO (wardlt): Compute more derived fields from BatteryArchive (#77) 24 | _timeseries_reference: dict[str, tuple[str, Optional[Callable[[Any], Any]]]] = { 25 | 'current': ('i', None), # TODO (wardlt): Which sign convention does battery archive use? 26 | 'voltage': ('v', None), 27 | 'temperature': ('env_temperature', None), # TODO (wardlt): @ypreger, would you prefer unknown temps as env or cell? 28 | 'time': ('date_time', lambda x: datetime.fromtimestamp(x).strftime('%m/%d/%Y %H:%M:%S.%f')), 29 | 'cycle_number': ('cycle_index', lambda x: x + 1), # BA starts indices from 1 30 | 'test_time': ('test_time', None), 31 | } 32 | 33 | _battery_metadata_reference: dict[str, str] = { 34 | 'nominal_capacity': 'ah', # TODO (wardlt): Why is ah an integer? 35 | 'form_factor': 'form_factor', 36 | 'mass': 'weight', # TODO (wardlt): What units does batteryachive use? 37 | 'dimensions': 'dimensions', # TODO (wardlt): How do you express shapes for different form factors 38 | } 39 | 40 | _cycle_stats_reference: dict[str, tuple[str, Callable[[Any], Any]]] = { 41 | 'V_maximum': ('v_max', None), 42 | 'V_minimum': ('v_min', None), 43 | 'capacity_discharge': ('ah_d', None), 44 | 'capacity_charge': ('ah_c', None), 45 | 'energy_discharge': ('e_d', None), 46 | 'energy_charge': ('e_c', None), 47 | 'discharge_V_average': ('v_d_mean', None), 48 | 'charge_V_average': ('v_c_mean', None), 49 | 'coulomb_efficiency': ('ah_eff', None), # TODO (wardlt): Is this correct? 50 | 'energy_efficiency': ('e_eff', None), 51 | 'cycle_start': ('test_time', None), # TODO (wardlt): Is test-time the beginning, duration, something else? 52 | 'cycle_number': ('cycle_index', lambda x: x + 1), # BA starts indices from 1 53 | } 54 | 55 | _metadata_reference: dict[str, str] = { 56 | 'source': 'source', 57 | } 58 | 59 | 60 | # TODO (wardlt): Reconsider saving in CSV. Parquet would preserve data types 61 | 62 | @dataclass 63 | class BatteryArchiveWriter(DatasetWriter): 64 | """Export data into CSV files that follow the format definitions used in BatteryArchive 65 | 66 | The exporter writes files for each table in the 67 | `Battery Archive SQL schema `_ 68 | with column names matches to their definitions. 69 | """ 70 | 71 | chunk_size: int = 100000 72 | """Maximum number of rows to write to disk in a single CSV file""" 73 | 74 | def write_timeseries(self, cell_id: str, data: pd.DataFrame, path: Path): 75 | """Write the time series dataset 76 | 77 | Args: 78 | cell_id: Name for the cell, used as a foreign key to map between tables 79 | data: Time series data to write to disk 80 | path: Root path for writing cycling data 81 | """ 82 | 83 | num_chunks = len(data) // self.chunk_size + 1 84 | logger.info(f'Writing time series data to disk in {num_chunks} chunks') 85 | for i, chunk in enumerate(np.array_split(data, num_chunks)): 86 | # Convert all of our columns 87 | out_chunk = pd.DataFrame() 88 | for my_col, (out_col, out_fun) in _timeseries_reference.items(): 89 | if my_col in chunk: 90 | out_chunk[out_col] = chunk[my_col] 91 | if out_fun is not None: 92 | out_chunk[out_col] = out_chunk[out_col].apply(out_fun) 93 | 94 | # Add a cell id to the frame 95 | out_chunk['cell_id'] = cell_id 96 | 97 | # Save to disk 98 | chunk_path = path / f'cycle-timeseries-{i}.csv' 99 | out_chunk.to_csv(chunk_path, index=False, encoding='utf-8') 100 | logger.debug(f'Wrote {len(out_chunk)} rows to {chunk_path}') 101 | 102 | def write_cycle_stats(self, cell_id: str, data: pd.DataFrame, path: Path): 103 | """Write the cycle stats to disk 104 | 105 | Args: 106 | cell_id: Name of the cell 107 | data: Cycle stats dataframe 108 | path: Path to the output directory 109 | """ 110 | 111 | # Convert the dataframe 112 | out_data = pd.DataFrame() 113 | for my_col, (out_col, out_fun) in _cycle_stats_reference.items(): 114 | if my_col in data: 115 | out_data[out_col] = data[my_col] 116 | if out_fun is not None: 117 | out_data[out_col] = out_data[out_col].apply(out_fun) 118 | 119 | # Write the cell ID in the output 120 | out_data['cell_id'] = cell_id 121 | 122 | out_data.to_csv(path / 'cycle-stats.csv', index=False) 123 | 124 | def write_metadata(self, cell_id: str, metadata: BatteryMetadata, path: Path): 125 | """Write the metadata into a JSON file 126 | 127 | Args: 128 | cell_id: ID for the cell 129 | metadata: Metadata to be written 130 | path: Path in which to write the data 131 | """ 132 | 133 | output = {'cell_id': cell_id} 134 | 135 | # Write the materials for the anode and cathode as dictionaries 136 | for terminal in ['anode', 'cathode']: 137 | attr = getattr(metadata.battery, terminal, None) 138 | if attr is not None: 139 | output[terminal] = attr.model_dump_json(exclude_unset=True) 140 | 141 | # Write the simple fields about the batteries and tester 142 | for my_field, ba_field in _battery_metadata_reference.items(): 143 | attr = getattr(metadata.battery, my_field, None) 144 | if attr is not None: 145 | output[ba_field] = attr 146 | 147 | for my_field, ba_field in _metadata_reference.items(): 148 | attr = getattr(metadata, my_field, None) 149 | if attr is not None: 150 | output[ba_field] = attr 151 | 152 | with open(path / 'metadata.json', 'w') as fp: 153 | json.dump(output, fp) 154 | 155 | def export(self, dataset: BatteryDataset, path: Path): 156 | cell_name = dataset.metadata.name or str(uuid4()) # Default to UUID if none provided 157 | 158 | if (table := dataset.tables.get('raw_data')) is not None: 159 | self.write_timeseries(cell_name, table, path) 160 | 161 | if dataset.metadata is not None: 162 | self.write_metadata(cell_name, dataset.metadata, path) 163 | 164 | if (table := dataset.tables.get('cycle_stats')) is not None: 165 | self.write_cycle_stats(cell_name, table, path) 166 | -------------------------------------------------------------------------------- /battdat/io/base.py: -------------------------------------------------------------------------------- 1 | """Base class for a battery data import and export tools""" 2 | from typing import List, Optional, Union, Iterator, Sequence 3 | from pathlib import Path 4 | import os 5 | 6 | import pandas as pd 7 | 8 | from battdat.data import BatteryDataset 9 | from battdat.schemas import BatteryMetadata 10 | 11 | PathLike = Union[str, Path] 12 | 13 | 14 | class DatasetReader: 15 | """Base class for tools which read battery data as a :class:`~battdat.data.BatteryDataset` 16 | 17 | All readers must implement a function which receives battery metadata as input and produces 18 | a completed :class:`battdat.data.BatteryDataset` as an output. 19 | 20 | Subclasses provide additional suggested operations useful when working with data from 21 | common sources (e.g., file systems, web APIs) 22 | """ 23 | 24 | def read_dataset(self, metadata: Optional[Union[BatteryMetadata, dict]] = None, **kwargs) -> BatteryDataset: 25 | """Parse a set of files into a Pandas dataframe 26 | 27 | Args: 28 | metadata: Metadata for the battery 29 | Returns: 30 | Dataset holding all available information about the dataset 31 | """ 32 | raise NotImplementedError() 33 | 34 | 35 | class DatasetFileReader(DatasetReader): 36 | """Tool which reads datasets written to files 37 | 38 | Provide an :meth:`identify_files` to filter out files likely to be in this format, 39 | or :meth:`group` function to find related file if data are often split into multiple files. 40 | """ 41 | 42 | def identify_files(self, path: PathLike, context: dict = None) -> Iterator[tuple[PathLike]]: 43 | """Identify all groups of files likely to be compatible with this reader 44 | 45 | Uses the :meth:`group` function to determine groups of files that should be parsed together. 46 | 47 | Args: 48 | path: Root of directory to group together 49 | context: Context about the files 50 | Yields: 51 | Groups of eligible files 52 | """ 53 | 54 | # Walk through the directories 55 | for root, dirs, files in os.walk(path): 56 | # Generate the full paths 57 | dirs = [os.path.join(root, d) for d in dirs] 58 | files = [os.path.join(root, f) for f in files] 59 | 60 | # Get any groups from this directory 61 | for group in self.group(files, dirs, context): 62 | yield group 63 | 64 | def group(self, 65 | files: Union[PathLike, List[PathLike]], 66 | directories: List[PathLike] = None, 67 | context: dict = None) -> Iterator[tuple[PathLike, ...]]: 68 | """Identify a groups of files and directories that should be parsed together 69 | 70 | Will create groups using only the files and directories included as input. 71 | 72 | The files of files are *all* files that could be read by this extractor, 73 | which may include many false positives. 74 | 75 | Args: 76 | files: List of files to consider grouping 77 | directories: Any directories to consider group as well 78 | context: Context about the files 79 | Yields: 80 | Groups of files 81 | """ 82 | 83 | # Make sure file paths are strings or Path-like objects 84 | if isinstance(files, str): 85 | files = [files] 86 | files = [Path(p) for p in files] 87 | 88 | # Default: Every file is in its own group 89 | for f in files: 90 | yield f, 91 | 92 | 93 | class CycleTestReader(DatasetFileReader): 94 | """Template class for reading the files output by battery cell cyclers 95 | 96 | Adds logic for reading cycling time series from a list of files. 97 | """ 98 | 99 | def read_file(self, 100 | file: str, 101 | file_number: int = 0, 102 | start_cycle: int = 0, 103 | start_time: int = 0) -> pd.DataFrame: 104 | """Generate a DataFrame containing the data in this file 105 | 106 | The dataframe will be in our standard format 107 | 108 | Args: 109 | file: Path to the file 110 | file_number: Number of file, in case the test is spread across multiple files 111 | start_cycle: Index to use for the first cycle, in case test is spread across multiple files 112 | start_time: Test time to use for the start of the test, in case test is spread across multiple files 113 | 114 | Returns: 115 | Dataframe containing the battery data in a standard format 116 | """ 117 | raise NotImplementedError() 118 | 119 | def read_dataset(self, group: Sequence[PathLike] = (), metadata: Optional[BatteryMetadata] = None) -> BatteryDataset: 120 | """Parse a set of files into a Pandas dataframe 121 | 122 | Args: 123 | group: List of files to parse as part of the same test. Ordered sequentially 124 | metadata: Metadata for the battery, should adhere to the BatteryMetadata schema 125 | 126 | Returns: 127 | DataFrame containing the information from all files 128 | """ 129 | 130 | # Initialize counters for the cycle numbers, etc., Used to determine offsets for the files read 131 | start_cycle = 0 132 | start_time = 0 133 | 134 | # Read the data for each file 135 | # Keep track of the ending index and ending time 136 | output_dfs = [] 137 | for file_number, file in enumerate(group): 138 | # Read the file 139 | df_out = self.read_file(file, file_number, start_cycle, start_time) 140 | output_dfs.append(df_out) 141 | 142 | # Increment the start cycle and time to determine starting point of next file 143 | start_cycle += df_out['cycle_number'].max() - df_out['cycle_number'].min() + 1 144 | start_time = df_out['test_time'].max() 145 | 146 | # Combine the data from all files 147 | df_out = pd.concat(output_dfs, ignore_index=True) 148 | 149 | # Attach the metadata and return the data 150 | return BatteryDataset.make_cell_dataset(raw_data=df_out, metadata=metadata) 151 | 152 | 153 | class DatasetWriter: 154 | """Tool which exports data from a :class:`~battdat.data.BatteryDataset` to disk in a specific format""" 155 | 156 | def export(self, dataset: BatteryDataset, path: PathLike): 157 | """Write the dataset to disk in a specific path 158 | 159 | All files from the dataset must be placed in the provided directory 160 | 161 | Args: 162 | dataset: Dataset to be exported 163 | path: Output path 164 | """ 165 | raise NotImplementedError() 166 | -------------------------------------------------------------------------------- /battdat/io/batterydata.py: -------------------------------------------------------------------------------- 1 | """Parse from the CSV formats of batterydata.energy.gov""" 2 | import re 3 | import logging 4 | from pathlib import Path 5 | from dataclasses import dataclass 6 | from collections import defaultdict 7 | from datetime import datetime, timedelta 8 | from typing import Union, List, Iterator, Tuple, Optional, Iterable 9 | 10 | import pandas as pd 11 | 12 | from battdat.data import BatteryDataset 13 | from battdat.io.base import DatasetFileReader 14 | from battdat.schemas import BatteryMetadata, BatteryDescription 15 | 16 | _fname_match = re.compile(r'(?P[-\w]+)[- ](?Psummary|raw)\.csv') 17 | 18 | logger = logging.getLogger(__name__) 19 | 20 | 21 | def generate_metadata(desc: dict, associated_ids: Iterable[str] = ()) -> BatteryMetadata: 22 | """Assemble the battery metadata for a dataset 23 | 24 | The metadata for a single dataset are all the same and available by querying 25 | the ``https://batterydata.energy.gov/api/3/action/package_show?id={dataset_id}`` 26 | endpoint of `Battery Data Hub `_. 27 | 28 | Args: 29 | desc: Data from the CKAN metadata response 30 | associated_ids: List of other resources associated with this dataset, such as the DOIs of papers. 31 | Returns: 32 | Metadata for the cell provenance and construction 33 | """ 34 | 35 | # Get the "results" pane if users didn't provide it 36 | if 'result' in desc: 37 | desc = desc['result'] 38 | 39 | # Describe the battery 40 | battery = BatteryDescription( 41 | manufacturer=desc['manufacturer_supplier'], 42 | design=", ".join(desc['cell_type']), 43 | anode={'name': ", ".join(desc['negative_electrode'])}, 44 | cathode={'name': ", ".join(desc['positive_electrode'])}, 45 | electrolyte={'name': ", ".join(desc['electrolyte_class_dataset'])}, 46 | nominal_capacity=desc['nominal_cell_capacity'], 47 | ) 48 | 49 | # Describe the context of when it was tested 50 | return BatteryMetadata( 51 | source=desc['organization']['title'], 52 | dataset_name=desc['title'], 53 | associated_ids=associated_ids, 54 | battery=battery, 55 | ) 56 | 57 | 58 | # TODO (wardlt): Columns that yet to have a home in the schema: 59 | # - Cell2 60 | _name_map_raw = { 61 | 'Cycle_Index': 'cycle_number', 62 | 'Step': 'step_index', 63 | 'Time_s': 'test_time', 64 | 'Current_A': 'current', 65 | 'Voltage_V': 'voltage', 66 | 'Cell_Temperature_C': 'temperature', 67 | 'Datenum_d': 'time' 68 | } 69 | 70 | 71 | def convert_raw_signal(input_df: pd.DataFrame, store_all: bool) -> pd.DataFrame: 72 | """Convert a cycle statistics dataframe to one using battdat names and conventions 73 | 74 | Args: 75 | input_df: Initial NREL-format dataframe 76 | store_all: Whether to store columns even we have not defined their names 77 | Returns: 78 | DataFrame in the battdat format 79 | """ 80 | output = pd.DataFrame() 81 | 82 | # Rename columns that are otherwise the same 83 | for orig, new in _name_map_raw.items(): 84 | output[new] = input_df[orig] 85 | 86 | # Decrement the indices from 1-indexed to 0-indexed 87 | output[['cycle_number', 'step_index']] -= 1 88 | 89 | # Convert the date to POSIX timestamp (ease of use in Python) from days from 1/1/0000 90 | begin_time = datetime(year=1, month=1, day=1) 91 | output['time'] = output['time'].apply(lambda x: (timedelta(days=x - 366) + begin_time).timestamp()) 92 | 93 | # Add all other columns as-is 94 | if store_all: 95 | for col in input_df.columns: 96 | if col not in _name_map_raw: 97 | output[col] = input_df[col] 98 | 99 | return output 100 | 101 | 102 | _name_map_summary = { 103 | 'Cycle_Index': 'cycle_number', 104 | 'Q_chg': 'capacity_charge', 105 | 'E_chg': 'energy_charge', 106 | 'Q_dis': 'capacity_discharge', 107 | 'E_dis': 'energy_discharge', 108 | 'CE': 'coulomb_efficiency', 109 | 'EE': 'energy_efficiency', 110 | 'tsecs_start': 'cycle_start', 111 | 'tsecs_cycle': 'cycle_duration', 112 | 'T_min': 'temperature_minimum', 113 | 'T_max': 'temperature_maximum', 114 | 'T_avg': 'temperature_average', 115 | } 116 | 117 | 118 | def convert_summary(input_df: pd.DataFrame, store_all: bool) -> pd.DataFrame: 119 | """Convert the summary dataframe to a format using battdat names and conventions 120 | 121 | Args: 122 | input_df: Initial NREL-format dataframe 123 | store_all: Whether to store columns even we have not defined their names 124 | Returns: 125 | DataFrame in the battdat format 126 | """ 127 | 128 | output = pd.DataFrame() 129 | 130 | # Rename columns that are otherwise the same 131 | for orig, new in _name_map_summary.items(): 132 | output[new] = input_df[orig] 133 | 134 | # Add all other columns as-is 135 | if store_all: 136 | for col in input_df.columns: 137 | if col not in _name_map_summary: 138 | output[col] = input_df[col] 139 | 140 | return output 141 | 142 | 143 | def convert_eis_data(input_df: pd.DataFrame) -> pd.DataFrame: 144 | """Rename the columns from an NREL-standard set of EIS data to our names and conventions 145 | 146 | Args: 147 | input_df: NREL-format raw data 148 | Returns: 149 | EIS data in battdat format 150 | """ 151 | 152 | # Filter out the non-EIS data 153 | input_df = input_df[~input_df['Frequency_Hz'].isnull()] 154 | 155 | # Use the cycle index as a test index 156 | output = pd.DataFrame() 157 | output['test_id'] = input_df['Cycle_Index'] 158 | 159 | # Drop units off and make lower case 160 | cols = ['Frequency_Hz', 'Z_Imag_Ohm', 'Z_Real_Ohm', 'Z_Mag_Ohm', 'Z_Phase_Degree'] 161 | for col in cols: 162 | my_name = "_".join(col.lower().split("_")[:-1]) 163 | output[my_name] = input_df[col] 164 | return output 165 | 166 | 167 | @dataclass 168 | class BDReader(DatasetFileReader): 169 | """Read data from the batterydata.energy.gov CSV format 170 | 171 | Every cell in batterydata.energy.gov is stored as two separate CSV files for each battery, 172 | "-summary.csv" for the cycle-level summaries 173 | and "-raw.csv" for the time series measurements. 174 | Metadata is held in an Excel file, "metadata.xlsx," in the same directory.""" 175 | 176 | store_all: bool = False 177 | """Store all data from the original data, even if we have not defined it""" 178 | 179 | def group(self, files: Union[str, List[str]], directories: List[str] = None, 180 | context: dict = None) -> Iterator[Tuple[str, ...]]: 181 | 182 | # Find files that match the CSV naming convention 183 | groups = defaultdict(list) # Map of cell name to the output 184 | for file in files: 185 | if (match := _fname_match.match(Path(file).name)) is not None: 186 | groups[match.group('name')].append(file) 187 | 188 | yield from groups.values() 189 | 190 | def read_dataset(self, 191 | group: List[str], 192 | metadata: Optional[Union[BatteryMetadata, dict]] = None) -> BatteryDataset: 193 | # Make an empty metadata if none available 194 | if metadata is None: 195 | metadata = BatteryMetadata() 196 | 197 | # Process each file 198 | raw_data = cycle_stats = eis_data = None 199 | for path in group: 200 | match = _fname_match.match(Path(path).name) 201 | if match is None: 202 | raise ValueError(f'Filename convention broken for {path}. Should be -.csv') 203 | 204 | # Update the name in the metadata 205 | if metadata.name is None: 206 | metadata.name = match.group('name') 207 | 208 | # Different parsing logic by type 209 | data_type = match.group('type') 210 | if data_type == 'summary': 211 | cycle_stats = convert_summary(pd.read_csv(path), self.store_all) 212 | elif data_type == 'raw': 213 | nrel_data = pd.read_csv(path) 214 | raw_data = convert_raw_signal(nrel_data, self.store_all) 215 | 216 | # Get EIS data, if available 217 | if 'Z_Imag_Ohm' in nrel_data.columns and not (nrel_data['Z_Imag_Ohm'].isna()).all(): 218 | eis_data = convert_eis_data(nrel_data) 219 | else: 220 | raise ValueError(f'Data type unrecognized: {data_type}') 221 | 222 | # Separate out the EIS data, if possible 223 | return BatteryDataset.make_cell_dataset(raw_data=raw_data, cycle_stats=cycle_stats, eis_data=eis_data, metadata=metadata) 224 | -------------------------------------------------------------------------------- /battdat/io/maccor.py: -------------------------------------------------------------------------------- 1 | """Extractor for MACCOR""" 2 | import re 3 | import itertools 4 | from dataclasses import dataclass 5 | from datetime import datetime 6 | from typing import Union, List, Iterator, Tuple, Sequence, Optional 7 | 8 | import pandas as pd 9 | import numpy as np 10 | 11 | from battdat.data import BatteryDataset 12 | from battdat.io.base import DatasetFileReader, CycleTestReader, PathLike 13 | from battdat.schemas import BatteryMetadata 14 | from battdat.schemas.column import ChargingState 15 | from battdat.postprocess.tagging import AddMethod, AddSteps, AddSubSteps 16 | from battdat.utils import drop_cycles 17 | 18 | _test_date_re = re.compile(r'Date of Test:\s+(\d{2}/\d{2}/\d{4})') 19 | 20 | 21 | @dataclass 22 | class MACCORReader(CycleTestReader, DatasetFileReader): 23 | """Parser for reading from MACCOR-format files 24 | 25 | Expects the files to be ASCII files with a .### extension. 26 | The :meth:`group` operation will consolidate files such that all with 27 | the same prefix (i.e., everything except the numerals in the extension) 28 | are treated as part of the same experiment. 29 | """ 30 | 31 | ignore_time: bool = False 32 | """Ignore the the time column, which can be problematic.""" 33 | 34 | def group(self, files: Union[str, List[str]], directories: List[str] = None, 35 | context: dict = None) -> Iterator[Tuple[str, ...]]: 36 | if isinstance(files, str): 37 | files = [files] 38 | 39 | # Get only the MACCOR-style names 40 | valid_names = filter(lambda x: x[-3:].isdigit(), files) 41 | 42 | # Split then sort based on the prefix 43 | split_filenames = sorted(name.rsplit(".", maxsplit=1) for name in valid_names) 44 | 45 | # Return groups 46 | for prefix, group in itertools.groupby(split_filenames, key=lambda x: x[0]): 47 | yield tuple('.'.join(x) for x in group) 48 | 49 | def read_dataset(self, group: Sequence[PathLike] = (), metadata: Optional[BatteryMetadata] = None) -> BatteryDataset: 50 | # Verify the cells are ordered by test date 51 | start_dates = [] 52 | for file in group: 53 | with open(file, 'r') as fp: 54 | header = fp.readline() 55 | test_date = _test_date_re.findall(header)[0] 56 | start_dates.append(datetime.strptime(test_date, '%m/%d/%Y')) 57 | 58 | # Make sure they are in the correct order 59 | if not all(x >= y for x, y in zip(start_dates[1:], start_dates)): 60 | msg = "\n ".join(f'- {x} {y.strftime("%m/%d/%Y")}' for x, y in zip(group, start_dates)) 61 | raise ValueError(f'Files are not in the correct order by test date: {msg}\n') 62 | 63 | return super().read_dataset(group, metadata) 64 | 65 | def read_file(self, file: PathLike, file_number: int = 0, start_cycle: int = 0, 66 | start_time: int = 0) -> pd.DataFrame: 67 | 68 | # Pull the test date from the first line of the file 69 | with open(file, 'r') as fp: 70 | header = fp.readline() 71 | test_date = _test_date_re.findall(header)[0] 72 | 73 | # Read in the ASCII file (I found this notation works) 74 | df = pd.read_csv(file, skiprows=1, engine='python', sep='\t', index_col=False, encoding="ISO-8859-1") 75 | df = df.rename(columns={'DateTime': 'test_time'}) 76 | 77 | # create fresh dataframe 78 | df_out = pd.DataFrame() 79 | 80 | # fill in new dataframe 81 | df_out['cycle_number'] = df['Cyc#'] + start_cycle - df['Cyc#'].min() 82 | df_out['cycle_number'] = df_out['cycle_number'].astype('int64') 83 | df_out['file_number'] = file_number # df_out['cycle_number']*0 84 | df_out['test_time'] = df['Test (Min)'] * 60 - df['Test (Min)'].iloc[0] * 60 + start_time 85 | df_out['state'] = df['State'] 86 | df_out['current'] = df['Amps'] 87 | df_out['current'] = np.where(df['State'] == 'D', -1 * df_out['current'], df_out['current']) 88 | 89 | if not self.ignore_time: 90 | def _parse_time(time: str) -> float: 91 | if '/' in time: 92 | return datetime.strptime(time, '%m/%d/%Y %H:%M:%S').timestamp() 93 | else: 94 | return datetime.strptime(f'{test_date} {time}', '%m/%d/%Y %H:%M:%S').timestamp() 95 | 96 | df_out['time'] = df['DPt Time'].apply(_parse_time) 97 | 98 | # 0 is rest, 1 is charge, -1 is discharge 99 | df_out.loc[df_out['state'] == 'R', 'state'] = ChargingState.rest 100 | df_out.loc[df_out['state'] == 'C', 'state'] = ChargingState.charging 101 | df_out.loc[df_out['state'] == 'D', 'state'] = ChargingState.discharging 102 | df_out.loc[df_out['state'].apply(lambda x: x not in {'R', 'C', 'D'}), 'state'] = ChargingState.unknown 103 | 104 | df_out['voltage'] = df['Volts'] 105 | df_out = drop_cycles(df_out) 106 | AddSteps().enhance(df_out) 107 | AddMethod().enhance(df_out) 108 | AddSubSteps().enhance(df_out) 109 | return df_out 110 | -------------------------------------------------------------------------------- /battdat/io/parquet.py: -------------------------------------------------------------------------------- 1 | """Read and write from `battery-data-toolkit's parquet format `_""" 2 | from dataclasses import dataclass, field 3 | from datetime import datetime 4 | from typing import Dict, Any, Optional, Union, Collection 5 | from pathlib import Path 6 | import warnings 7 | import logging 8 | import shutil 9 | 10 | from pyarrow import parquet as pq 11 | from pyarrow import Table 12 | 13 | from .base import DatasetWriter, DatasetFileReader, PathLike 14 | from ..data import BatteryDataset 15 | from ..schemas import BatteryMetadata 16 | from ..schemas.column import ColumnSchema 17 | 18 | logger = logging.getLogger(__name__) 19 | 20 | 21 | def inspect_parquet_files(path: PathLike) -> BatteryMetadata: 22 | """Read the metadata from a collection of Parquet files 23 | 24 | Args: 25 | path: Path to a directory of parquet files 26 | 27 | Returns: 28 | Metadata from one of the files 29 | """ 30 | # Get a parquet file 31 | path = Path(path) 32 | if path.is_file(): 33 | pq_path = path 34 | else: 35 | pq_path = next(path.glob('*.parquet'), None) 36 | if pq_path is None: 37 | raise ValueError(f'No parquet files in {path}') 38 | 39 | # Read the metadata from the schema 40 | schema = pq.read_schema(pq_path) 41 | if b'battery_metadata' not in schema.metadata: 42 | raise ValueError(f'No metadata in {pq_path}') 43 | return BatteryMetadata.model_validate_json(schema.metadata[b'battery_metadata']) 44 | 45 | 46 | @dataclass 47 | class ParquetWriter(DatasetWriter): 48 | """Write to parquet files in the format specification of battery-data-toolkit 49 | 50 | Writes all data to the same directory with a separate parquet file for each table. 51 | The battery metadata, column schemas, and write date are all saved in the file-level metadata for each file. 52 | """ 53 | 54 | overwrite: bool = False 55 | """Whether to overwrite existing data""" 56 | write_options: Dict[str, Any] = field(default_factory=dict) 57 | """Options passed to :func:`~pyarrow.parquet.write_table`.""" 58 | 59 | def export(self, dataset: BatteryDataset, path: Path): 60 | # Handle existing paths 61 | path = Path(path) 62 | if path.exists(): 63 | if not self.overwrite: 64 | raise ValueError(f'Path already exists and overwrite is disabled: {path}') 65 | logger.info(f'Deleting existing directory at {path}') 66 | shutil.rmtree(path) 67 | 68 | # Make the output directory, then write each Parquet file 69 | path.mkdir(parents=True, exist_ok=False) 70 | my_metadata = { 71 | 'battery_metadata': dataset.metadata.model_dump_json(exclude_none=True), 72 | 'write_date': datetime.now().isoformat() 73 | } 74 | written = {} 75 | for key, schema in dataset.schemas.items(): 76 | if (data := dataset.tables.get(key)) is None: 77 | continue 78 | 79 | # Put the metadata for the battery and this specific table into the table's schema in the FileMetaData 80 | data_path = path / f'{key}.parquet' 81 | my_metadata['table_metadata'] = schema.model_dump_json() 82 | table = Table.from_pandas(data, preserve_index=False) 83 | new_schema = table.schema.with_metadata({**my_metadata, **table.schema.metadata}) 84 | table = table.cast(new_schema) 85 | pq.write_table(table, where=data_path, **self.write_options) 86 | 87 | written[key] = data_path 88 | return written 89 | 90 | 91 | class ParquetReader(DatasetFileReader): 92 | """Read parquet files formatted according to battery-data-toolkit standards 93 | 94 | Mirrors :class:`ParquetWriter`. Expects each constituent table to be in a separate parquet 95 | file and to have the metadata stored in the file-level metadata of the parquet file. 96 | """ 97 | 98 | def read_dataset(self, paths: Union[PathLike, Collection[PathLike]], metadata: Optional[Union[BatteryMetadata, dict]] = None) -> BatteryDataset: 99 | """Read a set of parquet files into a BatteryDataset 100 | 101 | Args: 102 | paths: Either the path to a single-directory of files, or a list of files to parse 103 | metadata: Metadata which will overwrite what is available in the files 104 | 105 | Returns: 106 | Dataset including all subsets 107 | """ 108 | # Find the parquet files, if no specification is listed 109 | if isinstance(paths, PathLike): 110 | paths = [paths] 111 | paths = [Path(p) for p in paths] 112 | if len(paths) == 1 and paths[0].is_dir(): 113 | paths = list(paths[0].glob('*.parquet')) 114 | elif not all(is_file := [p.is_file() for p in paths]): 115 | not_files = [p for i, p in zip(is_file, paths) if not i] 116 | raise ValueError(f'Expected either a list of files or a single directory. The following are not files: {not_files}') 117 | 118 | if len(paths) == 0 and metadata is None: 119 | raise ValueError('No data available.') 120 | 121 | # Load each subset 122 | metadata = None 123 | data = {} 124 | schemas = {} 125 | for data_path in paths: 126 | subset = data_path.with_suffix('').name 127 | table = pq.read_table(data_path) 128 | 129 | # Load or check the metadata 130 | if b'battery_metadata' not in table.schema.metadata: 131 | warnings.warn(f'Battery metadata not found in {data_path}') 132 | else: 133 | # Load the metadata for the whole cell 134 | my_metadata = table.schema.metadata[b'battery_metadata'] if metadata is None else metadata 135 | if metadata is None: 136 | metadata = my_metadata 137 | elif my_metadata != metadata: 138 | warnings.warn(f'Battery data different for files in {data_path}') 139 | 140 | # Load the batdata schema for the table 141 | if b'table_metadata' not in table.schema.metadata: 142 | warnings.warn(f'Column schema not found in {data_path}') 143 | schemas[subset] = ColumnSchema.from_json(table.schema.metadata[b'table_metadata']) 144 | 145 | # Read it to a dataframe 146 | data[subset] = table.to_pandas() 147 | 148 | return BatteryDataset.make_cell_dataset( 149 | metadata=BatteryMetadata.model_validate_json(metadata), 150 | schemas=schemas, 151 | tables=data 152 | ) 153 | -------------------------------------------------------------------------------- /battdat/postprocess/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROVI-org/battery-data-toolkit/19961e6bbb2d0cfe0bff9c129144fcf8f3dd3be6/battdat/postprocess/__init__.py -------------------------------------------------------------------------------- /battdat/postprocess/base.py: -------------------------------------------------------------------------------- 1 | """Base class and utilities related to post-processing on battery data""" 2 | from typing import List 3 | 4 | import pandas as pd 5 | 6 | from battdat.data import BatteryDataset 7 | from battdat.schemas.column import ColumnSchema 8 | 9 | 10 | class BaseFeatureComputer: 11 | """Base class for methods that produce new features given battery data 12 | 13 | Features can be anything but are often collected statistics about a certain cycle. 14 | """ 15 | 16 | def compute_features(self, data: BatteryDataset) -> pd.DataFrame: 17 | """Compute 18 | 19 | Args: 20 | data: Battery data object 21 | 22 | Returns: 23 | A dataframe of features where rows are different cycles or steps, columns are different features 24 | """ 25 | raise NotImplementedError() 26 | 27 | 28 | class RawDataEnhancer(BaseFeatureComputer): 29 | """Base class for methods derives new data from the existing columns in raw data""" 30 | 31 | column_names: List[str] = ... 32 | 33 | def compute_features(self, data: BatteryDataset) -> pd.DataFrame: 34 | self.enhance(data.tables['raw_data']) 35 | return data.tables['raw_data'][self.column_names] 36 | 37 | def enhance(self, data: pd.DataFrame): 38 | """Add additional columns to the raw data 39 | 40 | Args: 41 | data: Raw data to be modified 42 | """ 43 | raise NotImplementedError() 44 | 45 | 46 | class CycleSummarizer(BaseFeatureComputer): 47 | """Classes which produce a summary of certain cycles given the raw data from a cycle""" 48 | 49 | column_names: List[str] = ... 50 | 51 | def compute_features(self, data: BatteryDataset) -> pd.DataFrame: 52 | self.add_summaries(data) 53 | return data.tables['cycle_stats'][['cycle_number'] + self.column_names] 54 | 55 | def add_summaries(self, data: BatteryDataset): 56 | """Add cycle-level summaries to a battery dataset 57 | 58 | Args: 59 | data: Dataset to be modified 60 | """ 61 | 62 | # Add a cycle summary if not already available 63 | if 'cycle_stats' not in data.tables: 64 | data.tables['cycle_stats'] = pd.DataFrame({ 65 | 'cycle_number': sorted(set(data.tables['raw_data']['cycle_number'])) 66 | }) 67 | data.schemas['cycle_stats'] = ColumnSchema() 68 | 69 | # Perform the update 70 | self._summarize(data.tables['raw_data'], data.tables['cycle_stats']) 71 | 72 | def _summarize(self, raw_data: pd.DataFrame, cycle_data: pd.DataFrame): 73 | """Add additional data to a cycle summary dataframe 74 | 75 | Args: 76 | raw_data: Raw data describing the initial cycles. Is not modified 77 | cycle_data: Cycle data frame to be updated 78 | """ 79 | raise NotImplementedError() 80 | -------------------------------------------------------------------------------- /battdat/postprocess/integral.py: -------------------------------------------------------------------------------- 1 | """Features related to integral quantities (e.g., energy, capacity)""" 2 | import warnings 3 | from itertools import zip_longest 4 | from typing import List 5 | 6 | import numpy as np 7 | import pandas as pd 8 | from scipy.integrate import cumulative_trapezoid 9 | 10 | from battdat.postprocess.base import RawDataEnhancer, CycleSummarizer 11 | 12 | 13 | class CapacityPerCycle(CycleSummarizer): 14 | """Compute the observed capacity and energy during both charge and discharge of each cycle 15 | 16 | Determines capacities based on the integral of current over each cycle: 17 | 18 | 1. Compute the change in state of charge from the start of the cycle 19 | by computing the integral of the capacity over time. 20 | We refer to this integral as the dSOC. 21 | 2. Determine whether the battery started from a charged state 22 | by determining if the largest capacity change is positive 23 | (i.e., if the point most different state of charge from the 24 | start is *more discharged* than the starting point). 25 | The code will raise a warning if the quantities are similar. 26 | 3. If starting from a charged state, the discharge capacity 27 | is the maximum change in state of charge (``dSOC.max()``). 28 | The charge capacity is the amount of charge transferred to the 29 | battery between this maximally-discharged state and the end 30 | the of the cycle (``dSOC.max() - dSOC[-1]``) 31 | 4. If starting from a discharged state, the charge capacity 32 | is the maximum change in state of charge and the discharge capacity 33 | is the amount transferred from the battery into the end of the cycle. 34 | 35 | 36 | The energy is computed using a similar procedure, but by integrating 37 | the product of current and voltage instead of only current. 38 | 39 | .. note:: 40 | 41 | Measurements of capacity and energy assume a cycle returns 42 | the battery to the same state as it started the cycle. 43 | 44 | Output dataframe has 4 new columns. 45 | 46 | - ``capacity_discharge``: Discharge capacity per cycle in A-hr 47 | - ``capacity_charge``: Charge capacity per the cycle in A-hr 48 | - ``energy_charge``: Discharge energy per cycle in J 49 | - ``energy_discharge``: Charge energy per the cycle in J 50 | - ``max_cycled_capacity``: Maximum amount of charge cycled during the cycle, in A-hr 51 | 52 | The full definitions are provided in the :class:`~battdat.schemas.cycling.CycleLevelData` schema 53 | """ 54 | 55 | def __init__(self, reuse_integrals: bool = True): 56 | """ 57 | 58 | Args: 59 | reuse_integrals: Whether to reuse the ``cycled_charge`` and ``cycled_energy`` if they are available 60 | """ 61 | self.reuse_integrals = reuse_integrals 62 | 63 | @property 64 | def column_names(self) -> List[str]: 65 | output = [] 66 | for name in ['charge', 'discharge']: 67 | output.extend([f'energy_{name}', f'capacity_{name}']) 68 | output.extend(['max_cycled_capacity']) 69 | return output 70 | 71 | def _summarize(self, raw_data: pd.DataFrame, cycle_data: pd.DataFrame): 72 | # Initialize the output arrays 73 | cycle_data.set_index('cycle_number', drop=False) 74 | for name in self.column_names: 75 | cycle_data[name] = np.nan 76 | 77 | # Get the indices of the beginning of each cycle 78 | raw_data = raw_data.reset_index() # Ensure a sequential ordering from 0 79 | start_inds = raw_data.drop_duplicates('cycle_number', keep='first').index 80 | 81 | # Loop over each cycle. Using the starting point of this cycle and the first point of the next as end caps 82 | for cyc, (start_ind, stop_ind) in enumerate(zip_longest(start_inds, start_inds[1:] + 1, fillvalue=len(raw_data))): 83 | cycle_subset = raw_data.iloc[start_ind:stop_ind] 84 | 85 | # Skip cycles that are too short to have a capacity measurement 86 | if len(cycle_subset) < 3: 87 | continue 88 | 89 | # Perform the integration 90 | if self.reuse_integrals and 'cycled_energy' in cycle_subset.columns and 'cycled_charge' in cycle_subset.columns: 91 | capacity_change = cycle_subset['cycled_charge'].values * 3600 # To A-s 92 | energy_change = cycle_subset['cycled_energy'].values * 3600 # To J 93 | else: 94 | capacity_change = cumulative_trapezoid(cycle_subset['current'], x=cycle_subset['test_time']) 95 | energy_change = cumulative_trapezoid(cycle_subset['current'] * cycle_subset['voltage'], x=cycle_subset['test_time']) 96 | 97 | # Estimate if the battery starts as charged or discharged 98 | max_charge = capacity_change.max() 99 | max_discharge = -capacity_change.min() 100 | cycle_data.loc[cyc, 'max_cycled_capacity'] = (max_charge + max_discharge) / 3600 # To Amp-hour 101 | 102 | starts_charged = max_discharge > max_charge 103 | if np.isclose(max_discharge, max_charge, rtol=0.01): 104 | warnings.warn(f'Unable to clearly detect if battery started charged or discharged in cycle {cyc}. ' 105 | f'Amount discharged is {max_discharge:.2e} A-s, charged is {max_charge:.2e} A-s') 106 | 107 | # Assign the charge and discharge capacity 108 | # One capacity is beginning to maximum change, the other is maximum change to end 109 | if starts_charged: 110 | discharge_cap = max_discharge 111 | charge_cap = capacity_change[-1] + max_discharge 112 | discharge_eng = -energy_change.min() 113 | charge_eng = energy_change[-1] + discharge_eng 114 | else: 115 | charge_cap = max_charge 116 | discharge_cap = max_charge - capacity_change[-1] 117 | charge_eng = energy_change.max() 118 | discharge_eng = charge_eng - energy_change[-1] 119 | 120 | cycle_data.loc[cyc, 'energy_charge'] = charge_eng / 3600. # To W-hr 121 | cycle_data.loc[cyc, 'energy_discharge'] = discharge_eng / 3600. 122 | cycle_data.loc[cyc, 'capacity_charge'] = charge_cap / 3600. # To A-hr 123 | cycle_data.loc[cyc, 'capacity_discharge'] = discharge_cap / 3600. 124 | 125 | 126 | class StateOfCharge(RawDataEnhancer): 127 | """Compute the change in capacity and system energy over each cycle 128 | 129 | The capacity change for a cycle is determined by integrating the 130 | current as a function of time between the start of the cycle 131 | and the first of the next cycle. 132 | The energy change is determined by integrating the product 133 | of current and voltage. 134 | 135 | Output dataframe has 3 new columns: 136 | - ``cycled_charge``: Amount of observed charge cycled since the beginning of the cycle, in A-hr 137 | - ``cycled_energy``: Amount of observed energy cycled since the beginning of the cycle, in W-hr 138 | - ``CE_adjusted_charge``: Amount of charge in the battery relative to the beginning of the cycle, accounting for 139 | Coulombic Efficiency (CE), in A-hr 140 | """ 141 | def __init__(self, coulombic_efficiency: float = 1.0): 142 | """ 143 | Args: 144 | coulombic_efficiency: Coulombic efficiency to use when computing the state of charge 145 | """ 146 | self.coulombic_efficiency = coulombic_efficiency 147 | 148 | @property 149 | def coulombic_efficiency(self) -> float: 150 | return self._ce 151 | 152 | @coulombic_efficiency.setter 153 | def coulombic_efficiency(self, value: float): 154 | if value < 0 or value > 1: 155 | raise ValueError('Coulombic efficiency must be between 0 and 1') 156 | self._ce = value 157 | 158 | @property 159 | def column_names(self) -> List[str]: 160 | return ['cycled_charge', 'cycled_energy', 'CE_adjusted_charge'] 161 | 162 | def _get_CE_adjusted_curr(self, current: np.ndarray) -> np.ndarray: 163 | """Adjust the current based on the coulombic efficiency 164 | 165 | Args: 166 | current: Current array in A 167 | 168 | Returns: 169 | Adjusted current array in A 170 | """ 171 | adjusted_current = np.where(current > 0, self.coulombic_efficiency * current, current) 172 | return adjusted_current.flatten() 173 | 174 | def enhance(self, data: pd.DataFrame): 175 | # Add columns for the capacity and energy 176 | for c in self.column_names: 177 | data.loc[:, (c,)] = np.nan 178 | 179 | # Compute the capacity and energy for each cycle 180 | ordered_copy = data.reset_index() # Ensure a sequential ordering from 0 181 | start_inds = ordered_copy.drop_duplicates('cycle_number', keep='first').index 182 | 183 | # Loop over each cycle 184 | for cyc, (start_ind, stop_ind) in enumerate(zip_longest(start_inds, start_inds[1:] + 1, fillvalue=len(ordered_copy) + 1)): 185 | cycle_subset = ordered_copy.iloc[start_ind:stop_ind] 186 | 187 | # Perform the integration 188 | ce_adj_curr = self._get_CE_adjusted_curr(cycle_subset['current'].to_numpy()) 189 | capacity_change = cumulative_trapezoid(cycle_subset['current'], x=cycle_subset['test_time'], initial=0) 190 | ce_charge = cumulative_trapezoid(ce_adj_curr, x=cycle_subset['test_time'], initial=0) 191 | energy_change = cumulative_trapezoid(cycle_subset['current'] * cycle_subset['voltage'], x=cycle_subset['test_time'], initial=0) 192 | 193 | # Store them in the raw data 194 | data.loc[cycle_subset['index'], 'cycled_charge'] = capacity_change / 3600 # To A-hr 195 | data.loc[cycle_subset['index'], 'CE_adjusted_charge'] = ce_charge / 3600 # To A-hr 196 | data.loc[cycle_subset['index'], 'cycled_energy'] = energy_change / 3600 # To W-hr 197 | -------------------------------------------------------------------------------- /battdat/postprocess/tagging.py: -------------------------------------------------------------------------------- 1 | """Methods which assign labels that are present in some testing machines yet absent in others. 2 | 3 | For example, :meth:`add_method` determines whether the battery is being held at a constant voltage or current.""" 4 | import logging 5 | from typing import List, Literal 6 | 7 | import numpy as np 8 | import pandas as pd 9 | from pandas import DataFrame 10 | from scipy.interpolate import interp1d 11 | from scipy.signal import find_peaks, savgol_filter 12 | 13 | from battdat.schemas.column import ChargingState, ControlMethod 14 | from .base import RawDataEnhancer 15 | 16 | logger = logging.getLogger(__name__) 17 | 18 | 19 | class AddMethod(RawDataEnhancer): 20 | """Determine how the battery was being controlled 21 | 22 | Determines whether a charging step is composed of constant-current, constant-voltage, 23 | or mixed steps by first partitioning it into substeps based on the maximum curvature 24 | of these points then assigning regions to constant voltage or current if one varied 25 | more than twice the other. 26 | """ 27 | def __init__(self, short_period_threshold: float = 30.0): 28 | """ 29 | Args: 30 | short_period_threshold: Maximum duration of a step to be considered a short step, in seconds 31 | """ 32 | self.short_period_threshold = short_period_threshold 33 | 34 | @property 35 | def column_names(self) -> List[str]: 36 | return ['method'] 37 | 38 | def enhance(self, df: pd.DataFrame): 39 | # Insert a new column into the dataframe, starting with everything marked as other 40 | df.loc[:, ('method',)] = ControlMethod.other 41 | 42 | # array of indexes 43 | cycles = df.groupby(["cycle_number", "step_index"]) 44 | logger.info('Identifying charging/discharging methods') 45 | for key, cycle in cycles: 46 | 47 | # pull out columns of interest and turn into numpy array 48 | t = cycle["test_time"].values 49 | voltage = cycle["voltage"].values 50 | current = cycle['current'].values 51 | ind = cycle.index.values 52 | state = cycle['state'].values 53 | 54 | if t[-1] - t[0] < self.short_period_threshold: 55 | # The step is shorter than 30 seconds 56 | if state[0] == ChargingState.rest: 57 | # If the step is a rest, we label it as a short rest 58 | df.loc[ind, 'method'] = ControlMethod.short_rest 59 | elif len(ind) < 5: 60 | # The step contains fewer than 5 data points, so it is innapropriate to label it as anything 61 | # definitive other than a short non-rest 62 | df.loc[ind, 'method'] = ControlMethod.short_nonrest 63 | else: 64 | # The step is a pulse 65 | df.loc[ind, 'method'] = ControlMethod.pulse 66 | elif state[0] == ChargingState.rest: 67 | # This is a standard rest, which lasts longer than 30 seconds 68 | df.loc[ind, 'method'] = ControlMethod.rest 69 | elif len(ind) < 5: 70 | # The step spans over 30 seconds, but has fewer than 5 data points, rendering inadequate for control 71 | # method determination 72 | df.loc[ind, 'method'] = ControlMethod.unknown 73 | 74 | else: 75 | # Normalize the voltage and current before determining which one moves "more" 76 | for x in [voltage, current]: 77 | x -= x.min() 78 | x /= max(x.max(), 1e-6) 79 | 80 | # First see if there are significant changes in the charging behavior 81 | # We use a https://en.wikipedia.org/wiki/Savitzky%E2%80%93Golay_filter to get smooth 82 | # derviatives, which requires even spacing. 83 | # So, our first step will be to make sure that the spacings are relatively even, 84 | # and to make an interpolated version if not 85 | dt = t[1:] - t[:-1] 86 | noneven = dt.std() / dt.mean() > 1e-6 87 | if noneven: 88 | t_spaced = np.linspace(t.min(), t.max(), len(t) * 2) 89 | voltage_spaced = interp1d(t, voltage)(t_spaced) 90 | current_spaced = interp1d(t, current)(t_spaced) 91 | else: 92 | voltage_spaced = voltage 93 | current_spaced = current 94 | 95 | d2v_dt2 = savgol_filter(voltage_spaced, 5, 4, deriv=2) 96 | d2i_dt2 = savgol_filter(current_spaced, 5, 4, deriv=2) 97 | 98 | # If we had to interpolate, interpolate again to get the values of the derivative 99 | if noneven: 100 | d2v_dt2 = interp1d(t_spaced, d2v_dt2)(t) 101 | d2i_dt2 = interp1d(t_spaced, d2i_dt2)(t) 102 | 103 | current_peaks, _ = find_peaks(d2i_dt2, distance=5, prominence=10 ** -3) 104 | voltage_peaks, _ = find_peaks(d2v_dt2, distance=5, prominence=10 ** -3) 105 | 106 | # Assign a control method to the segment between each of these peaks 107 | extrema = [0] + sorted(set(current_peaks).union(set(voltage_peaks))) + [len(voltage)] 108 | 109 | methods = [] 110 | for i in range(len(extrema) - 1): 111 | # Get the segment between these two peaks 112 | low = extrema[i] 113 | high = extrema[i + 1] 114 | r = np.arange(low, high).tolist() 115 | 116 | # Measure the ratio between the change and current and the change in the voltage 117 | s_i = current[r].std() 118 | s_v = voltage[r].std() 119 | val = s_i / max(s_i + s_v, 1e-6) 120 | 121 | if val > 0.66: # If the change in the current is 2x as large as the change in current 122 | method = ControlMethod.constant_voltage 123 | elif val < 0.33: # If voltage is 2x larger than the voltage 124 | method = ControlMethod.constant_current 125 | else: 126 | method = ControlMethod.other 127 | methods.extend([method] * len(r)) 128 | 129 | assert len(methods) == len(ind), (len(methods), len(ind)) 130 | df.loc[ind, 'method'] = methods 131 | 132 | return df[['method']] 133 | 134 | 135 | class AddState(RawDataEnhancer): 136 | """ 137 | Marks states in which battery is charging, discharging, or resting 138 | 139 | Args: 140 | rest_curr_threshold: threshold of current for a period to be considered a rest 141 | """ 142 | def __init__(self, rest_curr_threshold: float = 1.0e-04): 143 | self.rest_curr_threshold = rest_curr_threshold 144 | 145 | @property 146 | def column_names(self) -> List[str]: 147 | return ['current'] 148 | 149 | def enhance(self, data: pd.DataFrame) -> None: 150 | logger.debug('Adding states') 151 | data.loc[:, ('state',)] = data.apply(_determine_state, axis=1, args=(self.rest_curr_threshold,)) 152 | 153 | 154 | class AddSteps(RawDataEnhancer): 155 | """Mark points at which the battery changed state: charging, discharging, rest""" 156 | 157 | column_names = ['state'] 158 | 159 | def enhance(self, data: pd.DataFrame): 160 | logger.debug('Adding step indices') 161 | _determine_steps(data, 'state', 'step_index') 162 | 163 | 164 | class AddSubSteps(RawDataEnhancer): 165 | """Mark points at which the battery control method changed state 166 | 167 | See :class:`~AddMethod` for how control methods are determined. 168 | """ 169 | 170 | def enhance(self, data: pd.DataFrame): 171 | logger.debug('Adding substep indices') 172 | _determine_steps(data, 'method', 'substep_index') 173 | 174 | 175 | def _determine_steps(df: DataFrame, column: str, output_col: str): 176 | """Assign step indices based on whether there is a change in the value of a certain column 177 | 178 | Also resets the 179 | 180 | Parameters 181 | ---------- 182 | df: pd.DataFrame 183 | Battery data 184 | column: str 185 | Column which to monitor for changes 186 | output_col: str 187 | Name in column which to store output results 188 | """ 189 | # A new step occurs when the previous step had a different value, so we compare against 190 | # the array shifted forward one index 191 | change = df[column].ne(df[column].shift(periods=1, fill_value=df[column].iloc[0])) 192 | 193 | # The step number is equal to the number of changes observed previously in a batch 194 | # Step 1: Compute the changes since the beginning of file 195 | df.loc[:, (output_col,)] = change.cumsum() 196 | 197 | # Step 2: Adjust so that each cycle starts with step 0 198 | for _, cycle in df.groupby("cycle_number"): 199 | df.loc[cycle.index, output_col] -= cycle[output_col].min() 200 | 201 | 202 | def _determine_state( 203 | row: pd.Series, 204 | zero_threshold: float = 1.0e-4 205 | ) -> Literal[ChargingState.charging, ChargingState.discharging, ChargingState.rest]: 206 | """ 207 | Function to help determine the state of the cell based on the current 208 | 209 | Args: 210 | row: Row that stores the value of current, following the convention established in this package 211 | zero_threshold: Maximum absolute value a current can take to be assigned rest. Defaults to 0.1 mA 212 | 213 | Returns 214 | State of the cell, which can be either 'charging', 'discharging', or 'rest' 215 | """ 216 | current = row['current'] 217 | if abs(current) <= zero_threshold: 218 | return ChargingState.rest 219 | elif current > 0.: 220 | return ChargingState.charging 221 | return ChargingState.discharging 222 | -------------------------------------------------------------------------------- /battdat/postprocess/timing.py: -------------------------------------------------------------------------------- 1 | """Features related to the relative to the start of cycles or the test, etc""" 2 | import warnings 3 | 4 | import numpy as np 5 | import pandas as pd 6 | 7 | from battdat.postprocess.base import CycleSummarizer, RawDataEnhancer 8 | 9 | 10 | class CycleTimesSummarizer(CycleSummarizer): 11 | """Capture the start time and duration of a cycle 12 | 13 | The start of a cycle is the minimum test time for any point in the raw data. 14 | 15 | The duration of a cycle is the difference between the start of the next cycle and the start of the cycle. 16 | If the start time of the next cycle is unavailable, it is the difference between the test time of the 17 | last test time in the raw data and the start of the cycle. 18 | """ 19 | 20 | column_names = ['cycle_start', 'cycle_duration'] 21 | 22 | def _summarize(self, raw_data: pd.DataFrame, cycle_data: pd.DataFrame): 23 | # Compute the starts and durations 24 | time_summary = raw_data.groupby('cycle_number')['test_time'].agg( 25 | cycle_start="min", cycle_duration=lambda x: max(x) - min(x), count=len 26 | ).reset_index() # reset_index makes `cycle_number` a regular column 27 | if time_summary['count'].min() == 1: 28 | warnings.warn('Some cycles have only one measurements.') 29 | 30 | # Compute the duration using the start of the next cycle, if known 31 | time_summary['next_diff'] = time_summary['cycle_number'].diff(-1).iloc[:-1] 32 | if (time_summary['next_diff'].iloc[:-1] != -1).any(): 33 | warnings.warn('Some cycles are missing from the dataframe. Time durations for those cycles may be too short') 34 | has_next_cycle = time_summary.query('next_diff == -1') 35 | time_summary.loc[has_next_cycle.index, 'cycle_duration'] = -time_summary['cycle_start'].diff(-1)[has_next_cycle.index] 36 | 37 | # Update the cycle_data accordingly 38 | cycle_data[self.column_names] = np.nan 39 | cycle_data.update(time_summary) 40 | 41 | 42 | class TimeEnhancer(RawDataEnhancer): 43 | """Compute additional columns describing the time a measurement was taken""" 44 | 45 | column_names = ['test_time', 'cycle_time'] 46 | 47 | def enhance(self, data: pd.DataFrame): 48 | 49 | # Compute the test_time from the date_time 50 | if 'test_time' not in data.columns: 51 | if 'date_time' not in data.columns: 52 | raise ValueError('The data must contain a `date_time` column') 53 | data['test_time'] = (data['date_time'] - data['date_time'].min()).dt.total_seconds() 54 | 55 | # Compute the cycle_time from the test_time 56 | data['cycle_time'] = data['test_time'] 57 | data['cycle_time'] -= data.groupby('cycle_number')['test_time'].transform("min") 58 | return data 59 | -------------------------------------------------------------------------------- /battdat/schemas/__init__.py: -------------------------------------------------------------------------------- 1 | """Schemas for battery data and metadata""" 2 | from typing import List, Tuple, Optional 3 | 4 | from pydantic import BaseModel, Field, AnyUrl 5 | 6 | from battdat.schemas.modeling import ModelMetadata 7 | from battdat.schemas.battery import BatteryDescription 8 | from battdat.schemas.cycling import CyclingProtocol 9 | from battdat.version import __version__ 10 | 11 | 12 | class BatteryMetadata(BaseModel, extra='allow'): 13 | """Representation for the metadata about a battery 14 | 15 | The metadata captures the information about what experiment was run 16 | on what battery. A complete set of metadata should be sufficient to 17 | reproduce an experiment. 18 | """ 19 | 20 | # Miscellaneous fields 21 | name: Optional[str] = Field(None, description="Name of the cell. Any format for the name is acceptable," 22 | " as it is intended to be used by the battery data provider.") 23 | comments: Optional[str] = Field(None, description="Long form comments describing the test") 24 | version: str = Field(__version__, description="Version of this metadata. Set by the battery-data-toolkit") 25 | is_measurement: bool = Field(True, description="Whether the data was created observationally as opposed to a computer simulation", 26 | json_schema_extra=dict( 27 | iri="https://w3id.org/emmo#EMMO_463bcfda_867b_41d9_a967_211d4d437cfb" 28 | )) 29 | 30 | # Fields that describe the test protocol 31 | test_protocol: Optional[CyclingProtocol] = Field(None, description="Method used to cycle the battery") 32 | 33 | # Field that describe the battery assembly 34 | battery: Optional[BatteryDescription] = Field(None, description="Description of the battery being cycled") 35 | 36 | # Fields that describe source of synthetic data 37 | modeling: Optional[ModelMetadata] = Field(None, description="Description of simulation approach") 38 | 39 | # Fields that describe the source of data 40 | source: Optional[str] = Field(None, description="Organization who created this data") 41 | dataset_name: Optional[str] = Field(None, description="Name of a larger dataset this data is associated with") 42 | authors: Optional[List[Tuple[str, str]]] = Field(None, description="Name and affiliation of each of the authors of the data. First and last names") 43 | associated_ids: Optional[List[AnyUrl]] = Field(None, description="Any identifiers associated with this data file." 44 | " Identifiers can be any URI, such as DOIs of associated" 45 | " paper or HTTP addresses of associated websites") 46 | -------------------------------------------------------------------------------- /battdat/schemas/battery.py: -------------------------------------------------------------------------------- 1 | """Schemas associated with the components of a battery""" 2 | from typing import Optional, List 3 | 4 | from pydantic import BaseModel, Field 5 | 6 | 7 | class ElectrodeDescription(BaseModel, extra='allow'): 8 | """Description of an electrode""" 9 | 10 | name: str = Field(..., description='Short description of the electrolyte type') 11 | 12 | # Relating to sourcing information 13 | supplier: Optional[str] = Field(None, description='Manufacturer of the material') 14 | product: Optional[str] = Field(None, description='Name of the product. Unique to the supplier') 15 | 16 | # Relating to the microstructure of the electrode 17 | thickness: Optional[float] = Field(None, description='Thickness of the material', ge=0, 18 | json_schema_extra=dict(units='um')) 19 | area: Optional[float] = Field(None, description='Total area of the electrode', ge=0, 20 | json_schema_extra=dict(units='cm^2')) 21 | loading: Optional[float] = Field(None, description='Amount of active material per area', ge=0, 22 | json_schema_extra=dict(units='mg/cm^2')) 23 | porosity: Optional[float] = Field(None, description='Relative volume of the electrode occupied by gas', 24 | ge=0, le=100, json_schema_extra=dict(units='%')) 25 | 26 | 27 | class ElectrolyteAdditive(BaseModel, extra='allow'): 28 | """Additive to the electrolyte""" 29 | 30 | name: str = Field(..., description='Name of the additive') 31 | amount: Optional[float] = Field(None, description='Amount added to the solution') 32 | units: Optional[float] = Field(None, description='Units of the amount') 33 | 34 | 35 | class ElectrolyteDescription(BaseModel, extra='allow'): 36 | """Description of the electrolyte""" 37 | 38 | name: str = Field(..., description='Short description of the electrolyte types') 39 | additives: List[ElectrolyteAdditive] = Field(default_factory=list, 40 | description='Any additives present in the electrolyte') 41 | 42 | 43 | class BatteryDescription(BaseModel, extra='allow'): 44 | """Description of the entire battery""" 45 | 46 | # Overall design information 47 | manufacturer: Optional[str] = Field(None, description="Manufacturer of the battery") 48 | design: Optional[str] = Field(None, description="Name of the battery type, such as the battery product ID") 49 | 50 | # Geometry information 51 | layer_count: Optional[int] = Field(None, description="Number of layers within the battery", gt=1) 52 | form_factor: Optional[str] = Field(None, description="The general shape of the battery", 53 | json_schema_extra=dict( 54 | iri="https://w3id.org/emmo/domain/electrochemistry#electrochemistry_1586ef26_6d30_49e3_ae32_b4c9fc181941" 55 | )) 56 | mass: Optional[float] = Field(None, description="Mass of the entire battery", 57 | json_schema_extra=dict(units='kg')) 58 | dimensions: Optional[str] = Field(None, description='Dimensions of the battery in plain text.') 59 | 60 | # Materials description 61 | anode: Optional[ElectrodeDescription] = Field(None, description="Name of the anode material", 62 | json_schema_extra=dict( 63 | iri="https://w3id.org/emmo/domain/electrochemistry#electrochemistry_b6319c74_d2ce_48c0_a75a_63156776b302" 64 | )) 65 | cathode: Optional[ElectrodeDescription] = Field( 66 | None, description="Name of the cathode material", 67 | json_schema_extra=dict( 68 | iri="https://w3id.org/emmo/domain/electrochemistry#electrochemistry_35c650ab_3b23_4938_b312_1b0dede2e6d5" 69 | )) 70 | electrolyte: Optional[ElectrolyteDescription] = Field( 71 | None, description="Name of the electrolyte material", 72 | json_schema_extra=dict( 73 | iri="https://w3id.org/emmo/domain/electrochemistry#electrochemistry_fb0d9eef_92af_4628_8814_e065ca255d59" 74 | )) 75 | 76 | # Performance information 77 | nominal_capacity: Optional[float] = Field( 78 | None, description="Rated capacity of the battery", 79 | json_schema_extra=dict( 80 | iri="https://w3id.org/emmo/domain/electrochemistry#electrochemistry_9b3b4668_0795_4a35_9965_2af383497a26", 81 | units='A-hr' 82 | )) 83 | -------------------------------------------------------------------------------- /battdat/schemas/cycling.py: -------------------------------------------------------------------------------- 1 | """Describing cycling protocol""" 2 | from datetime import date 3 | from typing import Optional 4 | 5 | from pydantic import BaseModel, Field 6 | 7 | 8 | class CyclingProtocol(BaseModel, extra='allow'): 9 | """Test protocol for cell cycling""" 10 | cycler: Optional[str] = Field(None, description='Name of the cycling machine') 11 | start_date: Optional[date] = Field(None, description="Date the initial test on the cell began") 12 | set_temperature: Optional[float] = Field(None, description="Set temperature for the battery testing equipment", 13 | json_schema_extra=dict(units='C')) 14 | schedule: Optional[str] = Field(None, description="Schedule file used for the cycling machine") 15 | -------------------------------------------------------------------------------- /battdat/schemas/eis.py: -------------------------------------------------------------------------------- 1 | """Schemas associated with Electrochemical Impedance Spectroscopy""" 2 | from pandas import DataFrame 3 | import numpy as np 4 | 5 | from .column import ColumnSchema, ColumnInfo, DataType 6 | 7 | 8 | class EISData(ColumnSchema): 9 | """Measurements for a specific EIS test""" 10 | 11 | test_id: ColumnInfo = ColumnInfo(description='Integer used to identify rows belonging to the same experiment.', required=True, type=DataType.INTEGER) 12 | test_time: ColumnInfo = ColumnInfo(description="Time from the beginning of measurements.", units="s", monotonic=True, type=DataType.FLOAT) 13 | time: ColumnInfo = ColumnInfo(description="Time as a UNIX timestamp. Assumed to be in UTC", type=DataType.FLOAT) 14 | frequency: ColumnInfo = ColumnInfo(description="Applied frequency", units="Hz", required=True, type=DataType.FLOAT) 15 | z_real: ColumnInfo = ColumnInfo(description="Real component of impedance", units="Ohm", required=True, type=DataType.FLOAT) 16 | z_imag: ColumnInfo = ColumnInfo(description="Imaginary component of impedance", units="Ohm", required=True, type=DataType.FLOAT) 17 | z_mag: ColumnInfo = ColumnInfo(description="Magnitude of impedance", units="Ohm", required=True, type=DataType.FLOAT) 18 | z_phase: ColumnInfo = ColumnInfo(description="Phase angle of the impedance", units="Degree", required=True, type=DataType.FLOAT) 19 | 20 | def validate_dataframe(self, data: DataFrame, allow_extra_columns: bool = True): 21 | # Check that the schema is supported 22 | super().validate_dataframe(data, allow_extra_columns) 23 | 24 | # Ensure that the cartesian coordinates for the impedance agree with the magnitude 25 | cart = { 26 | 'real': np.multiply(data['z_mag'], np.cos(np.deg2rad(data['z_phase']))), 27 | 'imag': np.multiply(data['z_mag'], np.sin(np.deg2rad(data['z_phase']))) 28 | } 29 | for k, values in cart.items(): 30 | largest_diff = (np.abs(values - data[f'z_{k}']) / np.clip(values, a_min=1e-6, a_max=None)).max() 31 | if largest_diff > 0.01: 32 | raise ValueError(f'Polar and cartesian forms of impedance disagree for {k} component. Largest difference: {largest_diff * 100:.1f}%') 33 | -------------------------------------------------------------------------------- /battdat/schemas/modeling.py: -------------------------------------------------------------------------------- 1 | """Metadata which describes how data produced by models were generated""" 2 | from typing import Optional, List 3 | from enum import Enum 4 | 5 | from pydantic import BaseModel, Field, AnyUrl 6 | 7 | 8 | class ModelTypes(str, Enum): 9 | """Type of computational method""" 10 | 11 | physics = 'physics' 12 | """A computational application that uses a physical model to predict the behaviour of a system, 13 | providing a identifiable analogy with the original object. 14 | 15 | IRI: https://w3id.org/emmo#EMMO_8d4962d7_9608_44f7_a2f1_82a4bb173f4a""" 16 | data = 'data' 17 | """A computational application that uses existing data to predict the behaviour of a system 18 | without providing a identifiable analogy with the original object. 19 | 20 | IRI: https://w3id.org/emmo#EMMO_a4b14b83_9392_4a5f_a2e8_b2b58793f59b""" 21 | 22 | empirical = 'empirical' 23 | """A computational application that uses an empiric equation to predict the behaviour of a system 24 | without relying on the knowledge of the actual physical phenomena occurring in the object. 25 | 26 | IRI: https://w3id.org/emmo#EMMO_67c70dcd_2adf_4e6c_b3f8_f33dd1512487""" 27 | 28 | 29 | class ModelMetadata(BaseModel, extra='allow'): 30 | """Describe the type and version of a computational tool used to generate battery data""" 31 | 32 | # High-level information about the code 33 | name: str = Field(..., description='Name of the software') 34 | version: Optional[str] = Field(..., description='Version of the software if known') 35 | type: Optional[ModelTypes] = Field(None, description='Type of the computational method it implements.') 36 | references: Optional[List[AnyUrl]] = Field(None, description='List of references associated with the software') 37 | 38 | # Details for physics based simulation 39 | models: Optional[List[str]] = Field( 40 | None, description='Type of mathematical model(s) being used in physics simulation.' 41 | 'Use terms defined in BattINFO, such as "BatteryEquivalentCircuitModel".', 42 | json_schema_extra=dict( 43 | root_iri='https://w3id.org/emmo#EMMO_f7ed665b_c2e1_42bc_889b_6b42ed3a36f0' 44 | )) 45 | simulation_type: Optional[str] = Field( 46 | None, description='Type of simulation being performed. Use terms defined in BattINFO, such as "TightlyCoupledModelsSimulation"', 47 | json_schema_extra=dict( 48 | root_iri='https://w3id.org/emmo#EMMO_e97af6ec_4371_4bbc_8936_34b76e33302f' 49 | )) 50 | -------------------------------------------------------------------------------- /battdat/schemas/ontology.py: -------------------------------------------------------------------------------- 1 | """Tools used for linking terms in our data format to the BattINFO ontology""" 2 | from dataclasses import dataclass, field 3 | from functools import cache 4 | from typing import Type, List, Optional, Union 5 | 6 | from ontopy import World 7 | from owlready2 import Thing 8 | from pydantic import BaseModel 9 | 10 | _battinfo_url = 'https://raw.githubusercontent.com/emmo-repo/domain-battery/master/battery-inferred.ttl' 11 | 12 | 13 | @cache 14 | def load_battinfo(): 15 | """Download and store the latest ontology into an in-memory""" 16 | return World().get_ontology(_battinfo_url).load() 17 | 18 | 19 | @dataclass 20 | class TermInfo: 21 | """Information about a term as referenced from the BattINFO ontology""" 22 | 23 | name: str 24 | """Name of the matching term""" 25 | iri: str = field(repr=False) 26 | """IRI of the term""" 27 | elucidation: Optional[str] = field(repr=False) 28 | """Explanation of the term""" 29 | 30 | @classmethod 31 | def from_thing(cls, thing: Thing): 32 | # Retrieve the description, as provided by EMMO 33 | eluc = thing.get_annotations().get('elucidation') 34 | if eluc is not None: 35 | eluc = str(eluc[0]) 36 | return TermInfo(name=str(thing), iri=thing.iri, elucidation=eluc) 37 | 38 | 39 | def cross_reference_terms(model: Type[BaseModel]) -> dict[str, TermInfo]: 40 | """Gather the descriptions of fields from our schema which 41 | are cross-referenced to a term within the BattINFO/EMMO ontologies 42 | 43 | Args: 44 | model: Schema object to be cross-referenced 45 | Returns: 46 | Mapping between metadata fields in elucidation field from the ontology 47 | """ 48 | 49 | # Load the BattINFO ontology 50 | battinfo = load_battinfo() 51 | 52 | # Loop over each field in the schema 53 | terms = {} 54 | for name, attr in model.model_fields.items(): 55 | # Map to the term in the ontology if known 56 | if attr.json_schema_extra is not None and (iri := attr.json_schema_extra.get('iri')) is not None: 57 | term = battinfo.search_one(iri=iri) 58 | if term is None: 59 | raise ValueError(f'Count not find matching term for {name} with iri={iri}') 60 | terms[name] = TermInfo.from_thing(term) 61 | 62 | return terms 63 | 64 | 65 | def resolve_term(name_or_iri: str) -> Thing: 66 | """Resolve the Term object associated with a string 67 | 68 | Args: 69 | name_or_iri: The preferred label or the IRI of a term in the ontology 70 | Returns: 71 | Thing matching the term 72 | """ 73 | 74 | # Attempt to find it 75 | bi = load_battinfo() 76 | if name_or_iri.startswith('https://'): 77 | term = bi.search_one(iri=name_or_iri) 78 | t = 'IRI' 79 | else: 80 | term = bi.search_one(prefLabel=name_or_iri) 81 | t = 'name' 82 | 83 | if term is None: 84 | raise ValueError(f'Could not find the {t}={name_or_iri}') 85 | return term 86 | 87 | 88 | def gather_descendants(term: Union[Type[Thing], str]) -> List[TermInfo]: 89 | """Get descriptions of the descendants of a certain base type 90 | 91 | Args: 92 | term: Term for which to gather all descendants. Either the class object itself or its preferred label or IRI 93 | Returns: 94 | List of descriptions of the descendants 95 | """ 96 | 97 | # Resolve the term object, if needed 98 | if isinstance(term, str): 99 | term = resolve_term(term) 100 | 101 | return [ 102 | TermInfo.from_thing(d) for d in term.descendants(include_self=False) 103 | ] 104 | -------------------------------------------------------------------------------- /battdat/streaming/__init__.py: -------------------------------------------------------------------------------- 1 | """Retrieve data in smaller chunks from a large HDF5 file""" 2 | from typing import Union, Iterator, Dict, Collection 3 | from itertools import groupby 4 | from pathlib import Path 5 | 6 | import pandas as pd 7 | from pandas import HDFStore 8 | from tables import File, Table 9 | 10 | from battdat.data import BatteryDataset 11 | from battdat.io.hdf import as_hdf5_object 12 | 13 | RecordType = Dict[str, Union[str, float, int]] 14 | 15 | 16 | def _get_raw_data_iterator_h5(hdf5_path: Union[Path, str, File], key: str) -> Iterator[RecordType]: 17 | """Open an iterator over rows of an HDF5 Table""" 18 | 19 | with as_hdf5_object(hdf5_path) as file: 20 | table: Table = file.get_node(f'/{key}') 21 | names = table.dtype.fields.keys() 22 | for row in table.iterrows(): 23 | out = dict((n, row[n]) for n in names) 24 | yield out 25 | 26 | 27 | def iterate_records_from_file(hdf5_path: Union[Path, str, HDFStore], key: str = 'raw_data') -> Iterator[RecordType]: 28 | """Stream individual records from a file 29 | 30 | Args: 31 | hdf5_path: Path to the data file 32 | key: Which table to read 33 | Yields: 34 | Individual rows from the "raw_data" section of the HDF5 file 35 | """ 36 | 37 | yield from _get_raw_data_iterator_h5(hdf5_path, key=key) 38 | 39 | 40 | def iterate_cycles_from_file(hdf5_path: Union[Path, str, HDFStore], 41 | make_dataset: bool = False, 42 | key: str | Collection[str] | None = 'raw_data') -> Iterator[Union[pd.DataFrame, Dict[str, pd.DataFrame], BatteryDataset]]: 43 | """Stream single-cycle datasets from the HDF5 file 44 | 45 | Args: 46 | hdf5_path: Path to the data file 47 | make_dataset: Whether to form a :class:`~battdat.data.BatteryDataset` for each cycle, 48 | including the metadata from the source file. 49 | key: Which table(s) to read. Supply either a single key, a list of keys, or ``None`` to read all tables 50 | 51 | Yields: 52 | All rows belonging to each cycle from the requested table of the HDF5 file. 53 | Generates a ``BatteryDataset`` if ``make_dataset`` is ``True``. 54 | Otherwise, yields a single DataFrame if ``key`` is a single string 55 | or a dictionary of DataFrames if ``key`` is a list. 56 | """ 57 | 58 | # Get the metadata out of the file, if needed 59 | metadata = None 60 | if make_dataset or key is None: 61 | metadata, _, schemas = BatteryDataset.inspect_hdf(hdf5_path) 62 | 63 | # Determine the keys to read from the file 64 | single = False 65 | if isinstance(key, str): 66 | single = True 67 | keys = [key] 68 | elif key is None: 69 | keys = list(schemas.keys()) 70 | else: 71 | keys = list(key) 72 | 73 | iterators = [ 74 | groupby(_get_raw_data_iterator_h5(hdf5_path, k), lambda x: x['cycle_number']) for k in keys 75 | ] 76 | 77 | for batch in zip(*iterators): 78 | cycle_ids, chunks = zip(*batch) 79 | if len(set(cycle_ids)) != 1: 80 | raise ValueError(f'Different cycle indices across entries: {" ".join(f"{k}={i}" for k, i in zip(keys, cycle_ids))}') 81 | 82 | # Produce the desired output file 83 | chunks = [pd.DataFrame(chunk) for chunk in chunks] 84 | if single and not make_dataset: 85 | yield chunks[0] 86 | elif make_dataset: 87 | yield BatteryDataset( 88 | metadata=metadata, 89 | schemas=schemas, 90 | tables=dict(zip(keys, chunks)) 91 | ) 92 | else: 93 | yield dict(zip(keys, chunks)) 94 | -------------------------------------------------------------------------------- /battdat/streaming/hdf5.py: -------------------------------------------------------------------------------- 1 | """Streaming tools related to the HDF5 format""" 2 | from typing import Union, Dict, Optional, List 3 | from contextlib import AbstractContextManager 4 | from dataclasses import field, dataclass 5 | from pathlib import Path 6 | import logging 7 | 8 | import numpy as np 9 | import pandas as pd 10 | from tables import File, Table, Filters 11 | 12 | from battdat.io.hdf import write_df_to_table 13 | from battdat.schemas.column import ColumnSchema, RawData 14 | from battdat.schemas import BatteryMetadata 15 | from battdat import __version__ 16 | 17 | logger = logging.getLogger(__name__) 18 | 19 | 20 | @dataclass 21 | class HDF5Writer(AbstractContextManager): 22 | """Tool to write raw time series data to an HDF5 file incrementally 23 | 24 | Writes data to the ``raw_data`` key of a different dataset.""" 25 | 26 | # Attributes defining where and how to write 27 | hdf5_output: Union[Path, str, File] 28 | """File or already-open HDF5 file in which to store data""" 29 | write_mode: str = 'a' 30 | """Mode to use when opening the HDF5 file. Ignored if :attr:`hdf5_output` is a ``File``.""" 31 | metadata: BatteryMetadata = field(default_factory=BatteryMetadata) 32 | """Metadata describing the cell""" 33 | schema: ColumnSchema = field(default_factory=RawData) 34 | """Schema describing columns of the cell""" 35 | complevel: int = 0 36 | """Compression level. Can be between 0 (no compression) and 9 (maximum compression). Ignored if data table already exists""" 37 | complib: str = 'zlib' 38 | """Compression algorithm. Consult :func:`~pandas.read_hdf` for available options. Ignored if data table already exists""" 39 | key: str = '' 40 | """Name of the root group in which to store the data. Ignored if :attr:`hdf5_output` is a ``File``.""" 41 | buffer_size: int = 32768 42 | """Number of rows to collect in memory before writing to disk""" 43 | 44 | # State used only while in writing mode 45 | _file: Optional[File] = None 46 | """Handle to an open file""" 47 | _dtype: Optional[np.dtype] = None 48 | """Dtype of records to be written""" 49 | _table: Optional[Table] = None 50 | """Pointer to the table being written""" 51 | _write_buffer: Optional[List[Dict]] = None 52 | """Index of the next step to be written""" 53 | 54 | def __enter__(self): 55 | self._write_buffer = list() 56 | 57 | # Open the store, if needed 58 | if isinstance(self.hdf5_output, File): 59 | self._file = self.hdf5_output 60 | else: 61 | self._file = File( 62 | self.hdf5_output, 63 | root_uep='/' + self.key, 64 | mode=self.write_mode 65 | ) 66 | 67 | # Write metadata to the store's root's attributes 68 | root = self._file.root 69 | root._v_attrs.metadata = self.metadata.model_dump_json(exclude_none=True) 70 | root._v_attrs.json_schema = self.metadata.model_json_schema() 71 | root._v_attrs.battdat_version = __version__ 72 | 73 | # Get the table if it exists already 74 | if 'raw_data' in root: 75 | self._table = root['raw_data'] 76 | return self 77 | 78 | def __exit__(self, exc_type, exc_val, exc_tb): 79 | if len(self._write_buffer) > 0: # Ensure last rows are written 80 | self.flush() 81 | if not isinstance(self.hdf5_output, File): # Close file if a path was provided 82 | self._file.close() 83 | self._table = self._file = self._write_buffer = None 84 | 85 | def write_row(self, row: Dict[str, Union[str, float, int]]) -> int: 86 | """Add a row to the data file 87 | 88 | Args: 89 | row: Row to be added to the HDF5 file 90 | Returns: 91 | Number of rows written to file. Writes only occur when a write buffer has filled 92 | """ 93 | self._write_buffer.append(row.copy()) 94 | if len(self._write_buffer) >= self.buffer_size: 95 | return self.flush() 96 | return 0 97 | 98 | def flush(self) -> int: 99 | """Write the current row buffer to the file 100 | 101 | Returns: 102 | Number of rows written 103 | """ 104 | 105 | if self._table is None: 106 | # Make the table the first time 107 | filters = Filters(complevel=self.complevel, complib=self.complib) 108 | df = pd.DataFrame(self._write_buffer) 109 | self._table = write_df_to_table(self._file, self._file.root, name='raw_data', filters=filters, df=df) 110 | 111 | # Store the metadata 112 | self._table.attrs.metadata = self.schema.model_dump_json() 113 | self._table.attrs.json_schema = self.schema.model_json_schema() 114 | else: 115 | # Append rows to the "raw_data" key 116 | row = np.empty((1,), dtype=self._table.dtype) 117 | known_names = set(self._table.dtype.names) 118 | for new_row in self._write_buffer: 119 | if (new_keys := set(new_row.keys())) != known_names: 120 | logger.warning(f'Row has different keys than the Table. New keys: ({", ".join(new_keys.difference(known_names))}.' 121 | f' Missing: {", ".join(known_names.difference(new_keys))}') 122 | for c in known_names: 123 | row[c] = new_row[c] 124 | self._table.append(row) 125 | 126 | written = len(self._write_buffer) 127 | self._write_buffer.clear() 128 | return written 129 | -------------------------------------------------------------------------------- /battdat/utils.py: -------------------------------------------------------------------------------- 1 | from pandas import DataFrame 2 | import logging 3 | 4 | logger = logging.getLogger(__name__) 5 | 6 | 7 | # TODO (wardlt): Move to post-processing? 8 | def drop_cycles(df: DataFrame, digit: int = 2): 9 | """ 10 | Drop duplicate cycles from a dataframe. 11 | 12 | Cycles must meet the following criteria 13 | that meet the following criteria: 14 | the Voltage and Current must be exactly the same, 15 | and the time between steps must be identical to 2 digits. 16 | They can sometimes vary by some epsilon for the Arbin data 17 | 18 | Parameters 19 | ---------- 20 | df : Pandas DataFrame 21 | input dataframe 22 | digit : int 23 | number of digits to round to in time index (in seconds) 24 | 25 | Returns 26 | ------- 27 | df : Pandas DataFrame 28 | dataframe without the duplicate columns 29 | 30 | 31 | Examples 32 | -------- 33 | none yet 34 | 35 | """ 36 | 37 | # NOTE: we have already converted time to seconds 38 | 39 | # add rounded time to dataframe 40 | df['TMP'] = df['test_time'] 41 | logger.debug('Removing duplicates from dataframe') 42 | 43 | # round time to specified number of digits 44 | df = df.round({'TMP': digit}) 45 | len1 = len(df) 46 | 47 | # drop points where the rounded time, voltage and current are identical 48 | # keep only first instance 49 | df.drop_duplicates(subset=['TMP', 'voltage', 'current'], keep='first', inplace=True) 50 | 51 | # re-index dataframe with points dropped 52 | df.reset_index(drop=True, inplace=True) 53 | 54 | # calculate number of cycles dropped 55 | dropped = len1 - len(df) 56 | logger.debug(f'Dropped {dropped} lines') 57 | 58 | # remove the now-unneed column 59 | df.drop(columns=['TMP'], inplace=True) 60 | 61 | return df 62 | -------------------------------------------------------------------------------- /battdat/version.py: -------------------------------------------------------------------------------- 1 | # we target 3.8+, so this should be okay without fallback to importlib_metadata 2 | import importlib.metadata 3 | 4 | # single source of truth for package version, 5 | # see https://packaging.python.org/en/latest/single_source_version/ 6 | 7 | __version__ = importlib.metadata.version('battery-data-toolkit') 8 | -------------------------------------------------------------------------------- /dev/README.md: -------------------------------------------------------------------------------- 1 | # Development Files 2 | 3 | Files useful to setting up a development environment 4 | -------------------------------------------------------------------------------- /dev/environment.yml: -------------------------------------------------------------------------------- 1 | # Conda environment file 2 | name: batdata 3 | channels: 4 | - defaults 5 | dependencies: 6 | - python==3.10.* 7 | - pandoc # Needed for the 8 | 9 | # For now, use Pip for everything major 10 | - pip 11 | - pip: 12 | # Install jupyter 13 | - jupyterlab 14 | - matplotlib 15 | - -e ..[test,docs] 16 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | cd user-guide/schemas; python export-schemas.py 21 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 22 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # Documentation 2 | 3 | Built using Sphinx and the pydata style 4 | 5 | Compile the documentation using Make 6 | ```commandline 7 | make html 8 | ``` 9 | -------------------------------------------------------------------------------- /docs/_static/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROVI-org/battery-data-toolkit/19961e6bbb2d0cfe0bff9c129144fcf8f3dd3be6/docs/_static/logo.png -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # For the full list of built-in configuration values, see the documentation: 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 5 | 6 | # -- Project information ----------------------------------------------------- 7 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information 8 | 9 | project = 'Battery Data Toolkit' 10 | copyright = '2024' 11 | author = 'ROVI Team' 12 | 13 | # -- General configuration --------------------------------------------------- 14 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration 15 | 16 | extensions = ['nbsphinx'] 17 | 18 | templates_path = ['_templates'] 19 | exclude_patterns = ['_build'] 20 | 21 | 22 | # -- Options for HTML output ------------------------------------------------- 23 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output 24 | 25 | html_theme = 'pydata_sphinx_theme' 26 | html_static_path = ['_static'] 27 | html_theme_options = { 28 | "logo": { 29 | "text": "BattData", 30 | "image_light": "_static/logo.png", 31 | "image_dark": "_static/logo.png", 32 | } 33 | } 34 | html_logo = '_static/logo.png' 35 | 36 | 37 | # -- Options for NBSphinx ----------------------------------------------------- 38 | 39 | nbsphinx_execute = 'never' 40 | 41 | # -- API Documentation -------------------------------------------------------- 42 | 43 | extensions.extend([ 44 | 'sphinx.ext.autodoc', 45 | 'sphinx.ext.autosummary', 46 | 'sphinx.ext.intersphinx', 47 | 'sphinx.ext.napoleon', 48 | 'sphinxcontrib.autodoc_pydantic', 49 | 'sphinx_design' 50 | ]) 51 | 52 | autodoc_pydantic_model_show_json = False 53 | autodoc_pydantic_settings_show_json = False 54 | 55 | autoclass_content = 'both' 56 | 57 | intersphinx_mapping = { 58 | 'python': ('https://docs.python.org/3/', None), 59 | 'pandas': ('https://pandas.pydata.org/docs/', None), 60 | 'pyarrow': ('https://arrow.apache.org/docs/', None), 61 | 'h5py': ('https://docs.h5py.org/en/stable/', None), 62 | 'tables': ('https://www.pytables.org/', None) 63 | } 64 | -------------------------------------------------------------------------------- /docs/getting-started.rst: -------------------------------------------------------------------------------- 1 | Getting Started 2 | =============== 3 | 4 | Battery-Data-Toolkit is a Python toolkit for storing and manipulating data from battery systems. 5 | Most operations are based on `Pandas `_ to simplify using 6 | common libraries for data science for battery science. 7 | 8 | Installation 9 | ------------ 10 | 11 | Battery Data Toolkit is available on PyPI and is pure Python. 12 | Installing via Pip will work on most systems: 13 | 14 | .. code-block:: shell 15 | 16 | pip install battery-data-toolkit 17 | 18 | Build the toolkit for development by cloning the repository 19 | then installing with the "tests" and "docs" optional packages: 20 | 21 | .. code-block:: shell 22 | 23 | git clone git@github.com:ROVI-org/battery-data-toolkit.git 24 | cd battery-data-toolkit 25 | pip install -e .[test,docs] 26 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | Battery Data Toolkit 2 | ==================== 3 | 4 | The battery-data-toolkit, ``battdat``, creates consistently-formatted collections of battery data. 5 | The library has three main purposes: 6 | 7 | 1. *Storing battery data in standardized formats.* ``battdat`` stores data in 8 | `high-performance file formats <./user-guide/formats.html>`_ and include 9 | `extensive metadata <./user-guide/schemas/index.html>`_ alongside data. 10 | 2. *Interfacing battery data with the PyData ecosystem*. The core data model, 11 | `BatteryDataset <./user-guide/dataset.html>`_, 12 | is built atop Pandas DataFrames. 13 | 3. *Providing standard implementations of common analysis techniques*. ``battdat`` implements functions which 14 | `ensure quality <./user-guide/consistency/index.html>`_ 15 | or `perform common analyses <./user-guide/post-processing/index.html>`_. 16 | 17 | Source code: https://github.com/ROVI-org/battery-data-toolkit 18 | 19 | .. toctree:: 20 | :maxdepth: 2 21 | :caption: Contents: 22 | 23 | getting-started 24 | user-guide/index 25 | source/modules 26 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | cd user-guide/schemas 29 | python export-schemas.py 30 | cd ../.. 31 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 32 | goto end 33 | 34 | :help 35 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 36 | 37 | :end 38 | popd 39 | -------------------------------------------------------------------------------- /docs/pptx-files/logo.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROVI-org/battery-data-toolkit/19961e6bbb2d0cfe0bff9c129144fcf8f3dd3be6/docs/pptx-files/logo.pptx -------------------------------------------------------------------------------- /docs/source/consistency.rst: -------------------------------------------------------------------------------- 1 | Error Checking (``battdat.consistency``) 2 | ======================================== 3 | 4 | .. automodule:: battdat.consistency 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | 10 | Base (``b.consistency.base``) 11 | ----------------------------- 12 | 13 | .. automodule:: battdat.consistency.base 14 | :members: 15 | :undoc-members: 16 | :show-inheritance: 17 | 18 | Current (``b.consistency.current``) 19 | ------------------------------------ 20 | 21 | .. automodule:: battdat.consistency.current 22 | :members: 23 | :undoc-members: 24 | :show-inheritance: 25 | -------------------------------------------------------------------------------- /docs/source/data.rst: -------------------------------------------------------------------------------- 1 | Dataset (``battdat.data``) 2 | ========================== 3 | 4 | .. automodule:: battdat.data 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/io.rst: -------------------------------------------------------------------------------- 1 | Extractors (``battdat.io``) 2 | =========================== 3 | 4 | .. automodule:: battdat.io 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | Base Classes (``b.io.base``) 10 | ---------------------------- 11 | 12 | .. automodule:: battdat.io.base 13 | :members: 14 | :undoc-members: 15 | :show-inheritance: 16 | 17 | Arbin (``b.io.arbin``) 18 | ---------------------- 19 | 20 | .. automodule:: battdat.io.arbin 21 | :members: 22 | :undoc-members: 23 | :show-inheritance: 24 | 25 | Battery Archive (``b.io.ba``) 26 | ----------------------------- 27 | 28 | .. automodule:: battdat.io.ba 29 | :members: 30 | :undoc-members: 31 | :show-inheritance: 32 | 33 | 34 | Battery Data Hub (``b.io.batterydata``) 35 | --------------------------------------- 36 | 37 | .. automodule:: battdat.io.batterydata 38 | :members: 39 | :undoc-members: 40 | :show-inheritance: 41 | 42 | 43 | HDF5 (``b.io.hdf``) 44 | ------------------- 45 | 46 | .. automodule:: battdat.io.hdf 47 | :members: 48 | :undoc-members: 49 | :show-inheritance: 50 | 51 | 52 | MACCOR (``b.io.maccor``) 53 | ------------------------ 54 | 55 | .. automodule:: battdat.io.maccor 56 | :members: 57 | :undoc-members: 58 | :show-inheritance: 59 | 60 | 61 | Parquet (``b.io.parquet``) 62 | -------------------------- 63 | 64 | .. automodule:: battdat.io.parquet 65 | :members: 66 | :undoc-members: 67 | :show-inheritance: 68 | -------------------------------------------------------------------------------- /docs/source/modules.rst: -------------------------------------------------------------------------------- 1 | battdat API 2 | =========== 3 | 4 | API documentation for each module 5 | 6 | .. toctree:: 7 | :maxdepth: 2 8 | :caption: Contents: 9 | 10 | data 11 | schemas 12 | io 13 | postprocess 14 | consistency 15 | streaming 16 | -------------------------------------------------------------------------------- /docs/source/postprocess.rst: -------------------------------------------------------------------------------- 1 | Postprocessing (``battdat.postprocess``) 2 | ======================================== 3 | 4 | .. automodule:: battdat.postprocess 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | 10 | Base (``b.postprocess.base``) 11 | ----------------------------- 12 | 13 | .. automodule:: battdat.postprocess.base 14 | :members: 15 | :undoc-members: 16 | :show-inheritance: 17 | 18 | Integral (``b.postprocess.integral``) 19 | ------------------------------------- 20 | 21 | .. automodule:: battdat.postprocess.integral 22 | :members: 23 | :undoc-members: 24 | :show-inheritance: 25 | 26 | Tagging (``b.postprocess.tagging``) 27 | ----------------------------------- 28 | 29 | .. automodule:: battdat.postprocess.tagging 30 | :members: 31 | :undoc-members: 32 | :show-inheritance: 33 | 34 | Timing (``b.postprocess.tagging``) 35 | ---------------------------------- 36 | 37 | .. automodule:: battdat.postprocess.timing 38 | :members: 39 | :undoc-members: 40 | :show-inheritance: 41 | -------------------------------------------------------------------------------- /docs/source/schemas.rst: -------------------------------------------------------------------------------- 1 | Schemas (``battdat.schemas``) 2 | ============================= 3 | 4 | .. automodule:: battdat.schemas 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | Battery Description (``b.schemas.battery``) 10 | ------------------------------------------- 11 | 12 | .. automodule:: battdat.schemas.battery 13 | :members: 14 | :undoc-members: 15 | :show-inheritance: 16 | 17 | Metadata: Computation (``b.schemas.modeling``) 18 | ---------------------------------------------- 19 | 20 | .. automodule:: battdat.schemas.modeling 21 | :members: 22 | :undoc-members: 23 | :show-inheritance: 24 | 25 | Metadata: Cycling Protocol (``b.schemas.cycling``) 26 | -------------------------------------------------- 27 | 28 | .. automodule:: battdat.schemas.cycling 29 | :members: 30 | :undoc-members: 31 | :show-inheritance: 32 | 33 | 34 | Data: Time Series (``b.schemas.column``) 35 | ---------------------------------------- 36 | 37 | .. automodule:: battdat.schemas.column 38 | :members: 39 | :undoc-members: 40 | :show-inheritance: 41 | 42 | Data: EIS (``b.schemas.eis``) 43 | -------------------------------- 44 | 45 | .. automodule:: battdat.schemas.eis 46 | :members: 47 | :undoc-members: 48 | :show-inheritance: 49 | 50 | Utility: Ontologies (``b.schemas.ontology``) 51 | -------------------------------------------- 52 | 53 | .. automodule:: battdat.schemas.ontology 54 | :members: 55 | :undoc-members: 56 | :show-inheritance: 57 | -------------------------------------------------------------------------------- /docs/source/streaming.rst: -------------------------------------------------------------------------------- 1 | Streaming (``battdat.streaming``) 2 | ================================= 3 | 4 | .. automodule:: battdat.streaming 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | HDF5 Streaming (``b.streaming.hdf5``) 10 | ------------------------------------- 11 | 12 | .. automodule:: battdat.streaming.hdf5 13 | :members: 14 | :undoc-members: 15 | :show-inheritance: 16 | -------------------------------------------------------------------------------- /docs/user-guide/consistency/index.rst: -------------------------------------------------------------------------------- 1 | Consistency Checks 2 | ================== 3 | 4 | Many problems, such as sign convention mishaps or unit conversion issues, can be detected from inconsistencies between 5 | or within columns in a dataset. 6 | The :mod:`battdat.consistency` module provides algorithms that check whether there may be problems within a battery dataset. 7 | 8 | All algorithms are based on :class:`~battdat.consistency.base.ConsistencyChecker`, 9 | which creates a list of warnings given a dataset. 10 | 11 | .. code-block:: python 12 | 13 | computer = ConsistencyChecker() 14 | warnings = computer.check(data) 15 | 16 | if len(warnings) > 0: 17 | print(f'There are {len(warnings)} warnings, which includes: {warnings[0]}') 18 | 19 | 20 | .. toctree:: 21 | :maxdepth: 1 22 | :caption: Available consistency checks: 23 | 24 | check-sign-convention 25 | 26 | -------------------------------------------------------------------------------- /docs/user-guide/dataset.rst: -------------------------------------------------------------------------------- 1 | The `BatteryDataset` Object 2 | =========================== 3 | 4 | The :class:`~battdat.data.BatteryDataset` object is the central object for the battery data toolkit. 5 | Extractors render vendor-specific data into the `BatteryDataset`, 6 | schemas describe its contents, 7 | and post-processing codes manipulate its datasets. 8 | 9 | 10 | Structure of a ``BatteryDataset`` 11 | --------------------------------- 12 | 13 | The :class:`~battdat.data.BatteryDataset` holds all information about a battery system together in the same Python object. 14 | Every dataset holds three attributes: 15 | 16 | #. :attr:`~battdat.data.BatteryDataset.metadata`: Information describing the source of the data 17 | (see `Source Metadata `_) 18 | #. :attr:`~battdat.data.BatteryDataset.tables`: A named collection of data tables as Pandas :class:`~pandas.DataFrame`. 19 | #. :attr:`~battdat.data.BatteryDataset.schemas`: Descriptions of the columns in each data table 20 | (see `Column Schema `_) 21 | 22 | The types of tables held in each dataset depends on the type of battery. 23 | Datasets describing a single cell may only include a single time series of the measurements, 24 | whereas a dataset describing an entire system may have time series for each cell in each module 25 | and those for multiple power conversion systems. 26 | 27 | Access the data tables within the dataset by indexing the dataset: 28 | 29 | .. code-block:: python 30 | 31 | dataset = BatteryDataset.from_hdf('example.h5') 32 | 33 | # These two ways for accessing a table are equivalent 34 | df = dataset['raw_data'] 35 | df = dataset.tables['raw_data'] 36 | df['voltage'].max() # Compute the maximum voltage 37 | 38 | 39 | Creating a ``BatteryDataset`` 40 | ----------------------------- 41 | 42 | Load data from another file format using battdat's `dataset readers `_. 43 | If there is no available reader, 44 | build by passing a collection of tables as :class:`~pandas.DataFrame` and their schemas along with the metadata to the constructor. 45 | Once assembled, all component tables will be saved and loaded together. 46 | 47 | .. code-block:: python 48 | 49 | from battdat.schemas import BatteryMetadata 50 | from battdat.schemas.column import RawData 51 | from battdat.data import BatteryDataset 52 | 53 | metadata = BatteryMetadata(name='2_cell_module') 54 | col_schema = RawData() # Use the same schema for both tables 55 | dataset = BatteryDataset( 56 | data={'cell_1': cell1_df, 'cell_2': cell2_df}, 57 | schemas={'cell_1': col_schema, 'cell_2': col_schema} 58 | metadata=metadata 59 | ) 60 | 61 | Columns of the dataframes can be any `NumPy data type `_ 62 | except timedeltas (m), timestamps (M), or voids (v). 63 | Battery data toolkit does not yet support storing these types in HDF5 or Parquet formats. 64 | Columns where all values are arrays of the same size are also supported. 65 | 66 | Check that your data and metadata agree using the :meth:`~battdat.data.BatteryDataset.validate` method. 67 | 68 | .. code-block:: python 69 | 70 | dataset.validate() 71 | 72 | The validate function will raise errors if the tables do not match the column schema 73 | and will return names of columns without descriptions, if desired. 74 | 75 | Factory Methods 76 | +++++++++++++++ 77 | 78 | :class:`~battdat.data.BatteryDataset` contains factory methods that build datasets from 79 | tables with pre-defined names and tables. 80 | All are named ``make_*_dataset``. 81 | 82 | For example, :meth:`~battdat.data.BatteryDataset.make_cell_dataset` creates a dataset 83 | which represents a single-cell battery. 84 | 85 | .. code-block:: python 86 | 87 | from battdat.data import BatteryDataset 88 | 89 | dataset = BatteryDataset.make_cell_data(raw_data=df) 90 | 91 | Each table will be associated with a default schema. 92 | Describe columns not yet present in the schema by adding them after assembly: 93 | 94 | .. code-block:: python 95 | 96 | from battdat.schemas.columns import ColumnInfo 97 | dataset.schemas['raw_data'].add_column( 98 | name='new_col', 99 | description='Information not already included in RawData', 100 | units='ohm', 101 | ) 102 | 103 | The current factory methods are: 104 | 105 | .. _type-table: 106 | 107 | .. list-table:: 108 | :header-rows: 1 109 | 110 | * - Method 111 | - Description 112 | * - :class:`~battdat.data.BatteryDataset.make_cell_dataset` 113 | - Single battery cell with measurements of voltage, current, and other data at specific times 114 | or averaged over entire cycles. Tables (and their schemas) include: 115 | 116 | - ``raw_data`` (`RawData `_): Measurements of system state at specific points in time. 117 | - ``cycle_stats`` (`CycleLevelData `_): Descriptive statistics about state over entire cycles. 118 | - ``eis_data`` (`EISData `_): EIS measurements at different frequencies, over time. 119 | 120 | Loading and Saving 121 | ------------------ 122 | 123 | The battery data and metadata can be saved in a few different styles, each with different advantages. 124 | 125 | Functions to save are named ``to_[format]`` and 126 | functions for loading data are named ``from_[format]``. 127 | 128 | See the `formats `_ documentation page for more detail. 129 | 130 | Loading functions loads the entire dataset. See `streaming `_ for 131 | how to load large datasets incrementally. 132 | -------------------------------------------------------------------------------- /docs/user-guide/formats.rst: -------------------------------------------------------------------------------- 1 | File Formats 2 | ============ 3 | 4 | The battery data toolkit stores data and metadata in two formats: 5 | 6 | - *HDF5*: A format for saving all available information about a battery into a single file 7 | - *Parquet*: A format optimized for storing column data, but requires saving separate files for each type of data (cycle vs raw) 8 | 9 | .. contents:: 10 | :local: 11 | :depth: 1 12 | 13 | :class:`~battdat.data.BatteryDataset` objects support reading and writing to these classes via ``to_[format]`` and ``from_[format]`` 14 | methods, such as :meth:`~battdat.data.BatteryDataset.to_hdf` and :meth:`~battdat.data.BatteryDataset.from_parquet` 15 | 16 | .. _hdf5: 17 | 18 | HDF5 19 | ---- 20 | 21 | The `HDF5 format `_ stores array data as a nested series of dictionaries. 22 | ``battdat`` stores each type of data known about a battery in separate groups 23 | and the metadata for the battery as the metadata. 24 | 25 | .. code-block:: python 26 | 27 | from tables import File 28 | import json 29 | 30 | with File('example.h5') as f: 31 | metadata = json.loads(f.root._v_attrs['metadata']) # Data describing the cell and how it was tested 32 | version = json.loads(f.root._v_attrs['battdat_version']) # BattDat version used to save dataset 33 | raw_data = f.root['raw_data'] # HDF5 group holding raw data 34 | schema = raw_data._v_attrs['metadata'] # Description of each column 35 | 36 | The internal structure of each group (e.g., ``f['raw_data']``) are that of 37 | the `PyTables Table format `_: 38 | a one-dimensional chunked array with a compound data type. 39 | 40 | .. dropdown:: HDF5 content 41 | 42 | .. code-block:: 43 | 44 | $ h5ls -rv single-resistor-complex-charge_from-discharged.hdf 45 | Opened ".\single-resistor-complex-charge_from-discharged.hdf" with sec2 driver. 46 | / Group 47 | Attribute: CLASS scalar 48 | Type: 5-byte null-terminated UTF-8 string 49 | Attribute: PYTABLES_FORMAT_VERSION scalar 50 | Type: 3-byte null-terminated UTF-8 string 51 | Attribute: TITLE null 52 | Type: 1-byte null-terminated UTF-8 string 53 | Attribute: VERSION scalar 54 | Type: 3-byte null-terminated UTF-8 string 55 | Attribute: battdat_version scalar 56 | Type: 5-byte null-terminated UTF-8 string 57 | Attribute: json_schema scalar 58 | Type: 8816-byte null-terminated ASCII string 59 | Attribute: metadata scalar 60 | Type: 242-byte null-terminated UTF-8 string 61 | Location: 1:96 62 | Links: 1 63 | /raw_data Dataset {3701/Inf} 64 | Attribute: CLASS scalar 65 | Type: 5-byte null-terminated UTF-8 string 66 | Attribute: FIELD_0_FILL scalar 67 | Type: native double 68 | Attribute: FIELD_0_NAME scalar 69 | Type: 9-byte null-terminated UTF-8 string 70 | Attribute: FIELD_1_FILL scalar 71 | Type: native double 72 | Attribute: FIELD_1_NAME scalar 73 | Type: 7-byte null-terminated UTF-8 string 74 | Attribute: FIELD_2_FILL scalar 75 | Type: native double 76 | Attribute: FIELD_2_NAME scalar 77 | Type: 7-byte null-terminated UTF-8 string 78 | Attribute: FIELD_3_FILL scalar 79 | Type: native long long 80 | Attribute: FIELD_3_NAME scalar 81 | Type: 12-byte null-terminated UTF-8 string 82 | Attribute: NROWS scalar 83 | Type: native long long 84 | Attribute: TITLE null 85 | Type: 1-byte null-terminated UTF-8 string 86 | Attribute: VERSION scalar 87 | Type: 3-byte null-terminated UTF-8 string 88 | Attribute: json_schema scalar 89 | Type: 2824-byte null-terminated UTF-8 string 90 | Attribute: metadata scalar 91 | Type: 2824-byte null-terminated UTF-8 string 92 | Location: 1:10240 93 | Links: 1 94 | Chunks: {2048} 65536 bytes 95 | Storage: 118432 logical bytes, 6670 allocated bytes, 1775.59% utilization 96 | Filter-0: shuffle-2 OPT {32} 97 | Filter-1: deflate-1 OPT {9} 98 | Type: struct { 99 | "test_time" +0 native double 100 | "current" +8 native double 101 | "voltage" +16 native double 102 | "cycle_number" +24 native long long 103 | } 32 bytes 104 | 105 | Multiple Batteries per File 106 | +++++++++++++++++++++++++++ 107 | 108 | Data from multiple batteries can share a single HDF5 file as long as they share the same metadata. 109 | 110 | Add multiple batteries into an HDF5 file by providing a "prefix" to name each cell. 111 | 112 | .. code-block:: python 113 | 114 | test_a.to_hdf('test.h5', prefix='a') 115 | test_b.to_hdf('test.h5', prefix='b', overwrite=False) # Overwrite is mandatory 116 | 117 | 118 | Load a specific cell by providing a specific prefix on load 119 | 120 | .. code-block:: python 121 | 122 | test_a = BatteryDataset.from_hdf('test.h5', prefix='a') 123 | 124 | 125 | or load any of the included cells by providing an index 126 | 127 | .. code-block:: python 128 | 129 | test_a = BatteryDataset.from_hdf('test.h5', prefix=0) 130 | 131 | Load all cells by iterating over them: 132 | 133 | .. code-block:: python 134 | 135 | for name, cell in BatteryDataset.all_cells_from_hdf('test.h5'): 136 | do_some_processing(cell) 137 | 138 | 139 | Appending to Existing File 140 | ++++++++++++++++++++++++++ 141 | 142 | The :class:`~battdat.io.hdf.HDF5Writer` class facilitates adding to existing datasets. 143 | Start by creating the writer with the desired compression settings 144 | 145 | .. code-block:: python 146 | 147 | from battdat.io.hdf import HDFWriter 148 | 149 | writer = HDFWriter(complevel=9) 150 | 151 | Add a new table to an existing dataset with :meth:`~battdat.io.hdf.HDF5Writer.add_table`, 152 | which requires the name of a dataset and a `column schema `_. 153 | 154 | .. code-block:: python 155 | 156 | import pandas as pd 157 | import tables 158 | 159 | 160 | # Make dataset and column 161 | df = pd.DataFrame({'a': [1., 0.]}) 162 | schema = ColumnSchema() 163 | schema.add_column('a', 'A column') 164 | 165 | with tables.open_file('example.h5', mode='a') as file: 166 | writer.add_table(file, 'example_table', df, schema) 167 | 168 | Add data to an existing table with :meth:`~battdat.io.hdf.HDF5Writer.append_to_table` 169 | 170 | .. code-block:: python 171 | 172 | with tables.open_file('example.h5', mode='a') as file: 173 | writer.append_to_table(file, 'example_table', df) 174 | 175 | The new table must match the existing table's contents exactly. 176 | Any compression settings or metadata from the existing table will be re-used. 177 | 178 | Parquet 179 | ------- 180 | 181 | The `Apache Parquet format `_ is designed for high performance I/O of tabular data. 182 | ``battdat`` stores each type of data in a separate file and the metadata in `file-level metadata `_ 183 | of each file. 184 | 185 | .. code-block:: python 186 | 187 | from pyarrow import parquet as pq 188 | import json 189 | 190 | # Reading the metadata 191 | file_metadata = pq.read_metadata('raw_data.parquet') # Parquet metadata 192 | metadata = json.loads(file_metadata.metadata[b'battery_metadata']) # For the battery 193 | schema = json.loads(file_metadata.metadata[b'table_metadata']) # For the columns 194 | 195 | # Reading the data 196 | table = pq.read_table('raw_data.parquet') # In pyarrow's native Table format 197 | df = table.to_pandas() # As a dataframe 198 | 199 | The internal structure of a Parquet file saved by ``battdat`` has column names and data types which match those provided when saving the file. 200 | Any numeric types will be the same format (e.g., ``float32`` vs ``float64``) 201 | and times are stored as floating point numbers, rather than Parquet's time format. 202 | -------------------------------------------------------------------------------- /docs/user-guide/index.rst: -------------------------------------------------------------------------------- 1 | User Guide 2 | ========== 3 | 4 | Start to learn the battery-data-toolkit by understanding the 5 | :class:`~battdat.data.BatteryDataset` object. 6 | Either continue with schemas if planning to make a new dataset, 7 | or post-processing if using already-existing data. 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | :caption: Contents: 12 | 13 | dataset 14 | io 15 | formats 16 | schemas/index 17 | post-processing/index 18 | consistency/index 19 | streaming 20 | -------------------------------------------------------------------------------- /docs/user-guide/io.rst: -------------------------------------------------------------------------------- 1 | Reading and Writing Datasets 2 | ============================ 3 | 4 | The :mod:`battdat.io` module provides tools to read and write from :class:`~battdat.data.BatteryDataset` objects. 5 | 6 | .. list-table:: 7 | :align: center 8 | :header-rows: 1 9 | 10 | * - Format 11 | - Module 12 | - Reading 13 | - Writing 14 | * - Arbin 15 | - :mod:`~battdat.io.arbin` 16 | - ✔️ 17 | - ✖️ 18 | * - Battery Archive (https://www.batteryarchive.org) 19 | - :mod:`~battdat.io.ba` 20 | - ✖️ 21 | - ✔️ 22 | * - Battery Data Hub (https://batterydata.energy.gov) 23 | - :mod:`~battdat.io.batterydata` 24 | - ✔️ 25 | - ✖️ 26 | * - `HDF5 `_ 27 | - :mod:`~battdat.io.hdf` 28 | - ✔️ 29 | - ✔️ 30 | * - MACCOR 31 | - :mod:`~battdat.io.maccor` 32 | - ✔️ 33 | - ✖️ 34 | * - `Parquet `_ 35 | - :mod:`~battdat.io.parquet` 36 | - ✔️ 37 | - ✔️ 38 | 39 | 40 | .. note:: 41 | 42 | The parquet and HDF5 formats write to the `battery-data-toolkit file formats `_. 43 | 44 | Reading Data 45 | ------------ 46 | 47 | :class:`~battdat.io.base.DatasetReader` classes provide the ability to create a dataset 48 | through the :class:`~battdat.io.base.DatasetReader.read_dataset` method. 49 | The inputs to ``read_dataset`` always include a :class:`~battdat.schemas.BatteryMetadata` object 50 | containing information beyond what is available in the files. 51 | 52 | Most :class:`~battdat.io.base.DatasetReader` read data from a filesystem and are based on :class:`~battdat.io.base.DatasetFileReader`. 53 | These readers take list of paths to data files alongside the metadata and also include methods (e.g., :meth:`~battdat.io.base.DatasetFileReader.group`) to 54 | find files: 55 | 56 | .. code-block:: python 57 | 58 | from battdat.io.batterydata import BDReader 59 | 60 | extractor = BDReader(store_all=True) 61 | group = next(extractor.identify_files('./example-path/')) 62 | dataset = extractor.read_dataset(group) 63 | 64 | The :ref:`type of output dataset ` is defined by the :attr:`~battdat.io.base.DatasetFileReader.output_class` attribute. 65 | Most uses of readers do not require modifying this attribute. 66 | 67 | Writing Data 68 | ------------ 69 | 70 | :class:`~battdat.io.base.DatasetWriter` classes write :class:`battdat.data.BatteryDataset` objects into forms usable by other tools. 71 | 72 | For example, the :class:`~battdat.io.ba.BatteryArchiveWriter` converts the metadata into the schema used by `Battery Archive `_ 73 | and writes the data into the preferred format: CSV files no longer than 100k rows. 74 | 75 | 76 | .. code-block:: python 77 | 78 | from battdat.io.ba import BatteryArchiveWriter 79 | exporter = BatteryArchiveWriter() 80 | exporter.export(example_data, './to-upload') 81 | -------------------------------------------------------------------------------- /docs/user-guide/post-processing/figures/explain-capacities.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROVI-org/battery-data-toolkit/19961e6bbb2d0cfe0bff9c129144fcf8f3dd3be6/docs/user-guide/post-processing/figures/explain-capacities.png -------------------------------------------------------------------------------- /docs/user-guide/post-processing/index.rst: -------------------------------------------------------------------------------- 1 | Post-Processing 2 | =============== 3 | 4 | Most sources of battery data provide the voltage and current over time, 5 | but the other properties which are derived from them may be missing. 6 | The battery data toolkit provides "post-processing" classes which 7 | add compute these derived data sources. 8 | 9 | All post-processing tools are based on the :class:`~battdat.postprocess.base.BaseFeatureComputer` class 10 | and, as a result, provide a :meth:`~battdat.postprocess.base.BaseFeatureComputer.compute_features` function that adds 11 | new information to a battery dataset. 12 | Use them by first creating the tool and invoking that method with 13 | a :class:`~battdat.data.BatteryDataset`: 14 | 15 | .. code-block:: python 16 | 17 | computer = FeatureComputer() 18 | new_columns = computer.compute_features(data) 19 | 20 | New columns will be added to a part of the dataset (e.g., the cycle-level statistics) and those new columns 21 | will be returned from the function. 22 | 23 | The feature computers fall into two categories: 24 | 25 | - :class:`~battdat.postprocess.base.RawDataEnhancer`, which add information to the raw data as a function of time 26 | - :class:`~battdat.postprocess.base.CycleSummarizer`, which summarize the raw data and add new columns to the ``cycle_stats`` 27 | 28 | 29 | .. note:: 30 | 31 | Post-processing assumes the table named ``raw_data`` follows the :class:`~battdat.schemas.column.RawData` schema. 32 | 33 | Integral Quantities 34 | ------------------- 35 | 36 | Functions which add columns associated with the accumulated values of data in other columns. 37 | 38 | .. toctree:: 39 | :maxdepth: 1 40 | 41 | cell-capacity 42 | 43 | 44 | Time 45 | ---- 46 | 47 | Compute columns which are derived fields associated with the relative time or timespans of data. 48 | 49 | .. toctree:: 50 | :maxdepth: 1 51 | 52 | cycle-times 53 | -------------------------------------------------------------------------------- /docs/user-guide/schemas/column-schema.rst: -------------------------------------------------------------------------------- 1 | Column Schemas 2 | ============== 3 | 4 | The contents of each data table available with a dataset are described using a :class:`~battdat.schemas.column.ColumnSchema`. 5 | The schema is a collection of :class:`~battdat.schemas.column.ColumnInfo` objects detailing each column, 6 | which includes 7 | 8 | 1. **Description**: A English description of the contents 9 | 2. **Type**: Type of each record (e.g., integer, string) 10 | 3. **Units**: Units for the values, if applicable 11 | 4. **Required**: Whether the column *must* be present in the table 12 | 5. **Monotonic**: Whether values in never decrease between sequential rows 13 | 14 | Using a Column Schema 15 | --------------------- 16 | 17 | :class:`~battdat.schemas.column.ColumnSchema` stored inside the `HDF5 and Parquet files <../formats.html>`_ 18 | provided by the battery data toolkit are used to describe existing and validating new data. 19 | 20 | List the columns names with :attr:`~battdat.schemas.column.ColumnSchema.columns` attribute 21 | and access information for a single column through the get item method: 22 | 23 | .. code-block:: python 24 | 25 | data = BatteryDataset.from_battdat_hdf(out_path) 26 | schema = data.schemas['eis_data'] # ColumnSchema for the ``eis_data`` table 27 | print(schema['test_id'].model_dump()) 28 | 29 | The above code prints the data for a specific column. 30 | 31 | .. code-block:: python 32 | 33 | {'required': True, 34 | 'type': , 35 | 'description': 'Integer used to identify rows belonging to the same experiment.', 36 | 'units': None, 37 | 'monotonic': False} 38 | 39 | 40 | Use the :meth:`~battdat.schemas.column.ColumnSchema.validate_dataframe` to check 41 | if a dataframe matches requirements for each column. 42 | 43 | Pre-defined Schema 44 | ------------------ 45 | 46 | The battery-data-toolkit provides schemas for common types of data (e.g., cycling data for single cells, EIS). 47 | 48 | .. include:: rendered-column-schema.rst 49 | 50 | Defining a New Column Schema 51 | ---------------------------- 52 | 53 | Document a new type of data by either creating a subclass of :class:`~battdat.schemas.column.ColumnSchema` 54 | or adding individual columns to an existing schema. 55 | 56 | .. code-block:: python 57 | 58 | from battdat.schemas.column import RawData, ColumnInfo 59 | 60 | schema = RawData() # Schema for sensor measurements of cell 61 | schema.extra_columns['room_temp'] = ColumnInfo( 62 | description='Temperature of the room as measured by the HVAC system', 63 | units='C', data_type='float', 64 | ) 65 | -------------------------------------------------------------------------------- /docs/user-guide/schemas/export-schemas.py: -------------------------------------------------------------------------------- 1 | """Write schemas to an RST-compatible table format""" 2 | from typing import TextIO, get_args 3 | 4 | from pydantic import BaseModel 5 | 6 | from battdat.schemas.column import RawData, CycleLevelData 7 | from battdat.schemas import BatteryMetadata, BatteryDescription, ModelMetadata, CyclingProtocol 8 | from battdat.schemas.eis import EISData 9 | 10 | print('Exporting column schemas to RST...') 11 | 12 | with open('rendered-column-schema.rst', 'w') as fp: 13 | for data_type in [RawData(), CycleLevelData(), EISData()]: 14 | class_name = data_type.__class__.__name__ 15 | print(f'``{class_name}``\n++{"+" * len(class_name)}++', file=fp) 16 | print(f'\n**Source Object**: :class:`{data_type.__module__}.{class_name}`\n', file=fp) 17 | print(f'\n{data_type.__doc__}\n', file=fp) 18 | 19 | print('.. list-table::', file=fp) 20 | print(' :header-rows: 1\n', file=fp) 21 | print(' * - Column', file=fp) 22 | print(' - Description', file=fp) 23 | print(' - Units', file=fp) 24 | for name, field in data_type.columns.items(): 25 | print(f' * - {name}', file=fp) 26 | print(f' - {field.description}', file=fp) 27 | print(f' - {field.units}', file=fp) 28 | print(file=fp) 29 | 30 | # Export the metadata schemas recursively 31 | print('Exporting metadata formats') 32 | 33 | 34 | def expand_terms(metadata_cls: type[BaseModel], fo: TextIO, recurse: bool): 35 | """Export the data in column format""" 36 | 37 | to_recurse = set() 38 | 39 | class_name = metadata_cls.__name__ 40 | print(f'``{class_name}``\n~~{"~" * len(class_name)}~~', file=fo) 41 | print(f'\n**Source Object**: :class:`{metadata_cls.__module__}.{class_name}`\n', file=fo) 42 | doc_string = "\n".join(map(str.strip, metadata_cls.__doc__.split("\n"))) 43 | print(f'\n{doc_string}\n', file=fo) 44 | 45 | print('.. list-table::', file=fo) 46 | print(' :header-rows: 1\n', file=fo) 47 | print(' * - Column', file=fo) 48 | print(' - Type', file=fo) 49 | print(' - Description', file=fo) 50 | print(' - Units', file=fo) 51 | print(' - Definition', file=fo) 52 | for name, field in metadata_cls.model_fields.items(): 53 | print(f' * - {name}', file=fo) 54 | 55 | # Expand the type annotation 56 | is_optional = field.is_required() 57 | if (subtypes := get_args(field.annotation)) != (): 58 | is_optional = True 59 | print(f' - {", ".join(x.__name__ if isinstance(x, type(object)) else str(x) for x in subtypes if not x == type(None))}', file=fp) 60 | else: 61 | print(f' - {field.annotation.__name__}', file=fo) 62 | 63 | # Prepare to recurse 64 | for cls_type in [field.annotation, *subtypes]: 65 | if isinstance(cls_type, BaseModel.__class__): 66 | to_recurse.add(cls_type) 67 | 68 | print(f' - {"(**Required**) " if not is_optional else ""}{str(field.description)}', file=fo) 69 | 70 | # Print units 71 | if field.json_schema_extra is not None and (units := field.json_schema_extra.get('units')) is not None: 72 | print(f' - {units}', file=fo) 73 | else: 74 | print(' -', file=fo) 75 | 76 | # Print metadata source 77 | if field.json_schema_extra is not None and (iri := field.json_schema_extra.get('iri')) is not None: 78 | assert 'emmo' in iri.lower(), f'Found an IRI that is not from EMMO!?' 79 | print(f' - `EMMO <{iri}>`_', file=fo) 80 | else: 81 | print(' -', file=fo) 82 | 83 | print(file=fo) 84 | 85 | if recurse: 86 | for cls_type in to_recurse: 87 | expand_terms(cls_type, fo, recurse) 88 | 89 | 90 | with open('rendered-metadata-schema.rst', 'w', encoding='utf-8') as fp: 91 | print('High-level Data', file=fp) 92 | print('+++++++++++++++', file=fp) 93 | print('All metadata starts with the :class:`~battdat.schemas.BatteryMetadata` object.\n', file=fp) 94 | 95 | expand_terms(BatteryMetadata, fp, False) 96 | 97 | print('Describing Batteries', file=fp) 98 | print('++++++++++++++++++++', file=fp) 99 | print(':class:`~battdat.schemas.battery.BatteryDescription` and its related class capture details about the structure of a battery.\n', file=fp) 100 | 101 | expand_terms(BatteryDescription, fp, True) 102 | 103 | print('Simulation Data', file=fp) 104 | print('+++++++++++++++', file=fp) 105 | print(':class:`~battdat.schemas.modeling.ModelMetadata` and its related class capture details about data produces using computational methods.\n', file=fp) 106 | 107 | expand_terms(ModelMetadata, fp, True) 108 | 109 | print('Cycling Data', file=fp) 110 | print('++++++++++++', file=fp) 111 | print('Annotate how batteries were cycled following protocol description objects.\n', file=fp) 112 | 113 | expand_terms(CyclingProtocol, fp, True) 114 | -------------------------------------------------------------------------------- /docs/user-guide/schemas/index.rst: -------------------------------------------------------------------------------- 1 | Describing Battery Data 2 | ======================= 3 | 4 | The metadata schemas used by ``battdat`` standardize how we describe the source of battery datasets 5 | and annotate what the data are. 6 | Metadata are held as part of the :class:`battdat.data.BatteryDataset` object and saved within the file formats 7 | produced by ``battdat`` to ensure that the provenance of a dataset is kept alongside the actual data. 8 | 9 | Descriptions are defined in two parts: 10 | 11 | 1. **Source Metadata**: Information about a battery dataset applicable to all measurements. 12 | 2. **Column Schemas**: Details about a specific table of measurements. 13 | 14 | .. toctree:: 15 | :maxdepth: 2 16 | :caption: Contents: 17 | 18 | source-metadata 19 | column-schema 20 | -------------------------------------------------------------------------------- /docs/user-guide/schemas/source-metadata.rst: -------------------------------------------------------------------------------- 1 | Source Metadata 2 | =============== 3 | 4 | ''Source Metadata'' captures high-level information about a battery dataset 5 | in the :class:`~battdat.schemas.BatteryMetadata` object. 6 | Information included in ``BatteryMetadata``, in contrast to `Column Schemas `_, are relevant to 7 | all measurements performed on a battery, such as: 8 | 9 | 1. The type of battery (e.g., NMC Li-ion, Pb acid) 10 | 2. The simulation code used, if the data is from a model 11 | 3. How the battery was cycled 12 | 4. The authors of the data and any related publications 13 | 14 | Metadata Structure 15 | ------------------ 16 | 17 | :class:`~battdat.schemas.BatteryMetadata` objects have a hierarchical structure where 18 | each record is composed of a single document that has fields which can correspond 19 | to single values, collections of values, or entire sub-documents. 20 | 21 | Create new metadata through the Python interface by first creating a ``BatteryMetadata`` object. 22 | 23 | .. code-block:: python 24 | 25 | from battdat.schemas import BatteryMetadata 26 | 27 | metadata = BatteryMetadata( 28 | name='test-cell', 29 | ) 30 | 31 | Different types of information are grouped together into subdocuments, 32 | such as details about the battery in :class:`~battdat.schemas.battery.BatteryDescription` 33 | 34 | .. code-block:: python 35 | 36 | from battdat.schemas.battery import BatteryDescription 37 | from battdat.schemas import BatteryMetadata 38 | 39 | metadata = BatteryMetadata( 40 | name='test-cell', 41 | battery=BatteryDescription( 42 | manufacturer='famous', 43 | nominal_capacity=1., 44 | ) 45 | ) 46 | 47 | :class:`~battdat.schemas.BatteryMetadata` automatically validate inputs, 48 | and can convert to and JSON formats. (`Pydantic `_!) 49 | 50 | See the :mod:`battdat.schemas` for a full accounting of the available fields in our schema. 51 | 52 | .. note:: 53 | 54 | Validation only checks that already-defined fields are specified properly. 55 | Add metadata beyond what is described in battery-data-toolkit as desired. 56 | 57 | Source of Terminology 58 | --------------------- 59 | 60 | The `BattINFO ontology `_ is the core source of terms. 61 | 62 | Fields in the schema whose names correspond to a BattINFO term are marked 63 | with the "IRI" of the field, which points to a website containing the description. 64 | 65 | Fields whose values should be terms from the BattINFO ontology are marked with the root of the terms. 66 | For example, the ``model_type`` field of `ModelMetadata` can be any type of 67 | `MathematicalModel `_. 68 | Look them up using some utilities in ``battdat``. 69 | 70 | .. code-block:: python 71 | 72 | from battdat.schemas.ontology import gather_descendants 73 | 74 | print(gather_descendants('MathematicalModel')) 75 | 76 | 77 | .. note:: 78 | The schema will be a continual work in progress. 79 | Consider adding `an Issue `_ to the GitHub 80 | if you find you use a term enough it should be part of the schema. 81 | 82 | Metadata Objects 83 | ---------------- 84 | 85 | The battery-data-toolkit expresses the metadata schema using `Pydantic BaseModel objects `_. 86 | 87 | .. include:: rendered-metadata-schema.rst -------------------------------------------------------------------------------- /docs/user-guide/streaming.rst: -------------------------------------------------------------------------------- 1 | Streaming Battery Data 2 | ====================== 3 | 4 | Many battery datasets are too large to fit in memory in a single computer at once. 5 | Such data can be read or written incrementally using the streaming module of battery data toolkit, 6 | :class:`battdat.streaming`. 7 | 8 | Reading Data as a Stream 9 | ------------------------ 10 | 11 | The battery-data-toolkit allows streaming the raw time series data from an :ref:`HDF5 file format `. 12 | 13 | Stream the data either as individual rows or all rows belonging to each cycle 14 | with the :meth:`~battdat.streaming.iterate_records_from_file` 15 | or :meth:`~battdat.streaming.iterate_cycles_from_file`. 16 | 17 | Both functions produce `a Python generator `_ 18 | which retrieves a chunk of data from the HDF5 file incrementally and can be used to produce data individually 19 | 20 | .. code-block:: python 21 | 22 | row_iter = iterate_records_from_file('example.h5') 23 | row = next(row_iter) 24 | do_something_per_timestep(row) 25 | 26 | or as part of a for loop. 27 | 28 | .. code-block:: python 29 | 30 | for cycle in iterate_cycles_from_file('example.h5'): 31 | do_something_per_cycle(cycle) 32 | 33 | Reading full cycles by file can produce either a single :class:`~pandas.DataFrame` when reading a single table, 34 | a dictionary of ``DataFrames``, or a full :class:`~battdat.data.BatteryDataset` depending on the 35 | options for ``key`` and ``make_dataset``. 36 | 37 | .. code-block:: python 38 | 39 | # Read as a single DataFrame 40 | df = next(iterate_cycles_from_file('example.h5', key='raw_data')) 41 | 42 | # Read multiple tables as a dictionary 43 | dict_of_df = next(iterate_cycles_from_file('example.h5', key=['raw_data', 'cycle_stats'])) 44 | 45 | # Read all tables as a Dataset 46 | dataset = next(iterate_cycles_from_file('example.h5', key=None, make_dataset=True)) 47 | 48 | 49 | Streaming Data to a File 50 | ------------------------ 51 | 52 | Write large datasets into battery-data-toolkit-compatible formats incrementally using the :class:`~battdat.streaming.hdf5.HDF5Writer`. 53 | 54 | Start the writer class by providing the path to the HDF5 file and the metadata to be written 55 | then opening it via Python's ``with`` syntax. 56 | 57 | .. code-block:: python 58 | 59 | metadata = BatteryMetadata(name='example') 60 | with HDF5Writer('streamed.h5', metadata=metadata) as writer: 61 | for time, current, voltage in data_stream: 62 | writer.write_row({'test_time': time, 'current': current, 'voltage': voltage}) 63 | 64 | The writer only writes to disk after enough rows are collected or the end of a data stream is signaled by exiting the ``with`` block. 65 | -------------------------------------------------------------------------------- /notebooks/README.md: -------------------------------------------------------------------------------- 1 | # Example Notebooks 2 | 3 | Notebooks showing specific features of batadata. -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "battery-data-toolkit" 3 | dynamic = ["version"] 4 | description = "Utilities for reading and manipulating battery testing data" 5 | readme = "README.md" 6 | requires-python = ">=3.10" 7 | license = { file = 'LICENSE' } 8 | keywords = ["batteries", "science", "data science"] 9 | authors = [ 10 | { name = "Logan Ward", email = "lward@anl.gov" }, 11 | { name = "Noah Paulson", email = "lward@anl.gov" }, 12 | { name = "Joseph Kubal", email = "kubal@anl.gov" }, 13 | ] 14 | dependencies = [ 15 | "pandas > 1.0", 16 | "scipy > 1.3", 17 | "pydantic == 2.*", 18 | "tables > 3.6", 19 | "pyarrow >= 15", 20 | "EMMOntoPy", 21 | "xlrd" 22 | ] 23 | classifiers = [ 24 | "Development Status :: 4 - Beta", 25 | "Intended Audience :: Science/Research", 26 | "License :: OSI Approved :: Apache Software License", 27 | "Operating System :: OS Independent", 28 | "Topic :: File Formats", 29 | "Topic :: Scientific/Engineering" 30 | ] 31 | 32 | [tool.setuptools.packages.find] 33 | include = ["battdat*"] 34 | 35 | [build-system] 36 | requires = ["setuptools>=64", "setuptools-scm>=8"] 37 | build-backend = "setuptools.build_meta" 38 | 39 | [tool.setuptools_scm] 40 | 41 | [project.optional-dependencies] 42 | test = [ 43 | 'flake8', 44 | 'pytest', 45 | 'pytest-cov' 46 | ] 47 | docs = [ 48 | 'sphinx', 49 | 'pydata-sphinx-theme', 50 | 'nbsphinx', 51 | 'autodoc-pydantic', 52 | 'sphinx-design' 53 | ] 54 | 55 | [project.urls] 56 | repository = "https://github.com/rovi-org/battery-data-toolkit" 57 | 58 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal = 1 3 | 4 | [tool:pytest] 5 | addopts = --ignore=setup.py --cov=battdat 6 | 7 | [flake8] 8 | exclude = .git,*.egg* 9 | max-line-length = 160 10 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from pytest import fixture 4 | 5 | from battdat.data import BatteryMetadata, BatteryDataset 6 | from battdat.postprocess.timing import CycleTimesSummarizer 7 | 8 | 9 | @fixture() 10 | def file_path() -> Path: 11 | """Path to test-related files""" 12 | return Path(__file__).parent / 'files' 13 | 14 | 15 | @fixture() 16 | def example_data(file_path) -> BatteryDataset: 17 | """An example dataset which contains metadata and a few cycles of data""" 18 | 19 | # Load the simple cycling 20 | path = file_path / 'example-data' / 'single-resistor-constant-charge_from-discharged.hdf' 21 | data = BatteryDataset.from_hdf(path) 22 | 23 | # Compute basic cycling states 24 | for stats in [CycleTimesSummarizer()]: 25 | stats.compute_features(data) 26 | 27 | # Give the cell a name, at least 28 | data.metadata = BatteryMetadata(name='test') 29 | return data 30 | -------------------------------------------------------------------------------- /tests/consistency/test_sign.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from pytest import fixture 4 | 5 | from battdat.data import BatteryDataset 6 | from battdat.consistency.current import SignConventionChecker 7 | 8 | 9 | @fixture() 10 | def example_dataset(): 11 | # Make a rest period followed by a charge where the voltage increases 12 | times = np.linspace(0, 1800, 256) 13 | current = np.zeros_like(times) 14 | current[128:] = 1. 15 | 16 | voltage = np.ones_like(times) 17 | voltage[128:] = np.linspace(1., 1.3, 128) 18 | 19 | return BatteryDataset.make_cell_dataset( 20 | raw_data=pd.DataFrame({ 21 | 'test_time': times, 22 | 'current': current, 23 | 'voltage': voltage 24 | }) 25 | ) 26 | 27 | 28 | def test_sign_checker(example_dataset): 29 | chcker = SignConventionChecker() 30 | result = chcker.check(example_dataset) 31 | assert len(result) == 0 32 | 33 | # Make sure swapping the sign breaks things 34 | example_dataset.tables['raw_data']['current'] *= -1 35 | result = chcker.check(example_dataset) 36 | assert len(result) == 1 37 | -------------------------------------------------------------------------------- /tests/exporters/test_ba.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from datetime import datetime 3 | import json 4 | 5 | import pandas as pd 6 | 7 | from battdat.io.ba import BatteryArchiveWriter 8 | from battdat.schemas import BatteryMetadata, BatteryDescription 9 | from battdat.schemas.battery import ElectrodeDescription 10 | 11 | 12 | def test_export(example_data, tmpdir): 13 | # Add a datetime 14 | raw_data = example_data.tables['raw_data'] 15 | raw_data['time'] = raw_data['test_time'] + datetime(year=2024, month=7, day=1).timestamp() 16 | 17 | # Add some metadata to the file 18 | example_data.metadata = BatteryMetadata( 19 | battery=BatteryDescription( 20 | anode=ElectrodeDescription(name='graphite', supplier='big-one'), 21 | cathode=ElectrodeDescription(name='nmc') 22 | ) 23 | ) 24 | 25 | tmpdir = Path(tmpdir) 26 | tmpdir.mkdir(exist_ok=True) 27 | exporter = BatteryArchiveWriter() 28 | exporter.export(example_data, tmpdir) 29 | 30 | # Make sure the time series loaded correctly 31 | timeseries_path = tmpdir.joinpath('cycle-timeseries-0.csv') 32 | assert timeseries_path.is_file() 33 | timeseries = pd.read_csv(timeseries_path) 34 | assert 'v' in timeseries # Make sure a conversion occurred correctly 35 | assert 'cell_id' in timeseries 36 | assert timeseries['date_time'].iloc[0] == '07/01/2024 00:00:00.000000' 37 | assert timeseries['cycle_index'].iloc[1] == 1 38 | 39 | # Check that metadata was written 40 | metadata = json.loads(tmpdir.joinpath('metadata.json').read_text()) 41 | assert metadata['cathode'] == '{"name":"nmc"}' 42 | 43 | # Make sure the cycle statistics are written 44 | cycle_stats = pd.read_csv(tmpdir.joinpath('cycle-stats.csv')) 45 | assert cycle_stats['cycle_index'].iloc[0] == 1 46 | -------------------------------------------------------------------------------- /tests/files/batteryarchive/CALCE_CX2-33_prism_LCO_25C_0-100_0.5-0.5C_d_cycle_data.csv: -------------------------------------------------------------------------------- 1 | Cycle_Index,Start_Time,End_Time,Test_Time (s),Min_Current (A),Max_Current (A),Min_Voltage (V),Max_Voltage (V),Charge_Capacity (Ah),Discharge_Capacity (Ah),Charge_Energy (Wh),Discharge_Energy (Wh) 2 | 1.0,,,12923.434,-0.674,1.124,2.699,4.2,0.604,1.292,2.5,4.758 3 | 2.0,,,29533.288,-0.674,1.141,2.699,4.2,1.291,1.295,5.184,4.781 4 | 3.0,,,46063.491,-0.674,1.145,2.699,4.2,1.296,1.295,5.198,4.785 5 | 4.0,,,62601.426,-0.674,1.13,2.699,4.2,1.293,1.29,5.189,4.757 6 | 5.0,,,79120.977,-0.674,1.148,2.699,4.2,1.289,1.291,5.175,4.764 7 | 6.0,,,95621.926,-0.674,1.137,2.699,4.2,1.291,1.29,5.183,4.761 8 | 7.0,,,112147.916,-0.674,1.139,2.699,4.2,1.289,1.291,5.175,4.767 9 | 8.0,,,128609.889,-0.674,1.147,2.699,4.2,1.292,1.292,5.182,4.775 10 | 9.0,,,145061.227,-0.674,1.131,2.699,4.2,1.292,1.291,5.18,4.773 11 | -------------------------------------------------------------------------------- /tests/files/batteryarchive/CALCE_CX2-33_prism_LCO_25C_0-100_0.5-0.5C_d_timeseries.csv: -------------------------------------------------------------------------------- 1 | Date_Time,Test_Time (s),Cycle_Index,Current (A),Voltage (V),Charge_Capacity (Ah),Discharge_Capacity (Ah),Charge_Energy (Wh),Discharge_Energy (Wh),Environment_Temperature (C),Cell_Temperature (C) 2 | 2010-09-02 14:35:40,30.009,1.0,0.0,3.843,0.0,0.0,0.0,0.0,, 3 | 2010-09-02 14:36:10,60.025,1.0,0.0,3.844,0.0,0.0,0.0,0.0,, 4 | 2010-09-02 14:36:40,90.04,1.0,0.0,3.843,0.0,0.0,0.0,0.0,, 5 | 2010-09-02 14:37:10,120.008,1.0,0.0,3.844,0.0,0.0,0.0,0.0,, 6 | 2010-09-02 14:37:40,150.024,1.0,0.674,3.963,0.002,0.0,0.01,0.0,, 7 | 2010-09-02 14:38:10,180.039,1.0,0.674,3.98,0.008,0.0,0.033,0.0,, 8 | 2010-09-02 14:38:40,210.055,1.0,0.674,3.993,0.014,0.0,0.055,0.0,, 9 | 2010-09-02 14:39:10,240.069,1.0,0.675,4.003,0.019,0.0,0.078,0.0,, 10 | 2010-09-02 14:39:40,270.084,1.0,0.675,4.011,0.025,0.0,0.1,0.0,, 11 | 2010-09-02 14:40:10,300.1,1.0,0.675,4.016,0.03,0.0,0.123,0.0,, 12 | 2010-09-02 14:40:40,330.115,1.0,0.675,4.02,0.036,0.0,0.146,0.0,, 13 | 2010-09-02 14:41:10,360.129,1.0,0.674,4.024,0.042,0.0,0.168,0.0,, 14 | 2010-09-02 14:41:40,390.13,1.0,0.675,4.027,0.047,0.0,0.191,0.0,, 15 | 2010-09-02 14:42:10,420.145,1.0,0.674,4.03,0.053,0.0,0.213,0.0,, 16 | 2010-09-02 14:42:40,450.16,1.0,0.675,4.032,0.059,0.0,0.236,0.0,, 17 | -------------------------------------------------------------------------------- /tests/files/batterydata/.gitattributes: -------------------------------------------------------------------------------- 1 | *.csv binary 2 | -------------------------------------------------------------------------------- /tests/files/example-data/README.md: -------------------------------------------------------------------------------- 1 | # Example Datasets for Post-Processing 2 | 3 | These examples create example battery-data-toolkit-format data which allow us to test post-processing code on idealized data. 4 | Each notebook produces an HDF5 file when run. 5 | -------------------------------------------------------------------------------- /tests/files/example-data/single-resistor-complex-charge_from-discharged.hdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROVI-org/battery-data-toolkit/19961e6bbb2d0cfe0bff9c129144fcf8f3dd3be6/tests/files/example-data/single-resistor-complex-charge_from-discharged.hdf -------------------------------------------------------------------------------- /tests/files/example-data/single-resistor-complex-charge_from-discharged/raw_data.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROVI-org/battery-data-toolkit/19961e6bbb2d0cfe0bff9c129144fcf8f3dd3be6/tests/files/example-data/single-resistor-complex-charge_from-discharged/raw_data.parquet -------------------------------------------------------------------------------- /tests/files/example-data/single-resistor-constant-charge_from-charged.hdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROVI-org/battery-data-toolkit/19961e6bbb2d0cfe0bff9c129144fcf8f3dd3be6/tests/files/example-data/single-resistor-constant-charge_from-charged.hdf -------------------------------------------------------------------------------- /tests/files/example-data/single-resistor-constant-charge_from-discharged.hdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROVI-org/battery-data-toolkit/19961e6bbb2d0cfe0bff9c129144fcf8f3dd3be6/tests/files/example-data/single-resistor-constant-charge_from-discharged.hdf -------------------------------------------------------------------------------- /tests/files/maccor_example.001: -------------------------------------------------------------------------------- 1 | Today's Date 04/04/2016 Date of Test: 03/31/2016 Filename: C:\Data\MIMS\Backup\ARGONNE #20\SET-LN3024-104-1a.001 Procedure: ABRHV-NCM523-Form-4p1.000NCM 523, Formation Test at 0.1C; from 3.0 to 4.1V Comment/Barcode: SET-LN3024-104, Targray NCM811 [LN2086-32-4] vs. Li metal, 3.0 to 4.3V, Formation, C-rate= 2.4 mAh, Data collected for electrode matching (HEHV) 2 | Rec# Cyc# Step Test (Min) Step (Min) Amp-hr Watt-hr Amps Volts State ES DPt Time 3 | 1 0 1 0.0000 0.0000 0.0000000000 0.0000000000 0.0000000000 3.30678264 R 0 03/31/2016 16:05:31 4 | 2 0 1 0.1667 0.1667 0.0000000000 0.0000000000 0.0000000000 3.30571450 R 1 03/31/2016 16:05:41 5 | 3 0 1 0.3333 0.3333 0.0000000000 0.0000000000 0.0000000000 3.30571450 R 1 03/31/2016 16:05:51 6 | 4 0 1 0.5000 0.5000 0.0000000000 0.0000000000 0.0000000000 3.30586709 R 1 03/31/2016 16:06:01 7 | 5 0 1 0.6667 0.6667 0.0000000000 0.0000000000 0.0000000000 3.30601968 R 1 03/31/2016 16:06:11 8 | 6 0 1 0.8333 0.8333 0.0000000000 0.0000000000 0.0000000000 3.30601968 R 1 03/31/2016 16:06:21 9 | 7 0 1 1.0000 1.0000 0.0000000000 0.0000000000 0.0000000000 3.30586709 R 1 03/31/2016 16:06:31 10 | 8 0 1 1.1667 1.1667 0.0000000000 0.0000000000 0.0000000000 3.30617227 R 1 03/31/2016 16:06:41 11 | -------------------------------------------------------------------------------- /tests/files/maccor_example.002: -------------------------------------------------------------------------------- 1 | Today's Date 04/04/2016 Date of Test: 04/01/2016 Filename: C:\Data\MIMS\Backup\ARGONNE #20\SET-LN3024-104-1a.001 Procedure: ABRHV-NCM523-Form-4p1.000NCM 523, Formation Test at 0.1C; from 3.0 to 4.1V Comment/Barcode: SET-LN3024-104, Targray NCM811 [LN2086-32-4] vs. Li metal, 3.0 to 4.3V, Formation, C-rate= 2.4 mAh, Data collected for electrode matching (HEHV) 2 | Rec# Cyc# Step Test (Min) Step (Min) Amp-hr Watt-hr Amps Volts State ES DPt Time 3 | 1 0 1 0.0000 0.0000 0.0000000000 0.0000000000 0.0000000000 3.30678264 R 0 16:05:31 4 | 2 0 1 0.1667 0.1667 0.0000000000 0.0000000000 0.0000000000 3.30571450 R 1 16:05:41 5 | 3 0 1 0.3333 0.3333 0.0000000000 0.0000000000 0.0000000000 3.30571450 R 1 16:05:51 6 | 4 0 1 0.5000 0.5000 0.0000000000 0.0000000000 0.0000000000 3.30586709 R 1 16:06:01 7 | 5 0 1 0.6667 0.6667 0.0000000000 0.0000000000 0.0000000000 3.30601968 R 1 16:06:11 8 | 6 0 1 0.8333 0.8333 0.0000000000 0.0000000000 0.0000000000 3.30601968 R 1 16:06:21 9 | 7 0 1 1.0000 1.0000 0.0000000000 0.0000000000 0.0000000000 3.30586709 R 1 16:06:31 10 | 8 0 1 1.1667 1.1667 0.0000000000 0.0000000000 0.0000000000 3.30617227 R 1 16:06:41 11 | -------------------------------------------------------------------------------- /tests/io/test_arbin.py: -------------------------------------------------------------------------------- 1 | """Tests related to the Arbin parser""" 2 | 3 | from battdat.io.arbin import ArbinReader 4 | 5 | 6 | def test_validation(file_path): 7 | """Make sure the parser generates valid outputs""" 8 | arbin = ArbinReader() 9 | test_file = file_path / 'arbin_example.csv' 10 | data = arbin.read_dataset([test_file]) 11 | data.validate_columns(allow_extra_columns=False) 12 | -------------------------------------------------------------------------------- /tests/io/test_batterydata.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | from pytest import fixture 4 | 5 | from battdat.io.batterydata import BDReader, generate_metadata 6 | 7 | example_metadata = {'cell_type': ['Pouch cell'], 8 | 'creator_user_id': 'a853d711-0e37-44c9-80c9-a41d450c2da4', 9 | 'date_dataset_created': '2018-08-16', 10 | 'electrolyte_class_dataset': ['Organic liquid'], 11 | 'id': 'ef9dec93-17a2-445a-b58e-dc3eadb1f79d', 12 | 'isopen': False, 13 | 'manufacturer_supplier': 'CAMP', 14 | 'maximum_voltage': '4.1', 15 | 'metadata_created': '2024-04-19T21:18:38.938069', 16 | 'metadata_modified': '2024-04-20T00:45:59.866451', 17 | 'minimum_voltage': '3', 18 | 'name': 'xcel-round-2-slpc_reupload_2', 19 | 'negative_electrode': ['Graphite'], 20 | 'nominal_cell_capacity': '0.037', 21 | 'notes': 'Single layer pouch cell from CAMP (2.5mAh/cm2) at various charge protocols (CCCV and Multi-step).', 22 | 'num_resources': 35, 23 | 'num_tags': 9, 24 | 'onec_cell_capacity': '0.032', 25 | 'organization': {'id': '67de8624-a528-43df-9b63-a65a410920bb', 26 | 'name': 'xcel', 27 | 'title': 'XCEL', 28 | 'type': 'project', 29 | 'description': 'XCEL Project ', 30 | 'image_url': '', 31 | 'created': '2023-06-08T17:38:37.007623', 32 | 'is_organization': True, 33 | 'approval_status': 'approved', 34 | 'state': 'active'}, 35 | 'owner_org': '67de8624-a528-43df-9b63-a65a410920bb', 36 | 'poc_email_address': 'Sangwook.Kim@inl.gov', 37 | 'poc_institution': ['INL'], 38 | 'poc_name': 'skim', 39 | 'positive_electrode': ['NMC532'], 40 | 'private': False, 41 | 'reference_electrode': ['No'], 42 | 'separator_class': ['PP polymer'], 43 | 'state': 'active', 44 | 'technology': ['Li-ion'], 45 | 'title': 'XCEL Round 2 SLPC', 46 | 'type': 'dataset', 47 | 'tags': [{'display_name': 'fast charge', 48 | 'id': '04f1dafd-24f0-496e-b263-96038a9da8f8', 49 | 'name': 'fast charge', 50 | 'state': 'active', 51 | 'vocabulary_id': None}]} 52 | 53 | 54 | @fixture() 55 | def test_files(file_path): 56 | return file_path / 'batterydata' 57 | 58 | 59 | def test_detect_then_convert(test_files): 60 | # Find two files 61 | extractor = BDReader(store_all=False) 62 | group = next(extractor.identify_files(test_files)) 63 | assert len(group) == 2 64 | 65 | # Parse them 66 | data = extractor.read_dataset(group) 67 | assert data.metadata.name == 'p492-13' 68 | 69 | # Test a few of columns which require conversion 70 | assert data.raw_data['cycle_number'].max() == 8 71 | first_measurement = datetime.fromtimestamp(data.raw_data['time'].iloc[0]) 72 | assert first_measurement.year == 2020 73 | assert first_measurement.day == 3 74 | 75 | # Ensure it validates 76 | data.validate() 77 | 78 | 79 | def test_store_all(test_files): 80 | """Make sure we get exactly one copy of all columns""" 81 | 82 | # Find two files 83 | extractor = BDReader(store_all=True) 84 | group = next(extractor.identify_files(test_files)) 85 | data = extractor.read_dataset(group) 86 | 87 | # Make sure we only have the renamed `cycle_number` and not original `Cycle_Index` 88 | for df in [data.raw_data, data.cycle_stats]: 89 | assert 'cycle_number' in df.columns 90 | assert 'Cycle_Index' not in df.columns 91 | 92 | # Make sure NREL-specific columns are stored 93 | assert 'datenum_d' in data.cycle_stats.columns 94 | assert 'Charge_Throughput_Ah' in data.raw_data.columns 95 | 96 | 97 | def test_metadata(): 98 | metadata = generate_metadata(example_metadata, ('https://test.url/',)) 99 | assert 'test.url' == metadata.associated_ids[0].host 100 | assert metadata.battery.cathode.name == 'NMC532' 101 | -------------------------------------------------------------------------------- /tests/io/test_cell_consistency.py: -------------------------------------------------------------------------------- 1 | """Run consistency checks for data corresponding to cells""" 2 | from battdat.consistency.current import SignConventionChecker 3 | 4 | from pytest import mark 5 | 6 | from battdat.io.arbin import ArbinReader 7 | from battdat.io.batterydata import BDReader 8 | from battdat.io.hdf import HDF5Reader 9 | 10 | checkers = [ 11 | SignConventionChecker() 12 | ] 13 | 14 | 15 | @mark.parametrize( 16 | 'reader,example_data', 17 | [(ArbinReader(), ['arbin_example.csv']), 18 | (BDReader(), ['batterydata/p492-13-raw.csv']), 19 | (HDF5Reader(), 'example-data/single-resistor-complex-charge_from-discharged.hdf')] 20 | ) 21 | def test_consistency(reader, example_data, file_path): 22 | dataset = reader.read_dataset( 23 | [file_path / p for p in example_data] if isinstance(example_data, list) else file_path / example_data 24 | ) 25 | for checker in checkers: 26 | warnings = checker.check(dataset) 27 | assert len(warnings) == 0, warnings 28 | -------------------------------------------------------------------------------- /tests/io/test_hdf.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from pytest import raises, mark 4 | import numpy as np 5 | import pandas as pd 6 | import tables 7 | 8 | from battdat.data import BatteryDataset 9 | from battdat.io.hdf import make_numpy_dtype_from_pandas, write_df_to_table, read_df_from_table, HDF5Writer, HDF5Reader 10 | from battdat.schemas.column import ColumnSchema 11 | 12 | example_df = pd.DataFrame({'a': [1, 2], 'b': [1., 3.], 'c': ['charge', 'discharge'], 'array': [[[1.]], [[0.]]]}) 13 | 14 | 15 | def test_dtype(): 16 | dtype = make_numpy_dtype_from_pandas(example_df) 17 | assert dtype.names == ('a', 'b', 'c', 'array') 18 | assert dtype['array'].shape == (1, 1) 19 | 20 | 21 | def test_store_df(tmpdir): 22 | with tables.open_file(tmpdir / "example.h5", "w") as file: 23 | group = file.create_group('/', name='base') 24 | table = write_df_to_table(file, group, 'table', example_df) 25 | assert tuple(table[0]) == (1, 1., b'charge', np.ones((1, 1))) 26 | 27 | with tables.open_file(tmpdir / "example.h5", "r") as file: 28 | table = file.get_node('/base/table') 29 | df_copy = read_df_from_table(table) 30 | assert (df_copy.columns == ['a', 'b', 'c', 'array']).all() 31 | assert np.allclose(df_copy['b'], [1., 3.]) 32 | 33 | 34 | def test_read_with_other_tables(tmpdir): 35 | writer = HDF5Writer() 36 | out_file = Path(tmpdir) / 'example.h5' 37 | 38 | # Write the same table through the writer (which puts metadata) and through the basic function (which does not) 39 | with tables.open_file(out_file, mode='w') as file: 40 | dataset = BatteryDataset(tables={'example_table': example_df}, 41 | schemas={'example_table': ColumnSchema()}) 42 | writer.write_to_hdf(dataset, file, None) 43 | write_df_to_table(file, file.root, 'extra_table', example_df) 44 | 45 | # Reading should only yield one table 46 | with tables.open_file(out_file) as file: 47 | dataset = HDF5Reader().read_from_hdf(file, None) 48 | assert set(dataset.tables.keys()) == {'example_table'} 49 | 50 | # Ensure error is raised if the schema is corrupted 51 | with tables.open_file(out_file, mode='a') as file: 52 | table = file.root['example_table'] 53 | for corrupted in ("asdf", '{"a": 1}'): 54 | table._v_attrs['metadata'] = corrupted 55 | with raises(ValueError, match='marked as a battdat dataset but schema fails to read'): 56 | HDF5Reader().read_from_hdf(file, None) 57 | 58 | 59 | @mark.parametrize('prefix', [None, 'a']) 60 | def test_append(tmpdir, prefix): 61 | writer = HDF5Writer() 62 | out_file = Path(tmpdir) / 'example.h5' 63 | 64 | # Write the initial data 65 | with tables.open_file(out_file, mode='w') as file: 66 | if prefix is not None: 67 | file.create_group(file.root, prefix) 68 | 69 | writer.add_table(file, 'example_table', example_df, ColumnSchema(), prefix) 70 | 71 | # Append the data again 72 | with tables.open_file(out_file, mode='a') as file: 73 | writer.append_to_table(file, 'example_table', example_df, prefix) 74 | 75 | table = file.get_node('/example_table' if prefix is None else f'/{prefix}/example_table') 76 | df_copy = read_df_from_table(table) 77 | assert len(df_copy) == len(example_df) * 2 78 | assert np.allclose(df_copy['a'], [1, 2, 1, 2]) 79 | assert np.equal(df_copy['c'], ['charge', 'discharge'] * 2).all() 80 | 81 | # Test data check 82 | with raises(ValueError, match='Existing and new'): 83 | writer.append_to_table(file, 'example_table', pd.DataFrame({'a': [1., 2.]}), prefix) 84 | 85 | # Test bad prefix 86 | with raises(ValueError, match='No data available for prefix'): 87 | writer.append_to_table(file, 'example_table', pd.DataFrame({'a': [1., 2.]}), prefix='b') 88 | 89 | 90 | def test_df_missing_strings(tmpdir): 91 | df = pd.DataFrame({'a': [None, 'a', 'bb']}) 92 | assert df.dtypes['a'] == object 93 | with tables.open_file(tmpdir / "example.h5", "w") as file: 94 | group = file.create_group('/', name='base') 95 | table = write_df_to_table(file, group, 'table', df) 96 | assert tuple(table[-1]) == (b'bb',) 97 | 98 | 99 | def test_df_strings(tmpdir): 100 | df = pd.DataFrame({'a': ['ccc', 'a', 'bb']}) 101 | assert df.dtypes['a'] == object 102 | with tables.open_file(tmpdir / "example.h5", "w") as file: 103 | group = file.create_group('/', name='base') 104 | table = write_df_to_table(file, group, 'table', df) 105 | assert tuple(table[-1]) == (b'bb',) 106 | assert tuple(table[0]) == (b'ccc',) 107 | 108 | 109 | def test_df_lists(tmpdir): 110 | df = pd.DataFrame({'a': [[1., 1.], [2., 2.]]}) 111 | assert df.dtypes['a'] == object 112 | with tables.open_file(tmpdir / "example.h5", "w") as file: 113 | group = file.create_group('/', name='base') 114 | table = write_df_to_table(file, group, 'table', df) 115 | assert np.array_equal(table[-1]['a'], [2., 2.]) 116 | -------------------------------------------------------------------------------- /tests/io/test_maccor.py: -------------------------------------------------------------------------------- 1 | """Tests related to the MACCOR parser""" 2 | from datetime import datetime 3 | from pytest import fixture, raises 4 | 5 | from battdat.io.maccor import MACCORReader 6 | 7 | 8 | @fixture() 9 | def test_file(file_path): 10 | return file_path / 'maccor_example.001' 11 | 12 | 13 | @fixture() 14 | def extractor(): 15 | return MACCORReader() 16 | 17 | 18 | def test_validation(extractor, test_file): 19 | """Make sure the parser generates valid outputs""" 20 | data = extractor.read_dataset([test_file]) 21 | data.validate_columns(allow_extra_columns=False) 22 | 23 | 24 | def test_grouping(extractor, tmp_path): 25 | # Make a file structure with two sets of experiments and a nonsense file 26 | for f in ['README', 'testA.002', 'testA.001', 'testB.001']: 27 | (tmp_path / f).write_text('junk') 28 | 29 | # Test the grouping 30 | groups = list(extractor.identify_files(tmp_path)) 31 | assert len(groups) == 2 32 | assert (str(tmp_path / 'testA.001'), str(tmp_path / 'testA.002')) in groups 33 | assert (str(tmp_path / 'testB.001'),) in groups 34 | 35 | 36 | def test_date_check(extractor, test_file): 37 | files = [test_file, test_file.with_suffix('.002')] 38 | data = extractor.read_dataset(files) 39 | data.validate() 40 | assert data.raw_data['file_number'].max() == 1 41 | 42 | with raises(ValueError, match='not in the correct order'): 43 | extractor.read_dataset(files[::-1]) 44 | 45 | 46 | def test_time_parser(extractor, test_file): 47 | # With date and time in the time column 48 | df = extractor.read_file(test_file) 49 | assert datetime.fromtimestamp(df['time'].iloc[0]).month == 3 50 | 51 | # With only the time in the time column 52 | df = extractor.read_file(test_file.with_suffix('.002')) 53 | assert datetime.fromtimestamp(df['time'].iloc[0]).month == 4 54 | 55 | # Ignoring datetime 56 | extractor.ignore_time = True 57 | df = extractor.read_file(test_file) 58 | assert 'time' not in df.columns 59 | -------------------------------------------------------------------------------- /tests/postprocess/test_integral.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pandas as pd 4 | from pytest import mark 5 | import numpy as np 6 | 7 | from battdat.data import BatteryDataset 8 | from battdat.io.batterydata import BDReader 9 | from battdat.postprocess.integral import CapacityPerCycle, StateOfCharge 10 | 11 | 12 | def get_example_data(file_path: Path, from_charged: bool) -> BatteryDataset: 13 | ex_file = file_path / 'example-data' / f'single-resistor-constant-charge_from-{"" if from_charged else "dis"}charged.hdf' 14 | return BatteryDataset.from_hdf(ex_file) 15 | 16 | 17 | def test_short_cycles(): 18 | """Make sure cycles that are too short for capacity measurements do not cause errors""" 19 | 20 | example_data = BatteryDataset.make_cell_dataset( 21 | raw_data=pd.DataFrame({'time': range(2), 'current': [1.] * 2, 'voltage': [2.] * 2, 'cycle_number': [0] * 2}) 22 | ) 23 | CapacityPerCycle().compute_features(example_data) 24 | assert np.isnan(example_data.tables['cycle_stats']['capacity_charge']).all() 25 | 26 | 27 | @mark.parametrize('from_charged', [True, False]) 28 | def test_cycle_stats(file_path, from_charged): 29 | example_data = get_example_data(file_path, from_charged) 30 | feat = CapacityPerCycle().compute_features(example_data) 31 | assert np.isclose([0], feat['cycle_number']).all() 32 | 33 | # Capacity is the 1 A-hr 34 | assert np.isclose([1.0], feat['capacity_discharge'], rtol=1e-2).all() 35 | assert np.isclose([1.0], feat['capacity_charge'], rtol=1e-2).all() 36 | 37 | # Energy to charge is (2.1 V + 3.1 V) / 2 * 1 A * 3600 s = 9360 J 38 | # Energy produced during discharge is (1.9 V + 2.9 V) * 1 A * 3600 s = 8640 J 39 | assert np.isclose([9360. / 3600], feat['energy_charge'], rtol=1e-2).all() 40 | assert np.isclose([8640. / 3600], feat['energy_discharge'], rtol=1e-2).all() 41 | 42 | 43 | @mark.parametrize('from_charged', [True, False]) 44 | def test_capacity(file_path, from_charged): 45 | example_data = get_example_data(file_path, from_charged) 46 | soc = StateOfCharge() 47 | raw_data = example_data.tables['raw_data'] 48 | soc.enhance(raw_data) 49 | 50 | assert all(c in raw_data for c in soc.column_names) 51 | assert not any(raw_data[c].isna().any() for c in soc.column_names) 52 | 53 | # First cell should be 0 54 | assert np.isclose(raw_data.drop_duplicates('cycle_number', keep='first')[soc.column_names], 0).all() 55 | 56 | # Last cell of the capacity should be zero for our test cases 57 | assert np.isclose(raw_data['cycled_charge'].iloc[-1], 0., atol=1e-3) 58 | 59 | # The capacity for the first few steps should be I*t/3600s 60 | first_steps = raw_data.iloc[:3] 61 | current = first_steps['current'].iloc[0] 62 | assert np.isclose(first_steps['cycled_charge'], current * first_steps['test_time'] / 3600).all() 63 | 64 | # The energy for the first few steps should be 65 | # discharging = I * \int_0^t (2.9 - t/3600) = I * (2.9t - t^2/7200) 66 | # charging = I * \int_0^t (2.1 + t/3600) = I * (2.1t + t^2/7200) 67 | if from_charged: 68 | answer = current * (2.9 * first_steps['test_time'] - first_steps['test_time'] ** 2 / 7200) 69 | assert (answer[1:] < 0).all() 70 | else: 71 | answer = current * (2.1 * first_steps['test_time'] + first_steps['test_time'] ** 2 / 7200) 72 | assert (answer[1:] > 0).all() 73 | assert np.isclose(first_steps['cycled_energy'], answer / 3600, rtol=1e-3).all() 74 | 75 | 76 | def test_against_battery_data_gov(file_path): 77 | """See if our capacities are similar to those computed in BatteryData.Energy.Gov""" 78 | 79 | cyc_id = 8 80 | data = BDReader().read_dataset(list((file_path / 'batterydata').glob('p492*'))) 81 | orig_data = \ 82 | data.tables['cycle_stats'][ 83 | ['capacity_discharge', 'capacity_charge', 'energy_discharge', 'energy_charge'] 84 | ].copy().iloc[cyc_id] 85 | 86 | # Recompute 87 | CapacityPerCycle().compute_features(data) 88 | new_data = data.tables['cycle_stats'][ 89 | ['capacity_discharge', 'capacity_charge', 'energy_discharge', 'energy_charge']].iloc[cyc_id] 90 | diff = np.abs(orig_data.values - new_data.values) 91 | agree = diff < 1e-3 92 | assert agree.all(), diff 93 | 94 | 95 | def test_reuse_integrals(file_path): 96 | example_data = get_example_data(file_path, True) 97 | 98 | # Get a baseline capacity 99 | CapacityPerCycle(reuse_integrals=False).compute_features(example_data) 100 | initial_data = example_data.tables['cycle_stats'][ 101 | ['capacity_discharge', 'capacity_charge', 'energy_discharge', 'energy_charge']].copy() 102 | 103 | # Compute the integrals then intentionally increase capacity and energy 2x 104 | StateOfCharge().compute_features(example_data) 105 | for c in ['cycled_energy', 'cycled_charge']: 106 | example_data.tables['raw_data'][c] *= 2 107 | 108 | # Recompute capacity and energy measurements, which should have increased by 2x 109 | CapacityPerCycle(reuse_integrals=True).compute_features(example_data) 110 | final_data = example_data.tables['cycle_stats'][ 111 | ['capacity_discharge', 'capacity_charge', 'energy_discharge', 'energy_charge']].copy() 112 | assert np.isclose(initial_data.values * 2, final_data.values, atol=1e-3).all() 113 | -------------------------------------------------------------------------------- /tests/postprocess/test_stats.py: -------------------------------------------------------------------------------- 1 | """Test for features related to timing""" 2 | from datetime import datetime, timedelta 3 | import pandas as pd 4 | import numpy as np 5 | 6 | from pytest import warns, fixture, raises 7 | 8 | from battdat.data import BatteryDataset 9 | from battdat.postprocess.timing import CycleTimesSummarizer, TimeEnhancer 10 | 11 | 12 | @fixture() 13 | def raw_data(): 14 | return pd.DataFrame({ 15 | 'cycle_number': [0, 0, 1, 1, 2, 2], 16 | 'test_time': [0, 0.99, 1, 1.99, 2., 2.99] 17 | }) 18 | 19 | 20 | def test_summary(raw_data): 21 | computer = CycleTimesSummarizer() 22 | data = BatteryDataset.make_cell_dataset(raw_data=raw_data) 23 | output = computer.compute_features(data) 24 | assert set(output.columns) == set(computer.column_names).union({'cycle_number'}) 25 | assert np.isclose(data.tables['cycle_stats']['cycle_start'], [0., 1., 2.]).all() 26 | assert np.isclose(data.tables['cycle_stats']['cycle_duration'], [1., 1., 0.99]).all() 27 | 28 | # Make sure it warns if the next cycle is unavailable 29 | raw_data = pd.DataFrame({ 30 | 'cycle_number': [0, 0, 1, 1, 3, 3], # As if cycle 2 is missing 31 | 'test_time': [0, 0.99, 1, 1.99, 2., 2.99] 32 | }) 33 | data = BatteryDataset.make_cell_dataset(raw_data=raw_data) 34 | with warns(UserWarning) as w: 35 | computer.compute_features(data) 36 | assert 'Some cycles are missing' in str(w[0]) 37 | assert len(w) == 1 38 | 39 | assert np.isclose(data.tables['cycle_stats']['cycle_start'], [0., 1., 2.]).all() 40 | assert np.isclose(data.tables['cycle_stats']['cycle_duration'], [1., 0.99, 0.99]).all() 41 | 42 | # Warns on one point per cycle, which may be the case for rests... maybe 43 | raw_data = pd.DataFrame({ 44 | 'cycle_number': [0, 1, 1, 2, 2], 45 | 'test_time': [0, 1, 1.99, 2., 2.99] 46 | }) 47 | data = BatteryDataset.make_cell_dataset(raw_data=raw_data) 48 | with warns(UserWarning) as w: 49 | computer.compute_features(data) 50 | assert 'Some cycles have only one' in str(w[0]) 51 | assert len(w) == 1 52 | 53 | assert np.isclose(data.tables['cycle_stats']['cycle_start'], [0., 1., 2.]).all() 54 | assert np.isclose(data.tables['cycle_stats']['cycle_duration'], [1., 1., 0.99]).all() 55 | 56 | 57 | def test_enhance(raw_data): 58 | computer = TimeEnhancer() 59 | 60 | # Create a datetime series 61 | now = datetime.now() 62 | date_time = raw_data['test_time'].apply(lambda x: now + timedelta(seconds=x)) 63 | 64 | # Remove the time column, make sure it crashes without the datetime column 65 | orig_test_time = raw_data['test_time'] 66 | raw_data.drop(columns=['test_time'], inplace=True) 67 | 68 | with raises(ValueError, match='must contain a `date_time`'): 69 | computer.enhance(raw_data) 70 | 71 | # Add the datetime series to the dataframe then compute the cycle_stats 72 | raw_data['date_time'] = date_time 73 | computer.enhance(raw_data) 74 | 75 | assert np.allclose(raw_data['test_time'], orig_test_time) 76 | assert np.allclose(raw_data['cycle_time'], [0, 0.99] * 3) 77 | -------------------------------------------------------------------------------- /tests/postprocess/test_tagging.py: -------------------------------------------------------------------------------- 1 | """Tests that cover adding derived columns to the raw data""" 2 | import numpy as np 3 | import pandas as pd 4 | from pytest import fixture 5 | import pytest 6 | 7 | from battdat.data import BatteryDataset 8 | from battdat.postprocess.tagging import AddSteps, AddMethod, AddSubSteps, AddState 9 | from battdat.schemas.column import ChargingState, ControlMethod 10 | 11 | 12 | @fixture() 13 | def synthetic_data() -> BatteryDataset: 14 | """Data which includes all of our types of steps""" 15 | 16 | # Make the segments 17 | rest_v = [3.5] * 16 18 | rest_i = [0.] * 16 19 | rest_s = [ChargingState.rest] * 16 20 | discharge_v = np.linspace(3.5, 3.25, 16) 21 | discharge_i = [-0.125] * 16 22 | discharge_s = [ChargingState.discharging] * 16 23 | shortrest_v = [3.25] * 4 24 | shortrest_i = [0] * 4 25 | shortrest_s = [ChargingState.rest] * 4 26 | shortnon_v = [3.25] * 4 27 | shortnon_i = [-0.1] * 4 28 | shortnon_s = [ChargingState.discharging] * 4 29 | pulse_v = [3.25] * 8 30 | pulse_i = [0.05] * 8 31 | pulse_s = [ChargingState.charging] * 8 32 | charge_v = [3.6] * 8 + np.linspace(3.6, 3.8, 8).tolist() 33 | charge_i = np.linspace(0.15, 0.1, 8).tolist() + [0.125] * 8 34 | charge_s = [ChargingState.charging] * 16 35 | 36 | # Combine them 37 | v = np.concatenate([rest_v, discharge_v, shortrest_v, shortnon_v, pulse_v, shortrest_v, charge_v]) 38 | i = np.concatenate([rest_i, discharge_i, shortrest_i, shortnon_i, pulse_i, shortrest_i, charge_i]) 39 | s = sum([rest_s, discharge_s, shortrest_s, shortnon_s, pulse_s, shortrest_s, charge_s], []) 40 | t = np.arange(len(v)) * 2. # Assume measurements every 2 seconds 41 | c = np.zeros_like(t, dtype=int) # All in the same cycle 42 | 43 | data = pd.DataFrame({ 44 | 'current': i, 45 | 'voltage': v, 46 | 'state': s, 47 | 'test_time': t, 48 | 'cycle_number': c 49 | }) 50 | # data.drop([62, 63, 64], inplace=True) 51 | return BatteryDataset.make_cell_dataset(raw_data=data) 52 | 53 | 54 | def test_example_data(synthetic_data): 55 | synthetic_data.validate_columns() 56 | 57 | 58 | def test_step_detection(synthetic_data): 59 | AddSteps().enhance(synthetic_data.raw_data) 60 | 61 | # Should detect steps 62 | assert (synthetic_data.raw_data['step_index'].iloc[:16] == 0).all() 63 | assert (synthetic_data.raw_data['step_index'].iloc[16:32] == 1).all() 64 | assert (synthetic_data.raw_data['step_index'].iloc[32:36] == 2).all() 65 | assert (synthetic_data.raw_data['step_index'].iloc[36:40] == 3).all() 66 | assert (synthetic_data.raw_data['step_index'].iloc[40:48] == 4).all() 67 | assert (synthetic_data.raw_data['step_index'].iloc[48:52] == 5).all() 68 | assert (synthetic_data.raw_data['step_index'].iloc[52:68] == 6).all() 69 | 70 | 71 | @pytest.mark.xfail 72 | def test_method_detection(synthetic_data): 73 | # Start assuming that the step detection worked 74 | AddSteps().enhance(synthetic_data.raw_data) 75 | 76 | # See if we can detect the steps 77 | AddMethod().enhance(synthetic_data.raw_data) 78 | assert (synthetic_data.raw_data['method'].iloc[:16] == ControlMethod.rest).all() 79 | assert (synthetic_data.raw_data['method'].iloc[16:32] == ControlMethod.constant_current).all() 80 | assert (synthetic_data.raw_data['method'].iloc[32:36] == ControlMethod.short_rest).all() 81 | assert (synthetic_data.raw_data['method'].iloc[36:40] == ControlMethod.short_nonrest).all() 82 | assert (synthetic_data.raw_data['method'].iloc[40:48] == ControlMethod.pulse).all() 83 | assert (synthetic_data.raw_data['method'].iloc[48:52] == ControlMethod.short_rest).all() 84 | assert (synthetic_data.raw_data['method'].iloc[52:60] == ControlMethod.constant_voltage).all() 85 | assert (synthetic_data.raw_data['method'].iloc[60:68] == ControlMethod.constant_current).all() 86 | 87 | 88 | @pytest.mark.xfail 89 | def test_substep_detect(synthetic_data): 90 | # Start assuming that the step and method detection worked 91 | AddSteps().enhance(synthetic_data.raw_data) 92 | AddMethod().enhance(synthetic_data.raw_data) 93 | 94 | # The substeps should be the same as the steps because we do not have two charging/rest cycles next to each other 95 | AddSubSteps().enhance(synthetic_data.raw_data) 96 | assert (synthetic_data.raw_data['step_index'].iloc[:60] == synthetic_data.raw_data['substep_index'].iloc[:60]).all() 97 | assert (synthetic_data.raw_data['substep_index'].iloc[60:] == 7).all() 98 | 99 | 100 | def test_state_detection(synthetic_data): 101 | # First, get only the data without the pre-defined state 102 | raw_data = synthetic_data.raw_data.drop(columns=['state']) 103 | 104 | # Enhance 105 | AddState().enhance(data=raw_data) 106 | 107 | # assert False, len(synthetic_data.raw_data) 108 | assert (raw_data['state'].iloc[:16] == ChargingState.rest).all(), raw_data['state'].iloc[:16] 109 | assert (raw_data['state'].iloc[16:32] == ChargingState.discharging).all(), raw_data['state'].iloc[16:32].to_numpy() 110 | assert (raw_data['state'].iloc[32:36] == ChargingState.rest).all() 111 | assert (raw_data['state'].iloc[36:40] == ChargingState.discharging).all() 112 | assert (raw_data['state'].iloc[40:48] == ChargingState.charging).all() 113 | assert (raw_data['state'].iloc[48:52] == ChargingState.rest).all() 114 | assert (raw_data['state'].iloc[52:] == ChargingState.charging).all() 115 | -------------------------------------------------------------------------------- /tests/schemas/test_cycling.py: -------------------------------------------------------------------------------- 1 | from battdat.schemas.column import RawData, DataType, ColumnSchema, ColumnInfo 2 | 3 | from pytest import raises, fixture, mark 4 | import pandas as pd 5 | 6 | 7 | @fixture() 8 | def example_df() -> pd.DataFrame: 9 | return pd.DataFrame({ 10 | 'cycle_number': [1, 2], 11 | 'test_time': [0, 0.1], 12 | 'voltage': [0.1, 0.2], 13 | 'current': [0.1, -0.1], 14 | 'state': ['charging', 'resting'] 15 | }) 16 | 17 | 18 | def test_json(): 19 | """Make sure we can serialize and deserialize classes""" 20 | 21 | as_json = RawData().model_dump_json() 22 | 23 | # Test deserialize using Pydantic, which requires knowing the base class 24 | schema = RawData.model_validate_json(as_json) 25 | assert schema.state.type == DataType.STATE 26 | 27 | # Test reading using the "unknown base" version 28 | schema = ColumnSchema.from_json(as_json) 29 | assert schema.state.type == DataType.STATE 30 | 31 | 32 | def test_required(): 33 | """Catch dataframe missing required columns""" 34 | 35 | d = pd.DataFrame() 36 | with raises(ValueError) as exc: 37 | RawData().validate_dataframe(d) 38 | assert 'missing a required column' in str(exc) 39 | 40 | 41 | def test_extra_cols(example_df): 42 | """Handle extra columns""" 43 | example_df['extra'] = [1, 1] 44 | 45 | # Passes with extra columns by default 46 | schema = RawData() 47 | schema.validate_dataframe(example_df) 48 | 49 | # Fails when desired 50 | with raises(ValueError) as exc: 51 | schema.validate_dataframe(example_df, allow_extra_columns=False) 52 | assert 'extra columns' in str(exc) 53 | 54 | # Passes when new column is defined 55 | schema.add_column('extra', 'An extra column') 56 | assert 'extra' in schema.extra_columns 57 | schema.validate_dataframe(example_df, allow_extra_columns=False) 58 | 59 | 60 | def test_get_item(): 61 | schema = RawData() 62 | schema.extra_columns['test'] = ColumnInfo(description='Test') 63 | assert schema['test'].description == 'Test' 64 | assert schema['test_time'].units == 's' 65 | with raises(KeyError, match='asdf'): 66 | schema['asdf'] 67 | 68 | 69 | @mark.parametrize( 70 | "col,values", 71 | [('temperature', [1, 2]), ('file_number', [0.1, 0.2]), ('state', [1, 2])] 72 | ) 73 | def test_type_failures(example_df, col, values): 74 | """Columns with the wrong type""" 75 | example_df[col] = values 76 | with raises(ValueError, match=col): 77 | RawData().validate_dataframe(example_df) 78 | 79 | 80 | def test_monotonic(example_df): 81 | """Columns that should be monotonic but are not""" 82 | example_df['cycle_number'] = [2, 1] 83 | with raises(ValueError) as exc: 84 | RawData().validate_dataframe(example_df) 85 | assert 'monotonic' in str(exc) 86 | 87 | example_df['cycle_number'] = [1, 1] 88 | RawData().validate_dataframe(example_df) 89 | -------------------------------------------------------------------------------- /tests/schemas/test_eis.py: -------------------------------------------------------------------------------- 1 | from pytest import fixture, raises 2 | import pandas as pd 3 | import numpy as np 4 | 5 | from battdat.schemas.eis import EISData 6 | 7 | 8 | @fixture() 9 | def example_df() -> pd.DataFrame: 10 | output = pd.DataFrame({ 11 | 'test_id': [1, 1], 12 | 'frequency': [5e5, 4e5], 13 | 'z_real': [0.241, 0.237], 14 | 'z_imag': [0.431, 0.327], 15 | }) 16 | output['z_mag'] = np.linalg.norm(output.values[:, -2:], axis=1) 17 | output['z_phase'] = np.rad2deg(np.arcsin(output['z_imag'] / output['z_mag'])) 18 | return output 19 | 20 | 21 | def test_pass(example_df): 22 | EISData().validate_dataframe(example_df) 23 | 24 | 25 | def test_consistency(example_df): 26 | example_df['z_imag'] *= 2 27 | with raises(ValueError) as e: 28 | EISData().validate_dataframe(example_df) 29 | assert 'imag' in str(e.value) 30 | 31 | example_df['z_real'] *= 2 32 | with raises(ValueError) as e: 33 | EISData().validate_dataframe(example_df) 34 | assert 'real' in str(e.value) 35 | -------------------------------------------------------------------------------- /tests/schemas/test_ontology.py: -------------------------------------------------------------------------------- 1 | """Test the ability to resolve cross-references from the ontology""" 2 | 3 | from battdat.schemas import BatteryMetadata 4 | from battdat.schemas.ontology import cross_reference_terms, gather_descendants, load_battinfo, resolve_term 5 | 6 | 7 | def test_crossref(): 8 | terms = cross_reference_terms(BatteryMetadata) 9 | assert 'is_measurement' in terms 10 | assert terms['is_measurement'].name == 'emmo.Measurement' 11 | assert 'EMMO' in terms['is_measurement'].iri 12 | assert 'well defined mesurement procedure.' in terms['is_measurement'].elucidation 13 | 14 | 15 | def test_resolve(): 16 | assert resolve_term('PhysicsBasedSimulation') is not None 17 | assert resolve_term('https://w3id.org/emmo#EMMO_f7ed665b_c2e1_42bc_889b_6b42ed3a36f0') is not None 18 | 19 | 20 | def test_descendants(): 21 | bi = load_battinfo() 22 | desc = [t.name for t in gather_descendants(bi.PhysicsBasedSimulation)] 23 | assert 'emmo.StandaloneModelSimulation' in desc 24 | 25 | desc = [t.name for t in gather_descendants('PhysicsBasedSimulation')] 26 | assert 'emmo.StandaloneModelSimulation' in desc 27 | -------------------------------------------------------------------------------- /tests/test_data.py: -------------------------------------------------------------------------------- 1 | """Tests for the Battery data frame""" 2 | import json 3 | import os 4 | 5 | import pytest 6 | import numpy as np 7 | import pandas as pd 8 | import pyarrow.parquet as pq 9 | from pydantic import ValidationError 10 | from pytest import fixture, raises 11 | from tables import File 12 | 13 | from battdat.schemas.column import ColumnInfo 14 | from battdat.data import BatteryDataset 15 | from battdat import __version__ 16 | 17 | 18 | @fixture() 19 | def test_df(): 20 | raw_data = pd.DataFrame({ 21 | 'test_time': [0, 1, 2.], 22 | 'current': [1., 0., -1.], 23 | 'voltage': [2., 2., 2.], 24 | 'other': [1, 2, 3], 25 | }) 26 | cycle_stats = pd.DataFrame({ 27 | 'cycle_number': [0], 28 | }) 29 | dataset = BatteryDataset.make_cell_dataset(raw_data=raw_data, cycle_stats=cycle_stats, metadata={'name': 'Test data'}) 30 | 31 | # Add an extra column in the schema 32 | dataset.schemas['raw_data'].extra_columns['new'] = ColumnInfo(description='An example column') 33 | return dataset 34 | 35 | 36 | def test_write_hdf(tmpdir, test_df): 37 | """Test whether the contents of the HDF5 file are reasonably understandable""" 38 | 39 | # Write the HDF file 40 | out_path = os.path.join(tmpdir, 'test.h5') 41 | test_df.to_hdf(out_path) 42 | 43 | # Investigate the contents 44 | with File(out_path) as f: 45 | attrs = f.root._v_attrs 46 | assert 'metadata' in attrs 47 | assert json.loads(attrs['metadata'])['name'] == 'Test data' 48 | assert 'raw_data' in f.root 49 | 50 | # Make sure we have a schema 51 | g = f.root['raw_data'] 52 | attrs = g._v_attrs 53 | assert 'metadata' in attrs 54 | assert json.loads(attrs['metadata'])['test_time']['units'] == 's' 55 | 56 | # Test writing to an already-open file 57 | with File(out_path, 'w') as file: 58 | test_df.to_hdf(file) 59 | 60 | 61 | def test_read_hdf(tmpdir, test_df): 62 | # Write it 63 | out_path = os.path.join(tmpdir, 'test.h5') 64 | test_df.to_hdf(out_path) 65 | 66 | # Test reading only the metadata 67 | metadata = BatteryDataset.get_metadata_from_hdf5(out_path) 68 | assert metadata.name == 'Test data' 69 | 70 | # Read it 71 | data = BatteryDataset.from_hdf(out_path) 72 | assert 'raw_data' in data 73 | assert 'test_time' in data['raw_data'].columns 74 | assert len(data) == 2 75 | assert len(list(data)) == 2 76 | assert data.metadata.name == 'Test data' 77 | assert data.get('raw_data') is not None 78 | assert data['cycle_stats'] is not None 79 | assert data.schemas['raw_data'].extra_columns['new'].description == 'An example column' 80 | 81 | # Test reading from an already-open file 82 | with File(out_path, 'r') as file: 83 | data = BatteryDataset.from_hdf(file) 84 | assert data.metadata.name == 'Test data' 85 | 86 | # Test requesting an unknown type of field 87 | with raises(ValueError) as exc: 88 | BatteryDataset.from_hdf(out_path, tables=('bad)_!~',)) 89 | assert 'bad)_!~' in str(exc) 90 | 91 | # Test reading an absent field 92 | del test_df.tables['cycle_stats'] 93 | test_df.to_hdf(out_path) 94 | with raises(ValueError) as exc: 95 | BatteryDataset.from_hdf(out_path, tables=('cycle_stats',)) 96 | assert 'File does not contain' in str(exc) 97 | 98 | 99 | def test_multi_cell_hdf5(tmpdir, test_df): 100 | out_path = os.path.join(tmpdir, 'test.h5') 101 | 102 | # Save the cell once, then multiply the current by 2 103 | test_df.to_hdf(out_path, 'a') 104 | test_df['raw_data']['current'] *= 2 105 | test_df.to_hdf(out_path, 'b', overwrite=False) 106 | 107 | # Make sure we can count two cells 108 | _, names, _ = BatteryDataset.inspect_hdf(out_path) 109 | assert names == {'a', 'b'} 110 | 111 | with File(out_path) as h: 112 | _, names, schemas = BatteryDataset.inspect_hdf(h) 113 | assert names == {'a', 'b'} 114 | 115 | # Check that there are schemas for the raw_data 116 | assert 'current' in schemas['raw_data'] 117 | 118 | # Load both 119 | test_a = BatteryDataset.from_hdf(out_path, prefix='a') 120 | test_b = BatteryDataset.from_hdf(out_path, prefix='b') 121 | assert np.isclose(test_a['raw_data']['current'] * 2, test_b['raw_data']['current']).all() 122 | 123 | # Test reading by index 124 | test_0 = BatteryDataset.from_hdf(out_path, prefix=0) 125 | assert np.isclose(test_0['raw_data']['current'], 126 | test_a['raw_data']['current']).all() 127 | 128 | # Iterate over all 129 | keys = dict(BatteryDataset.all_cells_from_hdf(out_path)) 130 | assert len(keys) 131 | assert np.isclose(keys['a']['raw_data']['current'] * 2, 132 | keys['b']['raw_data']['current']).all() 133 | 134 | 135 | def test_missing_prefix_warning(tmpdir, test_df): 136 | out_path = os.path.join(tmpdir, 'test.h5') 137 | 138 | test_df.to_hdf(out_path, 'a', overwrite=False) 139 | 140 | # Error if prefix not found 141 | with pytest.raises(ValueError, match='No data available'): 142 | BatteryDataset.from_hdf(out_path, prefix='b') 143 | 144 | 145 | def test_multicell_metadata_warning(tmpdir, test_df): 146 | out_path = os.path.join(tmpdir, 'test.h5') 147 | 148 | # Save the cell once, then alter metadata 149 | test_df.to_hdf(out_path, 'a', overwrite=False) 150 | test_df.metadata.name = 'Not test data' 151 | with pytest.warns(UserWarning, match='differs from new metadata'): 152 | test_df.to_hdf(out_path, 'b', overwrite=False) 153 | 154 | 155 | def test_validate(test_df): 156 | # Make sure the provided data passes 157 | warnings = test_df.validate() 158 | assert len(warnings) == 1 159 | assert 'other' in warnings[0] 160 | 161 | # Make sure we can define new columns 162 | test_df.schemas['raw_data'].extra_columns['other'] = ColumnInfo(description='Test') 163 | warnings = test_df.validate() 164 | assert len(warnings) == 0 165 | 166 | 167 | def test_parquet(test_df, tmpdir): 168 | write_dir = tmpdir / 'parquet-test' 169 | written = test_df.to_parquet(write_dir) 170 | assert len(written) == 2 171 | for file in written.values(): 172 | metadata = pq.read_metadata(file).metadata 173 | assert b'battery_metadata' in metadata 174 | assert b'table_metadata' in metadata 175 | 176 | # Read it back in, ensure data are recovered 177 | read_df = BatteryDataset.from_parquet(write_dir) 178 | assert (read_df.cycle_stats['cycle_number'] == test_df.cycle_stats['cycle_number']).all() 179 | assert (read_df.raw_data['voltage'] == test_df.raw_data['voltage']).all() 180 | assert read_df.metadata == test_df.metadata 181 | assert read_df.schemas['raw_data'].extra_columns['new'].description == 'An example column' 182 | 183 | # Test reading subsets 184 | read_df = BatteryDataset.from_parquet(write_dir, subsets=('cycle_stats',)) 185 | assert read_df.metadata is not None 186 | with raises(AttributeError, match='raw_data'): 187 | assert read_df.raw_data 188 | assert read_df.cycle_stats is not None 189 | 190 | with raises(ValueError) as e: 191 | BatteryDataset.from_parquet(tmpdir) 192 | assert 'No data available' in str(e) 193 | 194 | # Test reading only metadata 195 | metadata = BatteryDataset.inspect_parquet(write_dir) 196 | assert metadata == test_df.metadata 197 | BatteryDataset.inspect_parquet(write_dir / 'cycle_stats.parquet') 198 | with raises(ValueError) as e: 199 | BatteryDataset.inspect_parquet(tmpdir) 200 | assert 'No parquet files' in str(e) 201 | 202 | 203 | def test_version_warnings(test_df): 204 | # Alter the version number, then copy using to/from dict 205 | test_df.metadata.version = 'super.old.version' 206 | with pytest.warns() as w: 207 | BatteryDataset.make_cell_dataset(metadata=test_df.metadata, warn_on_mismatch=True) 208 | assert len(w) == 1 # Only the warning about the versions 209 | assert 'supplied=super.old.version' in str(w.list[0].message) 210 | 211 | # Make a change that will violate the schema 212 | test_df.metadata.name = 1 # Name cannot be an int 213 | 214 | with pytest.warns() as w: 215 | recovered = BatteryDataset.make_cell_dataset(metadata=test_df.metadata, warn_on_mismatch=True) 216 | assert len(w) == 3 # Warning during save, warning about mismatch, warning that schema failed 217 | assert 'supplied=super.old.version' in str(w.list[1].message) 218 | assert 'failed to validate, probably' in str(w.list[2].message) 219 | assert recovered.metadata.version == __version__ 220 | 221 | 222 | def test_bad_metadata(): 223 | """Ensure bad metadata causes an exception""" 224 | 225 | metadata = {'name': 1} 226 | with raises(ValidationError): 227 | BatteryDataset.make_cell_dataset(metadata=metadata) 228 | -------------------------------------------------------------------------------- /tests/test_stream.py: -------------------------------------------------------------------------------- 1 | """Evaluate streaming reads from files""" 2 | from itertools import zip_longest 3 | from pathlib import Path 4 | 5 | import numpy as np 6 | from tables import File 7 | from pytest import fixture, mark, raises 8 | 9 | from battdat.data import BatteryDataset 10 | from battdat.io.batterydata import BDReader 11 | from battdat.postprocess.timing import CycleTimesSummarizer 12 | from battdat.streaming import iterate_records_from_file, iterate_cycles_from_file 13 | from battdat.streaming.hdf5 import HDF5Writer 14 | 15 | 16 | @fixture() 17 | def example_dataset(file_path): 18 | data = BDReader().read_dataset([file_path / 'batterydata' / 'p492-13-raw.csv']) 19 | data.metadata.name = 'test_name' 20 | return data 21 | 22 | 23 | @fixture() 24 | def example_h5_path(tmpdir, example_dataset): 25 | h5_path = Path(tmpdir) / 'example_h5' 26 | example_dataset.to_hdf(h5_path) 27 | return h5_path 28 | 29 | 30 | def test_stream_by_rows(example_h5_path): 31 | row_iter = iterate_records_from_file(example_h5_path) 32 | 33 | row_0 = next(row_iter) 34 | assert row_0['test_time'] == 0. 35 | row_1 = next(row_iter) 36 | assert row_1['voltage'] == 3.27191577 37 | 38 | 39 | def test_stream_by_cycles(example_h5_path): 40 | test_data = BatteryDataset.from_hdf(example_h5_path) 41 | cycle_iter = iterate_cycles_from_file(example_h5_path) 42 | for streamed, (_, original) in zip_longest(cycle_iter, test_data.raw_data.groupby('cycle_number')): 43 | assert streamed is not None 44 | assert original is not None 45 | assert np.allclose(streamed['test_time'], original['test_time']) 46 | 47 | # Test reading a list of keys 48 | cycle_iter = iterate_cycles_from_file(example_h5_path, make_dataset=False, key=['raw_data']) 49 | cycle_0 = next(cycle_iter) 50 | assert 'raw_data' in cycle_0 51 | 52 | # Ensure we can generate chunks with metadata 53 | for key in ('raw_data', ['raw_data']): 54 | cycle_iter = iterate_cycles_from_file(example_h5_path, make_dataset=True, key=key) 55 | cycle_0 = next(cycle_iter) 56 | assert cycle_0.metadata == test_data.metadata 57 | 58 | 59 | def test_stream_by_cycles_with_stats(example_dataset, tmpdir): 60 | # Remove EIS data, add capacities 61 | example_dataset.tables.pop('eis_data') 62 | CycleTimesSummarizer().add_summaries(example_dataset) 63 | assert 'cycle_stats' in example_dataset 64 | h5_path = Path(tmpdir / 'test.h5') 65 | example_dataset.to_hdf(h5_path) 66 | 67 | # Test streaming a cycle 68 | cycle_iter = iterate_cycles_from_file(h5_path, make_dataset=False, key=None) 69 | cycle_0 = next(cycle_iter) 70 | assert cycle_0['cycle_stats'].iloc[0]['cycle_number'] == 0 71 | 72 | # Delete the first row in the cycle steps to cause an error 73 | example_dataset.cycle_stats.drop(index=0, inplace=True) 74 | h5_path = Path(tmpdir / 'test-fail.h5') 75 | example_dataset.to_hdf(h5_path) 76 | 77 | cycle_iter = iterate_cycles_from_file(h5_path, make_dataset=False, key=None) 78 | with raises(ValueError, match='cycle_stats=1'): 79 | next(cycle_iter) 80 | 81 | 82 | @mark.parametrize('buffer_size', [128, 400000000]) # Way smaller than data size, way larger 83 | def test_streaming_write(example_dataset, buffer_size, tmpdir): 84 | out_file = Path(tmpdir) / 'streamed.h5' 85 | writer = HDF5Writer(out_file, metadata=example_dataset.metadata, buffer_size=buffer_size) 86 | assert len(example_dataset.raw_data) > 0 87 | with writer: 88 | for _, row in example_dataset.raw_data.iterrows(): 89 | writer.write_row(row.to_dict()) 90 | 91 | # Make sure the data are identical 92 | copied_data = BatteryDataset.from_hdf(out_file) 93 | assert copied_data.metadata.name == example_dataset.metadata.name 94 | cols = ['test_time', 'current'] 95 | assert np.allclose(copied_data.raw_data[cols], example_dataset.raw_data[cols]) 96 | 97 | 98 | def test_streaming_write_existing_store(example_dataset, tmpdir): 99 | out_file = Path(tmpdir) / 'streamed.h5' 100 | with File(out_file, mode='a') as file, HDF5Writer(file, buffer_size=2, complevel=4) as writer: 101 | assert writer.write_row({'test_time': 0.}) == 0 # Written on close, so the number written here is zero 102 | 103 | with File(out_file, mode='a') as file, HDF5Writer(file, buffer_size=2, complevel=4) as writer: 104 | assert writer.write_row({'test_time': 1.}) == 0 105 | 106 | # Read it in 107 | data = BatteryDataset.from_hdf(out_file) 108 | assert np.allclose(data.raw_data['test_time'], [0., 1.]) 109 | --------------------------------------------------------------------------------