├── .coveragerc
├── .github
    └── workflows
    │   ├── gh-pages.yml
    │   ├── python-package.yml
    │   └── release.yml
├── .gitignore
├── LICENSE
├── README.md
├── battdat
    ├── __init__.py
    ├── consistency
    │   ├── __init__.py
    │   ├── base.py
    │   └── current.py
    ├── data.py
    ├── io
    │   ├── __init__.py
    │   ├── arbin.py
    │   ├── ba.py
    │   ├── base.py
    │   ├── batterydata.py
    │   ├── hdf.py
    │   ├── maccor.py
    │   └── parquet.py
    ├── postprocess
    │   ├── __init__.py
    │   ├── base.py
    │   ├── integral.py
    │   ├── tagging.py
    │   └── timing.py
    ├── schemas
    │   ├── __init__.py
    │   ├── battery.py
    │   ├── column.py
    │   ├── cycling.py
    │   ├── eis.py
    │   ├── modeling.py
    │   └── ontology.py
    ├── streaming
    │   ├── __init__.py
    │   └── hdf5.py
    ├── utils.py
    └── version.py
├── dev
    ├── README.md
    └── environment.yml
├── docs
    ├── Makefile
    ├── README.md
    ├── _static
    │   └── logo.png
    ├── conf.py
    ├── getting-started.rst
    ├── index.rst
    ├── make.bat
    ├── pptx-files
    │   └── logo.pptx
    ├── source
    │   ├── consistency.rst
    │   ├── data.rst
    │   ├── io.rst
    │   ├── modules.rst
    │   ├── postprocess.rst
    │   ├── schemas.rst
    │   └── streaming.rst
    └── user-guide
    │   ├── consistency
    │       ├── check-sign-convention.ipynb
    │       └── index.rst
    │   ├── dataset.rst
    │   ├── formats.rst
    │   ├── index.rst
    │   ├── io.rst
    │   ├── post-processing
    │       ├── cell-capacity.ipynb
    │       ├── cycle-times.ipynb
    │       ├── figures
    │       │   └── explain-capacities.png
    │       └── index.rst
    │   ├── schemas
    │       ├── column-schema.rst
    │       ├── export-schemas.py
    │       ├── index.rst
    │       └── source-metadata.rst
    │   └── streaming.rst
├── notebooks
    ├── README.md
    └── extract-from-batterydata.ipynb
├── pyproject.toml
├── setup.cfg
└── tests
    ├── conftest.py
    ├── consistency
        └── test_sign.py
    ├── exporters
        └── test_ba.py
    ├── files
        ├── arbin_example.csv
        ├── batteryarchive
        │   ├── CALCE_CX2-33_prism_LCO_25C_0-100_0.5-0.5C_d_cycle_data.csv
        │   └── CALCE_CX2-33_prism_LCO_25C_0-100_0.5-0.5C_d_timeseries.csv
        ├── batterydata
        │   ├── .gitattributes
        │   ├── p492-13-raw.csv
        │   └── p492-13-summary.csv
        ├── example-data
        │   ├── README.md
        │   ├── resistor-only_complex-cycling.ipynb
        │   ├── resistor-only_simple-cycling.ipynb
        │   ├── single-resistor-complex-charge_from-discharged.hdf
        │   ├── single-resistor-complex-charge_from-discharged
        │   │   └── raw_data.parquet
        │   ├── single-resistor-constant-charge_from-charged.hdf
        │   └── single-resistor-constant-charge_from-discharged.hdf
        ├── maccor_example.001
        └── maccor_example.002
    ├── io
        ├── test_arbin.py
        ├── test_batterydata.py
        ├── test_cell_consistency.py
        ├── test_hdf.py
        └── test_maccor.py
    ├── postprocess
        ├── test_integral.py
        ├── test_stats.py
        └── test_tagging.py
    ├── schemas
        ├── test_cycling.py
        ├── test_eis.py
        └── test_ontology.py
    ├── test_data.py
    └── test_stream.py


/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | omit = */tests/*
3 | 


--------------------------------------------------------------------------------
/.github/workflows/gh-pages.yml:
--------------------------------------------------------------------------------
 1 | name: Deploy Docs
 2 | on:
 3 |   push:
 4 |     branches: [ "main" ]
 5 |   pull_request:
 6 |     branches: [ "main" ]
 7 |   workflow_dispatch:
 8 | 
 9 | jobs:
10 |   build:
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |       - uses: actions/checkout@v3
14 |       - uses: actions/setup-python@v3
15 |         with:
16 |           python-version: '3.10'
17 |       - name: Install Pandoc
18 |         run: |
19 |           sudo apt update
20 |           sudo apt install -y pandoc
21 |       - name: Install dependencies
22 |         run: |
23 |           pip install -e .[docs]
24 |       - name: Sphinx build
25 |         run: |
26 |           cd docs
27 |           make html
28 |       - name: Upload artifact
29 |         uses: actions/upload-pages-artifact@v3
30 |         with:
31 |           path: docs/_build/html
32 | 
33 |   deploy:
34 |     if: github.ref == 'refs/heads/main'
35 |     needs: build
36 |     permissions:
37 |       pages: write      # to deploy to Pages
38 |       id-token: write   # to verify the deployment originates from an appropriate source
39 | 
40 |     environment:
41 |       name: github-pages
42 |       url: ${{ steps.deployment.outputs.page_url }}
43 | 
44 |     runs-on: ubuntu-latest
45 |     steps:
46 |       - name: Deploy to GitHub Pages
47 |         id: deployment
48 |         uses: actions/deploy-pages@v4
49 | 


--------------------------------------------------------------------------------
/.github/workflows/python-package.yml:
--------------------------------------------------------------------------------
 1 | name: Python Package
 2 | 
 3 | on: [ push, pull_request ]
 4 | 
 5 | jobs:
 6 |   build:
 7 |     strategy:
 8 |       matrix:
 9 |         os: [ ubuntu-latest, macos-latest, windows-latest ]
10 |       max-parallel: 5
11 |     runs-on: ${{ matrix.os }}
12 |     steps:
13 |       - uses: actions/checkout@v2
14 |       - name: Set up Python 3.10
15 |         uses: actions/setup-python@v2
16 |         with:
17 |           python-version: '3.10'
18 |       - name: Install package
19 |         run: |
20 |           pip install -e .[test]
21 |       - name: Lint with flake8
22 |         run: |
23 |           flake8 battdat/ tests
24 |       - name: Test with pytest
25 |         run: |
26 |           pytest --cov=batdata tests
27 |       - name: Coveralls
28 |         run: |
29 |           pip install coveralls
30 |           coveralls --service=github-actions
31 |         env:
32 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
33 |           COVERALLS_REPO_TOKEN: ${{ secrets.COVERALLS_REPO_TOKEN }}
34 |       - name: Test example notebooks
35 |         if: ${{ runner.os == 'Linux' }}
36 |         run: |
37 |           pip install jupyter matplotlib
38 |           home_dir=`pwd`
39 |           for notebook in `find . -name "*.ipynb"`; do
40 |              cd `dirname $notebook`
41 |              jupyter execute `basename $notebook`
42 |              cd $home_dir
43 |           done
44 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: Publish to PyPi
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [published]
 6 | 
 7 | jobs:
 8 |   publish:
 9 |     runs-on: ubuntu-latest
10 |     steps:
11 |       - uses: actions/checkout@v2
12 |       
13 |       - name: Install pypa/build
14 |         run: >-
15 |           python -m
16 |           pip install
17 |           build
18 |           --user
19 | 
20 |       - name: Build a binary wheel and a source tarball
21 |         run: >-
22 |           python -m
23 |           build
24 |           --sdist
25 |           --wheel
26 |           --outdir dist/
27 |           .
28 | 
29 |       - name: pypi-publish
30 |         if: startsWith(github.ref, 'refs/tags')
31 |         uses: pypa/gh-action-pypi-publish@release/v1
32 |         with:
33 |           password: ${{ secrets.PYPI_TOKEN }}
34 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # VSCode settings
114 | .vscode/
115 | 
116 | # Spyder project settings
117 | .spyderproject
118 | .spyproject
119 | 
120 | # Rope project settings
121 | .ropeproject
122 | 
123 | # mkdocs documentation
124 | /site
125 | 
126 | # mypy
127 | .mypy_cache/
128 | .dmypy.json
129 | dmypy.json
130 | 
131 | # Pyre type checker
132 | .pyre/
133 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Battery Data Toolkit
 2 | 
 3 | [![Python Package](https://github.com/rovi-org/battery-data-toolkit/actions/workflows/python-package.yml/badge.svg)](https://github.com/rovi-org/battery-data-toolkit/actions/workflows/python-package.yml)
 4 | [![Deploy Docs](https://github.com/ROVI-org/battery-data-toolkit/actions/workflows/gh-pages.yml/badge.svg?branch=main)](https://rovi-org.github.io/battery-data-toolkit/)
 5 | [![Coverage Status](https://coveralls.io/repos/github/ROVI-org/battery-data-toolkit/badge.svg?branch=main)](https://coveralls.io/github/ROVI-org/battery-data-toolkit?branch=main)
 6 | [![PyPI version](https://badge.fury.io/py/battery-data-toolkit.svg)](https://badge.fury.io/py/battery-data-toolkit)
 7 | 
 8 | The battery-data-toolkit, `battdat`, creates consistently-formatted collections of battery data.
 9 | The library has three main purposes:
10 | 
11 | 1. *Storing battery data in standardized formats.* ``battdat`` stores data in 
12 |    [HDF5 or Parquet files](https://rovi-org.github.io/battery-data-toolkit/user-guide/formats.html) which include 
13 |    [extensive metadata](https://rovi-org.github.io/battery-data-toolkit/user-guide/schemas/index.html). 
14 | 2. *Interfacing battery data with the PyData ecosystem*. The core data model,
15 |    [``BatteryDataset``](https://rovi-org.github.io/battery-data-toolkit/user-guide/dataset.html),
16 |    is built atop Pandas DataFrames.
17 | 3. *Providing standard implementations of common analysis techniques*. ``battdat`` implements functions which
18 |    [ensure quality](https://rovi-org.github.io/battery-data-toolkit/user-guide/consistency/index.html)
19 |    or [perform common analyses](https://rovi-org.github.io/battery-data-toolkit/user-guide/post-processing/index.html).
20 | 
21 | ## Installation
22 | 
23 | Install ``battdat`` with pip: `pip install battery-data-toolkit`
24 | 
25 | ## Documentation
26 | 
27 | Find the documentation at: https://rovi-org.github.io/battery-data-toolkit/
28 | 
29 | ## Support
30 | 
31 | The motivation and funding for this project came from the Rapid Operational Validation Initiative (ROVI) sponsored by the Office of Electricity.
32 | The focus of ROVI is "to greatly reduce time required for emerging energy storage technologies to go from lab to market by developing new tools that will accelerate the testing and validation process needed to ensure commercial success." 
33 | If interested, you can read more about ROVI here.
34 | 


--------------------------------------------------------------------------------
/battdat/__init__.py:
--------------------------------------------------------------------------------
1 | from .version import __version__  # noqa: 401
2 | 


--------------------------------------------------------------------------------
/battdat/consistency/__init__.py:
--------------------------------------------------------------------------------
1 | """Tools for checking whether data contained in a dataset is self-consistent"""
2 | 


--------------------------------------------------------------------------------
/battdat/consistency/base.py:
--------------------------------------------------------------------------------
 1 | """Base class for consistency checkers"""
 2 | from typing import List
 3 | 
 4 | from battdat.data import BatteryDataset
 5 | 
 6 | 
 7 | # TODO (wardlt): Consider standardizing the error messages: which table, how bad, possible remedy
 8 | # TODO (wardlt): Make attributes defining which subsets to explore part of the base class
 9 | class ConsistencyChecker:
10 |     """Interface for classes which assess whether data in a :class:`~battdata.data.BatteryDataset` are self-consistent"""
11 | 
12 |     def check(self, dataset: BatteryDataset) -> List[str]:
13 |         """Report possible inconsistencies within a dataset
14 | 
15 |         Args:
16 |             dataset: Dataset to be evaluated
17 |         Returns:
18 |             List of observed inconsistencies
19 |         """
20 |         raise NotImplementedError()
21 | 


--------------------------------------------------------------------------------
/battdat/consistency/current.py:
--------------------------------------------------------------------------------
 1 | """Checks related to the current in time series data"""
 2 | from dataclasses import dataclass
 3 | from typing import List, Collection, Optional
 4 | 
 5 | from datetime import datetime, timedelta
 6 | 
 7 | import numpy as np
 8 | import pandas as pd
 9 | 
10 | from battdat.data import BatteryDataset
11 | from battdat.consistency.base import ConsistencyChecker
12 | 
13 | 
14 | # TODO (wardlt): Check over different cycles
15 | @dataclass
16 | class SignConventionChecker(ConsistencyChecker):
17 |     """Estimate whether the sign convention of a dataset is likely to be correct
18 | 
19 |     The concept underpinning this class is that the voltage of a cell should increase as it is charged.
20 |     The algorithm looks for a period where the current is the most consistent the measures whether
21 |     the change in measured voltage during that period.
22 |     """
23 | 
24 |     subsets_to_check: Collection[str] = ('raw_data',)
25 |     """Which subsets within a dataset to evaluate"""
26 |     window_length: float = 360.
27 |     """Length of time period over which to assess voltage change (units: s)"""
28 |     minimum_current: float = 1e-6
29 |     """Minimum current used when determining periods of charge or discharge"""
30 | 
31 |     def check(self, dataset: BatteryDataset) -> List[str]:
32 |         warnings = []
33 |         for subset in self.subsets_to_check:
34 |             if (warning := self.check_subset(dataset.tables[subset])) is not None:
35 |                 warnings.append(warning)
36 |         return warnings
37 | 
38 |     def check_subset(self, time_series: pd.DataFrame) -> Optional[str]:
39 |         # Convert the test time (seconds) to a time object so that Panda's rolling window can use a time
40 |         time_series['timestamp'] = time_series['test_time'].apply(datetime.fromtimestamp)
41 |         nonzero_current = time_series.query(f'current > {self.minimum_current} or current < {-self.minimum_current}')  # Only get nonzero currents
42 |         windowed = nonzero_current[['timestamp', 'test_time', 'current', 'voltage']].rolling(
43 |             window=timedelta(seconds=self.window_length), on='timestamp', min_periods=4,
44 |         )
45 |         if len(nonzero_current) < 4:
46 |             raise ValueError(f'Insufficient data to judge the sign convention (only {len(nonzero_current)}). Consider raising the minimum current threshold.')
47 | 
48 |         # Find the region with the lowest standard deviation
49 |         most_stable_point = windowed['current'].std().idxmin()
50 |         most_stable_time = nonzero_current['test_time'].loc[most_stable_point]
51 |         stable_window = nonzero_current.query(f'test_time < {most_stable_time} and test_time > {most_stable_time - self.window_length}')
52 |         curr_volt_cov = np.cov(stable_window['voltage'], stable_window['test_time'])[0, 1]
53 |         if np.sign(curr_volt_cov) != np.sign(stable_window['current'].mean()):
54 |             return (f'Potential sign error in current. Average current between test_time={most_stable_time - self.window_length:.1f}s and '
55 |                     f'test_time={most_stable_time:.1f} is {stable_window["current"].mean():.1e} A and the covariance between the voltage and current '
56 |                     f'is {curr_volt_cov:.1e} V-s. The current and this covariance should have the same sign.')
57 | 


--------------------------------------------------------------------------------
/battdat/io/__init__.py:
--------------------------------------------------------------------------------
1 | """Tools for reading external formats into :class:`~battdat.data.BatteryDataset` objects
2 | and exporting data to disk."""
3 | 


--------------------------------------------------------------------------------
/battdat/io/arbin.py:
--------------------------------------------------------------------------------
 1 | """Extractor for Arbin-format files"""
 2 | from typing import Union, List, Iterator, Tuple
 3 | 
 4 | import numpy as np
 5 | import pandas as pd
 6 | 
 7 | from battdat.io.base import CycleTestReader
 8 | from battdat.schemas.column import ChargingState
 9 | from battdat.utils import drop_cycles
10 | from battdat.postprocess.tagging import AddMethod, AddSteps, AddSubSteps
11 | 
12 | 
13 | class ArbinReader(CycleTestReader):
14 |     """Parser for reading from Arbin-format files
15 | 
16 |     Expects the files to be in CSV format"""
17 | 
18 |     def group(self, files: Union[str, List[str]], directories: List[str] = None,
19 |               context: dict = None) -> Iterator[Tuple[str, ...]]:
20 |         for file in files:
21 |             if file.lower().endswith('.csv'):
22 |                 yield file
23 | 
24 |     def read_file(self, file: str, file_number: int = 0, start_cycle: int = 0,
25 |                   start_time: float = 0) -> pd.DataFrame:
26 | 
27 |         # Read the file and rename the file
28 |         df = pd.read_csv(file)
29 |         df = df.rename(columns={'DateTime': 'test_time'})
30 | 
31 |         # create fresh dataframe
32 |         df_out = pd.DataFrame()
33 | 
34 |         # Convert the column names
35 |         df_out['cycle_number'] = df['Cycle_Index'] + start_cycle - df['Cycle_Index'].min()
36 |         df_out['cycle_number'] = df_out['cycle_number'].astype('int64')
37 |         df_out['file_number'] = file_number  # df_out['cycle_number']*0
38 |         df_out['test_time'] = np.array(df['test_time'] - df['test_time'][0] + start_time, dtype=float)
39 |         df_out['current'] = df['Current']  # TODO (wardlt): Check this!?
40 |         df_out['temperature'] = df['Temperature']
41 |         df_out['internal_resistance'] = df['Internal_Resistance']
42 |         df_out['voltage'] = df['Voltage']
43 | 
44 |         # Drop the duplicate rows
45 |         df_out = drop_cycles(df_out)
46 | 
47 |         # Determine whether the battery is charging or discharging:
48 |         #   0 is rest, 1 is charge, -1 is discharge
49 |         # TODO (wardlt): This function should move to post-processing
50 |         def compute_state(x):
51 |             if abs(x) < 1e-6:
52 |                 return ChargingState.rest
53 |             return ChargingState.charging if x > 0 else ChargingState.discharging
54 | 
55 |         df_out['state'] = df_out['current'].apply(compute_state)
56 | 
57 |         # Determine the method uses to control charging/discharging
58 |         AddSteps().enhance(df_out)
59 |         AddMethod().enhance(df_out)
60 |         AddSubSteps().enhance(df_out)
61 |         return df_out
62 | 


--------------------------------------------------------------------------------
/battdat/io/ba.py:
--------------------------------------------------------------------------------
  1 | """Tools for streamlining upload to `Battery Archive <https://batteryarchive.org/>`_"""
  2 | 
  3 | from typing import Callable, Any, Optional
  4 | from dataclasses import dataclass
  5 | from datetime import datetime
  6 | from pathlib import Path
  7 | from uuid import uuid4
  8 | import logging
  9 | import json
 10 | 
 11 | import numpy as np
 12 | import pandas as pd
 13 | 
 14 | from battdat.io.base import DatasetWriter
 15 | from battdat.data import BatteryDataset
 16 | from battdat.schemas import BatteryMetadata
 17 | 
 18 | logger = logging.getLogger(__name__)
 19 | 
 20 | # Mappings between our column names and theirs, with an optional function to perform conversion
 21 | # TODO (wardlt): Standardize fields for the cumulative charge and discharge for each cycle separately (#75)
 22 | # TODO (wardlt): Differentiate the cell temperature from the environment temperature (#76)
 23 | # TODO (wardlt): Compute more derived fields from BatteryArchive (#77)
 24 | _timeseries_reference: dict[str, tuple[str, Optional[Callable[[Any], Any]]]] = {
 25 |     'current': ('i', None),  # TODO (wardlt): Which sign convention does battery archive use?
 26 |     'voltage': ('v', None),
 27 |     'temperature': ('env_temperature', None),  # TODO (wardlt): @ypreger, would you prefer unknown temps as env or cell?
 28 |     'time': ('date_time', lambda x: datetime.fromtimestamp(x).strftime('%m/%d/%Y %H:%M:%S.%f')),
 29 |     'cycle_number': ('cycle_index', lambda x: x + 1),  # BA starts indices from 1
 30 |     'test_time': ('test_time', None),
 31 | }
 32 | 
 33 | _battery_metadata_reference: dict[str, str] = {
 34 |     'nominal_capacity': 'ah',  # TODO (wardlt): Why is ah an integer?
 35 |     'form_factor': 'form_factor',
 36 |     'mass': 'weight',  # TODO (wardlt): What units does batteryachive use?
 37 |     'dimensions': 'dimensions',  # TODO (wardlt): How do you express shapes for different form factors
 38 | }
 39 | 
 40 | _cycle_stats_reference: dict[str, tuple[str, Callable[[Any], Any]]] = {
 41 |     'V_maximum': ('v_max', None),
 42 |     'V_minimum': ('v_min', None),
 43 |     'capacity_discharge': ('ah_d', None),
 44 |     'capacity_charge': ('ah_c', None),
 45 |     'energy_discharge': ('e_d', None),
 46 |     'energy_charge': ('e_c', None),
 47 |     'discharge_V_average': ('v_d_mean', None),
 48 |     'charge_V_average': ('v_c_mean', None),
 49 |     'coulomb_efficiency': ('ah_eff', None),  # TODO (wardlt): Is this correct?
 50 |     'energy_efficiency': ('e_eff', None),
 51 |     'cycle_start': ('test_time', None),  # TODO (wardlt): Is test-time the beginning, duration, something else?
 52 |     'cycle_number': ('cycle_index', lambda x: x + 1),  # BA starts indices from 1
 53 | }
 54 | 
 55 | _metadata_reference: dict[str, str] = {
 56 |     'source': 'source',
 57 | }
 58 | 
 59 | 
 60 | # TODO (wardlt): Reconsider saving in CSV. Parquet would preserve data types
 61 | 
 62 | @dataclass
 63 | class BatteryArchiveWriter(DatasetWriter):
 64 |     """Export data into CSV files that follow the format definitions used in BatteryArchive
 65 | 
 66 |     The exporter writes files for each table in the
 67 |     `Battery Archive SQL schema <https://github.com/battery-lcf/batteryarchive-agent/blob/main/data/ba_data_schema.sql>`_
 68 |     with column names matches to their definitions.
 69 |     """
 70 | 
 71 |     chunk_size: int = 100000
 72 |     """Maximum number of rows to write to disk in a single CSV file"""
 73 | 
 74 |     def write_timeseries(self, cell_id: str, data: pd.DataFrame, path: Path):
 75 |         """Write the time series dataset
 76 | 
 77 |         Args:
 78 |             cell_id: Name for the cell, used as a foreign key to map between tables
 79 |             data: Time series data to write to disk
 80 |             path: Root path for writing cycling data
 81 |         """
 82 | 
 83 |         num_chunks = len(data) // self.chunk_size + 1
 84 |         logger.info(f'Writing time series data to disk in {num_chunks} chunks')
 85 |         for i, chunk in enumerate(np.array_split(data, num_chunks)):
 86 |             # Convert all of our columns
 87 |             out_chunk = pd.DataFrame()
 88 |             for my_col, (out_col, out_fun) in _timeseries_reference.items():
 89 |                 if my_col in chunk:
 90 |                     out_chunk[out_col] = chunk[my_col]
 91 |                     if out_fun is not None:
 92 |                         out_chunk[out_col] = out_chunk[out_col].apply(out_fun)
 93 | 
 94 |             # Add a cell id to the frame
 95 |             out_chunk['cell_id'] = cell_id
 96 | 
 97 |             # Save to disk
 98 |             chunk_path = path / f'cycle-timeseries-{i}.csv'
 99 |             out_chunk.to_csv(chunk_path, index=False, encoding='utf-8')
100 |             logger.debug(f'Wrote {len(out_chunk)} rows to {chunk_path}')
101 | 
102 |     def write_cycle_stats(self, cell_id: str, data: pd.DataFrame, path: Path):
103 |         """Write the cycle stats to disk
104 | 
105 |         Args:
106 |             cell_id: Name of the cell
107 |             data: Cycle stats dataframe
108 |             path: Path to the output directory
109 |         """
110 | 
111 |         # Convert the dataframe
112 |         out_data = pd.DataFrame()
113 |         for my_col, (out_col, out_fun) in _cycle_stats_reference.items():
114 |             if my_col in data:
115 |                 out_data[out_col] = data[my_col]
116 |                 if out_fun is not None:
117 |                     out_data[out_col] = out_data[out_col].apply(out_fun)
118 | 
119 |         # Write the cell ID in the output
120 |         out_data['cell_id'] = cell_id
121 | 
122 |         out_data.to_csv(path / 'cycle-stats.csv', index=False)
123 | 
124 |     def write_metadata(self, cell_id: str, metadata: BatteryMetadata, path: Path):
125 |         """Write the metadata into a JSON file
126 | 
127 |         Args:
128 |             cell_id: ID for the cell
129 |             metadata: Metadata to be written
130 |             path: Path in which to write the data
131 |         """
132 | 
133 |         output = {'cell_id': cell_id}
134 | 
135 |         # Write the materials for the anode and cathode as dictionaries
136 |         for terminal in ['anode', 'cathode']:
137 |             attr = getattr(metadata.battery, terminal, None)
138 |             if attr is not None:
139 |                 output[terminal] = attr.model_dump_json(exclude_unset=True)
140 | 
141 |         # Write the simple fields about the batteries and tester
142 |         for my_field, ba_field in _battery_metadata_reference.items():
143 |             attr = getattr(metadata.battery, my_field, None)
144 |             if attr is not None:
145 |                 output[ba_field] = attr
146 | 
147 |         for my_field, ba_field in _metadata_reference.items():
148 |             attr = getattr(metadata, my_field, None)
149 |             if attr is not None:
150 |                 output[ba_field] = attr
151 | 
152 |         with open(path / 'metadata.json', 'w') as fp:
153 |             json.dump(output, fp)
154 | 
155 |     def export(self, dataset: BatteryDataset, path: Path):
156 |         cell_name = dataset.metadata.name or str(uuid4())  # Default to UUID if none provided
157 | 
158 |         if (table := dataset.tables.get('raw_data')) is not None:
159 |             self.write_timeseries(cell_name, table, path)
160 | 
161 |         if dataset.metadata is not None:
162 |             self.write_metadata(cell_name, dataset.metadata, path)
163 | 
164 |         if (table := dataset.tables.get('cycle_stats')) is not None:
165 |             self.write_cycle_stats(cell_name, table, path)
166 | 


--------------------------------------------------------------------------------
/battdat/io/base.py:
--------------------------------------------------------------------------------
  1 | """Base class for a battery data import and export tools"""
  2 | from typing import List, Optional, Union, Iterator, Sequence
  3 | from pathlib import Path
  4 | import os
  5 | 
  6 | import pandas as pd
  7 | 
  8 | from battdat.data import BatteryDataset
  9 | from battdat.schemas import BatteryMetadata
 10 | 
 11 | PathLike = Union[str, Path]
 12 | 
 13 | 
 14 | class DatasetReader:
 15 |     """Base class for tools which read battery data as a :class:`~battdat.data.BatteryDataset`
 16 | 
 17 |     All readers must implement a function which receives battery metadata as input and produces
 18 |     a completed :class:`battdat.data.BatteryDataset` as an output.
 19 | 
 20 |     Subclasses provide additional suggested operations useful when working with data from
 21 |     common sources (e.g., file systems, web APIs)
 22 |     """
 23 | 
 24 |     def read_dataset(self, metadata: Optional[Union[BatteryMetadata, dict]] = None, **kwargs) -> BatteryDataset:
 25 |         """Parse a set of  files into a Pandas dataframe
 26 | 
 27 |         Args:
 28 |             metadata: Metadata for the battery
 29 |         Returns:
 30 |             Dataset holding all available information about the dataset
 31 |         """
 32 |         raise NotImplementedError()
 33 | 
 34 | 
 35 | class DatasetFileReader(DatasetReader):
 36 |     """Tool which reads datasets written to files
 37 | 
 38 |     Provide an :meth:`identify_files` to filter out files likely to be in this format,
 39 |     or :meth:`group` function to find related file if data are often split into multiple files.
 40 |     """
 41 | 
 42 |     def identify_files(self, path: PathLike, context: dict = None) -> Iterator[tuple[PathLike]]:
 43 |         """Identify all groups of files likely to be compatible with this reader
 44 | 
 45 |         Uses the :meth:`group` function to determine groups of files that should be parsed together.
 46 | 
 47 |         Args:
 48 |             path: Root of directory to group together
 49 |             context: Context about the files
 50 |         Yields:
 51 |             Groups of eligible files
 52 |         """
 53 | 
 54 |         # Walk through the directories
 55 |         for root, dirs, files in os.walk(path):
 56 |             # Generate the full paths
 57 |             dirs = [os.path.join(root, d) for d in dirs]
 58 |             files = [os.path.join(root, f) for f in files]
 59 | 
 60 |             # Get any groups from this directory
 61 |             for group in self.group(files, dirs, context):
 62 |                 yield group
 63 | 
 64 |     def group(self,
 65 |               files: Union[PathLike, List[PathLike]],
 66 |               directories: List[PathLike] = None,
 67 |               context: dict = None) -> Iterator[tuple[PathLike, ...]]:
 68 |         """Identify a groups of files and directories that should be parsed together
 69 | 
 70 |         Will create groups using only the files and directories included as input.
 71 | 
 72 |         The files of files are *all* files that could be read by this extractor,
 73 |         which may include many false positives.
 74 | 
 75 |         Args:
 76 |             files: List of files to consider grouping
 77 |             directories: Any directories to consider group as well
 78 |             context: Context about the files
 79 |         Yields:
 80 |             Groups of files
 81 |         """
 82 | 
 83 |         # Make sure file paths are strings or Path-like objects
 84 |         if isinstance(files, str):
 85 |             files = [files]
 86 |         files = [Path(p) for p in files]
 87 | 
 88 |         # Default: Every file is in its own group
 89 |         for f in files:
 90 |             yield f,
 91 | 
 92 | 
 93 | class CycleTestReader(DatasetFileReader):
 94 |     """Template class for reading the files output by battery cell cyclers
 95 | 
 96 |     Adds logic for reading cycling time series from a list of files.
 97 |     """
 98 | 
 99 |     def read_file(self,
100 |                   file: str,
101 |                   file_number: int = 0,
102 |                   start_cycle: int = 0,
103 |                   start_time: int = 0) -> pd.DataFrame:
104 |         """Generate a DataFrame containing the data in this file
105 | 
106 |         The dataframe will be in our standard format
107 | 
108 |         Args:
109 |             file: Path to the file
110 |             file_number: Number of file, in case the test is spread across multiple files
111 |             start_cycle: Index to use for the first cycle, in case test is spread across multiple files
112 |             start_time: Test time to use for the start of the test, in case test is spread across multiple files
113 | 
114 |         Returns:
115 |             Dataframe containing the battery data in a standard format
116 |         """
117 |         raise NotImplementedError()
118 | 
119 |     def read_dataset(self, group: Sequence[PathLike] = (), metadata: Optional[BatteryMetadata] = None) -> BatteryDataset:
120 |         """Parse a set of  files into a Pandas dataframe
121 | 
122 |         Args:
123 |             group: List of files to parse as part of the same test. Ordered sequentially
124 |             metadata: Metadata for the battery, should adhere to the BatteryMetadata schema
125 | 
126 |         Returns:
127 |             DataFrame containing the information from all files
128 |         """
129 | 
130 |         # Initialize counters for the cycle numbers, etc., Used to determine offsets for the files read
131 |         start_cycle = 0
132 |         start_time = 0
133 | 
134 |         # Read the data for each file
135 |         #  Keep track of the ending index and ending time
136 |         output_dfs = []
137 |         for file_number, file in enumerate(group):
138 |             # Read the file
139 |             df_out = self.read_file(file, file_number, start_cycle, start_time)
140 |             output_dfs.append(df_out)
141 | 
142 |             # Increment the start cycle and time to determine starting point of next file
143 |             start_cycle += df_out['cycle_number'].max() - df_out['cycle_number'].min() + 1
144 |             start_time = df_out['test_time'].max()
145 | 
146 |         # Combine the data from all files
147 |         df_out = pd.concat(output_dfs, ignore_index=True)
148 | 
149 |         # Attach the metadata and return the data
150 |         return BatteryDataset.make_cell_dataset(raw_data=df_out, metadata=metadata)
151 | 
152 | 
153 | class DatasetWriter:
154 |     """Tool which exports data from a :class:`~battdat.data.BatteryDataset` to disk in a specific format"""
155 | 
156 |     def export(self, dataset: BatteryDataset, path: PathLike):
157 |         """Write the dataset to disk in a specific path
158 | 
159 |         All files from the dataset must be placed in the provided directory
160 | 
161 |         Args:
162 |             dataset: Dataset to be exported
163 |             path: Output path
164 |         """
165 |         raise NotImplementedError()
166 | 


--------------------------------------------------------------------------------
/battdat/io/batterydata.py:
--------------------------------------------------------------------------------
  1 | """Parse from the CSV formats of batterydata.energy.gov"""
  2 | import re
  3 | import logging
  4 | from pathlib import Path
  5 | from dataclasses import dataclass
  6 | from collections import defaultdict
  7 | from datetime import datetime, timedelta
  8 | from typing import Union, List, Iterator, Tuple, Optional, Iterable
  9 | 
 10 | import pandas as pd
 11 | 
 12 | from battdat.data import BatteryDataset
 13 | from battdat.io.base import DatasetFileReader
 14 | from battdat.schemas import BatteryMetadata, BatteryDescription
 15 | 
 16 | _fname_match = re.compile(r'(?P<name>[-\w]+)[- ](?P<type>summary|raw)\.csv')
 17 | 
 18 | logger = logging.getLogger(__name__)
 19 | 
 20 | 
 21 | def generate_metadata(desc: dict, associated_ids: Iterable[str] = ()) -> BatteryMetadata:
 22 |     """Assemble the battery metadata for a dataset
 23 | 
 24 |     The metadata for a single dataset are all the same and available by querying
 25 |     the ``https://batterydata.energy.gov/api/3/action/package_show?id={dataset_id}``
 26 |     endpoint of `Battery Data Hub <https://batterydata.energy.gov/>`_.
 27 | 
 28 |     Args:
 29 |         desc: Data from the CKAN metadata response
 30 |         associated_ids: List of other resources associated with this dataset, such as the DOIs of papers.
 31 |     Returns:
 32 |         Metadata for the cell provenance and construction
 33 |     """
 34 | 
 35 |     # Get the "results" pane if users didn't provide it
 36 |     if 'result' in desc:
 37 |         desc = desc['result']
 38 | 
 39 |     # Describe the battery
 40 |     battery = BatteryDescription(
 41 |         manufacturer=desc['manufacturer_supplier'],
 42 |         design=", ".join(desc['cell_type']),
 43 |         anode={'name': ", ".join(desc['negative_electrode'])},
 44 |         cathode={'name': ", ".join(desc['positive_electrode'])},
 45 |         electrolyte={'name': ", ".join(desc['electrolyte_class_dataset'])},
 46 |         nominal_capacity=desc['nominal_cell_capacity'],
 47 |     )
 48 | 
 49 |     # Describe the context of when it was tested
 50 |     return BatteryMetadata(
 51 |         source=desc['organization']['title'],
 52 |         dataset_name=desc['title'],
 53 |         associated_ids=associated_ids,
 54 |         battery=battery,
 55 |     )
 56 | 
 57 | 
 58 | # TODO (wardlt): Columns that yet to have a home in the schema:
 59 | #  - Cell2
 60 | _name_map_raw = {
 61 |     'Cycle_Index': 'cycle_number',
 62 |     'Step': 'step_index',
 63 |     'Time_s': 'test_time',
 64 |     'Current_A': 'current',
 65 |     'Voltage_V': 'voltage',
 66 |     'Cell_Temperature_C': 'temperature',
 67 |     'Datenum_d': 'time'
 68 | }
 69 | 
 70 | 
 71 | def convert_raw_signal(input_df: pd.DataFrame, store_all: bool) -> pd.DataFrame:
 72 |     """Convert a cycle statistics dataframe to one using battdat names and conventions
 73 | 
 74 |     Args:
 75 |         input_df: Initial NREL-format dataframe
 76 |         store_all: Whether to store columns even we have not defined their names
 77 |     Returns:
 78 |         DataFrame in the battdat format
 79 |     """
 80 |     output = pd.DataFrame()
 81 | 
 82 |     # Rename columns that are otherwise the same
 83 |     for orig, new in _name_map_raw.items():
 84 |         output[new] = input_df[orig]
 85 | 
 86 |     # Decrement the indices from 1-indexed to 0-indexed
 87 |     output[['cycle_number', 'step_index']] -= 1
 88 | 
 89 |     # Convert the date to POSIX timestamp (ease of use in Python) from days from 1/1/0000
 90 |     begin_time = datetime(year=1, month=1, day=1)
 91 |     output['time'] = output['time'].apply(lambda x: (timedelta(days=x - 366) + begin_time).timestamp())
 92 | 
 93 |     # Add all other columns as-is
 94 |     if store_all:
 95 |         for col in input_df.columns:
 96 |             if col not in _name_map_raw:
 97 |                 output[col] = input_df[col]
 98 | 
 99 |     return output
100 | 
101 | 
102 | _name_map_summary = {
103 |     'Cycle_Index': 'cycle_number',
104 |     'Q_chg': 'capacity_charge',
105 |     'E_chg': 'energy_charge',
106 |     'Q_dis': 'capacity_discharge',
107 |     'E_dis': 'energy_discharge',
108 |     'CE': 'coulomb_efficiency',
109 |     'EE': 'energy_efficiency',
110 |     'tsecs_start': 'cycle_start',
111 |     'tsecs_cycle': 'cycle_duration',
112 |     'T_min': 'temperature_minimum',
113 |     'T_max': 'temperature_maximum',
114 |     'T_avg': 'temperature_average',
115 | }
116 | 
117 | 
118 | def convert_summary(input_df: pd.DataFrame, store_all: bool) -> pd.DataFrame:
119 |     """Convert the summary dataframe to a format using battdat names and conventions
120 | 
121 |     Args:
122 |         input_df: Initial NREL-format dataframe
123 |         store_all: Whether to store columns even we have not defined their names
124 |     Returns:
125 |         DataFrame in the battdat format
126 |     """
127 | 
128 |     output = pd.DataFrame()
129 | 
130 |     # Rename columns that are otherwise the same
131 |     for orig, new in _name_map_summary.items():
132 |         output[new] = input_df[orig]
133 | 
134 |     # Add all other columns as-is
135 |     if store_all:
136 |         for col in input_df.columns:
137 |             if col not in _name_map_summary:
138 |                 output[col] = input_df[col]
139 | 
140 |     return output
141 | 
142 | 
143 | def convert_eis_data(input_df: pd.DataFrame) -> pd.DataFrame:
144 |     """Rename the columns from an NREL-standard set of EIS data to our names and conventions
145 | 
146 |     Args:
147 |         input_df: NREL-format raw data
148 |     Returns:
149 |         EIS data in battdat format
150 |     """
151 | 
152 |     # Filter out the non-EIS data
153 |     input_df = input_df[~input_df['Frequency_Hz'].isnull()]
154 | 
155 |     # Use the cycle index as a test index
156 |     output = pd.DataFrame()
157 |     output['test_id'] = input_df['Cycle_Index']
158 | 
159 |     # Drop units off and make lower case
160 |     cols = ['Frequency_Hz', 'Z_Imag_Ohm', 'Z_Real_Ohm', 'Z_Mag_Ohm', 'Z_Phase_Degree']
161 |     for col in cols:
162 |         my_name = "_".join(col.lower().split("_")[:-1])
163 |         output[my_name] = input_df[col]
164 |     return output
165 | 
166 | 
167 | @dataclass
168 | class BDReader(DatasetFileReader):
169 |     """Read data from the batterydata.energy.gov CSV format
170 | 
171 |     Every cell in batterydata.energy.gov is stored as two separate CSV files for each battery,
172 |     "<cell_name>-summary.csv" for the cycle-level summaries
173 |     and "<cell_name>-raw.csv" for the time series measurements.
174 |     Metadata is held in an Excel file, "metadata.xlsx," in the same directory."""
175 | 
176 |     store_all: bool = False
177 |     """Store all data from the original data, even if we have not defined it"""
178 | 
179 |     def group(self, files: Union[str, List[str]], directories: List[str] = None,
180 |               context: dict = None) -> Iterator[Tuple[str, ...]]:
181 | 
182 |         # Find files that match the CSV naming convention
183 |         groups = defaultdict(list)  # Map of cell name to the output
184 |         for file in files:
185 |             if (match := _fname_match.match(Path(file).name)) is not None:
186 |                 groups[match.group('name')].append(file)
187 | 
188 |         yield from groups.values()
189 | 
190 |     def read_dataset(self,
191 |                      group: List[str],
192 |                      metadata: Optional[Union[BatteryMetadata, dict]] = None) -> BatteryDataset:
193 |         # Make an empty metadata if none available
194 |         if metadata is None:
195 |             metadata = BatteryMetadata()
196 | 
197 |         # Process each file
198 |         raw_data = cycle_stats = eis_data = None
199 |         for path in group:
200 |             match = _fname_match.match(Path(path).name)
201 |             if match is None:
202 |                 raise ValueError(f'Filename convention broken for {path}. Should be <cell_name>-<summary|raw>.csv')
203 | 
204 |             # Update the name in the metadata
205 |             if metadata.name is None:
206 |                 metadata.name = match.group('name')
207 | 
208 |             # Different parsing logic by type
209 |             data_type = match.group('type')
210 |             if data_type == 'summary':
211 |                 cycle_stats = convert_summary(pd.read_csv(path), self.store_all)
212 |             elif data_type == 'raw':
213 |                 nrel_data = pd.read_csv(path)
214 |                 raw_data = convert_raw_signal(nrel_data, self.store_all)
215 | 
216 |                 # Get EIS data, if available
217 |                 if 'Z_Imag_Ohm' in nrel_data.columns and not (nrel_data['Z_Imag_Ohm'].isna()).all():
218 |                     eis_data = convert_eis_data(nrel_data)
219 |             else:
220 |                 raise ValueError(f'Data type unrecognized: {data_type}')
221 | 
222 |         # Separate out the EIS data, if possible
223 |         return BatteryDataset.make_cell_dataset(raw_data=raw_data, cycle_stats=cycle_stats, eis_data=eis_data, metadata=metadata)
224 | 


--------------------------------------------------------------------------------
/battdat/io/maccor.py:
--------------------------------------------------------------------------------
  1 | """Extractor for MACCOR"""
  2 | import re
  3 | import itertools
  4 | from dataclasses import dataclass
  5 | from datetime import datetime
  6 | from typing import Union, List, Iterator, Tuple, Sequence, Optional
  7 | 
  8 | import pandas as pd
  9 | import numpy as np
 10 | 
 11 | from battdat.data import BatteryDataset
 12 | from battdat.io.base import DatasetFileReader, CycleTestReader, PathLike
 13 | from battdat.schemas import BatteryMetadata
 14 | from battdat.schemas.column import ChargingState
 15 | from battdat.postprocess.tagging import AddMethod, AddSteps, AddSubSteps
 16 | from battdat.utils import drop_cycles
 17 | 
 18 | _test_date_re = re.compile(r'Date of Test:\s+(\d{2}/\d{2}/\d{4})')
 19 | 
 20 | 
 21 | @dataclass
 22 | class MACCORReader(CycleTestReader, DatasetFileReader):
 23 |     """Parser for reading from MACCOR-format files
 24 | 
 25 |     Expects the files to be ASCII files with a .### extension.
 26 |     The :meth:`group` operation will consolidate files such that all with
 27 |     the same prefix (i.e., everything except the numerals in the extension)
 28 |     are treated as part of the same experiment.
 29 |     """
 30 | 
 31 |     ignore_time: bool = False
 32 |     """Ignore the the time column, which can be problematic."""
 33 | 
 34 |     def group(self, files: Union[str, List[str]], directories: List[str] = None,
 35 |               context: dict = None) -> Iterator[Tuple[str, ...]]:
 36 |         if isinstance(files, str):
 37 |             files = [files]
 38 | 
 39 |         # Get only the MACCOR-style names
 40 |         valid_names = filter(lambda x: x[-3:].isdigit(), files)
 41 | 
 42 |         # Split then sort based on the prefix
 43 |         split_filenames = sorted(name.rsplit(".", maxsplit=1) for name in valid_names)
 44 | 
 45 |         # Return groups
 46 |         for prefix, group in itertools.groupby(split_filenames, key=lambda x: x[0]):
 47 |             yield tuple('.'.join(x) for x in group)
 48 | 
 49 |     def read_dataset(self, group: Sequence[PathLike] = (), metadata: Optional[BatteryMetadata] = None) -> BatteryDataset:
 50 |         # Verify the cells are ordered by test date
 51 |         start_dates = []
 52 |         for file in group:
 53 |             with open(file, 'r') as fp:
 54 |                 header = fp.readline()
 55 |                 test_date = _test_date_re.findall(header)[0]
 56 |                 start_dates.append(datetime.strptime(test_date, '%m/%d/%Y'))
 57 | 
 58 |         # Make sure they are in the correct order
 59 |         if not all(x >= y for x, y in zip(start_dates[1:], start_dates)):
 60 |             msg = "\n  ".join(f'- {x} {y.strftime("%m/%d/%Y")}' for x, y in zip(group, start_dates))
 61 |             raise ValueError(f'Files are not in the correct order by test date: {msg}\n')
 62 | 
 63 |         return super().read_dataset(group, metadata)
 64 | 
 65 |     def read_file(self, file: PathLike, file_number: int = 0, start_cycle: int = 0,
 66 |                   start_time: int = 0) -> pd.DataFrame:
 67 | 
 68 |         # Pull the test date from the first line of the file
 69 |         with open(file, 'r') as fp:
 70 |             header = fp.readline()
 71 |         test_date = _test_date_re.findall(header)[0]
 72 | 
 73 |         # Read in the ASCII file (I found this notation works)
 74 |         df = pd.read_csv(file, skiprows=1, engine='python', sep='\t', index_col=False, encoding="ISO-8859-1")
 75 |         df = df.rename(columns={'DateTime': 'test_time'})
 76 | 
 77 |         # create fresh dataframe
 78 |         df_out = pd.DataFrame()
 79 | 
 80 |         # fill in new dataframe
 81 |         df_out['cycle_number'] = df['Cyc#'] + start_cycle - df['Cyc#'].min()
 82 |         df_out['cycle_number'] = df_out['cycle_number'].astype('int64')
 83 |         df_out['file_number'] = file_number  # df_out['cycle_number']*0
 84 |         df_out['test_time'] = df['Test (Min)'] * 60 - df['Test (Min)'].iloc[0] * 60 + start_time
 85 |         df_out['state'] = df['State']
 86 |         df_out['current'] = df['Amps']
 87 |         df_out['current'] = np.where(df['State'] == 'D', -1 * df_out['current'], df_out['current'])
 88 | 
 89 |         if not self.ignore_time:
 90 |             def _parse_time(time: str) -> float:
 91 |                 if '/' in time:
 92 |                     return datetime.strptime(time, '%m/%d/%Y %H:%M:%S').timestamp()
 93 |                 else:
 94 |                     return datetime.strptime(f'{test_date} {time}', '%m/%d/%Y %H:%M:%S').timestamp()
 95 | 
 96 |             df_out['time'] = df['DPt Time'].apply(_parse_time)
 97 | 
 98 |         #   0 is rest, 1 is charge, -1 is discharge
 99 |         df_out.loc[df_out['state'] == 'R', 'state'] = ChargingState.rest
100 |         df_out.loc[df_out['state'] == 'C', 'state'] = ChargingState.charging
101 |         df_out.loc[df_out['state'] == 'D', 'state'] = ChargingState.discharging
102 |         df_out.loc[df_out['state'].apply(lambda x: x not in {'R', 'C', 'D'}), 'state'] = ChargingState.unknown
103 | 
104 |         df_out['voltage'] = df['Volts']
105 |         df_out = drop_cycles(df_out)
106 |         AddSteps().enhance(df_out)
107 |         AddMethod().enhance(df_out)
108 |         AddSubSteps().enhance(df_out)
109 |         return df_out
110 | 


--------------------------------------------------------------------------------
/battdat/io/parquet.py:
--------------------------------------------------------------------------------
  1 | """Read and write from `battery-data-toolkit's parquet format <https://rovi-org.github.io/battery-data-toolkit/user-guide/formats.html#parquet>`_"""
  2 | from dataclasses import dataclass, field
  3 | from datetime import datetime
  4 | from typing import Dict, Any, Optional, Union, Collection
  5 | from pathlib import Path
  6 | import warnings
  7 | import logging
  8 | import shutil
  9 | 
 10 | from pyarrow import parquet as pq
 11 | from pyarrow import Table
 12 | 
 13 | from .base import DatasetWriter, DatasetFileReader, PathLike
 14 | from ..data import BatteryDataset
 15 | from ..schemas import BatteryMetadata
 16 | from ..schemas.column import ColumnSchema
 17 | 
 18 | logger = logging.getLogger(__name__)
 19 | 
 20 | 
 21 | def inspect_parquet_files(path: PathLike) -> BatteryMetadata:
 22 |     """Read the metadata from a collection of Parquet files
 23 | 
 24 |     Args:
 25 |         path: Path to a directory of parquet files
 26 | 
 27 |     Returns:
 28 |         Metadata from one of the files
 29 |     """
 30 |     # Get a parquet file
 31 |     path = Path(path)
 32 |     if path.is_file():
 33 |         pq_path = path
 34 |     else:
 35 |         pq_path = next(path.glob('*.parquet'), None)
 36 |         if pq_path is None:
 37 |             raise ValueError(f'No parquet files in {path}')
 38 | 
 39 |     # Read the metadata from the schema
 40 |     schema = pq.read_schema(pq_path)
 41 |     if b'battery_metadata' not in schema.metadata:
 42 |         raise ValueError(f'No metadata in {pq_path}')
 43 |     return BatteryMetadata.model_validate_json(schema.metadata[b'battery_metadata'])
 44 | 
 45 | 
 46 | @dataclass
 47 | class ParquetWriter(DatasetWriter):
 48 |     """Write to parquet files in the format specification of battery-data-toolkit
 49 | 
 50 |     Writes all data to the same directory with a separate parquet file for each table.
 51 |     The battery metadata, column schemas, and write date are all saved in the file-level metadata for each file.
 52 |     """
 53 | 
 54 |     overwrite: bool = False
 55 |     """Whether to overwrite existing data"""
 56 |     write_options: Dict[str, Any] = field(default_factory=dict)
 57 |     """Options passed to :func:`~pyarrow.parquet.write_table`."""
 58 | 
 59 |     def export(self, dataset: BatteryDataset, path: Path):
 60 |         # Handle existing paths
 61 |         path = Path(path)
 62 |         if path.exists():
 63 |             if not self.overwrite:
 64 |                 raise ValueError(f'Path already exists and overwrite is disabled: {path}')
 65 |             logger.info(f'Deleting existing directory at {path}')
 66 |             shutil.rmtree(path)
 67 | 
 68 |         # Make the output directory, then write each Parquet file
 69 |         path.mkdir(parents=True, exist_ok=False)
 70 |         my_metadata = {
 71 |             'battery_metadata': dataset.metadata.model_dump_json(exclude_none=True),
 72 |             'write_date': datetime.now().isoformat()
 73 |         }
 74 |         written = {}
 75 |         for key, schema in dataset.schemas.items():
 76 |             if (data := dataset.tables.get(key)) is None:
 77 |                 continue
 78 | 
 79 |             # Put the metadata for the battery and this specific table into the table's schema in the FileMetaData
 80 |             data_path = path / f'{key}.parquet'
 81 |             my_metadata['table_metadata'] = schema.model_dump_json()
 82 |             table = Table.from_pandas(data, preserve_index=False)
 83 |             new_schema = table.schema.with_metadata({**my_metadata, **table.schema.metadata})
 84 |             table = table.cast(new_schema)
 85 |             pq.write_table(table, where=data_path, **self.write_options)
 86 | 
 87 |             written[key] = data_path
 88 |         return written
 89 | 
 90 | 
 91 | class ParquetReader(DatasetFileReader):
 92 |     """Read parquet files formatted according to battery-data-toolkit standards
 93 | 
 94 |     Mirrors :class:`ParquetWriter`. Expects each constituent table to be in a separate parquet
 95 |     file and to have the metadata stored in the file-level metadata of the parquet file.
 96 |     """
 97 | 
 98 |     def read_dataset(self, paths: Union[PathLike, Collection[PathLike]], metadata: Optional[Union[BatteryMetadata, dict]] = None) -> BatteryDataset:
 99 |         """Read a set of parquet files into a BatteryDataset
100 | 
101 |         Args:
102 |             paths: Either the path to a single-directory of files, or a list of files to parse
103 |             metadata: Metadata which will overwrite what is available in the files
104 | 
105 |         Returns:
106 |             Dataset including all subsets
107 |         """
108 |         # Find the parquet files, if no specification is listed
109 |         if isinstance(paths, PathLike):
110 |             paths = [paths]
111 |         paths = [Path(p) for p in paths]
112 |         if len(paths) == 1 and paths[0].is_dir():
113 |             paths = list(paths[0].glob('*.parquet'))
114 |         elif not all(is_file := [p.is_file() for p in paths]):
115 |             not_files = [p for i, p in zip(is_file, paths) if not i]
116 |             raise ValueError(f'Expected either a list of files or a single directory. The following are not files: {not_files}')
117 | 
118 |         if len(paths) == 0 and metadata is None:
119 |             raise ValueError('No data available.')
120 | 
121 |         # Load each subset
122 |         metadata = None
123 |         data = {}
124 |         schemas = {}
125 |         for data_path in paths:
126 |             subset = data_path.with_suffix('').name
127 |             table = pq.read_table(data_path)
128 | 
129 |             # Load or check the metadata
130 |             if b'battery_metadata' not in table.schema.metadata:
131 |                 warnings.warn(f'Battery metadata not found in {data_path}')
132 |             else:
133 |                 # Load the metadata for the whole cell
134 |                 my_metadata = table.schema.metadata[b'battery_metadata'] if metadata is None else metadata
135 |                 if metadata is None:
136 |                     metadata = my_metadata
137 |                 elif my_metadata != metadata:
138 |                     warnings.warn(f'Battery data different for files in {data_path}')
139 | 
140 |             # Load the batdata schema for the table
141 |             if b'table_metadata' not in table.schema.metadata:
142 |                 warnings.warn(f'Column schema not found in {data_path}')
143 |             schemas[subset] = ColumnSchema.from_json(table.schema.metadata[b'table_metadata'])
144 | 
145 |             # Read it to a dataframe
146 |             data[subset] = table.to_pandas()
147 | 
148 |         return BatteryDataset.make_cell_dataset(
149 |             metadata=BatteryMetadata.model_validate_json(metadata),
150 |             schemas=schemas,
151 |             tables=data
152 |         )
153 | 


--------------------------------------------------------------------------------
/battdat/postprocess/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ROVI-org/battery-data-toolkit/19961e6bbb2d0cfe0bff9c129144fcf8f3dd3be6/battdat/postprocess/__init__.py


--------------------------------------------------------------------------------
/battdat/postprocess/base.py:
--------------------------------------------------------------------------------
 1 | """Base class and utilities related to post-processing on battery data"""
 2 | from typing import List
 3 | 
 4 | import pandas as pd
 5 | 
 6 | from battdat.data import BatteryDataset
 7 | from battdat.schemas.column import ColumnSchema
 8 | 
 9 | 
10 | class BaseFeatureComputer:
11 |     """Base class for methods that produce new features given battery data
12 | 
13 |     Features can be anything but are often collected statistics about a certain cycle.
14 |     """
15 | 
16 |     def compute_features(self, data: BatteryDataset) -> pd.DataFrame:
17 |         """Compute
18 | 
19 |         Args:
20 |             data: Battery data object
21 | 
22 |         Returns:
23 |             A dataframe of features where rows are different cycles or steps, columns are different features
24 |         """
25 |         raise NotImplementedError()
26 | 
27 | 
28 | class RawDataEnhancer(BaseFeatureComputer):
29 |     """Base class for methods derives new data from the existing columns in raw data"""
30 | 
31 |     column_names: List[str] = ...
32 | 
33 |     def compute_features(self, data: BatteryDataset) -> pd.DataFrame:
34 |         self.enhance(data.tables['raw_data'])
35 |         return data.tables['raw_data'][self.column_names]
36 | 
37 |     def enhance(self, data: pd.DataFrame):
38 |         """Add additional columns to the raw data
39 | 
40 |         Args:
41 |             data: Raw data to be modified
42 |         """
43 |         raise NotImplementedError()
44 | 
45 | 
46 | class CycleSummarizer(BaseFeatureComputer):
47 |     """Classes which produce a summary of certain cycles given the raw data from a cycle"""
48 | 
49 |     column_names: List[str] = ...
50 | 
51 |     def compute_features(self, data: BatteryDataset) -> pd.DataFrame:
52 |         self.add_summaries(data)
53 |         return data.tables['cycle_stats'][['cycle_number'] + self.column_names]
54 | 
55 |     def add_summaries(self, data: BatteryDataset):
56 |         """Add cycle-level summaries to a battery dataset
57 | 
58 |         Args:
59 |             data: Dataset to be modified
60 |         """
61 | 
62 |         # Add a cycle summary if not already available
63 |         if 'cycle_stats' not in data.tables:
64 |             data.tables['cycle_stats'] = pd.DataFrame({
65 |                 'cycle_number': sorted(set(data.tables['raw_data']['cycle_number']))
66 |             })
67 |             data.schemas['cycle_stats'] = ColumnSchema()
68 | 
69 |         # Perform the update
70 |         self._summarize(data.tables['raw_data'], data.tables['cycle_stats'])
71 | 
72 |     def _summarize(self, raw_data: pd.DataFrame, cycle_data: pd.DataFrame):
73 |         """Add additional data to a cycle summary dataframe
74 | 
75 |         Args:
76 |             raw_data: Raw data describing the initial cycles. Is not modified
77 |             cycle_data: Cycle data frame to be updated
78 |         """
79 |         raise NotImplementedError()
80 | 


--------------------------------------------------------------------------------
/battdat/postprocess/integral.py:
--------------------------------------------------------------------------------
  1 | """Features related to integral quantities (e.g., energy, capacity)"""
  2 | import warnings
  3 | from itertools import zip_longest
  4 | from typing import List
  5 | 
  6 | import numpy as np
  7 | import pandas as pd
  8 | from scipy.integrate import cumulative_trapezoid
  9 | 
 10 | from battdat.postprocess.base import RawDataEnhancer, CycleSummarizer
 11 | 
 12 | 
 13 | class CapacityPerCycle(CycleSummarizer):
 14 |     """Compute the observed capacity and energy during both charge and discharge of each cycle
 15 | 
 16 |     Determines capacities based on the integral of current over each cycle:
 17 | 
 18 |     1. Compute the change in state of charge from the start of the cycle
 19 |        by computing the integral of the capacity over time.
 20 |        We refer to this integral as the dSOC.
 21 |     2. Determine whether the battery started from a charged state
 22 |        by determining if the largest capacity change is positive
 23 |        (i.e., if the point most different state of charge from the
 24 |        start is *more discharged* than the starting point).
 25 |        The code will raise a warning if the quantities are similar.
 26 |     3. If starting from a charged state, the discharge capacity
 27 |        is the maximum change in state of charge (``dSOC.max()``).
 28 |        The charge capacity is the amount of charge transferred to the
 29 |        battery between this maximally-discharged state and the end
 30 |        the of the cycle (``dSOC.max() - dSOC[-1]``)
 31 |     4. If starting from a discharged state, the charge capacity
 32 |        is the maximum change in state of charge and the discharge capacity
 33 |        is the amount transferred from the battery into the end of the cycle.
 34 | 
 35 | 
 36 |     The energy is computed using a similar procedure, but by integrating
 37 |     the product of current and voltage instead of only current.
 38 | 
 39 |     .. note::
 40 | 
 41 |         Measurements of capacity and energy assume a cycle returns
 42 |         the battery to the same state as it started the cycle.
 43 | 
 44 |     Output dataframe has 4 new columns.
 45 | 
 46 |     - ``capacity_discharge``: Discharge capacity per cycle in A-hr
 47 |     - ``capacity_charge``: Charge capacity per the cycle in A-hr
 48 |     - ``energy_charge``: Discharge energy per cycle in J
 49 |     - ``energy_discharge``: Charge energy per the cycle in J
 50 |     - ``max_cycled_capacity``: Maximum amount of charge cycled during the cycle, in A-hr
 51 | 
 52 |     The full definitions are provided in the :class:`~battdat.schemas.cycling.CycleLevelData` schema
 53 |     """
 54 | 
 55 |     def __init__(self, reuse_integrals: bool = True):
 56 |         """
 57 | 
 58 |         Args:
 59 |             reuse_integrals: Whether to reuse the ``cycled_charge`` and ``cycled_energy`` if they are available
 60 |         """
 61 |         self.reuse_integrals = reuse_integrals
 62 | 
 63 |     @property
 64 |     def column_names(self) -> List[str]:
 65 |         output = []
 66 |         for name in ['charge', 'discharge']:
 67 |             output.extend([f'energy_{name}', f'capacity_{name}'])
 68 |         output.extend(['max_cycled_capacity'])
 69 |         return output
 70 | 
 71 |     def _summarize(self, raw_data: pd.DataFrame, cycle_data: pd.DataFrame):
 72 |         # Initialize the output arrays
 73 |         cycle_data.set_index('cycle_number', drop=False)
 74 |         for name in self.column_names:
 75 |             cycle_data[name] = np.nan
 76 | 
 77 |         # Get the indices of the beginning of each cycle
 78 |         raw_data = raw_data.reset_index()  # Ensure a sequential ordering from 0
 79 |         start_inds = raw_data.drop_duplicates('cycle_number', keep='first').index
 80 | 
 81 |         # Loop over each cycle. Using the starting point of this cycle and the first point of the next as end caps
 82 |         for cyc, (start_ind, stop_ind) in enumerate(zip_longest(start_inds, start_inds[1:] + 1, fillvalue=len(raw_data))):
 83 |             cycle_subset = raw_data.iloc[start_ind:stop_ind]
 84 | 
 85 |             # Skip cycles that are too short to have a capacity measurement
 86 |             if len(cycle_subset) < 3:
 87 |                 continue
 88 | 
 89 |             # Perform the integration
 90 |             if self.reuse_integrals and 'cycled_energy' in cycle_subset.columns and 'cycled_charge' in cycle_subset.columns:
 91 |                 capacity_change = cycle_subset['cycled_charge'].values * 3600  # To A-s
 92 |                 energy_change = cycle_subset['cycled_energy'].values * 3600  # To J
 93 |             else:
 94 |                 capacity_change = cumulative_trapezoid(cycle_subset['current'], x=cycle_subset['test_time'])
 95 |                 energy_change = cumulative_trapezoid(cycle_subset['current'] * cycle_subset['voltage'], x=cycle_subset['test_time'])
 96 | 
 97 |             # Estimate if the battery starts as charged or discharged
 98 |             max_charge = capacity_change.max()
 99 |             max_discharge = -capacity_change.min()
100 |             cycle_data.loc[cyc, 'max_cycled_capacity'] = (max_charge + max_discharge) / 3600  # To Amp-hour
101 | 
102 |             starts_charged = max_discharge > max_charge
103 |             if np.isclose(max_discharge, max_charge, rtol=0.01):
104 |                 warnings.warn(f'Unable to clearly detect if battery started charged or discharged in cycle {cyc}. '
105 |                               f'Amount discharged is {max_discharge:.2e} A-s, charged is {max_charge:.2e} A-s')
106 | 
107 |             # Assign the charge and discharge capacity
108 |             #  One capacity is beginning to maximum change, the other is maximum change to end
109 |             if starts_charged:
110 |                 discharge_cap = max_discharge
111 |                 charge_cap = capacity_change[-1] + max_discharge
112 |                 discharge_eng = -energy_change.min()
113 |                 charge_eng = energy_change[-1] + discharge_eng
114 |             else:
115 |                 charge_cap = max_charge
116 |                 discharge_cap = max_charge - capacity_change[-1]
117 |                 charge_eng = energy_change.max()
118 |                 discharge_eng = charge_eng - energy_change[-1]
119 | 
120 |             cycle_data.loc[cyc, 'energy_charge'] = charge_eng / 3600.  # To W-hr
121 |             cycle_data.loc[cyc, 'energy_discharge'] = discharge_eng / 3600.
122 |             cycle_data.loc[cyc, 'capacity_charge'] = charge_cap / 3600.  # To A-hr
123 |             cycle_data.loc[cyc, 'capacity_discharge'] = discharge_cap / 3600.
124 | 
125 | 
126 | class StateOfCharge(RawDataEnhancer):
127 |     """Compute the change in capacity and system energy over each cycle
128 | 
129 |     The capacity change for a cycle is determined by integrating the
130 |     current as a function of time between the start of the cycle
131 |     and the first of the next cycle.
132 |     The energy change is determined by integrating the product
133 |     of current and voltage.
134 | 
135 |     Output dataframe has 3 new columns:
136 |         - ``cycled_charge``: Amount of observed charge cycled since the beginning of the cycle, in A-hr
137 |         - ``cycled_energy``: Amount of observed energy cycled since the beginning of the cycle, in W-hr
138 |         - ``CE_adjusted_charge``: Amount of charge in the battery relative to the beginning of the cycle, accounting for
139 |             Coulombic Efficiency (CE), in A-hr
140 |     """
141 |     def __init__(self, coulombic_efficiency: float = 1.0):
142 |         """
143 |         Args:
144 |             coulombic_efficiency: Coulombic efficiency to use when computing the state of charge
145 |         """
146 |         self.coulombic_efficiency = coulombic_efficiency
147 | 
148 |     @property
149 |     def coulombic_efficiency(self) -> float:
150 |         return self._ce
151 | 
152 |     @coulombic_efficiency.setter
153 |     def coulombic_efficiency(self, value: float):
154 |         if value < 0 or value > 1:
155 |             raise ValueError('Coulombic efficiency must be between 0 and 1')
156 |         self._ce = value
157 | 
158 |     @property
159 |     def column_names(self) -> List[str]:
160 |         return ['cycled_charge', 'cycled_energy', 'CE_adjusted_charge']
161 | 
162 |     def _get_CE_adjusted_curr(self, current: np.ndarray) -> np.ndarray:
163 |         """Adjust the current based on the coulombic efficiency
164 | 
165 |         Args:
166 |             current: Current array in A
167 | 
168 |         Returns:
169 |             Adjusted current array in A
170 |         """
171 |         adjusted_current = np.where(current > 0, self.coulombic_efficiency * current, current)
172 |         return adjusted_current.flatten()
173 | 
174 |     def enhance(self, data: pd.DataFrame):
175 |         # Add columns for the capacity and energy
176 |         for c in self.column_names:
177 |             data.loc[:, (c,)] = np.nan
178 | 
179 |         # Compute the capacity and energy for each cycle
180 |         ordered_copy = data.reset_index()  # Ensure a sequential ordering from 0
181 |         start_inds = ordered_copy.drop_duplicates('cycle_number', keep='first').index
182 | 
183 |         # Loop over each cycle
184 |         for cyc, (start_ind, stop_ind) in enumerate(zip_longest(start_inds, start_inds[1:] + 1, fillvalue=len(ordered_copy) + 1)):
185 |             cycle_subset = ordered_copy.iloc[start_ind:stop_ind]
186 | 
187 |             # Perform the integration
188 |             ce_adj_curr = self._get_CE_adjusted_curr(cycle_subset['current'].to_numpy())
189 |             capacity_change = cumulative_trapezoid(cycle_subset['current'], x=cycle_subset['test_time'], initial=0)
190 |             ce_charge = cumulative_trapezoid(ce_adj_curr, x=cycle_subset['test_time'], initial=0)
191 |             energy_change = cumulative_trapezoid(cycle_subset['current'] * cycle_subset['voltage'], x=cycle_subset['test_time'], initial=0)
192 | 
193 |             # Store them in the raw data
194 |             data.loc[cycle_subset['index'], 'cycled_charge'] = capacity_change / 3600  # To A-hr
195 |             data.loc[cycle_subset['index'], 'CE_adjusted_charge'] = ce_charge / 3600  # To A-hr
196 |             data.loc[cycle_subset['index'], 'cycled_energy'] = energy_change / 3600  # To W-hr
197 | 


--------------------------------------------------------------------------------
/battdat/postprocess/tagging.py:
--------------------------------------------------------------------------------
  1 | """Methods which assign labels that are present in some testing machines yet absent in others.
  2 | 
  3 | For example, :meth:`add_method` determines whether the battery is being held at a constant voltage or current."""
  4 | import logging
  5 | from typing import List, Literal
  6 | 
  7 | import numpy as np
  8 | import pandas as pd
  9 | from pandas import DataFrame
 10 | from scipy.interpolate import interp1d
 11 | from scipy.signal import find_peaks, savgol_filter
 12 | 
 13 | from battdat.schemas.column import ChargingState, ControlMethod
 14 | from .base import RawDataEnhancer
 15 | 
 16 | logger = logging.getLogger(__name__)
 17 | 
 18 | 
 19 | class AddMethod(RawDataEnhancer):
 20 |     """Determine how the battery was being controlled
 21 | 
 22 |     Determines whether a charging step is composed of constant-current, constant-voltage,
 23 |     or mixed steps by first partitioning it into substeps based on the maximum curvature
 24 |     of these points then assigning regions to constant voltage or current if one varied
 25 |     more than twice the other.
 26 |     """
 27 |     def __init__(self, short_period_threshold: float = 30.0):
 28 |         """
 29 |         Args:
 30 |             short_period_threshold: Maximum duration of a step to be considered a short step, in seconds
 31 |         """
 32 |         self.short_period_threshold = short_period_threshold
 33 | 
 34 |     @property
 35 |     def column_names(self) -> List[str]:
 36 |         return ['method']
 37 | 
 38 |     def enhance(self, df: pd.DataFrame):
 39 |         # Insert a new column into the dataframe, starting with everything marked as other
 40 |         df.loc[:, ('method',)] = ControlMethod.other
 41 | 
 42 |         # array of indexes
 43 |         cycles = df.groupby(["cycle_number", "step_index"])
 44 |         logger.info('Identifying charging/discharging methods')
 45 |         for key, cycle in cycles:
 46 | 
 47 |             # pull out columns of interest and turn into numpy array
 48 |             t = cycle["test_time"].values
 49 |             voltage = cycle["voltage"].values
 50 |             current = cycle['current'].values
 51 |             ind = cycle.index.values
 52 |             state = cycle['state'].values
 53 | 
 54 |             if t[-1] - t[0] < self.short_period_threshold:
 55 |                 # The step is shorter than 30 seconds
 56 |                 if state[0] == ChargingState.rest:
 57 |                     # If the step is a rest, we label it as a short rest
 58 |                     df.loc[ind, 'method'] = ControlMethod.short_rest
 59 |                 elif len(ind) < 5:
 60 |                     # The step contains fewer than 5 data points, so it is innapropriate to label it as anything
 61 |                     # definitive other than a short non-rest
 62 |                     df.loc[ind, 'method'] = ControlMethod.short_nonrest
 63 |                 else:
 64 |                     # The step is a pulse
 65 |                     df.loc[ind, 'method'] = ControlMethod.pulse
 66 |             elif state[0] == ChargingState.rest:
 67 |                 # This is a standard rest, which lasts longer than 30 seconds
 68 |                 df.loc[ind, 'method'] = ControlMethod.rest
 69 |             elif len(ind) < 5:
 70 |                 # The step spans over 30 seconds, but has fewer than 5 data points, rendering inadequate for control
 71 |                 # method determination
 72 |                 df.loc[ind, 'method'] = ControlMethod.unknown
 73 | 
 74 |             else:
 75 |                 # Normalize the voltage and current before determining which one moves "more"
 76 |                 for x in [voltage, current]:
 77 |                     x -= x.min()
 78 |                     x /= max(x.max(), 1e-6)
 79 | 
 80 |                 # First see if there are significant changes in the charging behavior
 81 |                 #  We use a https://en.wikipedia.org/wiki/Savitzky%E2%80%93Golay_filter to get smooth
 82 |                 #  derviatives, which requires even spacing.
 83 |                 #  So, our first step will be to make sure that the spacings are relatively even,
 84 |                 #   and to make an interpolated version if not
 85 |                 dt = t[1:] - t[:-1]
 86 |                 noneven = dt.std() / dt.mean() > 1e-6
 87 |                 if noneven:
 88 |                     t_spaced = np.linspace(t.min(), t.max(), len(t) * 2)
 89 |                     voltage_spaced = interp1d(t, voltage)(t_spaced)
 90 |                     current_spaced = interp1d(t, current)(t_spaced)
 91 |                 else:
 92 |                     voltage_spaced = voltage
 93 |                     current_spaced = current
 94 | 
 95 |                 d2v_dt2 = savgol_filter(voltage_spaced, 5, 4, deriv=2)
 96 |                 d2i_dt2 = savgol_filter(current_spaced, 5, 4, deriv=2)
 97 | 
 98 |                 #  If we had to interpolate, interpolate again to get the values of the derivative
 99 |                 if noneven:
100 |                     d2v_dt2 = interp1d(t_spaced, d2v_dt2)(t)
101 |                     d2i_dt2 = interp1d(t_spaced, d2i_dt2)(t)
102 | 
103 |                 current_peaks, _ = find_peaks(d2i_dt2, distance=5, prominence=10 ** -3)
104 |                 voltage_peaks, _ = find_peaks(d2v_dt2, distance=5, prominence=10 ** -3)
105 | 
106 |                 # Assign a control method to the segment between each of these peaks
107 |                 extrema = [0] + sorted(set(current_peaks).union(set(voltage_peaks))) + [len(voltage)]
108 | 
109 |                 methods = []
110 |                 for i in range(len(extrema) - 1):
111 |                     # Get the segment between these two peaks
112 |                     low = extrema[i]
113 |                     high = extrema[i + 1]
114 |                     r = np.arange(low, high).tolist()
115 | 
116 |                     # Measure the ratio between the change and current and the change in the voltage
117 |                     s_i = current[r].std()
118 |                     s_v = voltage[r].std()
119 |                     val = s_i / max(s_i + s_v, 1e-6)
120 | 
121 |                     if val > 0.66:  # If the change in the current is 2x as large as the change in current
122 |                         method = ControlMethod.constant_voltage
123 |                     elif val < 0.33:  # If voltage is 2x larger than the voltage
124 |                         method = ControlMethod.constant_current
125 |                     else:
126 |                         method = ControlMethod.other
127 |                     methods.extend([method] * len(r))
128 | 
129 |                 assert len(methods) == len(ind), (len(methods), len(ind))
130 |                 df.loc[ind, 'method'] = methods
131 | 
132 |         return df[['method']]
133 | 
134 | 
135 | class AddState(RawDataEnhancer):
136 |     """
137 |     Marks states in which battery is charging, discharging, or resting
138 | 
139 |     Args:
140 |         rest_curr_threshold: threshold of current for a period to be considered a rest
141 |     """
142 |     def __init__(self, rest_curr_threshold: float = 1.0e-04):
143 |         self.rest_curr_threshold = rest_curr_threshold
144 | 
145 |     @property
146 |     def column_names(self) -> List[str]:
147 |         return ['current']
148 | 
149 |     def enhance(self, data: pd.DataFrame) -> None:
150 |         logger.debug('Adding states')
151 |         data.loc[:, ('state',)] = data.apply(_determine_state, axis=1, args=(self.rest_curr_threshold,))
152 | 
153 | 
154 | class AddSteps(RawDataEnhancer):
155 |     """Mark points at which the battery changed state: charging, discharging, rest"""
156 | 
157 |     column_names = ['state']
158 | 
159 |     def enhance(self, data: pd.DataFrame):
160 |         logger.debug('Adding step indices')
161 |         _determine_steps(data, 'state', 'step_index')
162 | 
163 | 
164 | class AddSubSteps(RawDataEnhancer):
165 |     """Mark points at which the battery control method changed state
166 | 
167 |     See :class:`~AddMethod` for how control methods are determined.
168 |     """
169 | 
170 |     def enhance(self, data: pd.DataFrame):
171 |         logger.debug('Adding substep indices')
172 |         _determine_steps(data, 'method', 'substep_index')
173 | 
174 | 
175 | def _determine_steps(df: DataFrame, column: str, output_col: str):
176 |     """Assign step indices based on whether there is a change in the value of a certain column
177 | 
178 |     Also resets the
179 | 
180 |     Parameters
181 |     ----------
182 |     df: pd.DataFrame
183 |         Battery data
184 |     column: str
185 |         Column which to monitor for changes
186 |     output_col: str
187 |         Name in column which to store output results
188 |     """
189 |     #  A new step occurs when the previous step had a different value, so we compare against
190 |     #   the array shifted forward one index
191 |     change = df[column].ne(df[column].shift(periods=1, fill_value=df[column].iloc[0]))
192 | 
193 |     # The step number is equal to the number of changes observed previously in a batch
194 |     #  Step 1: Compute the changes since the beginning of file
195 |     df.loc[:, (output_col,)] = change.cumsum()
196 | 
197 |     # Step 2: Adjust so that each cycle starts with step 0
198 |     for _, cycle in df.groupby("cycle_number"):
199 |         df.loc[cycle.index, output_col] -= cycle[output_col].min()
200 | 
201 | 
202 | def _determine_state(
203 |         row: pd.Series,
204 |         zero_threshold: float = 1.0e-4
205 |         ) -> Literal[ChargingState.charging, ChargingState.discharging, ChargingState.rest]:
206 |     """
207 |     Function to help determine the state of the cell based on the current
208 | 
209 |     Args:
210 |         row: Row that stores the value of current, following the convention established in this package
211 |         zero_threshold: Maximum absolute value a current can take to be assigned rest. Defaults to 0.1 mA
212 | 
213 |     Returns
214 |         State of the cell, which can be either 'charging', 'discharging', or 'rest'
215 |     """
216 |     current = row['current']
217 |     if abs(current) <= zero_threshold:
218 |         return ChargingState.rest
219 |     elif current > 0.:
220 |         return ChargingState.charging
221 |     return ChargingState.discharging
222 | 


--------------------------------------------------------------------------------
/battdat/postprocess/timing.py:
--------------------------------------------------------------------------------
 1 | """Features related to the relative to the start of cycles or the test, etc"""
 2 | import warnings
 3 | 
 4 | import numpy as np
 5 | import pandas as pd
 6 | 
 7 | from battdat.postprocess.base import CycleSummarizer, RawDataEnhancer
 8 | 
 9 | 
10 | class CycleTimesSummarizer(CycleSummarizer):
11 |     """Capture the start time and duration of a cycle
12 | 
13 |     The start of a cycle is the minimum test time for any point in the raw data.
14 | 
15 |     The duration of a cycle is the difference between the start of the next cycle and the start of the cycle.
16 |     If the start time of the next cycle is unavailable, it is the difference between the test time of the
17 |     last test time in the raw data and the start of the cycle.
18 |     """
19 | 
20 |     column_names = ['cycle_start', 'cycle_duration']
21 | 
22 |     def _summarize(self, raw_data: pd.DataFrame, cycle_data: pd.DataFrame):
23 |         # Compute the starts and durations
24 |         time_summary = raw_data.groupby('cycle_number')['test_time'].agg(
25 |             cycle_start="min", cycle_duration=lambda x: max(x) - min(x), count=len
26 |         ).reset_index()  # reset_index makes `cycle_number` a regular column
27 |         if time_summary['count'].min() == 1:
28 |             warnings.warn('Some cycles have only one measurements.')
29 | 
30 |         # Compute the duration using the start of the next cycle, if known
31 |         time_summary['next_diff'] = time_summary['cycle_number'].diff(-1).iloc[:-1]
32 |         if (time_summary['next_diff'].iloc[:-1] != -1).any():
33 |             warnings.warn('Some cycles are missing from the dataframe. Time durations for those cycles may be too short')
34 |         has_next_cycle = time_summary.query('next_diff == -1')
35 |         time_summary.loc[has_next_cycle.index, 'cycle_duration'] = -time_summary['cycle_start'].diff(-1)[has_next_cycle.index]
36 | 
37 |         # Update the cycle_data accordingly
38 |         cycle_data[self.column_names] = np.nan
39 |         cycle_data.update(time_summary)
40 | 
41 | 
42 | class TimeEnhancer(RawDataEnhancer):
43 |     """Compute additional columns describing the time a measurement was taken"""
44 | 
45 |     column_names = ['test_time', 'cycle_time']
46 | 
47 |     def enhance(self, data: pd.DataFrame):
48 | 
49 |         # Compute the test_time from the date_time
50 |         if 'test_time' not in data.columns:
51 |             if 'date_time' not in data.columns:
52 |                 raise ValueError('The data must contain a `date_time` column')
53 |             data['test_time'] = (data['date_time'] - data['date_time'].min()).dt.total_seconds()
54 | 
55 |         # Compute the cycle_time from the test_time
56 |         data['cycle_time'] = data['test_time']
57 |         data['cycle_time'] -= data.groupby('cycle_number')['test_time'].transform("min")
58 |         return data
59 | 


--------------------------------------------------------------------------------
/battdat/schemas/__init__.py:
--------------------------------------------------------------------------------
 1 | """Schemas for battery data and metadata"""
 2 | from typing import List, Tuple, Optional
 3 | 
 4 | from pydantic import BaseModel, Field, AnyUrl
 5 | 
 6 | from battdat.schemas.modeling import ModelMetadata
 7 | from battdat.schemas.battery import BatteryDescription
 8 | from battdat.schemas.cycling import CyclingProtocol
 9 | from battdat.version import __version__
10 | 
11 | 
12 | class BatteryMetadata(BaseModel, extra='allow'):
13 |     """Representation for the metadata about a battery
14 | 
15 |     The metadata captures the information about what experiment was run
16 |     on what battery. A complete set of metadata should be sufficient to
17 |     reproduce an experiment.
18 |     """
19 | 
20 |     # Miscellaneous fields
21 |     name: Optional[str] = Field(None, description="Name of the cell. Any format for the name is acceptable,"
22 |                                                   " as it is intended to be used by the battery data provider.")
23 |     comments: Optional[str] = Field(None, description="Long form comments describing the test")
24 |     version: str = Field(__version__, description="Version of this metadata. Set by the battery-data-toolkit")
25 |     is_measurement: bool = Field(True, description="Whether the data was created observationally as opposed to a computer simulation",
26 |                                  json_schema_extra=dict(
27 |                                      iri="https://w3id.org/emmo#EMMO_463bcfda_867b_41d9_a967_211d4d437cfb"
28 |                                  ))
29 | 
30 |     # Fields that describe the test protocol
31 |     test_protocol: Optional[CyclingProtocol] = Field(None, description="Method used to cycle the battery")
32 | 
33 |     # Field that describe the battery assembly
34 |     battery: Optional[BatteryDescription] = Field(None, description="Description of the battery being cycled")
35 | 
36 |     # Fields that describe source of synthetic data
37 |     modeling: Optional[ModelMetadata] = Field(None, description="Description of simulation approach")
38 | 
39 |     # Fields that describe the source of data
40 |     source: Optional[str] = Field(None, description="Organization who created this data")
41 |     dataset_name: Optional[str] = Field(None, description="Name of a larger dataset this data is associated with")
42 |     authors: Optional[List[Tuple[str, str]]] = Field(None, description="Name and affiliation of each of the authors of the data. First and last names")
43 |     associated_ids: Optional[List[AnyUrl]] = Field(None, description="Any identifiers associated with this data file."
44 |                                                                      " Identifiers can be any URI, such as DOIs of associated"
45 |                                                                      " paper or HTTP addresses of associated websites")
46 | 


--------------------------------------------------------------------------------
/battdat/schemas/battery.py:
--------------------------------------------------------------------------------
 1 | """Schemas associated with the components of a battery"""
 2 | from typing import Optional, List
 3 | 
 4 | from pydantic import BaseModel, Field
 5 | 
 6 | 
 7 | class ElectrodeDescription(BaseModel, extra='allow'):
 8 |     """Description of an electrode"""
 9 | 
10 |     name: str = Field(..., description='Short description of the electrolyte type')
11 | 
12 |     # Relating to sourcing information
13 |     supplier: Optional[str] = Field(None, description='Manufacturer of the material')
14 |     product: Optional[str] = Field(None, description='Name of the product. Unique to the supplier')
15 | 
16 |     # Relating to the microstructure of the electrode
17 |     thickness: Optional[float] = Field(None, description='Thickness of the material', ge=0,
18 |                                        json_schema_extra=dict(units='um'))
19 |     area: Optional[float] = Field(None, description='Total area of the electrode', ge=0,
20 |                                   json_schema_extra=dict(units='cm^2'))
21 |     loading: Optional[float] = Field(None, description='Amount of active material per area', ge=0,
22 |                                      json_schema_extra=dict(units='mg/cm^2'))
23 |     porosity: Optional[float] = Field(None, description='Relative volume of the electrode occupied by gas',
24 |                                       ge=0, le=100, json_schema_extra=dict(units='%'))
25 | 
26 | 
27 | class ElectrolyteAdditive(BaseModel, extra='allow'):
28 |     """Additive to the electrolyte"""
29 | 
30 |     name: str = Field(..., description='Name of the additive')
31 |     amount: Optional[float] = Field(None, description='Amount added to the solution')
32 |     units: Optional[float] = Field(None, description='Units of the amount')
33 | 
34 | 
35 | class ElectrolyteDescription(BaseModel, extra='allow'):
36 |     """Description of the electrolyte"""
37 | 
38 |     name: str = Field(..., description='Short description of the electrolyte types')
39 |     additives: List[ElectrolyteAdditive] = Field(default_factory=list,
40 |                                                  description='Any additives present in the electrolyte')
41 | 
42 | 
43 | class BatteryDescription(BaseModel, extra='allow'):
44 |     """Description of the entire battery"""
45 | 
46 |     # Overall design information
47 |     manufacturer: Optional[str] = Field(None, description="Manufacturer of the battery")
48 |     design: Optional[str] = Field(None, description="Name of the battery type, such as the battery product ID")
49 | 
50 |     # Geometry information
51 |     layer_count: Optional[int] = Field(None, description="Number of layers within the battery", gt=1)
52 |     form_factor: Optional[str] = Field(None, description="The general shape of the battery",
53 |                                        json_schema_extra=dict(
54 |                                            iri="https://w3id.org/emmo/domain/electrochemistry#electrochemistry_1586ef26_6d30_49e3_ae32_b4c9fc181941"
55 |                                        ))
56 |     mass: Optional[float] = Field(None, description="Mass of the entire battery",
57 |                                   json_schema_extra=dict(units='kg'))
58 |     dimensions: Optional[str] = Field(None, description='Dimensions of the battery in plain text.')
59 | 
60 |     # Materials description
61 |     anode: Optional[ElectrodeDescription] = Field(None, description="Name of the anode material",
62 |                                                   json_schema_extra=dict(
63 |                                                       iri="https://w3id.org/emmo/domain/electrochemistry#electrochemistry_b6319c74_d2ce_48c0_a75a_63156776b302"
64 |                                                   ))
65 |     cathode: Optional[ElectrodeDescription] = Field(
66 |         None, description="Name of the cathode material",
67 |         json_schema_extra=dict(
68 |             iri="https://w3id.org/emmo/domain/electrochemistry#electrochemistry_35c650ab_3b23_4938_b312_1b0dede2e6d5"
69 |         ))
70 |     electrolyte: Optional[ElectrolyteDescription] = Field(
71 |         None, description="Name of the electrolyte material",
72 |         json_schema_extra=dict(
73 |             iri="https://w3id.org/emmo/domain/electrochemistry#electrochemistry_fb0d9eef_92af_4628_8814_e065ca255d59"
74 |         ))
75 | 
76 |     # Performance information
77 |     nominal_capacity: Optional[float] = Field(
78 |         None, description="Rated capacity of the battery",
79 |         json_schema_extra=dict(
80 |             iri="https://w3id.org/emmo/domain/electrochemistry#electrochemistry_9b3b4668_0795_4a35_9965_2af383497a26",
81 |             units='A-hr'
82 |         ))
83 | 


--------------------------------------------------------------------------------
/battdat/schemas/cycling.py:
--------------------------------------------------------------------------------
 1 | """Describing cycling protocol"""
 2 | from datetime import date
 3 | from typing import Optional
 4 | 
 5 | from pydantic import BaseModel, Field
 6 | 
 7 | 
 8 | class CyclingProtocol(BaseModel, extra='allow'):
 9 |     """Test protocol for cell cycling"""
10 |     cycler: Optional[str] = Field(None, description='Name of the cycling machine')
11 |     start_date: Optional[date] = Field(None, description="Date the initial test on the cell began")
12 |     set_temperature: Optional[float] = Field(None, description="Set temperature for the battery testing equipment",
13 |                                              json_schema_extra=dict(units='C'))
14 |     schedule: Optional[str] = Field(None, description="Schedule file used for the cycling machine")
15 | 


--------------------------------------------------------------------------------
/battdat/schemas/eis.py:
--------------------------------------------------------------------------------
 1 | """Schemas associated with Electrochemical Impedance Spectroscopy"""
 2 | from pandas import DataFrame
 3 | import numpy as np
 4 | 
 5 | from .column import ColumnSchema, ColumnInfo, DataType
 6 | 
 7 | 
 8 | class EISData(ColumnSchema):
 9 |     """Measurements for a specific EIS test"""
10 | 
11 |     test_id: ColumnInfo = ColumnInfo(description='Integer used to identify rows belonging to the same experiment.', required=True, type=DataType.INTEGER)
12 |     test_time: ColumnInfo = ColumnInfo(description="Time from the beginning of measurements.", units="s", monotonic=True, type=DataType.FLOAT)
13 |     time: ColumnInfo = ColumnInfo(description="Time as a UNIX timestamp. Assumed to be in UTC", type=DataType.FLOAT)
14 |     frequency: ColumnInfo = ColumnInfo(description="Applied frequency", units="Hz", required=True, type=DataType.FLOAT)
15 |     z_real: ColumnInfo = ColumnInfo(description="Real component of impedance", units="Ohm", required=True, type=DataType.FLOAT)
16 |     z_imag: ColumnInfo = ColumnInfo(description="Imaginary component of impedance", units="Ohm", required=True, type=DataType.FLOAT)
17 |     z_mag: ColumnInfo = ColumnInfo(description="Magnitude of impedance", units="Ohm", required=True, type=DataType.FLOAT)
18 |     z_phase: ColumnInfo = ColumnInfo(description="Phase angle of the impedance", units="Degree", required=True, type=DataType.FLOAT)
19 | 
20 |     def validate_dataframe(self, data: DataFrame, allow_extra_columns: bool = True):
21 |         # Check that the schema is supported
22 |         super().validate_dataframe(data, allow_extra_columns)
23 | 
24 |         # Ensure that the cartesian coordinates for the impedance agree with the magnitude
25 |         cart = {
26 |             'real': np.multiply(data['z_mag'], np.cos(np.deg2rad(data['z_phase']))),
27 |             'imag': np.multiply(data['z_mag'], np.sin(np.deg2rad(data['z_phase'])))
28 |         }
29 |         for k, values in cart.items():
30 |             largest_diff = (np.abs(values - data[f'z_{k}']) / np.clip(values, a_min=1e-6, a_max=None)).max()
31 |             if largest_diff > 0.01:
32 |                 raise ValueError(f'Polar and cartesian forms of impedance disagree for {k} component. Largest difference: {largest_diff * 100:.1f}%')
33 | 


--------------------------------------------------------------------------------
/battdat/schemas/modeling.py:
--------------------------------------------------------------------------------
 1 | """Metadata which describes how data produced by models were generated"""
 2 | from typing import Optional, List
 3 | from enum import Enum
 4 | 
 5 | from pydantic import BaseModel, Field, AnyUrl
 6 | 
 7 | 
 8 | class ModelTypes(str, Enum):
 9 |     """Type of computational method"""
10 | 
11 |     physics = 'physics'
12 |     """A computational application that uses a physical model to predict the behaviour of a system,
13 |     providing a identifiable analogy with the original object.
14 | 
15 |     IRI: https://w3id.org/emmo#EMMO_8d4962d7_9608_44f7_a2f1_82a4bb173f4a"""
16 |     data = 'data'
17 |     """A computational application that uses existing data to predict the behaviour of a system
18 |     without providing a identifiable analogy with the original object.
19 | 
20 |     IRI: https://w3id.org/emmo#EMMO_a4b14b83_9392_4a5f_a2e8_b2b58793f59b"""
21 | 
22 |     empirical = 'empirical'
23 |     """A computational application that uses an empiric equation to predict the behaviour of a system
24 |     without relying on the knowledge of the actual physical phenomena occurring in the object.
25 | 
26 |     IRI: https://w3id.org/emmo#EMMO_67c70dcd_2adf_4e6c_b3f8_f33dd1512487"""
27 | 
28 | 
29 | class ModelMetadata(BaseModel, extra='allow'):
30 |     """Describe the type and version of a computational tool used to generate battery data"""
31 | 
32 |     # High-level information about the code
33 |     name: str = Field(..., description='Name of the software')
34 |     version: Optional[str] = Field(..., description='Version of the software if known')
35 |     type: Optional[ModelTypes] = Field(None, description='Type of the computational method it implements.')
36 |     references: Optional[List[AnyUrl]] = Field(None, description='List of references associated with the software')
37 | 
38 |     # Details for physics based simulation
39 |     models: Optional[List[str]] = Field(
40 |         None, description='Type of mathematical model(s) being used in physics simulation.'
41 |                           'Use terms defined in BattINFO, such as "BatteryEquivalentCircuitModel".',
42 |         json_schema_extra=dict(
43 |             root_iri='https://w3id.org/emmo#EMMO_f7ed665b_c2e1_42bc_889b_6b42ed3a36f0'
44 |         ))
45 |     simulation_type: Optional[str] = Field(
46 |         None, description='Type of simulation being performed. Use terms defined in BattINFO, such as "TightlyCoupledModelsSimulation"',
47 |         json_schema_extra=dict(
48 |             root_iri='https://w3id.org/emmo#EMMO_e97af6ec_4371_4bbc_8936_34b76e33302f'
49 |         ))
50 | 


--------------------------------------------------------------------------------
/battdat/schemas/ontology.py:
--------------------------------------------------------------------------------
  1 | """Tools used for linking terms in our data format to the BattINFO ontology"""
  2 | from dataclasses import dataclass, field
  3 | from functools import cache
  4 | from typing import Type, List, Optional, Union
  5 | 
  6 | from ontopy import World
  7 | from owlready2 import Thing
  8 | from pydantic import BaseModel
  9 | 
 10 | _battinfo_url = 'https://raw.githubusercontent.com/emmo-repo/domain-battery/master/battery-inferred.ttl'
 11 | 
 12 | 
 13 | @cache
 14 | def load_battinfo():
 15 |     """Download and store the latest ontology into an in-memory"""
 16 |     return World().get_ontology(_battinfo_url).load()
 17 | 
 18 | 
 19 | @dataclass
 20 | class TermInfo:
 21 |     """Information about a term as referenced from the BattINFO ontology"""
 22 | 
 23 |     name: str
 24 |     """Name of the matching term"""
 25 |     iri: str = field(repr=False)
 26 |     """IRI of the term"""
 27 |     elucidation: Optional[str] = field(repr=False)
 28 |     """Explanation of the term"""
 29 | 
 30 |     @classmethod
 31 |     def from_thing(cls, thing: Thing):
 32 |         # Retrieve the description, as provided by EMMO
 33 |         eluc = thing.get_annotations().get('elucidation')
 34 |         if eluc is not None:
 35 |             eluc = str(eluc[0])
 36 |         return TermInfo(name=str(thing), iri=thing.iri, elucidation=eluc)
 37 | 
 38 | 
 39 | def cross_reference_terms(model: Type[BaseModel]) -> dict[str, TermInfo]:
 40 |     """Gather the descriptions of fields from our schema which
 41 |     are cross-referenced to a term within the BattINFO/EMMO ontologies
 42 | 
 43 |     Args:
 44 |         model: Schema object to be cross-referenced
 45 |     Returns:
 46 |         Mapping between metadata fields in elucidation field from the ontology
 47 |     """
 48 | 
 49 |     # Load the BattINFO ontology
 50 |     battinfo = load_battinfo()
 51 | 
 52 |     # Loop over each field in the schema
 53 |     terms = {}
 54 |     for name, attr in model.model_fields.items():
 55 |         # Map to the term in the ontology if known
 56 |         if attr.json_schema_extra is not None and (iri := attr.json_schema_extra.get('iri')) is not None:
 57 |             term = battinfo.search_one(iri=iri)
 58 |             if term is None:
 59 |                 raise ValueError(f'Count not find matching term for {name} with iri={iri}')
 60 |             terms[name] = TermInfo.from_thing(term)
 61 | 
 62 |     return terms
 63 | 
 64 | 
 65 | def resolve_term(name_or_iri: str) -> Thing:
 66 |     """Resolve the Term object associated with a string
 67 | 
 68 |     Args:
 69 |         name_or_iri: The preferred label or the IRI of a term in the ontology
 70 |     Returns:
 71 |         Thing matching the term
 72 |     """
 73 | 
 74 |     # Attempt to find it
 75 |     bi = load_battinfo()
 76 |     if name_or_iri.startswith('https://'):
 77 |         term = bi.search_one(iri=name_or_iri)
 78 |         t = 'IRI'
 79 |     else:
 80 |         term = bi.search_one(prefLabel=name_or_iri)
 81 |         t = 'name'
 82 | 
 83 |     if term is None:
 84 |         raise ValueError(f'Could not find the {t}={name_or_iri}')
 85 |     return term
 86 | 
 87 | 
 88 | def gather_descendants(term: Union[Type[Thing], str]) -> List[TermInfo]:
 89 |     """Get descriptions of the descendants of a certain base type
 90 | 
 91 |     Args:
 92 |         term: Term for which to gather all descendants. Either the class object itself or its preferred label or IRI
 93 |     Returns:
 94 |         List of descriptions of the descendants
 95 |     """
 96 | 
 97 |     # Resolve the term object, if needed
 98 |     if isinstance(term, str):
 99 |         term = resolve_term(term)
100 | 
101 |     return [
102 |         TermInfo.from_thing(d) for d in term.descendants(include_self=False)
103 |     ]
104 | 


--------------------------------------------------------------------------------
/battdat/streaming/__init__.py:
--------------------------------------------------------------------------------
 1 | """Retrieve data in smaller chunks from a large HDF5 file"""
 2 | from typing import Union, Iterator, Dict, Collection
 3 | from itertools import groupby
 4 | from pathlib import Path
 5 | 
 6 | import pandas as pd
 7 | from pandas import HDFStore
 8 | from tables import File, Table
 9 | 
10 | from battdat.data import BatteryDataset
11 | from battdat.io.hdf import as_hdf5_object
12 | 
13 | RecordType = Dict[str, Union[str, float, int]]
14 | 
15 | 
16 | def _get_raw_data_iterator_h5(hdf5_path: Union[Path, str, File], key: str) -> Iterator[RecordType]:
17 |     """Open an iterator over rows of an HDF5 Table"""
18 | 
19 |     with as_hdf5_object(hdf5_path) as file:
20 |         table: Table = file.get_node(f'/{key}')
21 |         names = table.dtype.fields.keys()
22 |         for row in table.iterrows():
23 |             out = dict((n, row[n]) for n in names)
24 |             yield out
25 | 
26 | 
27 | def iterate_records_from_file(hdf5_path: Union[Path, str, HDFStore], key: str = 'raw_data') -> Iterator[RecordType]:
28 |     """Stream individual records from a file
29 | 
30 |     Args:
31 |         hdf5_path: Path to the data file
32 |         key: Which table to read
33 |     Yields:
34 |         Individual rows from the "raw_data" section of the HDF5 file
35 |     """
36 | 
37 |     yield from _get_raw_data_iterator_h5(hdf5_path, key=key)
38 | 
39 | 
40 | def iterate_cycles_from_file(hdf5_path: Union[Path, str, HDFStore],
41 |                              make_dataset: bool = False,
42 |                              key: str | Collection[str] | None = 'raw_data') -> Iterator[Union[pd.DataFrame, Dict[str, pd.DataFrame], BatteryDataset]]:
43 |     """Stream single-cycle datasets from the HDF5 file
44 | 
45 |     Args:
46 |         hdf5_path: Path to the data file
47 |         make_dataset: Whether to form a :class:`~battdat.data.BatteryDataset` for each cycle,
48 |             including the metadata from the source file.
49 |         key: Which table(s) to read. Supply either a single key, a list of keys, or ``None`` to read all tables
50 | 
51 |     Yields:
52 |         All rows belonging to each cycle from the requested table of the HDF5 file.
53 |         Generates a ``BatteryDataset`` if ``make_dataset`` is ``True``.
54 |         Otherwise, yields a single DataFrame if ``key`` is a single string
55 |         or a dictionary of DataFrames if ``key`` is a list.
56 |     """
57 | 
58 |     # Get the metadata out of the file, if needed
59 |     metadata = None
60 |     if make_dataset or key is None:
61 |         metadata, _, schemas = BatteryDataset.inspect_hdf(hdf5_path)
62 | 
63 |     # Determine the keys to read from the file
64 |     single = False
65 |     if isinstance(key, str):
66 |         single = True
67 |         keys = [key]
68 |     elif key is None:
69 |         keys = list(schemas.keys())
70 |     else:
71 |         keys = list(key)
72 | 
73 |     iterators = [
74 |         groupby(_get_raw_data_iterator_h5(hdf5_path, k), lambda x: x['cycle_number']) for k in keys
75 |     ]
76 | 
77 |     for batch in zip(*iterators):
78 |         cycle_ids, chunks = zip(*batch)
79 |         if len(set(cycle_ids)) != 1:
80 |             raise ValueError(f'Different cycle indices across entries: {" ".join(f"{k}={i}" for k, i in zip(keys, cycle_ids))}')
81 | 
82 |         # Produce the desired output file
83 |         chunks = [pd.DataFrame(chunk) for chunk in chunks]
84 |         if single and not make_dataset:
85 |             yield chunks[0]
86 |         elif make_dataset:
87 |             yield BatteryDataset(
88 |                 metadata=metadata,
89 |                 schemas=schemas,
90 |                 tables=dict(zip(keys, chunks))
91 |             )
92 |         else:
93 |             yield dict(zip(keys, chunks))
94 | 


--------------------------------------------------------------------------------
/battdat/streaming/hdf5.py:
--------------------------------------------------------------------------------
  1 | """Streaming tools related to the HDF5 format"""
  2 | from typing import Union, Dict, Optional, List
  3 | from contextlib import AbstractContextManager
  4 | from dataclasses import field, dataclass
  5 | from pathlib import Path
  6 | import logging
  7 | 
  8 | import numpy as np
  9 | import pandas as pd
 10 | from tables import File, Table, Filters
 11 | 
 12 | from battdat.io.hdf import write_df_to_table
 13 | from battdat.schemas.column import ColumnSchema, RawData
 14 | from battdat.schemas import BatteryMetadata
 15 | from battdat import __version__
 16 | 
 17 | logger = logging.getLogger(__name__)
 18 | 
 19 | 
 20 | @dataclass
 21 | class HDF5Writer(AbstractContextManager):
 22 |     """Tool to write raw time series data to an HDF5 file incrementally
 23 | 
 24 |     Writes data to the ``raw_data`` key of a different dataset."""
 25 | 
 26 |     # Attributes defining where and how to write
 27 |     hdf5_output: Union[Path, str, File]
 28 |     """File or already-open HDF5 file in which to store data"""
 29 |     write_mode: str = 'a'
 30 |     """Mode to use when opening the HDF5 file. Ignored if :attr:`hdf5_output` is a ``File``."""
 31 |     metadata: BatteryMetadata = field(default_factory=BatteryMetadata)
 32 |     """Metadata describing the cell"""
 33 |     schema: ColumnSchema = field(default_factory=RawData)
 34 |     """Schema describing columns of the cell"""
 35 |     complevel: int = 0
 36 |     """Compression level. Can be between 0 (no compression) and 9 (maximum compression). Ignored if data table already exists"""
 37 |     complib: str = 'zlib'
 38 |     """Compression algorithm. Consult :func:`~pandas.read_hdf` for available options. Ignored if data table already exists"""
 39 |     key: str = ''
 40 |     """Name of the root group in which to store the data. Ignored if :attr:`hdf5_output` is a ``File``."""
 41 |     buffer_size: int = 32768
 42 |     """Number of rows to collect in memory before writing to disk"""
 43 | 
 44 |     # State used only while in writing mode
 45 |     _file: Optional[File] = None
 46 |     """Handle to an open file"""
 47 |     _dtype: Optional[np.dtype] = None
 48 |     """Dtype of records to be written"""
 49 |     _table: Optional[Table] = None
 50 |     """Pointer to the table being written"""
 51 |     _write_buffer: Optional[List[Dict]] = None
 52 |     """Index of the next step to be written"""
 53 | 
 54 |     def __enter__(self):
 55 |         self._write_buffer = list()
 56 | 
 57 |         # Open the store, if needed
 58 |         if isinstance(self.hdf5_output, File):
 59 |             self._file = self.hdf5_output
 60 |         else:
 61 |             self._file = File(
 62 |                 self.hdf5_output,
 63 |                 root_uep='/' + self.key,
 64 |                 mode=self.write_mode
 65 |             )
 66 | 
 67 |         # Write metadata to the store's root's attributes
 68 |         root = self._file.root
 69 |         root._v_attrs.metadata = self.metadata.model_dump_json(exclude_none=True)
 70 |         root._v_attrs.json_schema = self.metadata.model_json_schema()
 71 |         root._v_attrs.battdat_version = __version__
 72 | 
 73 |         # Get the table if it exists already
 74 |         if 'raw_data' in root:
 75 |             self._table = root['raw_data']
 76 |         return self
 77 | 
 78 |     def __exit__(self, exc_type, exc_val, exc_tb):
 79 |         if len(self._write_buffer) > 0:  # Ensure last rows are written
 80 |             self.flush()
 81 |         if not isinstance(self.hdf5_output, File):  # Close file if a path was provided
 82 |             self._file.close()
 83 |         self._table = self._file = self._write_buffer = None
 84 | 
 85 |     def write_row(self, row: Dict[str, Union[str, float, int]]) -> int:
 86 |         """Add a row to the data file
 87 | 
 88 |         Args:
 89 |             row: Row to be added to the HDF5 file
 90 |         Returns:
 91 |             Number of rows written to file. Writes only occur when a write buffer has filled
 92 |         """
 93 |         self._write_buffer.append(row.copy())
 94 |         if len(self._write_buffer) >= self.buffer_size:
 95 |             return self.flush()
 96 |         return 0
 97 | 
 98 |     def flush(self) -> int:
 99 |         """Write the current row buffer to the file
100 | 
101 |         Returns:
102 |             Number of rows written
103 |         """
104 | 
105 |         if self._table is None:
106 |             # Make the table the first time
107 |             filters = Filters(complevel=self.complevel, complib=self.complib)
108 |             df = pd.DataFrame(self._write_buffer)
109 |             self._table = write_df_to_table(self._file, self._file.root, name='raw_data', filters=filters, df=df)
110 | 
111 |             # Store the metadata
112 |             self._table.attrs.metadata = self.schema.model_dump_json()
113 |             self._table.attrs.json_schema = self.schema.model_json_schema()
114 |         else:
115 |             # Append rows to the "raw_data" key
116 |             row = np.empty((1,), dtype=self._table.dtype)
117 |             known_names = set(self._table.dtype.names)
118 |             for new_row in self._write_buffer:
119 |                 if (new_keys := set(new_row.keys())) != known_names:
120 |                     logger.warning(f'Row has different keys than the Table. New keys: ({", ".join(new_keys.difference(known_names))}.'
121 |                                    f' Missing: {", ".join(known_names.difference(new_keys))}')
122 |                 for c in known_names:
123 |                     row[c] = new_row[c]
124 |                 self._table.append(row)
125 | 
126 |         written = len(self._write_buffer)
127 |         self._write_buffer.clear()
128 |         return written
129 | 


--------------------------------------------------------------------------------
/battdat/utils.py:
--------------------------------------------------------------------------------
 1 | from pandas import DataFrame
 2 | import logging
 3 | 
 4 | logger = logging.getLogger(__name__)
 5 | 
 6 | 
 7 | # TODO (wardlt): Move to post-processing?
 8 | def drop_cycles(df: DataFrame, digit: int = 2):
 9 |     """
10 |     Drop duplicate cycles from a dataframe.
11 | 
12 |     Cycles must meet the following criteria
13 |     that meet the following criteria:
14 |     the Voltage and Current must be exactly the same,
15 |     and the time between steps must be identical to 2 digits.
16 |     They can sometimes vary by some epsilon for the Arbin data
17 | 
18 |     Parameters
19 |     ----------
20 |     df : Pandas DataFrame
21 |         input dataframe
22 |     digit : int
23 |         number of digits to round to in time index (in seconds)
24 | 
25 |     Returns
26 |     -------
27 |     df : Pandas DataFrame
28 |         dataframe without the duplicate columns
29 | 
30 | 
31 |     Examples
32 |     --------
33 |     none yet
34 | 
35 |     """
36 | 
37 |     # NOTE: we have already converted time to seconds
38 | 
39 |     # add rounded time to dataframe
40 |     df['TMP'] = df['test_time']
41 |     logger.debug('Removing duplicates from dataframe')
42 | 
43 |     # round time to specified number of digits
44 |     df = df.round({'TMP': digit})
45 |     len1 = len(df)
46 | 
47 |     # drop points where the rounded time, voltage and current are identical
48 |     # keep only first instance
49 |     df.drop_duplicates(subset=['TMP', 'voltage', 'current'], keep='first', inplace=True)
50 | 
51 |     # re-index dataframe with points dropped
52 |     df.reset_index(drop=True, inplace=True)
53 | 
54 |     # calculate number of cycles dropped
55 |     dropped = len1 - len(df)
56 |     logger.debug(f'Dropped {dropped} lines')
57 | 
58 |     # remove the now-unneed column
59 |     df.drop(columns=['TMP'], inplace=True)
60 | 
61 |     return df
62 | 


--------------------------------------------------------------------------------
/battdat/version.py:
--------------------------------------------------------------------------------
1 | # we target 3.8+, so this should be okay without fallback to importlib_metadata
2 | import importlib.metadata
3 | 
4 | # single source of truth for package version,
5 | # see https://packaging.python.org/en/latest/single_source_version/
6 | 
7 | __version__ = importlib.metadata.version('battery-data-toolkit')
8 | 


--------------------------------------------------------------------------------
/dev/README.md:
--------------------------------------------------------------------------------
1 | # Development Files
2 | 
3 | Files useful to setting up a development environment
4 | 


--------------------------------------------------------------------------------
/dev/environment.yml:
--------------------------------------------------------------------------------
 1 | # Conda environment file
 2 | name: batdata
 3 | channels:
 4 |   - defaults
 5 | dependencies:
 6 |   - python==3.10.*
 7 |   - pandoc  # Needed for the
 8 | 
 9 |   # For now, use Pip for everything major
10 |   - pip
11 |   - pip:
12 |     # Install jupyter
13 |     - jupyterlab
14 |     - matplotlib
15 |     - -e ..[test,docs]
16 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	cd user-guide/schemas; python export-schemas.py
21 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
22 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
1 | # Documentation
2 | 
3 | Built using Sphinx and the pydata style
4 | 
5 | Compile the documentation using Make
6 | ```commandline
7 | make html
8 | ```
9 | 


--------------------------------------------------------------------------------
/docs/_static/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ROVI-org/battery-data-toolkit/19961e6bbb2d0cfe0bff9c129144fcf8f3dd3be6/docs/_static/logo.png


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # For the full list of built-in configuration values, see the documentation:
 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 5 | 
 6 | # -- Project information -----------------------------------------------------
 7 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
 8 | 
 9 | project = 'Battery Data Toolkit'
10 | copyright = '2024'
11 | author = 'ROVI Team'
12 | 
13 | # -- General configuration ---------------------------------------------------
14 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
15 | 
16 | extensions = ['nbsphinx']
17 | 
18 | templates_path = ['_templates']
19 | exclude_patterns = ['_build']
20 | 
21 | 
22 | # -- Options for HTML output -------------------------------------------------
23 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
24 | 
25 | html_theme = 'pydata_sphinx_theme'
26 | html_static_path = ['_static']
27 | html_theme_options = {
28 |     "logo": {
29 |         "text": "BattData",
30 |         "image_light": "_static/logo.png",
31 |         "image_dark": "_static/logo.png",
32 |     }
33 | }
34 | html_logo = '_static/logo.png'
35 | 
36 | 
37 | # -- Options for NBSphinx -----------------------------------------------------
38 | 
39 | nbsphinx_execute = 'never'
40 | 
41 | # -- API Documentation --------------------------------------------------------
42 | 
43 | extensions.extend([
44 |     'sphinx.ext.autodoc',
45 |     'sphinx.ext.autosummary',
46 |     'sphinx.ext.intersphinx',
47 |     'sphinx.ext.napoleon',
48 |     'sphinxcontrib.autodoc_pydantic',
49 |     'sphinx_design'
50 | ])
51 | 
52 | autodoc_pydantic_model_show_json = False
53 | autodoc_pydantic_settings_show_json = False
54 | 
55 | autoclass_content = 'both'
56 | 
57 | intersphinx_mapping = {
58 |     'python': ('https://docs.python.org/3/', None),
59 |     'pandas': ('https://pandas.pydata.org/docs/', None),
60 |     'pyarrow': ('https://arrow.apache.org/docs/', None),
61 |     'h5py': ('https://docs.h5py.org/en/stable/', None),
62 |     'tables': ('https://www.pytables.org/', None)
63 | }
64 | 


--------------------------------------------------------------------------------
/docs/getting-started.rst:
--------------------------------------------------------------------------------
 1 | Getting Started
 2 | ===============
 3 | 
 4 | Battery-Data-Toolkit is a Python toolkit for storing and manipulating data from battery systems.
 5 | Most operations are based on `Pandas <https://pandas.pydata.org/docs/>`_ to simplify using
 6 | common libraries for data science for battery science.
 7 | 
 8 | Installation
 9 | ------------
10 | 
11 | Battery Data Toolkit is available on PyPI and is pure Python.
12 | Installing via Pip will work on most systems:
13 | 
14 | .. code-block:: shell
15 | 
16 |     pip install battery-data-toolkit
17 | 
18 | Build the toolkit for development by cloning the repository
19 | then installing with the "tests" and "docs" optional packages:
20 | 
21 | .. code-block:: shell
22 | 
23 |     git clone git@github.com:ROVI-org/battery-data-toolkit.git
24 |     cd battery-data-toolkit
25 |     pip install -e .[test,docs]
26 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | Battery Data Toolkit
 2 | ====================
 3 | 
 4 | The battery-data-toolkit, ``battdat``, creates consistently-formatted collections of battery data.
 5 | The library has three main purposes:
 6 | 
 7 | 1. *Storing battery data in standardized formats.* ``battdat`` stores data in
 8 |    `high-performance file formats <./user-guide/formats.html>`_ and include
 9 |    `extensive metadata <./user-guide/schemas/index.html>`_ alongside data.
10 | 2. *Interfacing battery data with the PyData ecosystem*. The core data model,
11 |    `BatteryDataset <./user-guide/dataset.html>`_,
12 |    is built atop Pandas DataFrames.
13 | 3. *Providing standard implementations of common analysis techniques*. ``battdat`` implements functions which
14 |    `ensure quality <./user-guide/consistency/index.html>`_
15 |    or `perform common analyses <./user-guide/post-processing/index.html>`_.
16 | 
17 | Source code: https://github.com/ROVI-org/battery-data-toolkit
18 | 
19 | .. toctree::
20 |    :maxdepth: 2
21 |    :caption: Contents:
22 | 
23 |    getting-started
24 |    user-guide/index
25 |    source/modules
26 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | %SPHINXBUILD% >NUL 2>NUL
14 | if errorlevel 9009 (
15 | 	echo.
16 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17 | 	echo.installed, then set the SPHINXBUILD environment variable to point
18 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
19 | 	echo.may add the Sphinx directory to PATH.
20 | 	echo.
21 | 	echo.If you don't have Sphinx installed, grab it from
22 | 	echo.https://www.sphinx-doc.org/
23 | 	exit /b 1
24 | )
25 | 
26 | if "%1" == "" goto help
27 | 
28 | cd user-guide/schemas
29 | python export-schemas.py
30 | cd ../..
31 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
32 | goto end
33 | 
34 | :help
35 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
36 | 
37 | :end
38 | popd
39 | 


--------------------------------------------------------------------------------
/docs/pptx-files/logo.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ROVI-org/battery-data-toolkit/19961e6bbb2d0cfe0bff9c129144fcf8f3dd3be6/docs/pptx-files/logo.pptx


--------------------------------------------------------------------------------
/docs/source/consistency.rst:
--------------------------------------------------------------------------------
 1 | Error Checking (``battdat.consistency``)
 2 | ========================================
 3 | 
 4 | .. automodule:: battdat.consistency
 5 |    :members:
 6 |    :undoc-members:
 7 |    :show-inheritance:
 8 | 
 9 | 
10 | Base (``b.consistency.base``)
11 | -----------------------------
12 | 
13 | .. automodule:: battdat.consistency.base
14 |    :members:
15 |    :undoc-members:
16 |    :show-inheritance:
17 | 
18 | Current (``b.consistency.current``)
19 | ------------------------------------
20 | 
21 | .. automodule:: battdat.consistency.current
22 |    :members:
23 |    :undoc-members:
24 |    :show-inheritance:
25 | 


--------------------------------------------------------------------------------
/docs/source/data.rst:
--------------------------------------------------------------------------------
1 | Dataset (``battdat.data``)
2 | ==========================
3 | 
4 | .. automodule:: battdat.data
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/io.rst:
--------------------------------------------------------------------------------
 1 | Extractors (``battdat.io``)
 2 | ===========================
 3 | 
 4 | .. automodule:: battdat.io
 5 |    :members:
 6 |    :undoc-members:
 7 |    :show-inheritance:
 8 | 
 9 | Base Classes (``b.io.base``)
10 | ----------------------------
11 | 
12 | .. automodule:: battdat.io.base
13 |    :members:
14 |    :undoc-members:
15 |    :show-inheritance:
16 | 
17 | Arbin (``b.io.arbin``)
18 | ----------------------
19 | 
20 | .. automodule:: battdat.io.arbin
21 |    :members:
22 |    :undoc-members:
23 |    :show-inheritance:
24 | 
25 | Battery Archive (``b.io.ba``)
26 | -----------------------------
27 | 
28 | .. automodule:: battdat.io.ba
29 |    :members:
30 |    :undoc-members:
31 |    :show-inheritance:
32 | 
33 | 
34 | Battery Data Hub (``b.io.batterydata``)
35 | ---------------------------------------
36 | 
37 | .. automodule:: battdat.io.batterydata
38 |    :members:
39 |    :undoc-members:
40 |    :show-inheritance:
41 | 
42 | 
43 | HDF5 (``b.io.hdf``)
44 | -------------------
45 | 
46 | .. automodule:: battdat.io.hdf
47 |    :members:
48 |    :undoc-members:
49 |    :show-inheritance:
50 | 
51 | 
52 | MACCOR (``b.io.maccor``)
53 | ------------------------
54 | 
55 | .. automodule:: battdat.io.maccor
56 |    :members:
57 |    :undoc-members:
58 |    :show-inheritance:
59 | 
60 | 
61 | Parquet (``b.io.parquet``)
62 | --------------------------
63 | 
64 | .. automodule:: battdat.io.parquet
65 |    :members:
66 |    :undoc-members:
67 |    :show-inheritance:
68 | 


--------------------------------------------------------------------------------
/docs/source/modules.rst:
--------------------------------------------------------------------------------
 1 | battdat API
 2 | ===========
 3 | 
 4 | API documentation for each module
 5 | 
 6 | .. toctree::
 7 |    :maxdepth: 2
 8 |    :caption: Contents:
 9 | 
10 |    data
11 |    schemas
12 |    io
13 |    postprocess
14 |    consistency
15 |    streaming
16 | 


--------------------------------------------------------------------------------
/docs/source/postprocess.rst:
--------------------------------------------------------------------------------
 1 | Postprocessing (``battdat.postprocess``)
 2 | ========================================
 3 | 
 4 | .. automodule:: battdat.postprocess
 5 |    :members:
 6 |    :undoc-members:
 7 |    :show-inheritance:
 8 | 
 9 | 
10 | Base (``b.postprocess.base``)
11 | -----------------------------
12 | 
13 | .. automodule:: battdat.postprocess.base
14 |    :members:
15 |    :undoc-members:
16 |    :show-inheritance:
17 | 
18 | Integral (``b.postprocess.integral``)
19 | -------------------------------------
20 | 
21 | .. automodule:: battdat.postprocess.integral
22 |    :members:
23 |    :undoc-members:
24 |    :show-inheritance:
25 | 
26 | Tagging (``b.postprocess.tagging``)
27 | -----------------------------------
28 | 
29 | .. automodule:: battdat.postprocess.tagging
30 |    :members:
31 |    :undoc-members:
32 |    :show-inheritance:
33 | 
34 | Timing (``b.postprocess.tagging``)
35 | ----------------------------------
36 | 
37 | .. automodule:: battdat.postprocess.timing
38 |    :members:
39 |    :undoc-members:
40 |    :show-inheritance:
41 | 


--------------------------------------------------------------------------------
/docs/source/schemas.rst:
--------------------------------------------------------------------------------
 1 | Schemas (``battdat.schemas``)
 2 | =============================
 3 | 
 4 | .. automodule:: battdat.schemas
 5 |    :members:
 6 |    :undoc-members:
 7 |    :show-inheritance:
 8 | 
 9 | Battery Description (``b.schemas.battery``)
10 | -------------------------------------------
11 | 
12 | .. automodule:: battdat.schemas.battery
13 |    :members:
14 |    :undoc-members:
15 |    :show-inheritance:
16 | 
17 | Metadata: Computation (``b.schemas.modeling``)
18 | ----------------------------------------------
19 | 
20 | .. automodule:: battdat.schemas.modeling
21 |    :members:
22 |    :undoc-members:
23 |    :show-inheritance:
24 | 
25 | Metadata: Cycling Protocol (``b.schemas.cycling``)
26 | --------------------------------------------------
27 | 
28 | .. automodule:: battdat.schemas.cycling
29 |    :members:
30 |    :undoc-members:
31 |    :show-inheritance:
32 | 
33 | 
34 | Data: Time Series (``b.schemas.column``)
35 | ----------------------------------------
36 | 
37 | .. automodule:: battdat.schemas.column
38 |    :members:
39 |    :undoc-members:
40 |    :show-inheritance:
41 | 
42 | Data: EIS (``b.schemas.eis``)
43 | --------------------------------
44 | 
45 | .. automodule:: battdat.schemas.eis
46 |    :members:
47 |    :undoc-members:
48 |    :show-inheritance:
49 | 
50 | Utility: Ontologies (``b.schemas.ontology``)
51 | --------------------------------------------
52 | 
53 | .. automodule:: battdat.schemas.ontology
54 |    :members:
55 |    :undoc-members:
56 |    :show-inheritance:
57 | 


--------------------------------------------------------------------------------
/docs/source/streaming.rst:
--------------------------------------------------------------------------------
 1 | Streaming (``battdat.streaming``)
 2 | =================================
 3 | 
 4 | .. automodule:: battdat.streaming
 5 |    :members:
 6 |    :undoc-members:
 7 |    :show-inheritance:
 8 | 
 9 | HDF5 Streaming (``b.streaming.hdf5``)
10 | -------------------------------------
11 | 
12 | .. automodule:: battdat.streaming.hdf5
13 |    :members:
14 |    :undoc-members:
15 |    :show-inheritance:
16 | 


--------------------------------------------------------------------------------
/docs/user-guide/consistency/index.rst:
--------------------------------------------------------------------------------
 1 | Consistency Checks
 2 | ==================
 3 | 
 4 | Many problems, such as sign convention mishaps or unit conversion issues, can be detected from inconsistencies between
 5 | or within columns in a dataset.
 6 | The :mod:`battdat.consistency` module provides algorithms that check whether there may be problems within a battery dataset.
 7 | 
 8 | All algorithms are based on :class:`~battdat.consistency.base.ConsistencyChecker`,
 9 | which creates a list of warnings given a dataset.
10 | 
11 | .. code-block:: python
12 | 
13 |     computer = ConsistencyChecker()
14 |     warnings = computer.check(data)
15 | 
16 |     if len(warnings) > 0:
17 |         print(f'There are {len(warnings)} warnings, which includes: {warnings[0]}')
18 | 
19 | 
20 | .. toctree::
21 |     :maxdepth: 1
22 |     :caption: Available consistency checks:
23 | 
24 |     check-sign-convention
25 | 
26 | 


--------------------------------------------------------------------------------
/docs/user-guide/dataset.rst:
--------------------------------------------------------------------------------
  1 | The `BatteryDataset` Object
  2 | ===========================
  3 | 
  4 | The :class:`~battdat.data.BatteryDataset` object is the central object for the battery data toolkit.
  5 | Extractors render vendor-specific data into the `BatteryDataset`,
  6 | schemas describe its contents,
  7 | and post-processing codes manipulate its datasets.
  8 | 
  9 | 
 10 | Structure of a ``BatteryDataset``
 11 | ---------------------------------
 12 | 
 13 | The :class:`~battdat.data.BatteryDataset` holds all information about a battery system together in the same Python object.
 14 | Every dataset holds three attributes:
 15 | 
 16 | #. :attr:`~battdat.data.BatteryDataset.metadata`: Information describing the source of the data
 17 |    (see `Source Metadata <schemas/source-metadata.html>`_)
 18 | #. :attr:`~battdat.data.BatteryDataset.tables`: A named collection of data tables as Pandas :class:`~pandas.DataFrame`.
 19 | #. :attr:`~battdat.data.BatteryDataset.schemas`: Descriptions of the columns in each data table
 20 |    (see `Column Schema <schemas/column-schema.html>`_)
 21 | 
 22 | The types of tables held in each dataset depends on the type of battery.
 23 | Datasets describing a single cell may only include a single time series of the measurements,
 24 | whereas a dataset describing an entire system may have time series for each cell in each module
 25 | and those for multiple power conversion systems.
 26 | 
 27 | Access the data tables within the dataset by indexing the dataset:
 28 | 
 29 | .. code-block:: python
 30 | 
 31 |     dataset = BatteryDataset.from_hdf('example.h5')
 32 | 
 33 |     # These two ways for accessing a table are equivalent
 34 |     df = dataset['raw_data']
 35 |     df = dataset.tables['raw_data']
 36 |     df['voltage'].max()  # Compute the maximum voltage
 37 | 
 38 | 
 39 | Creating a ``BatteryDataset``
 40 | -----------------------------
 41 | 
 42 | Load data from another file format using battdat's `dataset readers <io.html>`_.
 43 | If there is no available reader,
 44 | build by passing a collection of tables as :class:`~pandas.DataFrame` and their schemas along with the metadata to the constructor.
 45 | Once assembled, all component tables will be saved and loaded together.
 46 | 
 47 | .. code-block:: python
 48 | 
 49 |     from battdat.schemas import BatteryMetadata
 50 |     from battdat.schemas.column import RawData
 51 |     from battdat.data import BatteryDataset
 52 | 
 53 |     metadata = BatteryMetadata(name='2_cell_module')
 54 |     col_schema = RawData()  # Use the same schema for both tables
 55 |     dataset = BatteryDataset(
 56 |         data={'cell_1': cell1_df, 'cell_2': cell2_df},
 57 |         schemas={'cell_1': col_schema, 'cell_2': col_schema}
 58 |         metadata=metadata
 59 |     )
 60 | 
 61 | Columns of the dataframes can be any `NumPy data type <https://numpy.org/doc/stable/reference/generated/numpy.dtype.kind.html#numpy.dtype.kind>`_
 62 | except timedeltas (m), timestamps (M), or voids (v).
 63 | Battery data toolkit does not yet support storing these types in HDF5 or Parquet formats.
 64 | Columns where all values are arrays of the same size are also supported.
 65 | 
 66 | Check that your data and metadata agree using the :meth:`~battdat.data.BatteryDataset.validate` method.
 67 | 
 68 | .. code-block:: python
 69 | 
 70 |     dataset.validate()
 71 | 
 72 | The validate function will raise errors if the tables do not match the column schema
 73 | and will return names of columns without descriptions, if desired.
 74 | 
 75 | Factory Methods
 76 | +++++++++++++++
 77 | 
 78 | :class:`~battdat.data.BatteryDataset` contains factory methods that build datasets from
 79 | tables with pre-defined names and tables.
 80 | All are named ``make_*_dataset``.
 81 | 
 82 | For example, :meth:`~battdat.data.BatteryDataset.make_cell_dataset` creates a dataset
 83 | which represents a single-cell battery.
 84 | 
 85 | .. code-block:: python
 86 | 
 87 |     from battdat.data import BatteryDataset
 88 | 
 89 |     dataset = BatteryDataset.make_cell_data(raw_data=df)
 90 | 
 91 | Each table will be associated with a default schema.
 92 | Describe columns not yet present in the schema by adding them after assembly:
 93 | 
 94 | .. code-block:: python
 95 | 
 96 |     from battdat.schemas.columns import ColumnInfo
 97 |     dataset.schemas['raw_data'].add_column(
 98 |         name='new_col',
 99 |         description='Information not already included in RawData',
100 |         units='ohm',
101 |     )
102 | 
103 | The current factory methods are:
104 | 
105 | .. _type-table:
106 | 
107 | .. list-table::
108 |    :header-rows: 1
109 | 
110 |    * - Method
111 |      - Description
112 |    * - :class:`~battdat.data.BatteryDataset.make_cell_dataset`
113 |      - Single battery cell with measurements of voltage, current, and other data at specific times
114 |        or averaged over entire cycles. Tables (and their schemas) include:
115 | 
116 |        - ``raw_data`` (`RawData <schemas/column-schema.html#rawdata>`_): Measurements of system state at specific points in time.
117 |        - ``cycle_stats`` (`CycleLevelData <schemas/column-schema.html#cycleleveldata>`_): Descriptive statistics about state over entire cycles.
118 |        - ``eis_data`` (`EISData <schemas/column-schema.html#eisdata>`_): EIS measurements at different frequencies, over time.
119 | 
120 | Loading and Saving
121 | ------------------
122 | 
123 | The battery data and metadata can be saved in a few different styles, each with different advantages.
124 | 
125 | Functions to save are named ``to_[format]`` and
126 | functions for loading data are named ``from_[format]``.
127 | 
128 | See the `formats <formats.html>`_ documentation page for more detail.
129 | 
130 | Loading functions loads the entire dataset. See `streaming <streaming.html>`_ for
131 | how to load large datasets incrementally.
132 | 


--------------------------------------------------------------------------------
/docs/user-guide/formats.rst:
--------------------------------------------------------------------------------
  1 | File Formats
  2 | ============
  3 | 
  4 | The battery data toolkit stores data and metadata in two formats:
  5 | 
  6 | - *HDF5*: A format for saving all available information about a battery into a single file
  7 | - *Parquet*: A format optimized for storing column data, but requires saving separate files for each type of data (cycle vs raw)
  8 | 
  9 | .. contents::
 10 |   :local:
 11 |   :depth: 1
 12 | 
 13 | :class:`~battdat.data.BatteryDataset` objects support reading and writing to these classes via ``to_[format]`` and ``from_[format]``
 14 | methods, such as :meth:`~battdat.data.BatteryDataset.to_hdf` and :meth:`~battdat.data.BatteryDataset.from_parquet`
 15 | 
 16 | .. _hdf5:
 17 | 
 18 | HDF5
 19 | ----
 20 | 
 21 | The `HDF5 format <https://support.hdfgroup.org/documentation/hdf5/latest/>`_ stores array data as a nested series of dictionaries.
 22 | ``battdat`` stores each type of data known about a battery in separate groups
 23 | and the metadata for the battery as the metadata.
 24 | 
 25 | .. code-block:: python
 26 | 
 27 |     from tables import File
 28 |     import json
 29 | 
 30 |     with File('example.h5') as f:
 31 |         metadata = json.loads(f.root._v_attrs['metadata'])  # Data describing the cell and how it was tested
 32 |         version = json.loads(f.root._v_attrs['battdat_version'])  # BattDat version used to save dataset
 33 |         raw_data = f.root['raw_data']  # HDF5 group holding raw data
 34 |         schema = raw_data._v_attrs['metadata']  # Description of each column
 35 | 
 36 | The internal structure of each group (e.g., ``f['raw_data']``) are that of
 37 | the `PyTables Table format <https://www.pytables.org/usersguide/file_format.html#table-format>`_:
 38 | a one-dimensional chunked array with a compound data type.
 39 | 
 40 | .. dropdown:: HDF5 content
 41 | 
 42 |     .. code-block::
 43 | 
 44 |         $ h5ls -rv single-resistor-complex-charge_from-discharged.hdf
 45 |         Opened ".\single-resistor-complex-charge_from-discharged.hdf" with sec2 driver.
 46 |         /                        Group
 47 |             Attribute: CLASS scalar
 48 |                 Type:      5-byte null-terminated UTF-8 string
 49 |             Attribute: PYTABLES_FORMAT_VERSION scalar
 50 |                 Type:      3-byte null-terminated UTF-8 string
 51 |             Attribute: TITLE null
 52 |                 Type:      1-byte null-terminated UTF-8 string
 53 |             Attribute: VERSION scalar
 54 |                 Type:      3-byte null-terminated UTF-8 string
 55 |             Attribute: battdat_version scalar
 56 |                 Type:      5-byte null-terminated UTF-8 string
 57 |             Attribute: json_schema scalar
 58 |                 Type:      8816-byte null-terminated ASCII string
 59 |             Attribute: metadata scalar
 60 |                 Type:      242-byte null-terminated UTF-8 string
 61 |             Location:  1:96
 62 |             Links:     1
 63 |         /raw_data                Dataset {3701/Inf}
 64 |             Attribute: CLASS scalar
 65 |                 Type:      5-byte null-terminated UTF-8 string
 66 |             Attribute: FIELD_0_FILL scalar
 67 |                 Type:      native double
 68 |             Attribute: FIELD_0_NAME scalar
 69 |                 Type:      9-byte null-terminated UTF-8 string
 70 |             Attribute: FIELD_1_FILL scalar
 71 |                 Type:      native double
 72 |             Attribute: FIELD_1_NAME scalar
 73 |                 Type:      7-byte null-terminated UTF-8 string
 74 |             Attribute: FIELD_2_FILL scalar
 75 |                 Type:      native double
 76 |             Attribute: FIELD_2_NAME scalar
 77 |                 Type:      7-byte null-terminated UTF-8 string
 78 |             Attribute: FIELD_3_FILL scalar
 79 |                 Type:      native long long
 80 |             Attribute: FIELD_3_NAME scalar
 81 |                 Type:      12-byte null-terminated UTF-8 string
 82 |             Attribute: NROWS scalar
 83 |                 Type:      native long long
 84 |             Attribute: TITLE null
 85 |                 Type:      1-byte null-terminated UTF-8 string
 86 |             Attribute: VERSION scalar
 87 |                 Type:      3-byte null-terminated UTF-8 string
 88 |             Attribute: json_schema scalar
 89 |                 Type:      2824-byte null-terminated UTF-8 string
 90 |             Attribute: metadata scalar
 91 |                 Type:      2824-byte null-terminated UTF-8 string
 92 |             Location:  1:10240
 93 |             Links:     1
 94 |             Chunks:    {2048} 65536 bytes
 95 |             Storage:   118432 logical bytes, 6670 allocated bytes, 1775.59% utilization
 96 |             Filter-0:  shuffle-2 OPT {32}
 97 |             Filter-1:  deflate-1 OPT {9}
 98 |             Type:      struct {
 99 |                            "test_time"        +0    native double
100 |                            "current"          +8    native double
101 |                            "voltage"          +16   native double
102 |                            "cycle_number"     +24   native long long
103 |                        } 32 bytes
104 | 
105 | Multiple Batteries per File
106 | +++++++++++++++++++++++++++
107 | 
108 | Data from multiple batteries can share a single HDF5 file as long as they share the same metadata.
109 | 
110 | Add multiple batteries into an HDF5 file by providing a "prefix" to name each cell.
111 | 
112 | .. code-block:: python
113 | 
114 |     test_a.to_hdf('test.h5', prefix='a')
115 |     test_b.to_hdf('test.h5', prefix='b', overwrite=False)  # Overwrite is mandatory
116 | 
117 | 
118 | Load a specific cell by providing a specific prefix on load
119 | 
120 | .. code-block:: python
121 | 
122 |     test_a = BatteryDataset.from_hdf('test.h5', prefix='a')
123 | 
124 | 
125 | or load any of the included cells by providing an index
126 | 
127 | .. code-block:: python
128 | 
129 |     test_a = BatteryDataset.from_hdf('test.h5', prefix=0)
130 | 
131 | Load all cells by iterating over them:
132 | 
133 | .. code-block:: python
134 | 
135 |     for name, cell in BatteryDataset.all_cells_from_hdf('test.h5'):
136 |         do_some_processing(cell)
137 | 
138 | 
139 | Appending to Existing File
140 | ++++++++++++++++++++++++++
141 | 
142 | The :class:`~battdat.io.hdf.HDF5Writer` class facilitates adding to existing datasets.
143 | Start by creating the writer with the desired compression settings
144 | 
145 | .. code-block:: python
146 | 
147 |     from battdat.io.hdf import HDFWriter
148 | 
149 |     writer = HDFWriter(complevel=9)
150 | 
151 | Add a new table to an existing dataset with :meth:`~battdat.io.hdf.HDF5Writer.add_table`,
152 | which requires the name of a dataset and a `column schema <schemas/column-schema.html>`_.
153 | 
154 | .. code-block:: python
155 | 
156 |     import pandas as pd
157 |     import tables
158 | 
159 | 
160 |     # Make dataset and column
161 |     df = pd.DataFrame({'a': [1., 0.]})
162 |     schema = ColumnSchema()
163 |     schema.add_column('a', 'A column')
164 | 
165 |     with tables.open_file('example.h5', mode='a') as file:
166 |         writer.add_table(file, 'example_table', df, schema)
167 | 
168 | Add data to an existing table with :meth:`~battdat.io.hdf.HDF5Writer.append_to_table`
169 | 
170 | .. code-block:: python
171 | 
172 |     with tables.open_file('example.h5', mode='a') as file:
173 |         writer.append_to_table(file, 'example_table', df)
174 | 
175 | The new table must match the existing table's contents exactly.
176 | Any compression settings or metadata from the existing table will be re-used.
177 | 
178 | Parquet
179 | -------
180 | 
181 | The `Apache Parquet format <https://en.wikipedia.org/wiki/Apache_Parquet>`_ is designed for high performance I/O of tabular data.
182 | ``battdat`` stores each type of data in a separate file and the metadata in `file-level metadata <https://parquet.apache.org/docs/file-format/metadata/>`_
183 | of each file.
184 | 
185 | .. code-block:: python
186 | 
187 |     from pyarrow import parquet as pq
188 |     import json
189 | 
190 |     # Reading the metadata
191 |     file_metadata = pq.read_metadata('raw_data.parquet')  # Parquet metadata
192 |     metadata = json.loads(file_metadata.metadata[b'battery_metadata'])  # For the battery
193 |     schema = json.loads(file_metadata.metadata[b'table_metadata'])  # For the columns
194 | 
195 |     # Reading the data
196 |     table = pq.read_table('raw_data.parquet')  # In pyarrow's native Table format
197 |     df = table.to_pandas()  # As a dataframe
198 | 
199 | The internal structure of a Parquet file saved by ``battdat`` has column names and data types which match those provided when saving the file.
200 | Any numeric types will be the same format (e.g., ``float32`` vs ``float64``)
201 | and times are stored as floating point numbers, rather than Parquet's time format.
202 | 


--------------------------------------------------------------------------------
/docs/user-guide/index.rst:
--------------------------------------------------------------------------------
 1 | User Guide
 2 | ==========
 3 | 
 4 | Start to learn the battery-data-toolkit by understanding the
 5 | :class:`~battdat.data.BatteryDataset` object.
 6 | Either continue with schemas if planning to make a new dataset,
 7 | or post-processing if using already-existing data.
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 2
11 |    :caption: Contents:
12 | 
13 |    dataset
14 |    io
15 |    formats
16 |    schemas/index
17 |    post-processing/index
18 |    consistency/index
19 |    streaming
20 | 


--------------------------------------------------------------------------------
/docs/user-guide/io.rst:
--------------------------------------------------------------------------------
 1 | Reading and Writing Datasets
 2 | ============================
 3 | 
 4 | The :mod:`battdat.io` module provides tools to read and write from :class:`~battdat.data.BatteryDataset` objects.
 5 | 
 6 | .. list-table::
 7 |    :align: center
 8 |    :header-rows: 1
 9 | 
10 |    * - Format
11 |      - Module
12 |      - Reading
13 |      - Writing
14 |    * - Arbin
15 |      - :mod:`~battdat.io.arbin`
16 |      - ✔️
17 |      - ✖️
18 |    * - Battery Archive (https://www.batteryarchive.org)
19 |      - :mod:`~battdat.io.ba`
20 |      - ✖️
21 |      - ✔️
22 |    * - Battery Data Hub (https://batterydata.energy.gov)
23 |      - :mod:`~battdat.io.batterydata`
24 |      - ✔️
25 |      - ✖️
26 |    * - `HDF5 <formats.html#hdf5>`_
27 |      - :mod:`~battdat.io.hdf`
28 |      - ✔️
29 |      - ✔️
30 |    * - MACCOR
31 |      - :mod:`~battdat.io.maccor`
32 |      - ✔️
33 |      - ✖️
34 |    * - `Parquet <formats.html#parquet>`_
35 |      - :mod:`~battdat.io.parquet`
36 |      - ✔️
37 |      - ✔️
38 | 
39 | 
40 | .. note::
41 | 
42 |     The parquet and HDF5 formats write to the `battery-data-toolkit file formats <formats.html>`_.
43 | 
44 | Reading Data
45 | ------------
46 | 
47 | :class:`~battdat.io.base.DatasetReader` classes provide the ability to create a dataset
48 | through the :class:`~battdat.io.base.DatasetReader.read_dataset` method.
49 | The inputs to ``read_dataset`` always include a :class:`~battdat.schemas.BatteryMetadata` object
50 | containing information beyond what is available in the files.
51 | 
52 | Most :class:`~battdat.io.base.DatasetReader` read data from a filesystem and are based on :class:`~battdat.io.base.DatasetFileReader`.
53 | These readers take list of paths to data files alongside the metadata and also include methods (e.g., :meth:`~battdat.io.base.DatasetFileReader.group`) to
54 | find files:
55 | 
56 | .. code-block:: python
57 | 
58 |     from battdat.io.batterydata import BDReader
59 | 
60 |     extractor = BDReader(store_all=True)
61 |     group = next(extractor.identify_files('./example-path/'))
62 |     dataset = extractor.read_dataset(group)
63 | 
64 | The :ref:`type of output dataset <type-table>` is defined by the :attr:`~battdat.io.base.DatasetFileReader.output_class` attribute.
65 | Most uses of readers do not require modifying this attribute.
66 | 
67 | Writing Data
68 | ------------
69 | 
70 | :class:`~battdat.io.base.DatasetWriter` classes write :class:`battdat.data.BatteryDataset` objects into forms usable by other tools.
71 | 
72 | For example, the :class:`~battdat.io.ba.BatteryArchiveWriter` converts the metadata into the schema used by `Battery Archive <https://www.batteryarchive.org>`_
73 | and writes the data into the preferred format: CSV files no longer than 100k rows.
74 | 
75 | 
76 | .. code-block:: python
77 | 
78 |     from battdat.io.ba import BatteryArchiveWriter
79 |     exporter = BatteryArchiveWriter()
80 |     exporter.export(example_data, './to-upload')
81 | 


--------------------------------------------------------------------------------
/docs/user-guide/post-processing/figures/explain-capacities.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ROVI-org/battery-data-toolkit/19961e6bbb2d0cfe0bff9c129144fcf8f3dd3be6/docs/user-guide/post-processing/figures/explain-capacities.png


--------------------------------------------------------------------------------
/docs/user-guide/post-processing/index.rst:
--------------------------------------------------------------------------------
 1 | Post-Processing
 2 | ===============
 3 | 
 4 | Most sources of battery data provide the voltage and current over time,
 5 | but the other properties which are derived from them may be missing.
 6 | The battery data toolkit provides "post-processing" classes which
 7 | add compute these derived data sources.
 8 | 
 9 | All post-processing tools are based on the :class:`~battdat.postprocess.base.BaseFeatureComputer` class
10 | and, as a result, provide a :meth:`~battdat.postprocess.base.BaseFeatureComputer.compute_features` function that adds
11 | new information to a battery dataset.
12 | Use them by first creating the tool and invoking that method with
13 | a :class:`~battdat.data.BatteryDataset`:
14 | 
15 | .. code-block:: python
16 | 
17 |     computer = FeatureComputer()
18 |     new_columns = computer.compute_features(data)
19 | 
20 | New columns will be added to a part of the dataset (e.g., the cycle-level statistics) and those new columns
21 | will be returned from the function.
22 | 
23 | The feature computers fall into two categories:
24 | 
25 | - :class:`~battdat.postprocess.base.RawDataEnhancer`, which add information to the raw data as a function of time
26 | - :class:`~battdat.postprocess.base.CycleSummarizer`, which summarize the raw data and add new columns to the ``cycle_stats``
27 | 
28 | 
29 | .. note::
30 | 
31 |     Post-processing assumes the table named ``raw_data`` follows the :class:`~battdat.schemas.column.RawData` schema.
32 | 
33 | Integral Quantities
34 | -------------------
35 | 
36 | Functions which add columns associated with the accumulated values of data in other columns.
37 | 
38 | .. toctree::
39 |     :maxdepth: 1
40 | 
41 |     cell-capacity
42 | 
43 | 
44 | Time
45 | ----
46 | 
47 | Compute columns which are derived fields associated with the relative time or timespans of data.
48 | 
49 | .. toctree::
50 |     :maxdepth: 1
51 | 
52 |     cycle-times
53 | 


--------------------------------------------------------------------------------
/docs/user-guide/schemas/column-schema.rst:
--------------------------------------------------------------------------------
 1 | Column Schemas
 2 | ==============
 3 | 
 4 | The contents of each data table available with a dataset are described using a :class:`~battdat.schemas.column.ColumnSchema`.
 5 | The schema is a collection of :class:`~battdat.schemas.column.ColumnInfo` objects detailing each column,
 6 | which includes
 7 | 
 8 | 1. **Description**: A English description of the contents
 9 | 2. **Type**: Type of each record (e.g., integer, string)
10 | 3. **Units**: Units for the values, if applicable
11 | 4. **Required**: Whether the column *must* be present in the table
12 | 5. **Monotonic**: Whether values in never decrease between sequential rows
13 | 
14 | Using a Column Schema
15 | ---------------------
16 | 
17 | :class:`~battdat.schemas.column.ColumnSchema` stored inside the `HDF5 and Parquet files <../formats.html>`_
18 | provided by the battery data toolkit are used to describe existing and validating new data.
19 | 
20 | List the columns names with :attr:`~battdat.schemas.column.ColumnSchema.columns` attribute
21 | and access information for a single column through the get item method:
22 | 
23 | .. code-block:: python
24 | 
25 |     data = BatteryDataset.from_battdat_hdf(out_path)
26 |     schema = data.schemas['eis_data']  # ColumnSchema for the ``eis_data`` table
27 |     print(schema['test_id'].model_dump())
28 | 
29 | The above code prints the data for a specific column.
30 | 
31 | .. code-block:: python
32 | 
33 |     {'required': True,
34 |      'type': <DataType.INTEGER: 'integer'>,
35 |      'description': 'Integer used to identify rows belonging to the same experiment.',
36 |      'units': None,
37 |      'monotonic': False}
38 | 
39 | 
40 | Use the :meth:`~battdat.schemas.column.ColumnSchema.validate_dataframe` to check
41 | if a dataframe matches requirements for each column.
42 | 
43 | Pre-defined Schema
44 | ------------------
45 | 
46 | The battery-data-toolkit provides schemas for common types of data (e.g., cycling data for single cells, EIS).
47 | 
48 | .. include:: rendered-column-schema.rst
49 | 
50 | Defining a New Column Schema
51 | ----------------------------
52 | 
53 | Document a new type of data by either creating a subclass of :class:`~battdat.schemas.column.ColumnSchema`
54 | or adding individual columns to an existing schema.
55 | 
56 | .. code-block:: python
57 | 
58 |     from battdat.schemas.column import RawData, ColumnInfo
59 | 
60 |     schema = RawData()  # Schema for sensor measurements of cell
61 |     schema.extra_columns['room_temp'] = ColumnInfo(
62 |         description='Temperature of the room as measured by the HVAC system',
63 |         units='C', data_type='float',
64 |     )
65 | 


--------------------------------------------------------------------------------
/docs/user-guide/schemas/export-schemas.py:
--------------------------------------------------------------------------------
  1 | """Write schemas to an RST-compatible table format"""
  2 | from typing import TextIO, get_args
  3 | 
  4 | from pydantic import BaseModel
  5 | 
  6 | from battdat.schemas.column import RawData, CycleLevelData
  7 | from battdat.schemas import BatteryMetadata, BatteryDescription, ModelMetadata, CyclingProtocol
  8 | from battdat.schemas.eis import EISData
  9 | 
 10 | print('Exporting column schemas to RST...')
 11 | 
 12 | with open('rendered-column-schema.rst', 'w') as fp:
 13 |     for data_type in [RawData(), CycleLevelData(), EISData()]:
 14 |         class_name = data_type.__class__.__name__
 15 |         print(f'``{class_name}``\n++{"+" * len(class_name)}++', file=fp)
 16 |         print(f'\n**Source Object**: :class:`{data_type.__module__}.{class_name}`\n', file=fp)
 17 |         print(f'\n{data_type.__doc__}\n', file=fp)
 18 | 
 19 |         print('.. list-table::', file=fp)
 20 |         print('   :header-rows: 1\n', file=fp)
 21 |         print('   * - Column', file=fp)
 22 |         print('     - Description', file=fp)
 23 |         print('     - Units', file=fp)
 24 |         for name, field in data_type.columns.items():
 25 |             print(f'   * - {name}', file=fp)
 26 |             print(f'     - {field.description}', file=fp)
 27 |             print(f'     - {field.units}', file=fp)
 28 |         print(file=fp)
 29 | 
 30 | # Export the metadata schemas recursively
 31 | print('Exporting metadata formats')
 32 | 
 33 | 
 34 | def expand_terms(metadata_cls: type[BaseModel], fo: TextIO, recurse: bool):
 35 |     """Export the data in column format"""
 36 | 
 37 |     to_recurse = set()
 38 | 
 39 |     class_name = metadata_cls.__name__
 40 |     print(f'``{class_name}``\n~~{"~" * len(class_name)}~~', file=fo)
 41 |     print(f'\n**Source Object**: :class:`{metadata_cls.__module__}.{class_name}`\n', file=fo)
 42 |     doc_string = "\n".join(map(str.strip, metadata_cls.__doc__.split("\n")))
 43 |     print(f'\n{doc_string}\n', file=fo)
 44 | 
 45 |     print('.. list-table::', file=fo)
 46 |     print('   :header-rows: 1\n', file=fo)
 47 |     print('   * - Column', file=fo)
 48 |     print('     - Type', file=fo)
 49 |     print('     - Description', file=fo)
 50 |     print('     - Units', file=fo)
 51 |     print('     - Definition', file=fo)
 52 |     for name, field in metadata_cls.model_fields.items():
 53 |         print(f'   * - {name}', file=fo)
 54 | 
 55 |         # Expand the type annotation
 56 |         is_optional = field.is_required()
 57 |         if (subtypes := get_args(field.annotation)) != ():
 58 |             is_optional = True
 59 |             print(f'     - {", ".join(x.__name__ if isinstance(x, type(object)) else str(x) for x in subtypes if not x == type(None))}', file=fp)
 60 |         else:
 61 |             print(f'     - {field.annotation.__name__}', file=fo)
 62 | 
 63 |         # Prepare to recurse
 64 |         for cls_type in [field.annotation, *subtypes]:
 65 |             if isinstance(cls_type, BaseModel.__class__):
 66 |                 to_recurse.add(cls_type)
 67 | 
 68 |         print(f'     - {"(**Required**) " if not is_optional else ""}{str(field.description)}', file=fo)
 69 | 
 70 |         # Print units
 71 |         if field.json_schema_extra is not None and (units := field.json_schema_extra.get('units')) is not None:
 72 |             print(f'     - {units}', file=fo)
 73 |         else:
 74 |             print('     -', file=fo)
 75 | 
 76 |         # Print metadata source
 77 |         if field.json_schema_extra is not None and (iri := field.json_schema_extra.get('iri')) is not None:
 78 |             assert 'emmo' in iri.lower(), f'Found an IRI that is not from EMMO!?'
 79 |             print(f'     - `EMMO <{iri}>`_', file=fo)
 80 |         else:
 81 |             print('     -', file=fo)
 82 | 
 83 |     print(file=fo)
 84 | 
 85 |     if recurse:
 86 |         for cls_type in to_recurse:
 87 |             expand_terms(cls_type, fo, recurse)
 88 | 
 89 | 
 90 | with open('rendered-metadata-schema.rst', 'w', encoding='utf-8') as fp:
 91 |     print('High-level Data', file=fp)
 92 |     print('+++++++++++++++', file=fp)
 93 |     print('All metadata starts with the :class:`~battdat.schemas.BatteryMetadata` object.\n', file=fp)
 94 | 
 95 |     expand_terms(BatteryMetadata, fp, False)
 96 | 
 97 |     print('Describing Batteries', file=fp)
 98 |     print('++++++++++++++++++++', file=fp)
 99 |     print(':class:`~battdat.schemas.battery.BatteryDescription` and its related class capture details about the structure of a battery.\n', file=fp)
100 | 
101 |     expand_terms(BatteryDescription, fp, True)
102 | 
103 |     print('Simulation Data', file=fp)
104 |     print('+++++++++++++++', file=fp)
105 |     print(':class:`~battdat.schemas.modeling.ModelMetadata` and its related class capture details about data produces using computational methods.\n', file=fp)
106 | 
107 |     expand_terms(ModelMetadata, fp, True)
108 | 
109 |     print('Cycling Data', file=fp)
110 |     print('++++++++++++', file=fp)
111 |     print('Annotate how batteries were cycled following protocol description objects.\n', file=fp)
112 | 
113 |     expand_terms(CyclingProtocol, fp, True)
114 | 


--------------------------------------------------------------------------------
/docs/user-guide/schemas/index.rst:
--------------------------------------------------------------------------------
 1 | Describing Battery Data
 2 | =======================
 3 | 
 4 | The metadata schemas used by ``battdat`` standardize how we describe the source of battery datasets
 5 | and annotate what the data are.
 6 | Metadata are held as part of the :class:`battdat.data.BatteryDataset` object and saved within the file formats
 7 | produced by ``battdat`` to ensure that the provenance of a dataset is kept alongside the actual data.
 8 | 
 9 | Descriptions are defined in two parts:
10 | 
11 | 1. **Source Metadata**: Information about a battery dataset applicable to all measurements.
12 | 2. **Column Schemas**: Details about a specific table of measurements.
13 | 
14 | .. toctree::
15 |    :maxdepth: 2
16 |    :caption: Contents:
17 | 
18 |    source-metadata
19 |    column-schema
20 | 


--------------------------------------------------------------------------------
/docs/user-guide/schemas/source-metadata.rst:
--------------------------------------------------------------------------------
 1 | Source Metadata
 2 | ===============
 3 | 
 4 | ''Source Metadata'' captures high-level information about a battery dataset
 5 | in the :class:`~battdat.schemas.BatteryMetadata` object.
 6 | Information included in ``BatteryMetadata``, in contrast to `Column Schemas <column-schema.html>`_, are relevant to
 7 | all measurements performed on a battery, such as:
 8 | 
 9 | 1. The type of battery (e.g., NMC Li-ion, Pb acid)
10 | 2. The simulation code used, if the data is from a model
11 | 3. How the battery was cycled
12 | 4. The authors of the data and any related publications
13 | 
14 | Metadata Structure
15 | ------------------
16 | 
17 | :class:`~battdat.schemas.BatteryMetadata` objects have a hierarchical structure where
18 | each record is composed of a single document that has fields which can correspond
19 | to single values, collections of values, or entire sub-documents.
20 | 
21 | Create new metadata through the Python interface by first creating a ``BatteryMetadata`` object.
22 | 
23 | .. code-block:: python
24 | 
25 |     from battdat.schemas import BatteryMetadata
26 | 
27 |     metadata = BatteryMetadata(
28 |         name='test-cell',
29 |     )
30 | 
31 | Different types of information are grouped together into subdocuments,
32 | such as details about the battery in :class:`~battdat.schemas.battery.BatteryDescription`
33 | 
34 | .. code-block:: python
35 | 
36 |     from battdat.schemas.battery import BatteryDescription
37 |     from battdat.schemas import BatteryMetadata
38 | 
39 |     metadata = BatteryMetadata(
40 |         name='test-cell',
41 |         battery=BatteryDescription(
42 |             manufacturer='famous',
43 |             nominal_capacity=1.,
44 |         )
45 |     )
46 | 
47 | :class:`~battdat.schemas.BatteryMetadata` automatically validate inputs,
48 | and can convert to and JSON formats. (`Pydantic <https://docs.pydantic.dev/latest/>`_!)
49 | 
50 | See the :mod:`battdat.schemas` for a full accounting of the available fields in our schema.
51 | 
52 | .. note::
53 | 
54 |     Validation only checks that already-defined fields are specified properly.
55 |     Add metadata beyond what is described in battery-data-toolkit as desired.
56 | 
57 | Source of Terminology
58 | ---------------------
59 | 
60 | The `BattINFO ontology <https://big-map.github.io/BattINFO/index.html>`_ is the core source of terms.
61 | 
62 | Fields in the schema whose names correspond to a BattINFO term are marked
63 | with the "IRI" of the field, which points to a website containing the description.
64 | 
65 | Fields whose values should be terms from the BattINFO ontology are marked with the root of the terms.
66 | For example, the ``model_type`` field of `ModelMetadata` can be any type of
67 | `MathematicalModel <https://emmo-repo.github.io/emmo.html#EMMO_f7ed665b_c2e1_42bc_889b_6b42ed3a36f0>`_.
68 | Look them up using some utilities in ``battdat``.
69 | 
70 | .. code-block:: python
71 | 
72 |     from battdat.schemas.ontology import gather_descendants
73 | 
74 |     print(gather_descendants('MathematicalModel'))
75 | 
76 | 
77 | .. note::
78 |     The schema will be a continual work in progress.
79 |     Consider adding `an Issue <https://github.com/ROVI-org/battery-data-toolkit/issues>`_ to the GitHub
80 |     if you find you use a term enough it should be part of the schema.
81 | 
82 | Metadata Objects
83 | ----------------
84 | 
85 | The battery-data-toolkit expresses the metadata schema using `Pydantic BaseModel objects <https://docs.pydantic.dev/latest/>`_.
86 | 
87 | .. include:: rendered-metadata-schema.rst


--------------------------------------------------------------------------------
/docs/user-guide/streaming.rst:
--------------------------------------------------------------------------------
 1 | Streaming Battery Data
 2 | ======================
 3 | 
 4 | Many battery datasets are too large to fit in memory in a single computer at once.
 5 | Such data can be read or written incrementally using the streaming module of battery data toolkit,
 6 | :class:`battdat.streaming`.
 7 | 
 8 | Reading Data as a Stream
 9 | ------------------------
10 | 
11 | The battery-data-toolkit allows streaming the raw time series data from an :ref:`HDF5 file format <hdf5>`.
12 | 
13 | Stream the data either as individual rows or all rows belonging to each cycle
14 | with the :meth:`~battdat.streaming.iterate_records_from_file`
15 | or :meth:`~battdat.streaming.iterate_cycles_from_file`.
16 | 
17 | Both functions produce `a Python generator <https://docs.python.org/3/glossary.html#term-generator>`_
18 | which retrieves a chunk of data from the HDF5 file incrementally and can be used to produce data individually
19 | 
20 | .. code-block:: python
21 | 
22 |     row_iter = iterate_records_from_file('example.h5')
23 |     row = next(row_iter)
24 |     do_something_per_timestep(row)
25 | 
26 | or as part of a for loop.
27 | 
28 | .. code-block:: python
29 | 
30 |     for cycle in iterate_cycles_from_file('example.h5'):
31 |         do_something_per_cycle(cycle)
32 | 
33 | Reading full cycles by file can produce either a single :class:`~pandas.DataFrame` when reading a single table,
34 | a dictionary of ``DataFrames``, or a full :class:`~battdat.data.BatteryDataset` depending on the
35 | options for ``key`` and ``make_dataset``.
36 | 
37 | .. code-block:: python
38 | 
39 |     # Read as a single DataFrame
40 |     df = next(iterate_cycles_from_file('example.h5', key='raw_data'))
41 | 
42 |     # Read multiple tables as a dictionary
43 |     dict_of_df = next(iterate_cycles_from_file('example.h5', key=['raw_data', 'cycle_stats']))
44 | 
45 |     # Read all tables as a Dataset
46 |     dataset = next(iterate_cycles_from_file('example.h5', key=None, make_dataset=True))
47 | 
48 | 
49 | Streaming Data to a File
50 | ------------------------
51 | 
52 | Write large datasets into battery-data-toolkit-compatible formats incrementally using the :class:`~battdat.streaming.hdf5.HDF5Writer`.
53 | 
54 | Start the writer class by providing the path to the HDF5 file and the metadata to be written
55 | then opening it via Python's ``with`` syntax.
56 | 
57 | .. code-block:: python
58 | 
59 |     metadata = BatteryMetadata(name='example')
60 |     with HDF5Writer('streamed.h5', metadata=metadata) as writer:
61 |         for time, current, voltage in data_stream:
62 |             writer.write_row({'test_time': time, 'current': current, 'voltage': voltage})
63 | 
64 | The writer only writes to disk after enough rows are collected or the end of a data stream is signaled by exiting the ``with`` block.
65 | 


--------------------------------------------------------------------------------
/notebooks/README.md:
--------------------------------------------------------------------------------
1 | # Example Notebooks
2 | 
3 | Notebooks showing specific features of batadata.


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "battery-data-toolkit"
 3 | dynamic = ["version"]
 4 | description = "Utilities for reading and manipulating battery testing data"
 5 | readme = "README.md"
 6 | requires-python = ">=3.10"
 7 | license = { file = 'LICENSE' }
 8 | keywords = ["batteries", "science", "data science"]
 9 | authors = [
10 |     { name = "Logan Ward", email = "lward@anl.gov" },
11 |     { name = "Noah Paulson", email = "lward@anl.gov" },
12 |     { name = "Joseph Kubal", email = "kubal@anl.gov" },
13 | ]
14 | dependencies = [
15 |     "pandas > 1.0",
16 |     "scipy > 1.3",
17 |     "pydantic == 2.*",
18 |     "tables > 3.6",
19 |     "pyarrow >= 15",
20 |     "EMMOntoPy",
21 |     "xlrd"
22 | ]
23 | classifiers = [
24 |     "Development Status :: 4 - Beta",
25 |     "Intended Audience :: Science/Research",
26 |     "License :: OSI Approved :: Apache Software License",
27 |     "Operating System :: OS Independent",
28 |     "Topic :: File Formats",
29 |     "Topic :: Scientific/Engineering"
30 | ]
31 | 
32 | [tool.setuptools.packages.find]
33 | include = ["battdat*"]
34 | 
35 | [build-system]
36 | requires = ["setuptools>=64", "setuptools-scm>=8"]
37 | build-backend = "setuptools.build_meta"
38 | 
39 | [tool.setuptools_scm]
40 | 
41 | [project.optional-dependencies]
42 | test = [
43 |     'flake8',
44 |     'pytest',
45 |     'pytest-cov'
46 | ]
47 | docs = [
48 |     'sphinx',
49 |     'pydata-sphinx-theme',
50 |     'nbsphinx',
51 |     'autodoc-pydantic',
52 |     'sphinx-design'
53 | ]
54 | 
55 | [project.urls]
56 | repository = "https://github.com/rovi-org/battery-data-toolkit"
57 | 
58 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [bdist_wheel]
 2 | universal = 1
 3 | 
 4 | [tool:pytest]
 5 | addopts = --ignore=setup.py --cov=battdat
 6 | 
 7 | [flake8]
 8 | exclude = .git,*.egg*
 9 | max-line-length = 160
10 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | from pytest import fixture
 4 | 
 5 | from battdat.data import BatteryMetadata, BatteryDataset
 6 | from battdat.postprocess.timing import CycleTimesSummarizer
 7 | 
 8 | 
 9 | @fixture()
10 | def file_path() -> Path:
11 |     """Path to test-related files"""
12 |     return Path(__file__).parent / 'files'
13 | 
14 | 
15 | @fixture()
16 | def example_data(file_path) -> BatteryDataset:
17 |     """An example dataset which contains metadata and a few cycles of data"""
18 | 
19 |     # Load the simple cycling
20 |     path = file_path / 'example-data' / 'single-resistor-constant-charge_from-discharged.hdf'
21 |     data = BatteryDataset.from_hdf(path)
22 | 
23 |     # Compute basic cycling states
24 |     for stats in [CycleTimesSummarizer()]:
25 |         stats.compute_features(data)
26 | 
27 |     # Give the cell a name, at least
28 |     data.metadata = BatteryMetadata(name='test')
29 |     return data
30 | 


--------------------------------------------------------------------------------
/tests/consistency/test_sign.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | from pytest import fixture
 4 | 
 5 | from battdat.data import BatteryDataset
 6 | from battdat.consistency.current import SignConventionChecker
 7 | 
 8 | 
 9 | @fixture()
10 | def example_dataset():
11 |     # Make a rest period followed by a charge where the voltage increases
12 |     times = np.linspace(0, 1800, 256)
13 |     current = np.zeros_like(times)
14 |     current[128:] = 1.
15 | 
16 |     voltage = np.ones_like(times)
17 |     voltage[128:] = np.linspace(1., 1.3, 128)
18 | 
19 |     return BatteryDataset.make_cell_dataset(
20 |         raw_data=pd.DataFrame({
21 |             'test_time': times,
22 |             'current': current,
23 |             'voltage': voltage
24 |         })
25 |     )
26 | 
27 | 
28 | def test_sign_checker(example_dataset):
29 |     chcker = SignConventionChecker()
30 |     result = chcker.check(example_dataset)
31 |     assert len(result) == 0
32 | 
33 |     # Make sure swapping the sign breaks things
34 |     example_dataset.tables['raw_data']['current'] *= -1
35 |     result = chcker.check(example_dataset)
36 |     assert len(result) == 1
37 | 


--------------------------------------------------------------------------------
/tests/exporters/test_ba.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from datetime import datetime
 3 | import json
 4 | 
 5 | import pandas as pd
 6 | 
 7 | from battdat.io.ba import BatteryArchiveWriter
 8 | from battdat.schemas import BatteryMetadata, BatteryDescription
 9 | from battdat.schemas.battery import ElectrodeDescription
10 | 
11 | 
12 | def test_export(example_data, tmpdir):
13 |     # Add a datetime
14 |     raw_data = example_data.tables['raw_data']
15 |     raw_data['time'] = raw_data['test_time'] + datetime(year=2024, month=7, day=1).timestamp()
16 | 
17 |     # Add some metadata to the file
18 |     example_data.metadata = BatteryMetadata(
19 |         battery=BatteryDescription(
20 |             anode=ElectrodeDescription(name='graphite', supplier='big-one'),
21 |             cathode=ElectrodeDescription(name='nmc')
22 |         )
23 |     )
24 | 
25 |     tmpdir = Path(tmpdir)
26 |     tmpdir.mkdir(exist_ok=True)
27 |     exporter = BatteryArchiveWriter()
28 |     exporter.export(example_data, tmpdir)
29 | 
30 |     # Make sure the time series loaded correctly
31 |     timeseries_path = tmpdir.joinpath('cycle-timeseries-0.csv')
32 |     assert timeseries_path.is_file()
33 |     timeseries = pd.read_csv(timeseries_path)
34 |     assert 'v' in timeseries  # Make sure a conversion occurred correctly
35 |     assert 'cell_id' in timeseries
36 |     assert timeseries['date_time'].iloc[0] == '07/01/2024 00:00:00.000000'
37 |     assert timeseries['cycle_index'].iloc[1] == 1
38 | 
39 |     # Check that metadata was written
40 |     metadata = json.loads(tmpdir.joinpath('metadata.json').read_text())
41 |     assert metadata['cathode'] == '{"name":"nmc"}'
42 | 
43 |     # Make sure the cycle statistics are written
44 |     cycle_stats = pd.read_csv(tmpdir.joinpath('cycle-stats.csv'))
45 |     assert cycle_stats['cycle_index'].iloc[0] == 1
46 | 


--------------------------------------------------------------------------------
/tests/files/batteryarchive/CALCE_CX2-33_prism_LCO_25C_0-100_0.5-0.5C_d_cycle_data.csv:
--------------------------------------------------------------------------------
 1 | Cycle_Index,Start_Time,End_Time,Test_Time (s),Min_Current (A),Max_Current (A),Min_Voltage (V),Max_Voltage (V),Charge_Capacity (Ah),Discharge_Capacity (Ah),Charge_Energy (Wh),Discharge_Energy (Wh)
 2 | 1.0,,,12923.434,-0.674,1.124,2.699,4.2,0.604,1.292,2.5,4.758
 3 | 2.0,,,29533.288,-0.674,1.141,2.699,4.2,1.291,1.295,5.184,4.781
 4 | 3.0,,,46063.491,-0.674,1.145,2.699,4.2,1.296,1.295,5.198,4.785
 5 | 4.0,,,62601.426,-0.674,1.13,2.699,4.2,1.293,1.29,5.189,4.757
 6 | 5.0,,,79120.977,-0.674,1.148,2.699,4.2,1.289,1.291,5.175,4.764
 7 | 6.0,,,95621.926,-0.674,1.137,2.699,4.2,1.291,1.29,5.183,4.761
 8 | 7.0,,,112147.916,-0.674,1.139,2.699,4.2,1.289,1.291,5.175,4.767
 9 | 8.0,,,128609.889,-0.674,1.147,2.699,4.2,1.292,1.292,5.182,4.775
10 | 9.0,,,145061.227,-0.674,1.131,2.699,4.2,1.292,1.291,5.18,4.773
11 | 


--------------------------------------------------------------------------------
/tests/files/batteryarchive/CALCE_CX2-33_prism_LCO_25C_0-100_0.5-0.5C_d_timeseries.csv:
--------------------------------------------------------------------------------
 1 | Date_Time,Test_Time (s),Cycle_Index,Current (A),Voltage (V),Charge_Capacity (Ah),Discharge_Capacity (Ah),Charge_Energy (Wh),Discharge_Energy (Wh),Environment_Temperature (C),Cell_Temperature (C)
 2 | 2010-09-02 14:35:40,30.009,1.0,0.0,3.843,0.0,0.0,0.0,0.0,,
 3 | 2010-09-02 14:36:10,60.025,1.0,0.0,3.844,0.0,0.0,0.0,0.0,,
 4 | 2010-09-02 14:36:40,90.04,1.0,0.0,3.843,0.0,0.0,0.0,0.0,,
 5 | 2010-09-02 14:37:10,120.008,1.0,0.0,3.844,0.0,0.0,0.0,0.0,,
 6 | 2010-09-02 14:37:40,150.024,1.0,0.674,3.963,0.002,0.0,0.01,0.0,,
 7 | 2010-09-02 14:38:10,180.039,1.0,0.674,3.98,0.008,0.0,0.033,0.0,,
 8 | 2010-09-02 14:38:40,210.055,1.0,0.674,3.993,0.014,0.0,0.055,0.0,,
 9 | 2010-09-02 14:39:10,240.069,1.0,0.675,4.003,0.019,0.0,0.078,0.0,,
10 | 2010-09-02 14:39:40,270.084,1.0,0.675,4.011,0.025,0.0,0.1,0.0,,
11 | 2010-09-02 14:40:10,300.1,1.0,0.675,4.016,0.03,0.0,0.123,0.0,,
12 | 2010-09-02 14:40:40,330.115,1.0,0.675,4.02,0.036,0.0,0.146,0.0,,
13 | 2010-09-02 14:41:10,360.129,1.0,0.674,4.024,0.042,0.0,0.168,0.0,,
14 | 2010-09-02 14:41:40,390.13,1.0,0.675,4.027,0.047,0.0,0.191,0.0,,
15 | 2010-09-02 14:42:10,420.145,1.0,0.674,4.03,0.053,0.0,0.213,0.0,,
16 | 2010-09-02 14:42:40,450.16,1.0,0.675,4.032,0.059,0.0,0.236,0.0,,
17 | 


--------------------------------------------------------------------------------
/tests/files/batterydata/.gitattributes:
--------------------------------------------------------------------------------
1 | *.csv binary
2 | 


--------------------------------------------------------------------------------
/tests/files/example-data/README.md:
--------------------------------------------------------------------------------
1 | # Example Datasets for Post-Processing
2 | 
3 | These examples create example battery-data-toolkit-format data which allow us to test post-processing code on idealized data.
4 | Each notebook produces an HDF5 file when run.
5 | 


--------------------------------------------------------------------------------
/tests/files/example-data/single-resistor-complex-charge_from-discharged.hdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ROVI-org/battery-data-toolkit/19961e6bbb2d0cfe0bff9c129144fcf8f3dd3be6/tests/files/example-data/single-resistor-complex-charge_from-discharged.hdf


--------------------------------------------------------------------------------
/tests/files/example-data/single-resistor-complex-charge_from-discharged/raw_data.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ROVI-org/battery-data-toolkit/19961e6bbb2d0cfe0bff9c129144fcf8f3dd3be6/tests/files/example-data/single-resistor-complex-charge_from-discharged/raw_data.parquet


--------------------------------------------------------------------------------
/tests/files/example-data/single-resistor-constant-charge_from-charged.hdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ROVI-org/battery-data-toolkit/19961e6bbb2d0cfe0bff9c129144fcf8f3dd3be6/tests/files/example-data/single-resistor-constant-charge_from-charged.hdf


--------------------------------------------------------------------------------
/tests/files/example-data/single-resistor-constant-charge_from-discharged.hdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ROVI-org/battery-data-toolkit/19961e6bbb2d0cfe0bff9c129144fcf8f3dd3be6/tests/files/example-data/single-resistor-constant-charge_from-discharged.hdf


--------------------------------------------------------------------------------
/tests/files/maccor_example.001:
--------------------------------------------------------------------------------
 1 | Today's Date 04/04/2016  Date of Test:	03/31/2016	 Filename:	C:\Data\MIMS\Backup\ARGONNE #20\SET-LN3024-104-1a.001 Procedure: ABRHV-NCM523-Form-4p1.000NCM 523, Formation Test at 0.1C; from 3.0 to 4.1V	Comment/Barcode: SET-LN3024-104, Targray NCM811 [LN2086-32-4] vs. Li metal, 3.0 to 4.3V, Formation, C-rate= 2.4 mAh, Data collected for electrode matching (HEHV)
 2 | Rec#	Cyc#	Step	Test (Min)	Step (Min)	Amp-hr	Watt-hr	Amps	Volts	State	ES	DPt Time
 3 | 1	0	1	0.0000	0.0000	0.0000000000	0.0000000000	0.0000000000	3.30678264	R	0	03/31/2016 16:05:31
 4 | 2	0	1	0.1667	0.1667	0.0000000000	0.0000000000	0.0000000000	3.30571450	R	1	03/31/2016 16:05:41
 5 | 3	0	1	0.3333	0.3333	0.0000000000	0.0000000000	0.0000000000	3.30571450	R	1	03/31/2016 16:05:51
 6 | 4	0	1	0.5000	0.5000	0.0000000000	0.0000000000	0.0000000000	3.30586709	R	1	03/31/2016 16:06:01
 7 | 5	0	1	0.6667	0.6667	0.0000000000	0.0000000000	0.0000000000	3.30601968	R	1	03/31/2016 16:06:11
 8 | 6	0	1	0.8333	0.8333	0.0000000000	0.0000000000	0.0000000000	3.30601968	R	1	03/31/2016 16:06:21
 9 | 7	0	1	1.0000	1.0000	0.0000000000	0.0000000000	0.0000000000	3.30586709	R	1	03/31/2016 16:06:31
10 | 8	0	1	1.1667	1.1667	0.0000000000	0.0000000000	0.0000000000	3.30617227	R	1	03/31/2016 16:06:41
11 | 


--------------------------------------------------------------------------------
/tests/files/maccor_example.002:
--------------------------------------------------------------------------------
 1 | Today's Date 04/04/2016  Date of Test:	04/01/2016	 Filename:	C:\Data\MIMS\Backup\ARGONNE #20\SET-LN3024-104-1a.001 Procedure: ABRHV-NCM523-Form-4p1.000NCM 523, Formation Test at 0.1C; from 3.0 to 4.1V	Comment/Barcode: SET-LN3024-104, Targray NCM811 [LN2086-32-4] vs. Li metal, 3.0 to 4.3V, Formation, C-rate= 2.4 mAh, Data collected for electrode matching (HEHV)
 2 | Rec#	Cyc#	Step	Test (Min)	Step (Min)	Amp-hr	Watt-hr	Amps	Volts	State	ES	DPt Time
 3 | 1	0	1	0.0000	0.0000	0.0000000000	0.0000000000	0.0000000000	3.30678264	R	0	16:05:31
 4 | 2	0	1	0.1667	0.1667	0.0000000000	0.0000000000	0.0000000000	3.30571450	R	1	16:05:41
 5 | 3	0	1	0.3333	0.3333	0.0000000000	0.0000000000	0.0000000000	3.30571450	R	1	16:05:51
 6 | 4	0	1	0.5000	0.5000	0.0000000000	0.0000000000	0.0000000000	3.30586709	R	1	16:06:01
 7 | 5	0	1	0.6667	0.6667	0.0000000000	0.0000000000	0.0000000000	3.30601968	R	1	16:06:11
 8 | 6	0	1	0.8333	0.8333	0.0000000000	0.0000000000	0.0000000000	3.30601968	R	1	16:06:21
 9 | 7	0	1	1.0000	1.0000	0.0000000000	0.0000000000	0.0000000000	3.30586709	R	1	16:06:31
10 | 8	0	1	1.1667	1.1667	0.0000000000	0.0000000000	0.0000000000	3.30617227	R	1	16:06:41
11 | 


--------------------------------------------------------------------------------
/tests/io/test_arbin.py:
--------------------------------------------------------------------------------
 1 | """Tests related to the Arbin parser"""
 2 | 
 3 | from battdat.io.arbin import ArbinReader
 4 | 
 5 | 
 6 | def test_validation(file_path):
 7 |     """Make sure the parser generates valid outputs"""
 8 |     arbin = ArbinReader()
 9 |     test_file = file_path / 'arbin_example.csv'
10 |     data = arbin.read_dataset([test_file])
11 |     data.validate_columns(allow_extra_columns=False)
12 | 


--------------------------------------------------------------------------------
/tests/io/test_batterydata.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime
  2 | 
  3 | from pytest import fixture
  4 | 
  5 | from battdat.io.batterydata import BDReader, generate_metadata
  6 | 
  7 | example_metadata = {'cell_type': ['Pouch cell'],
  8 |                     'creator_user_id': 'a853d711-0e37-44c9-80c9-a41d450c2da4',
  9 |                     'date_dataset_created': '2018-08-16',
 10 |                     'electrolyte_class_dataset': ['Organic liquid'],
 11 |                     'id': 'ef9dec93-17a2-445a-b58e-dc3eadb1f79d',
 12 |                     'isopen': False,
 13 |                     'manufacturer_supplier': 'CAMP',
 14 |                     'maximum_voltage': '4.1',
 15 |                     'metadata_created': '2024-04-19T21:18:38.938069',
 16 |                     'metadata_modified': '2024-04-20T00:45:59.866451',
 17 |                     'minimum_voltage': '3',
 18 |                     'name': 'xcel-round-2-slpc_reupload_2',
 19 |                     'negative_electrode': ['Graphite'],
 20 |                     'nominal_cell_capacity': '0.037',
 21 |                     'notes': 'Single layer pouch cell from CAMP (2.5mAh/cm2) at various charge protocols (CCCV and Multi-step).',
 22 |                     'num_resources': 35,
 23 |                     'num_tags': 9,
 24 |                     'onec_cell_capacity': '0.032',
 25 |                     'organization': {'id': '67de8624-a528-43df-9b63-a65a410920bb',
 26 |                                      'name': 'xcel',
 27 |                                      'title': 'XCEL',
 28 |                                      'type': 'project',
 29 |                                      'description': 'XCEL Project ',
 30 |                                      'image_url': '',
 31 |                                      'created': '2023-06-08T17:38:37.007623',
 32 |                                      'is_organization': True,
 33 |                                      'approval_status': 'approved',
 34 |                                      'state': 'active'},
 35 |                     'owner_org': '67de8624-a528-43df-9b63-a65a410920bb',
 36 |                     'poc_email_address': 'Sangwook.Kim@inl.gov',
 37 |                     'poc_institution': ['INL'],
 38 |                     'poc_name': 'skim',
 39 |                     'positive_electrode': ['NMC532'],
 40 |                     'private': False,
 41 |                     'reference_electrode': ['No'],
 42 |                     'separator_class': ['PP polymer'],
 43 |                     'state': 'active',
 44 |                     'technology': ['Li-ion'],
 45 |                     'title': 'XCEL Round 2 SLPC',
 46 |                     'type': 'dataset',
 47 |                     'tags': [{'display_name': 'fast charge',
 48 |                               'id': '04f1dafd-24f0-496e-b263-96038a9da8f8',
 49 |                               'name': 'fast charge',
 50 |                               'state': 'active',
 51 |                               'vocabulary_id': None}]}
 52 | 
 53 | 
 54 | @fixture()
 55 | def test_files(file_path):
 56 |     return file_path / 'batterydata'
 57 | 
 58 | 
 59 | def test_detect_then_convert(test_files):
 60 |     # Find two files
 61 |     extractor = BDReader(store_all=False)
 62 |     group = next(extractor.identify_files(test_files))
 63 |     assert len(group) == 2
 64 | 
 65 |     # Parse them
 66 |     data = extractor.read_dataset(group)
 67 |     assert data.metadata.name == 'p492-13'
 68 | 
 69 |     # Test a few of columns which require conversion
 70 |     assert data.raw_data['cycle_number'].max() == 8
 71 |     first_measurement = datetime.fromtimestamp(data.raw_data['time'].iloc[0])
 72 |     assert first_measurement.year == 2020
 73 |     assert first_measurement.day == 3
 74 | 
 75 |     # Ensure it validates
 76 |     data.validate()
 77 | 
 78 | 
 79 | def test_store_all(test_files):
 80 |     """Make sure we get exactly one copy of all columns"""
 81 | 
 82 |     # Find two files
 83 |     extractor = BDReader(store_all=True)
 84 |     group = next(extractor.identify_files(test_files))
 85 |     data = extractor.read_dataset(group)
 86 | 
 87 |     # Make sure we only have the renamed `cycle_number` and not original `Cycle_Index`
 88 |     for df in [data.raw_data, data.cycle_stats]:
 89 |         assert 'cycle_number' in df.columns
 90 |         assert 'Cycle_Index' not in df.columns
 91 | 
 92 |     # Make sure NREL-specific columns are stored
 93 |     assert 'datenum_d' in data.cycle_stats.columns
 94 |     assert 'Charge_Throughput_Ah' in data.raw_data.columns
 95 | 
 96 | 
 97 | def test_metadata():
 98 |     metadata = generate_metadata(example_metadata, ('https://test.url/',))
 99 |     assert 'test.url' == metadata.associated_ids[0].host
100 |     assert metadata.battery.cathode.name == 'NMC532'
101 | 


--------------------------------------------------------------------------------
/tests/io/test_cell_consistency.py:
--------------------------------------------------------------------------------
 1 | """Run consistency checks for data corresponding to cells"""
 2 | from battdat.consistency.current import SignConventionChecker
 3 | 
 4 | from pytest import mark
 5 | 
 6 | from battdat.io.arbin import ArbinReader
 7 | from battdat.io.batterydata import BDReader
 8 | from battdat.io.hdf import HDF5Reader
 9 | 
10 | checkers = [
11 |     SignConventionChecker()
12 | ]
13 | 
14 | 
15 | @mark.parametrize(
16 |     'reader,example_data',
17 |     [(ArbinReader(), ['arbin_example.csv']),
18 |      (BDReader(), ['batterydata/p492-13-raw.csv']),
19 |      (HDF5Reader(), 'example-data/single-resistor-complex-charge_from-discharged.hdf')]
20 | )
21 | def test_consistency(reader, example_data, file_path):
22 |     dataset = reader.read_dataset(
23 |         [file_path / p for p in example_data] if isinstance(example_data, list) else file_path / example_data
24 |     )
25 |     for checker in checkers:
26 |         warnings = checker.check(dataset)
27 |         assert len(warnings) == 0, warnings
28 | 


--------------------------------------------------------------------------------
/tests/io/test_hdf.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | 
  3 | from pytest import raises, mark
  4 | import numpy as np
  5 | import pandas as pd
  6 | import tables
  7 | 
  8 | from battdat.data import BatteryDataset
  9 | from battdat.io.hdf import make_numpy_dtype_from_pandas, write_df_to_table, read_df_from_table, HDF5Writer, HDF5Reader
 10 | from battdat.schemas.column import ColumnSchema
 11 | 
 12 | example_df = pd.DataFrame({'a': [1, 2], 'b': [1., 3.], 'c': ['charge', 'discharge'], 'array': [[[1.]], [[0.]]]})
 13 | 
 14 | 
 15 | def test_dtype():
 16 |     dtype = make_numpy_dtype_from_pandas(example_df)
 17 |     assert dtype.names == ('a', 'b', 'c', 'array')
 18 |     assert dtype['array'].shape == (1, 1)
 19 | 
 20 | 
 21 | def test_store_df(tmpdir):
 22 |     with tables.open_file(tmpdir / "example.h5", "w") as file:
 23 |         group = file.create_group('/', name='base')
 24 |         table = write_df_to_table(file, group, 'table', example_df)
 25 |         assert tuple(table[0]) == (1, 1., b'charge', np.ones((1, 1)))
 26 | 
 27 |     with tables.open_file(tmpdir / "example.h5", "r") as file:
 28 |         table = file.get_node('/base/table')
 29 |         df_copy = read_df_from_table(table)
 30 |         assert (df_copy.columns == ['a', 'b', 'c', 'array']).all()
 31 |         assert np.allclose(df_copy['b'], [1., 3.])
 32 | 
 33 | 
 34 | def test_read_with_other_tables(tmpdir):
 35 |     writer = HDF5Writer()
 36 |     out_file = Path(tmpdir) / 'example.h5'
 37 | 
 38 |     # Write the same table through the writer (which puts metadata) and through the basic function (which does not)
 39 |     with tables.open_file(out_file, mode='w') as file:
 40 |         dataset = BatteryDataset(tables={'example_table': example_df},
 41 |                                  schemas={'example_table': ColumnSchema()})
 42 |         writer.write_to_hdf(dataset, file, None)
 43 |         write_df_to_table(file, file.root, 'extra_table', example_df)
 44 | 
 45 |     # Reading should only yield one table
 46 |     with tables.open_file(out_file) as file:
 47 |         dataset = HDF5Reader().read_from_hdf(file, None)
 48 |         assert set(dataset.tables.keys()) == {'example_table'}
 49 | 
 50 |     # Ensure error is raised if the schema is corrupted
 51 |     with tables.open_file(out_file, mode='a') as file:
 52 |         table = file.root['example_table']
 53 |         for corrupted in ("asdf", '{"a": 1}'):
 54 |             table._v_attrs['metadata'] = corrupted
 55 |             with raises(ValueError, match='marked as a battdat dataset but schema fails to read'):
 56 |                 HDF5Reader().read_from_hdf(file, None)
 57 | 
 58 | 
 59 | @mark.parametrize('prefix', [None, 'a'])
 60 | def test_append(tmpdir, prefix):
 61 |     writer = HDF5Writer()
 62 |     out_file = Path(tmpdir) / 'example.h5'
 63 | 
 64 |     # Write the initial data
 65 |     with tables.open_file(out_file, mode='w') as file:
 66 |         if prefix is not None:
 67 |             file.create_group(file.root, prefix)
 68 | 
 69 |         writer.add_table(file, 'example_table', example_df, ColumnSchema(), prefix)
 70 | 
 71 |     # Append the data again
 72 |     with tables.open_file(out_file, mode='a') as file:
 73 |         writer.append_to_table(file, 'example_table', example_df, prefix)
 74 | 
 75 |         table = file.get_node('/example_table' if prefix is None else f'/{prefix}/example_table')
 76 |         df_copy = read_df_from_table(table)
 77 |         assert len(df_copy) == len(example_df) * 2
 78 |         assert np.allclose(df_copy['a'], [1, 2, 1, 2])
 79 |         assert np.equal(df_copy['c'], ['charge', 'discharge'] * 2).all()
 80 | 
 81 |         # Test data check
 82 |         with raises(ValueError, match='Existing and new'):
 83 |             writer.append_to_table(file, 'example_table', pd.DataFrame({'a': [1., 2.]}), prefix)
 84 | 
 85 |         # Test bad prefix
 86 |         with raises(ValueError, match='No data available for prefix'):
 87 |             writer.append_to_table(file, 'example_table', pd.DataFrame({'a': [1., 2.]}), prefix='b')
 88 | 
 89 | 
 90 | def test_df_missing_strings(tmpdir):
 91 |     df = pd.DataFrame({'a': [None, 'a', 'bb']})
 92 |     assert df.dtypes['a'] == object
 93 |     with tables.open_file(tmpdir / "example.h5", "w") as file:
 94 |         group = file.create_group('/', name='base')
 95 |         table = write_df_to_table(file, group, 'table', df)
 96 |         assert tuple(table[-1]) == (b'bb',)
 97 | 
 98 | 
 99 | def test_df_strings(tmpdir):
100 |     df = pd.DataFrame({'a': ['ccc', 'a', 'bb']})
101 |     assert df.dtypes['a'] == object
102 |     with tables.open_file(tmpdir / "example.h5", "w") as file:
103 |         group = file.create_group('/', name='base')
104 |         table = write_df_to_table(file, group, 'table', df)
105 |         assert tuple(table[-1]) == (b'bb',)
106 |         assert tuple(table[0]) == (b'ccc',)
107 | 
108 | 
109 | def test_df_lists(tmpdir):
110 |     df = pd.DataFrame({'a': [[1., 1.], [2., 2.]]})
111 |     assert df.dtypes['a'] == object
112 |     with tables.open_file(tmpdir / "example.h5", "w") as file:
113 |         group = file.create_group('/', name='base')
114 |         table = write_df_to_table(file, group, 'table', df)
115 |         assert np.array_equal(table[-1]['a'], [2., 2.])
116 | 


--------------------------------------------------------------------------------
/tests/io/test_maccor.py:
--------------------------------------------------------------------------------
 1 | """Tests related to the MACCOR parser"""
 2 | from datetime import datetime
 3 | from pytest import fixture, raises
 4 | 
 5 | from battdat.io.maccor import MACCORReader
 6 | 
 7 | 
 8 | @fixture()
 9 | def test_file(file_path):
10 |     return file_path / 'maccor_example.001'
11 | 
12 | 
13 | @fixture()
14 | def extractor():
15 |     return MACCORReader()
16 | 
17 | 
18 | def test_validation(extractor, test_file):
19 |     """Make sure the parser generates valid outputs"""
20 |     data = extractor.read_dataset([test_file])
21 |     data.validate_columns(allow_extra_columns=False)
22 | 
23 | 
24 | def test_grouping(extractor, tmp_path):
25 |     # Make a file structure with two sets of experiments and a nonsense file
26 |     for f in ['README', 'testA.002', 'testA.001', 'testB.001']:
27 |         (tmp_path / f).write_text('junk')
28 | 
29 |     # Test the grouping
30 |     groups = list(extractor.identify_files(tmp_path))
31 |     assert len(groups) == 2
32 |     assert (str(tmp_path / 'testA.001'), str(tmp_path / 'testA.002')) in groups
33 |     assert (str(tmp_path / 'testB.001'),) in groups
34 | 
35 | 
36 | def test_date_check(extractor, test_file):
37 |     files = [test_file, test_file.with_suffix('.002')]
38 |     data = extractor.read_dataset(files)
39 |     data.validate()
40 |     assert data.raw_data['file_number'].max() == 1
41 | 
42 |     with raises(ValueError, match='not in the correct order'):
43 |         extractor.read_dataset(files[::-1])
44 | 
45 | 
46 | def test_time_parser(extractor, test_file):
47 |     # With date and time in the time column
48 |     df = extractor.read_file(test_file)
49 |     assert datetime.fromtimestamp(df['time'].iloc[0]).month == 3
50 | 
51 |     # With only the time in the time column
52 |     df = extractor.read_file(test_file.with_suffix('.002'))
53 |     assert datetime.fromtimestamp(df['time'].iloc[0]).month == 4
54 | 
55 |     # Ignoring datetime
56 |     extractor.ignore_time = True
57 |     df = extractor.read_file(test_file)
58 |     assert 'time' not in df.columns
59 | 


--------------------------------------------------------------------------------
/tests/postprocess/test_integral.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | 
  3 | import pandas as pd
  4 | from pytest import mark
  5 | import numpy as np
  6 | 
  7 | from battdat.data import BatteryDataset
  8 | from battdat.io.batterydata import BDReader
  9 | from battdat.postprocess.integral import CapacityPerCycle, StateOfCharge
 10 | 
 11 | 
 12 | def get_example_data(file_path: Path, from_charged: bool) -> BatteryDataset:
 13 |     ex_file = file_path / 'example-data' / f'single-resistor-constant-charge_from-{"" if from_charged else "dis"}charged.hdf'
 14 |     return BatteryDataset.from_hdf(ex_file)
 15 | 
 16 | 
 17 | def test_short_cycles():
 18 |     """Make sure cycles that are too short for capacity measurements do not cause errors"""
 19 | 
 20 |     example_data = BatteryDataset.make_cell_dataset(
 21 |         raw_data=pd.DataFrame({'time': range(2), 'current': [1.] * 2, 'voltage': [2.] * 2, 'cycle_number': [0] * 2})
 22 |     )
 23 |     CapacityPerCycle().compute_features(example_data)
 24 |     assert np.isnan(example_data.tables['cycle_stats']['capacity_charge']).all()
 25 | 
 26 | 
 27 | @mark.parametrize('from_charged', [True, False])
 28 | def test_cycle_stats(file_path, from_charged):
 29 |     example_data = get_example_data(file_path, from_charged)
 30 |     feat = CapacityPerCycle().compute_features(example_data)
 31 |     assert np.isclose([0], feat['cycle_number']).all()
 32 | 
 33 |     # Capacity is the 1 A-hr
 34 |     assert np.isclose([1.0], feat['capacity_discharge'], rtol=1e-2).all()
 35 |     assert np.isclose([1.0], feat['capacity_charge'], rtol=1e-2).all()
 36 | 
 37 |     # Energy to charge is (2.1 V + 3.1 V) / 2 * 1 A * 3600 s = 9360 J
 38 |     # Energy produced during discharge is (1.9 V + 2.9 V) * 1 A * 3600 s = 8640 J
 39 |     assert np.isclose([9360. / 3600], feat['energy_charge'], rtol=1e-2).all()
 40 |     assert np.isclose([8640. / 3600], feat['energy_discharge'], rtol=1e-2).all()
 41 | 
 42 | 
 43 | @mark.parametrize('from_charged', [True, False])
 44 | def test_capacity(file_path, from_charged):
 45 |     example_data = get_example_data(file_path, from_charged)
 46 |     soc = StateOfCharge()
 47 |     raw_data = example_data.tables['raw_data']
 48 |     soc.enhance(raw_data)
 49 | 
 50 |     assert all(c in raw_data for c in soc.column_names)
 51 |     assert not any(raw_data[c].isna().any() for c in soc.column_names)
 52 | 
 53 |     # First cell should be 0
 54 |     assert np.isclose(raw_data.drop_duplicates('cycle_number', keep='first')[soc.column_names], 0).all()
 55 | 
 56 |     # Last cell of the capacity should be zero for our test cases
 57 |     assert np.isclose(raw_data['cycled_charge'].iloc[-1], 0., atol=1e-3)
 58 | 
 59 |     # The capacity for the first few steps should be I*t/3600s
 60 |     first_steps = raw_data.iloc[:3]
 61 |     current = first_steps['current'].iloc[0]
 62 |     assert np.isclose(first_steps['cycled_charge'], current * first_steps['test_time'] / 3600).all()
 63 | 
 64 |     # The energy for the first few steps should be
 65 |     #  discharging = I * \int_0^t (2.9 - t/3600) = I * (2.9t - t^2/7200)
 66 |     #  charging = I * \int_0^t (2.1 + t/3600) = I * (2.1t + t^2/7200)
 67 |     if from_charged:
 68 |         answer = current * (2.9 * first_steps['test_time'] - first_steps['test_time'] ** 2 / 7200)
 69 |         assert (answer[1:] < 0).all()
 70 |     else:
 71 |         answer = current * (2.1 * first_steps['test_time'] + first_steps['test_time'] ** 2 / 7200)
 72 |         assert (answer[1:] > 0).all()
 73 |     assert np.isclose(first_steps['cycled_energy'], answer / 3600, rtol=1e-3).all()
 74 | 
 75 | 
 76 | def test_against_battery_data_gov(file_path):
 77 |     """See if our capacities are similar to those computed in BatteryData.Energy.Gov"""
 78 | 
 79 |     cyc_id = 8
 80 |     data = BDReader().read_dataset(list((file_path / 'batterydata').glob('p492*')))
 81 |     orig_data = \
 82 |         data.tables['cycle_stats'][
 83 |             ['capacity_discharge', 'capacity_charge', 'energy_discharge', 'energy_charge']
 84 |         ].copy().iloc[cyc_id]
 85 | 
 86 |     # Recompute
 87 |     CapacityPerCycle().compute_features(data)
 88 |     new_data = data.tables['cycle_stats'][
 89 |         ['capacity_discharge', 'capacity_charge', 'energy_discharge', 'energy_charge']].iloc[cyc_id]
 90 |     diff = np.abs(orig_data.values - new_data.values)
 91 |     agree = diff < 1e-3
 92 |     assert agree.all(), diff
 93 | 
 94 | 
 95 | def test_reuse_integrals(file_path):
 96 |     example_data = get_example_data(file_path, True)
 97 | 
 98 |     # Get a baseline capacity
 99 |     CapacityPerCycle(reuse_integrals=False).compute_features(example_data)
100 |     initial_data = example_data.tables['cycle_stats'][
101 |         ['capacity_discharge', 'capacity_charge', 'energy_discharge', 'energy_charge']].copy()
102 | 
103 |     # Compute the integrals then intentionally increase capacity and energy 2x
104 |     StateOfCharge().compute_features(example_data)
105 |     for c in ['cycled_energy', 'cycled_charge']:
106 |         example_data.tables['raw_data'][c] *= 2
107 | 
108 |     # Recompute capacity and energy measurements, which should have increased by 2x
109 |     CapacityPerCycle(reuse_integrals=True).compute_features(example_data)
110 |     final_data = example_data.tables['cycle_stats'][
111 |         ['capacity_discharge', 'capacity_charge', 'energy_discharge', 'energy_charge']].copy()
112 |     assert np.isclose(initial_data.values * 2, final_data.values, atol=1e-3).all()
113 | 


--------------------------------------------------------------------------------
/tests/postprocess/test_stats.py:
--------------------------------------------------------------------------------
 1 | """Test for features related to timing"""
 2 | from datetime import datetime, timedelta
 3 | import pandas as pd
 4 | import numpy as np
 5 | 
 6 | from pytest import warns, fixture, raises
 7 | 
 8 | from battdat.data import BatteryDataset
 9 | from battdat.postprocess.timing import CycleTimesSummarizer, TimeEnhancer
10 | 
11 | 
12 | @fixture()
13 | def raw_data():
14 |     return pd.DataFrame({
15 |         'cycle_number': [0, 0, 1, 1, 2, 2],
16 |         'test_time': [0, 0.99, 1, 1.99, 2., 2.99]
17 |     })
18 | 
19 | 
20 | def test_summary(raw_data):
21 |     computer = CycleTimesSummarizer()
22 |     data = BatteryDataset.make_cell_dataset(raw_data=raw_data)
23 |     output = computer.compute_features(data)
24 |     assert set(output.columns) == set(computer.column_names).union({'cycle_number'})
25 |     assert np.isclose(data.tables['cycle_stats']['cycle_start'], [0., 1., 2.]).all()
26 |     assert np.isclose(data.tables['cycle_stats']['cycle_duration'], [1., 1., 0.99]).all()
27 | 
28 |     # Make sure it warns if the next cycle is unavailable
29 |     raw_data = pd.DataFrame({
30 |         'cycle_number': [0, 0, 1, 1, 3, 3],  # As if cycle 2 is missing
31 |         'test_time': [0, 0.99, 1, 1.99, 2., 2.99]
32 |     })
33 |     data = BatteryDataset.make_cell_dataset(raw_data=raw_data)
34 |     with warns(UserWarning) as w:
35 |         computer.compute_features(data)
36 |     assert 'Some cycles are missing' in str(w[0])
37 |     assert len(w) == 1
38 | 
39 |     assert np.isclose(data.tables['cycle_stats']['cycle_start'], [0., 1., 2.]).all()
40 |     assert np.isclose(data.tables['cycle_stats']['cycle_duration'], [1., 0.99, 0.99]).all()
41 | 
42 |     # Warns on one point per cycle, which may be the case for rests... maybe
43 |     raw_data = pd.DataFrame({
44 |         'cycle_number': [0, 1, 1, 2, 2],
45 |         'test_time': [0, 1, 1.99, 2., 2.99]
46 |     })
47 |     data = BatteryDataset.make_cell_dataset(raw_data=raw_data)
48 |     with warns(UserWarning) as w:
49 |         computer.compute_features(data)
50 |     assert 'Some cycles have only one' in str(w[0])
51 |     assert len(w) == 1
52 | 
53 |     assert np.isclose(data.tables['cycle_stats']['cycle_start'], [0., 1., 2.]).all()
54 |     assert np.isclose(data.tables['cycle_stats']['cycle_duration'], [1., 1., 0.99]).all()
55 | 
56 | 
57 | def test_enhance(raw_data):
58 |     computer = TimeEnhancer()
59 | 
60 |     # Create a datetime series
61 |     now = datetime.now()
62 |     date_time = raw_data['test_time'].apply(lambda x: now + timedelta(seconds=x))
63 | 
64 |     # Remove the time column, make sure it crashes without the datetime column
65 |     orig_test_time = raw_data['test_time']
66 |     raw_data.drop(columns=['test_time'], inplace=True)
67 | 
68 |     with raises(ValueError, match='must contain a `date_time`'):
69 |         computer.enhance(raw_data)
70 | 
71 |     # Add the datetime series to the dataframe then compute the cycle_stats
72 |     raw_data['date_time'] = date_time
73 |     computer.enhance(raw_data)
74 | 
75 |     assert np.allclose(raw_data['test_time'], orig_test_time)
76 |     assert np.allclose(raw_data['cycle_time'], [0, 0.99] * 3)
77 | 


--------------------------------------------------------------------------------
/tests/postprocess/test_tagging.py:
--------------------------------------------------------------------------------
  1 | """Tests that cover adding derived columns to the raw data"""
  2 | import numpy as np
  3 | import pandas as pd
  4 | from pytest import fixture
  5 | import pytest
  6 | 
  7 | from battdat.data import BatteryDataset
  8 | from battdat.postprocess.tagging import AddSteps, AddMethod, AddSubSteps, AddState
  9 | from battdat.schemas.column import ChargingState, ControlMethod
 10 | 
 11 | 
 12 | @fixture()
 13 | def synthetic_data() -> BatteryDataset:
 14 |     """Data which includes all of our types of steps"""
 15 | 
 16 |     # Make the segments
 17 |     rest_v = [3.5] * 16
 18 |     rest_i = [0.] * 16
 19 |     rest_s = [ChargingState.rest] * 16
 20 |     discharge_v = np.linspace(3.5, 3.25, 16)
 21 |     discharge_i = [-0.125] * 16
 22 |     discharge_s = [ChargingState.discharging] * 16
 23 |     shortrest_v = [3.25] * 4
 24 |     shortrest_i = [0] * 4
 25 |     shortrest_s = [ChargingState.rest] * 4
 26 |     shortnon_v = [3.25] * 4
 27 |     shortnon_i = [-0.1] * 4
 28 |     shortnon_s = [ChargingState.discharging] * 4
 29 |     pulse_v = [3.25] * 8
 30 |     pulse_i = [0.05] * 8
 31 |     pulse_s = [ChargingState.charging] * 8
 32 |     charge_v = [3.6] * 8 + np.linspace(3.6, 3.8, 8).tolist()
 33 |     charge_i = np.linspace(0.15, 0.1, 8).tolist() + [0.125] * 8
 34 |     charge_s = [ChargingState.charging] * 16
 35 | 
 36 |     # Combine them
 37 |     v = np.concatenate([rest_v, discharge_v, shortrest_v, shortnon_v, pulse_v, shortrest_v, charge_v])
 38 |     i = np.concatenate([rest_i, discharge_i, shortrest_i, shortnon_i, pulse_i, shortrest_i, charge_i])
 39 |     s = sum([rest_s, discharge_s, shortrest_s, shortnon_s, pulse_s, shortrest_s, charge_s], [])
 40 |     t = np.arange(len(v)) * 2.  # Assume measurements every 2 seconds
 41 |     c = np.zeros_like(t, dtype=int)  # All in the same cycle
 42 | 
 43 |     data = pd.DataFrame({
 44 |         'current': i,
 45 |         'voltage': v,
 46 |         'state': s,
 47 |         'test_time': t,
 48 |         'cycle_number': c
 49 |     })
 50 |     # data.drop([62, 63, 64], inplace=True)
 51 |     return BatteryDataset.make_cell_dataset(raw_data=data)
 52 | 
 53 | 
 54 | def test_example_data(synthetic_data):
 55 |     synthetic_data.validate_columns()
 56 | 
 57 | 
 58 | def test_step_detection(synthetic_data):
 59 |     AddSteps().enhance(synthetic_data.raw_data)
 60 | 
 61 |     # Should detect steps
 62 |     assert (synthetic_data.raw_data['step_index'].iloc[:16] == 0).all()
 63 |     assert (synthetic_data.raw_data['step_index'].iloc[16:32] == 1).all()
 64 |     assert (synthetic_data.raw_data['step_index'].iloc[32:36] == 2).all()
 65 |     assert (synthetic_data.raw_data['step_index'].iloc[36:40] == 3).all()
 66 |     assert (synthetic_data.raw_data['step_index'].iloc[40:48] == 4).all()
 67 |     assert (synthetic_data.raw_data['step_index'].iloc[48:52] == 5).all()
 68 |     assert (synthetic_data.raw_data['step_index'].iloc[52:68] == 6).all()
 69 | 
 70 | 
 71 | @pytest.mark.xfail
 72 | def test_method_detection(synthetic_data):
 73 |     # Start assuming that the step detection worked
 74 |     AddSteps().enhance(synthetic_data.raw_data)
 75 | 
 76 |     # See if we can detect the steps
 77 |     AddMethod().enhance(synthetic_data.raw_data)
 78 |     assert (synthetic_data.raw_data['method'].iloc[:16] == ControlMethod.rest).all()
 79 |     assert (synthetic_data.raw_data['method'].iloc[16:32] == ControlMethod.constant_current).all()
 80 |     assert (synthetic_data.raw_data['method'].iloc[32:36] == ControlMethod.short_rest).all()
 81 |     assert (synthetic_data.raw_data['method'].iloc[36:40] == ControlMethod.short_nonrest).all()
 82 |     assert (synthetic_data.raw_data['method'].iloc[40:48] == ControlMethod.pulse).all()
 83 |     assert (synthetic_data.raw_data['method'].iloc[48:52] == ControlMethod.short_rest).all()
 84 |     assert (synthetic_data.raw_data['method'].iloc[52:60] == ControlMethod.constant_voltage).all()
 85 |     assert (synthetic_data.raw_data['method'].iloc[60:68] == ControlMethod.constant_current).all()
 86 | 
 87 | 
 88 | @pytest.mark.xfail
 89 | def test_substep_detect(synthetic_data):
 90 |     # Start assuming that the step and method detection worked
 91 |     AddSteps().enhance(synthetic_data.raw_data)
 92 |     AddMethod().enhance(synthetic_data.raw_data)
 93 | 
 94 |     # The substeps should be the same as the steps because we do not have two charging/rest cycles next to each other
 95 |     AddSubSteps().enhance(synthetic_data.raw_data)
 96 |     assert (synthetic_data.raw_data['step_index'].iloc[:60] == synthetic_data.raw_data['substep_index'].iloc[:60]).all()
 97 |     assert (synthetic_data.raw_data['substep_index'].iloc[60:] == 7).all()
 98 | 
 99 | 
100 | def test_state_detection(synthetic_data):
101 |     # First, get only the data without the pre-defined state
102 |     raw_data = synthetic_data.raw_data.drop(columns=['state'])
103 | 
104 |     # Enhance
105 |     AddState().enhance(data=raw_data)
106 | 
107 |     # assert False, len(synthetic_data.raw_data)
108 |     assert (raw_data['state'].iloc[:16] == ChargingState.rest).all(), raw_data['state'].iloc[:16]
109 |     assert (raw_data['state'].iloc[16:32] == ChargingState.discharging).all(), raw_data['state'].iloc[16:32].to_numpy()
110 |     assert (raw_data['state'].iloc[32:36] == ChargingState.rest).all()
111 |     assert (raw_data['state'].iloc[36:40] == ChargingState.discharging).all()
112 |     assert (raw_data['state'].iloc[40:48] == ChargingState.charging).all()
113 |     assert (raw_data['state'].iloc[48:52] == ChargingState.rest).all()
114 |     assert (raw_data['state'].iloc[52:] == ChargingState.charging).all()
115 | 


--------------------------------------------------------------------------------
/tests/schemas/test_cycling.py:
--------------------------------------------------------------------------------
 1 | from battdat.schemas.column import RawData, DataType, ColumnSchema, ColumnInfo
 2 | 
 3 | from pytest import raises, fixture, mark
 4 | import pandas as pd
 5 | 
 6 | 
 7 | @fixture()
 8 | def example_df() -> pd.DataFrame:
 9 |     return pd.DataFrame({
10 |         'cycle_number': [1, 2],
11 |         'test_time': [0, 0.1],
12 |         'voltage': [0.1, 0.2],
13 |         'current': [0.1, -0.1],
14 |         'state': ['charging', 'resting']
15 |     })
16 | 
17 | 
18 | def test_json():
19 |     """Make sure we can serialize and deserialize classes"""
20 | 
21 |     as_json = RawData().model_dump_json()
22 | 
23 |     # Test deserialize using Pydantic, which requires knowing the base class
24 |     schema = RawData.model_validate_json(as_json)
25 |     assert schema.state.type == DataType.STATE
26 | 
27 |     # Test reading using the "unknown base" version
28 |     schema = ColumnSchema.from_json(as_json)
29 |     assert schema.state.type == DataType.STATE
30 | 
31 | 
32 | def test_required():
33 |     """Catch dataframe missing required columns"""
34 | 
35 |     d = pd.DataFrame()
36 |     with raises(ValueError) as exc:
37 |         RawData().validate_dataframe(d)
38 |     assert 'missing a required column' in str(exc)
39 | 
40 | 
41 | def test_extra_cols(example_df):
42 |     """Handle extra columns"""
43 |     example_df['extra'] = [1, 1]
44 | 
45 |     # Passes with extra columns by default
46 |     schema = RawData()
47 |     schema.validate_dataframe(example_df)
48 | 
49 |     # Fails when desired
50 |     with raises(ValueError) as exc:
51 |         schema.validate_dataframe(example_df, allow_extra_columns=False)
52 |     assert 'extra columns' in str(exc)
53 | 
54 |     # Passes when new column is defined
55 |     schema.add_column('extra', 'An extra column')
56 |     assert 'extra' in schema.extra_columns
57 |     schema.validate_dataframe(example_df, allow_extra_columns=False)
58 | 
59 | 
60 | def test_get_item():
61 |     schema = RawData()
62 |     schema.extra_columns['test'] = ColumnInfo(description='Test')
63 |     assert schema['test'].description == 'Test'
64 |     assert schema['test_time'].units == 's'
65 |     with raises(KeyError, match='asdf'):
66 |         schema['asdf']
67 | 
68 | 
69 | @mark.parametrize(
70 |     "col,values",
71 |     [('temperature', [1, 2]), ('file_number', [0.1, 0.2]), ('state', [1, 2])]
72 | )
73 | def test_type_failures(example_df, col, values):
74 |     """Columns with the wrong type"""
75 |     example_df[col] = values
76 |     with raises(ValueError, match=col):
77 |         RawData().validate_dataframe(example_df)
78 | 
79 | 
80 | def test_monotonic(example_df):
81 |     """Columns that should be monotonic but are not"""
82 |     example_df['cycle_number'] = [2, 1]
83 |     with raises(ValueError) as exc:
84 |         RawData().validate_dataframe(example_df)
85 |     assert 'monotonic' in str(exc)
86 | 
87 |     example_df['cycle_number'] = [1, 1]
88 |     RawData().validate_dataframe(example_df)
89 | 


--------------------------------------------------------------------------------
/tests/schemas/test_eis.py:
--------------------------------------------------------------------------------
 1 | from pytest import fixture, raises
 2 | import pandas as pd
 3 | import numpy as np
 4 | 
 5 | from battdat.schemas.eis import EISData
 6 | 
 7 | 
 8 | @fixture()
 9 | def example_df() -> pd.DataFrame:
10 |     output = pd.DataFrame({
11 |         'test_id': [1, 1],
12 |         'frequency': [5e5, 4e5],
13 |         'z_real': [0.241, 0.237],
14 |         'z_imag': [0.431, 0.327],
15 |     })
16 |     output['z_mag'] = np.linalg.norm(output.values[:, -2:], axis=1)
17 |     output['z_phase'] = np.rad2deg(np.arcsin(output['z_imag'] / output['z_mag']))
18 |     return output
19 | 
20 | 
21 | def test_pass(example_df):
22 |     EISData().validate_dataframe(example_df)
23 | 
24 | 
25 | def test_consistency(example_df):
26 |     example_df['z_imag'] *= 2
27 |     with raises(ValueError) as e:
28 |         EISData().validate_dataframe(example_df)
29 |     assert 'imag' in str(e.value)
30 | 
31 |     example_df['z_real'] *= 2
32 |     with raises(ValueError) as e:
33 |         EISData().validate_dataframe(example_df)
34 |     assert 'real' in str(e.value)
35 | 


--------------------------------------------------------------------------------
/tests/schemas/test_ontology.py:
--------------------------------------------------------------------------------
 1 | """Test the ability to resolve cross-references from the ontology"""
 2 | 
 3 | from battdat.schemas import BatteryMetadata
 4 | from battdat.schemas.ontology import cross_reference_terms, gather_descendants, load_battinfo, resolve_term
 5 | 
 6 | 
 7 | def test_crossref():
 8 |     terms = cross_reference_terms(BatteryMetadata)
 9 |     assert 'is_measurement' in terms
10 |     assert terms['is_measurement'].name == 'emmo.Measurement'
11 |     assert 'EMMO' in terms['is_measurement'].iri
12 |     assert 'well defined mesurement procedure.' in terms['is_measurement'].elucidation
13 | 
14 | 
15 | def test_resolve():
16 |     assert resolve_term('PhysicsBasedSimulation') is not None
17 |     assert resolve_term('https://w3id.org/emmo#EMMO_f7ed665b_c2e1_42bc_889b_6b42ed3a36f0') is not None
18 | 
19 | 
20 | def test_descendants():
21 |     bi = load_battinfo()
22 |     desc = [t.name for t in gather_descendants(bi.PhysicsBasedSimulation)]
23 |     assert 'emmo.StandaloneModelSimulation' in desc
24 | 
25 |     desc = [t.name for t in gather_descendants('PhysicsBasedSimulation')]
26 |     assert 'emmo.StandaloneModelSimulation' in desc
27 | 


--------------------------------------------------------------------------------
/tests/test_data.py:
--------------------------------------------------------------------------------
  1 | """Tests for the Battery data frame"""
  2 | import json
  3 | import os
  4 | 
  5 | import pytest
  6 | import numpy as np
  7 | import pandas as pd
  8 | import pyarrow.parquet as pq
  9 | from pydantic import ValidationError
 10 | from pytest import fixture, raises
 11 | from tables import File
 12 | 
 13 | from battdat.schemas.column import ColumnInfo
 14 | from battdat.data import BatteryDataset
 15 | from battdat import __version__
 16 | 
 17 | 
 18 | @fixture()
 19 | def test_df():
 20 |     raw_data = pd.DataFrame({
 21 |         'test_time': [0, 1, 2.],
 22 |         'current': [1., 0., -1.],
 23 |         'voltage': [2., 2., 2.],
 24 |         'other': [1, 2, 3],
 25 |     })
 26 |     cycle_stats = pd.DataFrame({
 27 |         'cycle_number': [0],
 28 |     })
 29 |     dataset = BatteryDataset.make_cell_dataset(raw_data=raw_data, cycle_stats=cycle_stats, metadata={'name': 'Test data'})
 30 | 
 31 |     # Add an extra column in the schema
 32 |     dataset.schemas['raw_data'].extra_columns['new'] = ColumnInfo(description='An example column')
 33 |     return dataset
 34 | 
 35 | 
 36 | def test_write_hdf(tmpdir, test_df):
 37 |     """Test whether the contents of the HDF5 file are reasonably understandable"""
 38 | 
 39 |     # Write the HDF file
 40 |     out_path = os.path.join(tmpdir, 'test.h5')
 41 |     test_df.to_hdf(out_path)
 42 | 
 43 |     # Investigate the contents
 44 |     with File(out_path) as f:
 45 |         attrs = f.root._v_attrs
 46 |         assert 'metadata' in attrs
 47 |         assert json.loads(attrs['metadata'])['name'] == 'Test data'
 48 |         assert 'raw_data' in f.root
 49 | 
 50 |         # Make sure we have a schema
 51 |         g = f.root['raw_data']
 52 |         attrs = g._v_attrs
 53 |         assert 'metadata' in attrs
 54 |         assert json.loads(attrs['metadata'])['test_time']['units'] == 's'
 55 | 
 56 |     # Test writing to an already-open file
 57 |     with File(out_path, 'w') as file:
 58 |         test_df.to_hdf(file)
 59 | 
 60 | 
 61 | def test_read_hdf(tmpdir, test_df):
 62 |     # Write it
 63 |     out_path = os.path.join(tmpdir, 'test.h5')
 64 |     test_df.to_hdf(out_path)
 65 | 
 66 |     # Test reading only the metadata
 67 |     metadata = BatteryDataset.get_metadata_from_hdf5(out_path)
 68 |     assert metadata.name == 'Test data'
 69 | 
 70 |     # Read it
 71 |     data = BatteryDataset.from_hdf(out_path)
 72 |     assert 'raw_data' in data
 73 |     assert 'test_time' in data['raw_data'].columns
 74 |     assert len(data) == 2
 75 |     assert len(list(data)) == 2
 76 |     assert data.metadata.name == 'Test data'
 77 |     assert data.get('raw_data') is not None
 78 |     assert data['cycle_stats'] is not None
 79 |     assert data.schemas['raw_data'].extra_columns['new'].description == 'An example column'
 80 | 
 81 |     # Test reading from an already-open file
 82 |     with File(out_path, 'r') as file:
 83 |         data = BatteryDataset.from_hdf(file)
 84 |     assert data.metadata.name == 'Test data'
 85 | 
 86 |     # Test requesting an unknown type of field
 87 |     with raises(ValueError) as exc:
 88 |         BatteryDataset.from_hdf(out_path, tables=('bad)_!~',))
 89 |     assert 'bad)_!~' in str(exc)
 90 | 
 91 |     # Test reading an absent field
 92 |     del test_df.tables['cycle_stats']
 93 |     test_df.to_hdf(out_path)
 94 |     with raises(ValueError) as exc:
 95 |         BatteryDataset.from_hdf(out_path, tables=('cycle_stats',))
 96 |     assert 'File does not contain' in str(exc)
 97 | 
 98 | 
 99 | def test_multi_cell_hdf5(tmpdir, test_df):
100 |     out_path = os.path.join(tmpdir, 'test.h5')
101 | 
102 |     # Save the cell once, then multiply the current by 2
103 |     test_df.to_hdf(out_path, 'a')
104 |     test_df['raw_data']['current'] *= 2
105 |     test_df.to_hdf(out_path, 'b', overwrite=False)
106 | 
107 |     # Make sure we can count two cells
108 |     _, names, _ = BatteryDataset.inspect_hdf(out_path)
109 |     assert names == {'a', 'b'}
110 | 
111 |     with File(out_path) as h:
112 |         _, names, schemas = BatteryDataset.inspect_hdf(h)
113 |         assert names == {'a', 'b'}
114 | 
115 |     # Check that there are schemas for the raw_data
116 |     assert 'current' in schemas['raw_data']
117 | 
118 |     # Load both
119 |     test_a = BatteryDataset.from_hdf(out_path, prefix='a')
120 |     test_b = BatteryDataset.from_hdf(out_path, prefix='b')
121 |     assert np.isclose(test_a['raw_data']['current'] * 2, test_b['raw_data']['current']).all()
122 | 
123 |     # Test reading by index
124 |     test_0 = BatteryDataset.from_hdf(out_path, prefix=0)
125 |     assert np.isclose(test_0['raw_data']['current'],
126 |                       test_a['raw_data']['current']).all()
127 | 
128 |     # Iterate over all
129 |     keys = dict(BatteryDataset.all_cells_from_hdf(out_path))
130 |     assert len(keys)
131 |     assert np.isclose(keys['a']['raw_data']['current'] * 2,
132 |                       keys['b']['raw_data']['current']).all()
133 | 
134 | 
135 | def test_missing_prefix_warning(tmpdir, test_df):
136 |     out_path = os.path.join(tmpdir, 'test.h5')
137 | 
138 |     test_df.to_hdf(out_path, 'a', overwrite=False)
139 | 
140 |     # Error if prefix not found
141 |     with pytest.raises(ValueError, match='No data available'):
142 |         BatteryDataset.from_hdf(out_path, prefix='b')
143 | 
144 | 
145 | def test_multicell_metadata_warning(tmpdir, test_df):
146 |     out_path = os.path.join(tmpdir, 'test.h5')
147 | 
148 |     # Save the cell once, then alter metadata
149 |     test_df.to_hdf(out_path, 'a', overwrite=False)
150 |     test_df.metadata.name = 'Not test data'
151 |     with pytest.warns(UserWarning, match='differs from new metadata'):
152 |         test_df.to_hdf(out_path, 'b', overwrite=False)
153 | 
154 | 
155 | def test_validate(test_df):
156 |     # Make sure the provided data passes
157 |     warnings = test_df.validate()
158 |     assert len(warnings) == 1
159 |     assert 'other' in warnings[0]
160 | 
161 |     # Make sure we can define new columns
162 |     test_df.schemas['raw_data'].extra_columns['other'] = ColumnInfo(description='Test')
163 |     warnings = test_df.validate()
164 |     assert len(warnings) == 0
165 | 
166 | 
167 | def test_parquet(test_df, tmpdir):
168 |     write_dir = tmpdir / 'parquet-test'
169 |     written = test_df.to_parquet(write_dir)
170 |     assert len(written) == 2
171 |     for file in written.values():
172 |         metadata = pq.read_metadata(file).metadata
173 |         assert b'battery_metadata' in metadata
174 |         assert b'table_metadata' in metadata
175 | 
176 |     # Read it back in, ensure data are recovered
177 |     read_df = BatteryDataset.from_parquet(write_dir)
178 |     assert (read_df.cycle_stats['cycle_number'] == test_df.cycle_stats['cycle_number']).all()
179 |     assert (read_df.raw_data['voltage'] == test_df.raw_data['voltage']).all()
180 |     assert read_df.metadata == test_df.metadata
181 |     assert read_df.schemas['raw_data'].extra_columns['new'].description == 'An example column'
182 | 
183 |     # Test reading subsets
184 |     read_df = BatteryDataset.from_parquet(write_dir, subsets=('cycle_stats',))
185 |     assert read_df.metadata is not None
186 |     with raises(AttributeError, match='raw_data'):
187 |         assert read_df.raw_data
188 |     assert read_df.cycle_stats is not None
189 | 
190 |     with raises(ValueError) as e:
191 |         BatteryDataset.from_parquet(tmpdir)
192 |     assert 'No data available' in str(e)
193 | 
194 |     # Test reading only metadata
195 |     metadata = BatteryDataset.inspect_parquet(write_dir)
196 |     assert metadata == test_df.metadata
197 |     BatteryDataset.inspect_parquet(write_dir / 'cycle_stats.parquet')
198 |     with raises(ValueError) as e:
199 |         BatteryDataset.inspect_parquet(tmpdir)
200 |     assert 'No parquet files' in str(e)
201 | 
202 | 
203 | def test_version_warnings(test_df):
204 |     # Alter the version number, then copy using to/from dict
205 |     test_df.metadata.version = 'super.old.version'
206 |     with pytest.warns() as w:
207 |         BatteryDataset.make_cell_dataset(metadata=test_df.metadata, warn_on_mismatch=True)
208 |     assert len(w) == 1  # Only the warning about the versions
209 |     assert 'supplied=super.old.version' in str(w.list[0].message)
210 | 
211 |     # Make a change that will violate the schema
212 |     test_df.metadata.name = 1  # Name cannot be an int
213 | 
214 |     with pytest.warns() as w:
215 |         recovered = BatteryDataset.make_cell_dataset(metadata=test_df.metadata, warn_on_mismatch=True)
216 |     assert len(w) == 3  # Warning during save, warning about mismatch, warning that schema failed
217 |     assert 'supplied=super.old.version' in str(w.list[1].message)
218 |     assert 'failed to validate, probably' in str(w.list[2].message)
219 |     assert recovered.metadata.version == __version__
220 | 
221 | 
222 | def test_bad_metadata():
223 |     """Ensure bad metadata causes an exception"""
224 | 
225 |     metadata = {'name': 1}
226 |     with raises(ValidationError):
227 |         BatteryDataset.make_cell_dataset(metadata=metadata)
228 | 


--------------------------------------------------------------------------------
/tests/test_stream.py:
--------------------------------------------------------------------------------
  1 | """Evaluate streaming reads from files"""
  2 | from itertools import zip_longest
  3 | from pathlib import Path
  4 | 
  5 | import numpy as np
  6 | from tables import File
  7 | from pytest import fixture, mark, raises
  8 | 
  9 | from battdat.data import BatteryDataset
 10 | from battdat.io.batterydata import BDReader
 11 | from battdat.postprocess.timing import CycleTimesSummarizer
 12 | from battdat.streaming import iterate_records_from_file, iterate_cycles_from_file
 13 | from battdat.streaming.hdf5 import HDF5Writer
 14 | 
 15 | 
 16 | @fixture()
 17 | def example_dataset(file_path):
 18 |     data = BDReader().read_dataset([file_path / 'batterydata' / 'p492-13-raw.csv'])
 19 |     data.metadata.name = 'test_name'
 20 |     return data
 21 | 
 22 | 
 23 | @fixture()
 24 | def example_h5_path(tmpdir, example_dataset):
 25 |     h5_path = Path(tmpdir) / 'example_h5'
 26 |     example_dataset.to_hdf(h5_path)
 27 |     return h5_path
 28 | 
 29 | 
 30 | def test_stream_by_rows(example_h5_path):
 31 |     row_iter = iterate_records_from_file(example_h5_path)
 32 | 
 33 |     row_0 = next(row_iter)
 34 |     assert row_0['test_time'] == 0.
 35 |     row_1 = next(row_iter)
 36 |     assert row_1['voltage'] == 3.27191577
 37 | 
 38 | 
 39 | def test_stream_by_cycles(example_h5_path):
 40 |     test_data = BatteryDataset.from_hdf(example_h5_path)
 41 |     cycle_iter = iterate_cycles_from_file(example_h5_path)
 42 |     for streamed, (_, original) in zip_longest(cycle_iter, test_data.raw_data.groupby('cycle_number')):
 43 |         assert streamed is not None
 44 |         assert original is not None
 45 |         assert np.allclose(streamed['test_time'], original['test_time'])
 46 | 
 47 |     # Test reading a list of keys
 48 |     cycle_iter = iterate_cycles_from_file(example_h5_path, make_dataset=False, key=['raw_data'])
 49 |     cycle_0 = next(cycle_iter)
 50 |     assert 'raw_data' in cycle_0
 51 | 
 52 |     # Ensure we can generate chunks with metadata
 53 |     for key in ('raw_data', ['raw_data']):
 54 |         cycle_iter = iterate_cycles_from_file(example_h5_path, make_dataset=True, key=key)
 55 |         cycle_0 = next(cycle_iter)
 56 |         assert cycle_0.metadata == test_data.metadata
 57 | 
 58 | 
 59 | def test_stream_by_cycles_with_stats(example_dataset, tmpdir):
 60 |     # Remove EIS data, add capacities
 61 |     example_dataset.tables.pop('eis_data')
 62 |     CycleTimesSummarizer().add_summaries(example_dataset)
 63 |     assert 'cycle_stats' in example_dataset
 64 |     h5_path = Path(tmpdir / 'test.h5')
 65 |     example_dataset.to_hdf(h5_path)
 66 | 
 67 |     # Test streaming a cycle
 68 |     cycle_iter = iterate_cycles_from_file(h5_path, make_dataset=False, key=None)
 69 |     cycle_0 = next(cycle_iter)
 70 |     assert cycle_0['cycle_stats'].iloc[0]['cycle_number'] == 0
 71 | 
 72 |     # Delete the first row in the cycle steps to cause an error
 73 |     example_dataset.cycle_stats.drop(index=0, inplace=True)
 74 |     h5_path = Path(tmpdir / 'test-fail.h5')
 75 |     example_dataset.to_hdf(h5_path)
 76 | 
 77 |     cycle_iter = iterate_cycles_from_file(h5_path, make_dataset=False, key=None)
 78 |     with raises(ValueError, match='cycle_stats=1'):
 79 |         next(cycle_iter)
 80 | 
 81 | 
 82 | @mark.parametrize('buffer_size', [128, 400000000])  # Way smaller than data size, way larger
 83 | def test_streaming_write(example_dataset, buffer_size, tmpdir):
 84 |     out_file = Path(tmpdir) / 'streamed.h5'
 85 |     writer = HDF5Writer(out_file, metadata=example_dataset.metadata, buffer_size=buffer_size)
 86 |     assert len(example_dataset.raw_data) > 0
 87 |     with writer:
 88 |         for _, row in example_dataset.raw_data.iterrows():
 89 |             writer.write_row(row.to_dict())
 90 | 
 91 |     # Make sure the data are identical
 92 |     copied_data = BatteryDataset.from_hdf(out_file)
 93 |     assert copied_data.metadata.name == example_dataset.metadata.name
 94 |     cols = ['test_time', 'current']
 95 |     assert np.allclose(copied_data.raw_data[cols], example_dataset.raw_data[cols])
 96 | 
 97 | 
 98 | def test_streaming_write_existing_store(example_dataset, tmpdir):
 99 |     out_file = Path(tmpdir) / 'streamed.h5'
100 |     with File(out_file, mode='a') as file, HDF5Writer(file, buffer_size=2, complevel=4) as writer:
101 |         assert writer.write_row({'test_time': 0.}) == 0  # Written on close, so the number written here is zero
102 | 
103 |     with File(out_file, mode='a') as file, HDF5Writer(file, buffer_size=2, complevel=4) as writer:
104 |         assert writer.write_row({'test_time': 1.}) == 0
105 | 
106 |     # Read it in
107 |     data = BatteryDataset.from_hdf(out_file)
108 |     assert np.allclose(data.raw_data['test_time'], [0., 1.])
109 | 


--------------------------------------------------------------------------------