├── .coveragerc ├── .github ├── pull_request_template.md └── workflows │ ├── builddocs.yml │ ├── lint-and-test.yml │ └── pythonpublish.yml ├── .gitignore ├── .readthedocs.yml ├── CODE_OF_CONDUCT.md ├── LICENSE ├── README.md ├── docs ├── Makefile ├── _static │ └── css │ │ └── my_style.css ├── assets │ ├── diode_param_extractor.png │ ├── pvops_emblem.svg │ ├── pvops_full_logo.svg │ ├── vis_attr_connect_example.svg │ ├── vis_attr_timeseries_example.svg │ ├── vis_cat_scatter_example.svg │ ├── vis_cluster_entropy_example.svg │ ├── vis_counts_example.svg │ ├── vis_doc_clusters_example.svg │ ├── vis_freq_plot_example.svg │ └── vis_overlap_example.png ├── conf.py ├── index.rst ├── make.bat ├── pages │ ├── abbreviations.rst │ ├── apidoc │ │ ├── iv.rst │ │ ├── text.rst │ │ ├── text2time.rst │ │ └── timeseries.rst │ ├── contributing.rst │ ├── development.rst │ ├── installation.rst │ ├── moduleguides │ │ ├── iv.rst │ │ ├── text.rst │ │ ├── text2time.rst │ │ └── timeseries.rst │ ├── modules.rst │ ├── references.rst │ ├── releasenotes.rst │ ├── releasenotes │ │ ├── 0.1.7.rst │ │ ├── 0.1.8.rst │ │ ├── 0.1.9.rst │ │ ├── 0.2.0.rst │ │ ├── 0.3.0.rst │ │ ├── 0.4.0.rst │ │ ├── 0.5.0.rst │ │ ├── 0.5.1.rst │ │ ├── 0.5.2.rst │ │ ├── 0.5.3.rst │ │ ├── 0.6.0.rst │ │ ├── 0.6.1.rst │ │ ├── alpha.rst │ │ └── beta.rst │ ├── tutorials.rst │ ├── tutorials │ │ ├── assets │ │ │ └── diode_param_extractor.png │ │ ├── tutorial_AIT_timeseries.nblink │ │ ├── tutorial_iv_classifier.nblink │ │ ├── tutorial_iv_diode_extractor.nblink │ │ ├── tutorial_iv_simulator.nblink │ │ ├── tutorial_text2time_module.nblink │ │ ├── tutorial_textmodule.nblink │ │ ├── tutorial_timeseries.nblink │ │ ├── tutorial_timeseries_sim.nblink │ │ └── tutorial_timeseries_survival_analysis.nblink │ └── userguide.rst └── refs │ └── pvops.bib ├── noxfile.py ├── pvops ├── __init__.py ├── iv │ ├── __init__.py │ ├── extractor.py │ ├── models │ │ ├── __init__.py │ │ └── nn.py │ ├── physics_utils.py │ ├── preprocess.py │ ├── simulator.py │ ├── timeseries_simulator.py │ └── utils.py ├── tests │ ├── __init__.py │ ├── conftest.py │ ├── om_data_update_pick.pkl │ ├── om_summ_pick.pkl │ ├── prod_data_clean_iec_pick.pkl │ ├── prod_data_quant_pick.pkl │ ├── prod_summ_pick.pkl │ ├── test_iv.py │ ├── test_text.py │ ├── test_text2time.py │ └── test_timeseries.py ├── text │ ├── __init__.py │ ├── classify.py │ ├── defaults.py │ ├── nlp_utils.py │ ├── preprocess.py │ ├── stopwords.txt │ ├── utils.py │ └── visualize.py ├── text2time │ ├── __init__.py │ ├── preprocess.py │ ├── utils.py │ └── visualize.py └── timeseries │ ├── __init__.py │ ├── models │ ├── AIT.py │ ├── __init__.py │ ├── iec.py │ ├── linear.py │ └── survival.py │ └── preprocess.py ├── requirements.txt ├── setup.py └── tutorials ├── __init__.py ├── assets └── diode_param_extractor.png ├── example_data ├── example_ML_ticket_data.csv ├── example_metadata2.csv ├── example_om_data.csv ├── example_om_data2.csv ├── example_om_survival_analysis_data.csv ├── example_perf_data.csv ├── example_prod_data_cumE2.csv ├── example_prod_with_covariates.csv ├── mappings_cause.csv ├── mappings_equipment.csv ├── mappings_pv_terms.csv ├── remappings_asset.csv └── remappings_response.csv ├── text_class_example.py ├── tutorial_AIT_timeseries.ipynb ├── tutorial_iv_classifier.ipynb ├── tutorial_iv_diode_extractor.ipynb ├── tutorial_iv_simulator.ipynb ├── tutorial_text2time_module.ipynb ├── tutorial_text_classify_regex_example.ipynb ├── tutorial_textmodule.ipynb ├── tutorial_timeseries.ipynb ├── tutorial_timeseries_sim.ipynb └── tutorial_timeseries_survival_analysis.ipynb /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | omit = 3 | pvops/tests/conftest.py 4 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | ## Description 2 | *Thank you for your contribution! Please provide a brief description of the problem and the proposed solution or new feature (if not already fully described in a linked issue)* 3 | 4 | 5 | 6 | ## Motivation and Context 7 | 8 | 9 | 10 | ## How has this been tested? 11 | 12 | 13 | 14 | 15 | ## Screenshots (if appropriate): 16 | 17 | ## Types of changes 18 | 19 | - [ ] Bug fix (non-breaking change which fixes an issue) 20 | - [ ] New feature (non-breaking change which adds functionality) 21 | - [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) 22 | 23 | ## Checklist: 24 | 25 | 26 | - [ ] My code follows the code style of this project. 27 | - [ ] My change requires a change to the documentation. 28 | - [ ] I have updated the documentation accordingly. 29 | 30 | 31 | -------------------------------------------------------------------------------- /.github/workflows/builddocs.yml: -------------------------------------------------------------------------------- 1 | # This is a basic workflow to help you get started with Actions 2 | 3 | name: docs build experiment 4 | 5 | # Controls when the workflow will run 6 | on: 7 | # Triggers the workflow on push or pull request events but only for the master branch 8 | push: 9 | branches: [ master, docstrings ] 10 | 11 | # Allows you to run this workflow manually from the Actions tab 12 | workflow_dispatch: 13 | 14 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel 15 | jobs: 16 | # This workflow contains a single job called "build" 17 | build: 18 | # The type of runner that the job will run on 19 | runs-on: ubuntu-latest 20 | 21 | # Steps represent a sequence of tasks that will be executed as part of the job 22 | steps: 23 | # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it 24 | - uses: actions/checkout@v2 25 | with: 26 | fetch-depth: 0 27 | - name: Install pandoc 28 | run: sudo apt-get update -y && sudo apt-get install -y pandoc 29 | - uses: actions/setup-python@v2 30 | with: 31 | python-version: '3.11' 32 | - name: Setup Python 3.11 33 | run: | 34 | python -m pip install --upgrade pip 35 | python -m pip install -r requirements.txt 36 | pip install --upgrade coverage pytest 37 | - name: Install package 38 | run: | 39 | python -m pip install -e . 40 | - name: Build documentation 41 | run: sphinx-build -b html docs/ docs/_build/html 42 | - uses: actions/upload-artifact@v4 43 | with: 44 | name: html-docs 45 | path: docs/_build/html 46 | -------------------------------------------------------------------------------- /.github/workflows/lint-and-test.yml: -------------------------------------------------------------------------------- 1 | name: lint and test 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | test: 7 | runs-on: ${{ matrix.os }} 8 | strategy: 9 | matrix: 10 | os: [ubuntu-latest, macos-latest, windows-latest] 11 | python-version: ['3.8', '3.9', '3.10', '3.11'] 12 | fail-fast: false 13 | steps: 14 | - uses: actions/checkout@v2 15 | - name: Set up Python ${{ matrix.python-version }} 16 | uses: actions/setup-python@v4 17 | with: 18 | python-version: ${{ matrix.python-version }} 19 | - name: Install pvops 20 | run: | 21 | python -m pip install --upgrade pip 22 | pip install .[iv] 23 | - name: Test with pytest 24 | run: | 25 | pip install pytest pytest-cov 26 | pytest --cov=pvops --cov-config=.coveragerc --cov-report term-missing pvops 27 | 28 | lint: 29 | runs-on: ubuntu-latest 30 | strategy: 31 | matrix: 32 | python-version: ['3.8', '3.9', '3.10', '3.11'] 33 | steps: 34 | - uses: actions/checkout@v2 35 | - name: Set up Python ${{ matrix.python-version }} 36 | uses: actions/setup-python@v4 37 | with: 38 | python-version: ${{ matrix.python-version }} 39 | - name: Install flake8 40 | run: | 41 | python -m pip install --upgrade pip 42 | pip install flake8 43 | - name: Lint with flake8 44 | run: | 45 | flake8 . --count --statistics --show-source --ignore=E402,E203,E266,E501,W503,F403,F401,E402,W291,E302,W391,W292,F405,E722,W504,E121,E125,E712 46 | -------------------------------------------------------------------------------- /.github/workflows/pythonpublish.yml: -------------------------------------------------------------------------------- 1 | name: Upload to PyPi 2 | on: 3 | release: 4 | types: [published] 5 | 6 | jobs: 7 | deploy: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: actions/checkout@v2 11 | - name: Set up Python 12 | uses: actions/setup-python@v2 13 | with: 14 | python-version: '3.x' 15 | - name: Install dependencies 16 | run: | 17 | python -m pip install --upgrade pip 18 | pip install setuptools wheel twine 19 | - name: Build and publish 20 | env: 21 | TWINE_USERNAME: __token__ 22 | TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} 23 | run: | 24 | python setup.py sdist bdist_wheel 25 | twine upload dist/* 26 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | 162 | __pycache__/ 163 | pvops/text/__pycache__/ 164 | pvops/text2time/__pycache__/ 165 | examples/*.npy 166 | docs/_build/ 167 | .pytest_cache/ 168 | *.py[cod] 169 | examples/analysis/ 170 | *.ipynb_checkpoints/ 171 | *~ 172 | pvops/.vscode/* 173 | pvops/text2time/.vscode 174 | .vscode/* 175 | .vscode/settings.json 176 | .coverage -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | build: 4 | os: ubuntu-22.04 5 | tools: 6 | python: "3.11" 7 | 8 | sphinx: 9 | configuration: docs/conf.py 10 | 11 | formats: all 12 | 13 | python: 14 | install: 15 | - method: pip 16 | path: . 17 | extra_requirements: 18 | - doc, iv 19 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation 6 | in our community a harassment-free experience for everyone, regardless 7 | of age, body size, visible or invisible disability, ethnicity, sex 8 | characteristics, gender identity and expression, level of experience, 9 | education, socio-economic status, nationality, personal appearance, 10 | race, religion, or sexual identity and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, 13 | welcoming, diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for 18 | our community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our 24 | mistakes, and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the 26 | overall community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | * The use of sexualized language or imagery, and sexual attention or 31 | advances of any kind 32 | * Trolling, insulting or derogatory comments, and personal or 33 | political attacks 34 | * Public or private harassment 35 | * Publishing others' private information, such as a physical or email 36 | address, without their explicit permission 37 | * Other conduct which could reasonably be considered inappropriate in 38 | a professional setting 39 | 40 | ## Enforcement Responsibilities 41 | 42 | Community leaders are responsible for clarifying and enforcing our 43 | standards of acceptable behavior and will take appropriate and fair 44 | corrective action in response to any behavior that they deem 45 | inappropriate, threatening, offensive, or harmful. 46 | 47 | Community leaders have the right and responsibility to remove, edit, 48 | or reject comments, commits, code, wiki edits, issues, and other 49 | contributions that are not aligned to this Code of Conduct, and will 50 | communicate reasons for moderation decisions when appropriate. 51 | 52 | ## Scope 53 | 54 | This Code of Conduct applies within all community spaces, and also 55 | applies when an individual is officially representing the community in 56 | public spaces. Examples of representing our community include using an 57 | official e-mail address, posting via an official social media account, 58 | or acting as an appointed representative at an online or offline 59 | event. 60 | 61 | ## Enforcement 62 | 63 | Instances of abusive, harassing, or otherwise unacceptable behavior 64 | may be reported to the community leaders responsible for enforcement 65 | at cwhanse@sandia.gov or wfvinin@sandia.gov. All complaints will be 66 | reviewed and investigated promptly and fairly. 67 | 68 | All community leaders are obligated to respect the privacy and 69 | security of the reporter of any incident. 70 | 71 | ## Enforcement Guidelines 72 | 73 | Community leaders will follow these Community Impact Guidelines in 74 | determining the consequences for any action they deem in violation of 75 | this Code of Conduct: 76 | 77 | ### 1. Correction 78 | 79 | **Community Impact**: Use of inappropriate language or other behavior 80 | deemed unprofessional or unwelcome in the community. 81 | 82 | **Consequence**: A private, written warning from community leaders, 83 | providing clarity around the nature of the violation and an 84 | explanation of why the behavior was inappropriate. A public apology 85 | may be requested. 86 | 87 | ### 2. Warning 88 | 89 | **Community Impact**: A violation through a single incident or series 90 | of actions. 91 | 92 | **Consequence**: A warning with consequences for continued 93 | behavior. No interaction with the people involved, including 94 | unsolicited interaction with those enforcing the Code of Conduct, for 95 | a specified period of time. This includes avoiding interactions in 96 | community spaces as well as external channels like social 97 | media. Violating these terms may lead to a temporary or permanent 98 | ban. 99 | 100 | ### 3. Temporary Ban 101 | 102 | **Community Impact**: A serious violation of community standards, 103 | including sustained inappropriate behavior. 104 | 105 | **Consequence**: A temporary ban from any sort of interaction or 106 | public communication with the community for a specified period of 107 | time. No public or private interaction with the people involved, 108 | including unsolicited interaction with those enforcing the Code of 109 | Conduct, is allowed during this period. Violating these terms may 110 | lead to a permanent ban. 111 | 112 | ### 4. Permanent Ban 113 | 114 | **Community Impact**: Demonstrating a pattern of violation of 115 | community standards, including sustained inappropriate behavior, 116 | harassment of an individual, or aggression toward or disparagement of 117 | classes of individuals. 118 | 119 | **Consequence**: A permanent ban from any sort of public interaction 120 | within the community. 121 | 122 | ## Attribution 123 | 124 | This Code of Conduct is adapted from the [Contributor 125 | Covenant][homepage], version 2.0, available at 126 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. 127 | 128 | Community Impact Guidelines were inspired by [Mozilla's code of 129 | conduct enforcement ladder](https://github.com/mozilla/diversity). 130 | 131 | [homepage]: https://www.contributor-covenant.org 132 | 133 | For answers to common questions about this code of conduct, see the 134 | FAQ at https://www.contributor-covenant.org/faq. Translations are 135 | available at https://www.contributor-covenant.org/translations. 136 | 137 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2021 National Technology & Engineering Solutions of Sandia, LLC (NTESS). 4 | Under the terms of Contract DE-NA0003525 with NTESS, the U.S. Government retains 5 | certain rights in this software. 6 | 7 | Redistribution and use in source and binary forms, with or without modification, 8 | are permitted provided that the following conditions are met: 9 | 10 | Redistributions of source code must retain the above copyright notice, this 11 | list of conditions and the following disclaimer. 12 | 13 | Redistributions in binary form must reproduce the above copyright notice, this 14 | list of conditions and the following disclaimer in the documentation and/or 15 | other materials provided with the distribution. 16 | 17 | Neither the name of the {organization} nor the names of its 18 | contributors may be used to endorse or promote products derived from 19 | this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 22 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 23 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 25 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 26 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 28 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 30 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | [![GitHub version](https://badge.fury.io/gh/sandialabs%2FpvOps.svg)](https://badge.fury.io/gh/sandialabs%2FpvOps) 4 | [![License](https://img.shields.io/pypi/l/pvOps?color=green)](https://github.com/sandialabs/pvOps/blob/master/LICENSE) 5 | [![ActionStatus](https://github.com/sandialabs/pvOps/workflows/lint%20and%20test/badge.svg)](https://github.com/sandialabs/pvOps/actions) 6 | [![status](https://joss.theoj.org/papers/6c3554c98b1771125613cff94241847c/status.svg)](https://joss.theoj.org/papers/6c3554c98b1771125613cff94241847c) 7 | 8 | pvops contains a series of functions to facilitate fusion of text-based data with time series production data collected at photovoltaic sites. The package also contains example datasets and tutorials to help demonstrate how the functions can be used. 9 | 10 | Installation 11 | ============= 12 | pvops can be installed using `pip`. See more information at [readthedocs](https://pvops.readthedocs.io/en/latest/). 13 | 14 | Tutorials 15 | ========= 16 | To get started with pvops we recommended working with the [tutorials](https://pvops.readthedocs.io/en/latest/pages/tutorials.html) 17 | 18 | 19 | Package Layout and Documentation 20 | ============== 21 | 22 | The package is delineated into the following directories. 23 | ``` 24 | ├───docs : Documentation directory 25 | | 26 | ├───tutorials : Contains tutorials of functionality 27 | │ └─── example_data : └─── Example data 28 | | 29 | └───pvops : Source function library 30 | ├───tests : ├─── Library stability tests 31 | ├───text : ├─── Text processing functions 32 | ├───text2time : ├─── Text2Timeseries functions 33 | ├───timeseries : ├─── Timeseries functions 34 | └───iv : └─── Current-voltage functions 35 | ``` 36 | 37 | More information about these modules is available at [readthedocs](https://pvops.readthedocs.io/en/latest/). 38 | 39 | Citing 40 | ====== 41 | 42 | If using this package, please cite our [JOSS paper](https://joss.theoj.org/papers/10.21105/joss.05755#) using the following: 43 | 44 | **Citation:** 45 | 46 | ``` 47 | Bonney et al., (2023). pvOps: a Python package for empirical analysis of photovoltaic field data. 48 | Journal of Open Source Software, 8(91), 5755, https://doi.org/10.21105/joss.05755 49 | ``` 50 | 51 | **BibTex:** 52 | 53 | ``` 54 | @article{Bonney2023, 55 | doi = {10.21105/joss.05755}, 56 | url = {https://doi.org/10.21105/joss.05755}, 57 | year = {2023}, 58 | publisher = {The Open Journal}, 59 | volume = {8}, 60 | number = {91}, 61 | pages = {5755}, 62 | author = {Kirk L. Bonney and Thushara Gunda and Michael W. Hopwood and Hector Mendoza and Nicole D. Jackson}, 63 | title = {pvOps: a Python package for empirical analysis of photovoltaic field data}, 64 | journal = {Journal of Open Source Software} } 65 | ``` 66 | 67 | Contributing 68 | ============ 69 | 70 | The long-term success of pvops requires community support. Please see the [Contributing page](https://pvops.readthedocs.io/en/latest/) for more on how you can contribute. 71 | 72 | [![Contributors Display](https://badges.pufler.dev/contributors/sandialabs/pvOps?size=50&padding=5&bots=true)](https://badges.pufler.dev) 73 | 74 | Logo Credit: [Daniel Rubinstein](http://www.danielrubinstein.com/) 75 | 76 | Copyright and License 77 | ======= 78 | 79 | pvops is copyright through National Technology & Engineering Solutions of Sandia (NTESS). The software is distributed under the Revised BSD License. See the LICENSE file for more information. 80 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/_static/css/my_style.css: -------------------------------------------------------------------------------- 1 | @import url("theme.css"); 2 | 3 | .wy-nav-content { 4 | max-width: 1000px !important; 5 | } -------------------------------------------------------------------------------- /docs/assets/diode_param_extractor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sandialabs/pvOps/d6248e77fa063c94f5b5a0736b05b2bbe51c6be4/docs/assets/diode_param_extractor.png -------------------------------------------------------------------------------- /docs/assets/vis_overlap_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sandialabs/pvOps/d6248e77fa063c94f5b5a0736b05b2bbe51c6be4/docs/assets/vis_overlap_example.png -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | import os 14 | import sys 15 | from pvops import __version__ 16 | 17 | sys.path.insert(0, os.path.abspath("../")) 18 | 19 | # -- Project information ----------------------------------------------------- 20 | 21 | project = u"pvops" 22 | copyright = u"2021 National Technology & Engineering Solutions of Sandia, LLC (NTESS)" 23 | author = u"pvOps Developers" 24 | version = __version__ 25 | release = __version__ 26 | 27 | 28 | # -- General configuration --------------------------------------------------- 29 | 30 | # Add any Sphinx extension module names here, as strings. They can be 31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 32 | # ones. 33 | 34 | language = 'en' 35 | 36 | extensions = [ 37 | "sphinx.ext.autodoc", 38 | # pull in documentation from docstrings in a semi-automatic way. 39 | "nbsphinx", 40 | # nbsphinx is a Sphinx extension that provides a source parser 41 | # for *.ipynb files 42 | "nbsphinx_link", 43 | # A sphinx extension for including notebook files from outside 44 | # the sphinx source root. 45 | "sphinx_copybutton", 46 | # adds copy button to code blocks 47 | "sphinx.ext.coverage", 48 | # `make coverage` summarizes what has docstrings 49 | 'sphinx.ext.doctest', 50 | # allows for testing of code snippets 51 | 'sphinx.ext.viewcode', 52 | # add links to highlighted source code 53 | 'sphinx.ext.napoleon', 54 | # add parsing for google/numpy style docs 55 | 'sphinxcontrib.bibtex', 56 | # for bibtex referencing 57 | ] 58 | 59 | 60 | coverage_show_missing_items = True 61 | napoleon_numpy_docstring = True # use numpy style 62 | napoleon_google_docstring = False # not google style 63 | napoleon_use_rtype = False # option for return section formatting 64 | numpydoc_show_class_members = True 65 | numpydoc_show_inherited_class_members = False 66 | numpydoc_class_members_toctree = False 67 | napoleon_use_ivar = True # option for attribute section formatting 68 | napoleon_use_param = False # option for parameter section formatting 69 | viewcode_import = True # tries to find the source files 70 | bibtex_bibfiles = ['refs/pvops.bib'] 71 | 72 | # Add any paths that contain templates here, relative to this directory. 73 | templates_path = ["_templates"] 74 | 75 | # List of patterns, relative to source directory, that match files and 76 | # directories to ignore when looking for source files. 77 | # This pattern also affects html_static_path and html_extra_path. 78 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] 79 | 80 | 81 | # -- Options for HTML output ------------------------------------------------- 82 | 83 | # The theme to use for HTML and HTML Help pages. See the documentation for 84 | # a list of builtin themes. 85 | # 86 | html_theme = "sphinx_rtd_theme" 87 | 88 | # Add any paths that contain custom static files (such as style sheets) here, 89 | # relative to this directory. They are copied after the builtin static files, 90 | # so a file named "default.css" will overwrite the builtin "default.css". 91 | html_static_path = ["_static"] 92 | html_style = 'css/my_style.css' 93 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. _index: 2 | 3 | .. image:: assets/pvops_full_logo.svg 4 | :width: 400 5 | 6 | Overview 7 | ============ 8 | pvops is a python package for PV operators & researchers. 9 | It consists of a set of documented functions for supporting operations 10 | research of photovoltaic (PV) energy systems. 11 | The library leverages advances in machine learning, natural 12 | language processing and visualization 13 | tools to extract and visualize actionable information from common 14 | PV data including Operations & Maintenance (O&M) text data, timeseries 15 | production data, and current-voltage (IV) curves. 16 | 17 | .. list-table:: Module Overview 18 | :widths: 25 25 50 19 | :header-rows: 1 20 | 21 | * - Module 22 | - Type of data 23 | - Highlights of functions 24 | * - text 25 | - O&M records 26 | - - fill data gaps in dates and categorical records 27 | - visualize word clusters and patterns over time 28 | * - timeseries 29 | - Production data 30 | - - estimate expected energy with multiple models 31 | - evaluate inverter clipping 32 | - survival analysis for O&M records 33 | * - text2time 34 | - O&M records and production data 35 | - - analyze overlaps between O&M and production (timeseries) records 36 | - visualize overlaps between O&M records and production data 37 | * - iv 38 | - IV records 39 | - - simulate IV curves with physical faults 40 | - extract diode parameters from IV curves 41 | - classify faults using IV curves 42 | 43 | Statement of Need 44 | ================= 45 | 46 | Continued interest in PV deployment across the world has resulted in increased awareness of needs associated 47 | with managing reliability and performance of these systems during operation. Current open-source packages for 48 | PV analysis focus on theoretical evaluations of solar power simulations (e.g., `pvlib`; :cite:p:`holmgren2018pvlib`), 49 | specific use cases of empirical evaluations (e.g., `RdTools`; :cite:p:`deceglie2018rdtools` and `Pecos`; :cite:p:`klise2016performance` 50 | for degradation analysis), or analysis of electroluminescene images (e.g., `PVimage`; :cite:p:`pierce2020identifying`). However, 51 | a general package that can support data-driven, exploratory evaluations of diverse field collected information is currently lacking. 52 | To address this gap, we present `pvOps`, an open-source, Python package that can be used by researchers and industry 53 | analysts alike to evaluate different types of data routinely collected during PV field operations. 54 | 55 | PV data collected in the field varies greatly in structure (i.e., timeseries and text records) and quality 56 | (i.e., completeness and consistency). The data available for analysis is frequently semi-structured. 57 | Furthermore, the level of detail collected between different owners/operators might vary. 58 | For example, some may capture a general start and end time for an associated event whereas others might include 59 | additional time details for different resolution activities. This diversity in data types and structures often 60 | leads to data being under-utilized due to the amount of manual processing required. To address these issues, 61 | `pvOps` provides a suite of data processing, cleaning, and visualization methods to leverage insights across a 62 | broad range of data types, including operations and maintenance records, production timeseries, and IV curves. 63 | The functions within `pvOps` enable users to better parse available data to understand patterns in outages and production losses. 64 | 65 | 66 | .. toctree:: 67 | :maxdepth: 1 68 | :caption: Available resources: 69 | 70 | Overview 71 | pages/userguide 72 | pages/tutorials 73 | pages/modules 74 | pages/development 75 | pages/contributing 76 | pages/releasenotes 77 | pages/references 78 | 79 | 80 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/pages/abbreviations.rst: -------------------------------------------------------------------------------- 1 | Abbreviations/Terminology 2 | ==================================== 3 | * AIT: Additive Interaction Model described in :cite:p:`app12041872` 4 | * CEC: California Energy Commission 5 | * WS: wind speed 6 | * Varr: Voltage array 7 | * T: Average cell temperature 8 | * Rsh_mult: Multiplier usually less than 1 to simulate a drop in RSH 9 | * Rs_mult: Multiplier usually less than 1 to simulate a drop in RS 10 | * Io_mult: Multiplier usually less than 1 to simulate a drop in IO 11 | * Il_mult: Multiplier usually less than 1 to simulate a drop in IL 12 | * nnsvth_mult: Multiplier usually less than 1 to simulate a drop in NNSVTH 13 | * E: Irradiance 14 | * Tc: Cell temp 15 | * gt: (G - Irradiation and T - temperature) 16 | * v_rbd: Reverse bias diode voltage 17 | * v_oc: Open circuit voltage -------------------------------------------------------------------------------- /docs/pages/apidoc/iv.rst: -------------------------------------------------------------------------------- 1 | iv module 2 | ========== 3 | 4 | iv.extractor module 5 | ------------------- 6 | 7 | .. automodule:: pvops.iv.extractor 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | 12 | iv.physics_utils module 13 | ----------------------- 14 | 15 | .. automodule:: pvops.iv.physics_utils 16 | :members: 17 | :undoc-members: 18 | :show-inheritance: 19 | 20 | iv.preprocess module 21 | -------------------- 22 | 23 | .. automodule:: pvops.iv.preprocess 24 | :members: 25 | :undoc-members: 26 | :show-inheritance: 27 | 28 | iv.simulator module 29 | ------------------- 30 | 31 | .. automodule:: pvops.iv.simulator 32 | :members: 33 | :undoc-members: 34 | :show-inheritance: 35 | 36 | iv.utils module 37 | --------------- 38 | 39 | .. automodule:: pvops.iv.utils 40 | :members: 41 | :undoc-members: 42 | :show-inheritance: 43 | 44 | iv.timeseries_simulator module 45 | ----------------------------------- 46 | 47 | .. automodule:: pvops.iv.timeseries_simulator 48 | :members: 49 | :undoc-members: 50 | :show-inheritance: 51 | 52 | iv.models.nn module 53 | ------------------- 54 | 55 | .. automodule:: pvops.iv.models.nn 56 | :members: 57 | :undoc-members: 58 | :show-inheritance: -------------------------------------------------------------------------------- /docs/pages/apidoc/text.rst: -------------------------------------------------------------------------------- 1 | text module 2 | ============ 3 | 4 | text.classify module 5 | ------------------------------------ 6 | 7 | .. automodule:: pvops.text.classify 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | 12 | text.defaults module 13 | ------------------------------------ 14 | 15 | .. automodule:: pvops.text.defaults 16 | :members: 17 | :undoc-members: 18 | :show-inheritance: 19 | 20 | 21 | text.nlp_utils module 22 | ------------------------------------ 23 | 24 | .. automodule:: pvops.text.nlp_utils 25 | :members: 26 | :undoc-members: 27 | :show-inheritance: 28 | 29 | text.preprocess module 30 | ------------------------------------ 31 | 32 | .. automodule:: pvops.text.preprocess 33 | :members: 34 | :undoc-members: 35 | :show-inheritance: 36 | 37 | text.utils module 38 | ------------------------------------ 39 | 40 | .. automodule:: pvops.text.utils 41 | :members: 42 | :undoc-members: 43 | :show-inheritance: 44 | 45 | text.visualize module 46 | ------------------------------------ 47 | 48 | .. automodule:: pvops.text.visualize 49 | :members: 50 | :undoc-members: 51 | :show-inheritance: 52 | -------------------------------------------------------------------------------- /docs/pages/apidoc/text2time.rst: -------------------------------------------------------------------------------- 1 | text2time module 2 | ================= 3 | 4 | text2time.preprocess module 5 | --------------------------- 6 | 7 | .. automodule:: pvops.text2time.preprocess 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | 12 | text2time.utils module 13 | ---------------------- 14 | 15 | .. automodule:: pvops.text2time.utils 16 | :members: 17 | :undoc-members: 18 | :show-inheritance: 19 | 20 | text2time.visualize module 21 | -------------------------- 22 | 23 | .. automodule:: pvops.text2time.visualize 24 | :members: 25 | :undoc-members: 26 | :show-inheritance: 27 | 28 | -------------------------------------------------------------------------------- /docs/pages/apidoc/timeseries.rst: -------------------------------------------------------------------------------- 1 | timeseries module 2 | ================== 3 | 4 | timeseries.preprocess module 5 | ---------------------------- 6 | 7 | .. automodule:: pvops.timeseries.preprocess 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | 12 | .. _timeseries models: 13 | 14 | timeseries models 15 | ----------------- 16 | 17 | timeseries.models.linear module 18 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 19 | 20 | .. automodule:: pvops.timeseries.models.linear 21 | :members: 22 | :undoc-members: 23 | :show-inheritance: 24 | 25 | timeseries.models.AIT module 26 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 27 | 28 | .. automodule:: pvops.timeseries.models.AIT 29 | :members: 30 | :undoc-members: 31 | :show-inheritance: 32 | 33 | timeseries.models.iec module 34 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 35 | 36 | .. automodule:: pvops.timeseries.models.iec 37 | :members: 38 | :undoc-members: 39 | :show-inheritance: 40 | 41 | timeseries.models.survival module 42 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 43 | 44 | .. automodule:: pvops.timeseries.models.survival 45 | :members: 46 | :undoc-members: 47 | :show-inheritance: -------------------------------------------------------------------------------- /docs/pages/contributing.rst: -------------------------------------------------------------------------------- 1 | .. _contributing: 2 | 3 | Contributing 4 | ============ 5 | 6 | Thank you for wanting to contribute to this library! We will try to make this 7 | an easy process for you. It is recommended that you read 8 | the :ref:`development` page so that you can lint 9 | and test before submitting code. 10 | Checking that your PR passes the required testing and linting procedures will speed up 11 | the acceptance of your PR. 12 | 13 | Issues and bug reporting 14 | ------------------------ 15 | 16 | To report issues or bugs please create a new issue on 17 | the `pvops issues page `_. 18 | Before submitting your bug report, please perform a cursory search 19 | to see if the problem has been already reported. If it has been reported, 20 | and the issue is still open, add a comment to the existing issue instead of opening a new issue. 21 | 22 | Guidelines for effective bug reporting 23 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 24 | 25 | - Use a clear descriptive title for the issue. 26 | 27 | - Describe the steps to reproduce the problem, 28 | the behavior you observed after following the steps, and the expected behavior. 29 | 30 | - If possible, provide a simple example of the bug using pvOps example data. 31 | 32 | - When relevant, provide information on your computing environment 33 | (operating system, python version, pvOps version or commit). 34 | 35 | - For runtime errors, provide a function call stack. 36 | 37 | Contributing code 38 | ----------------- 39 | 40 | Software developers, within the core development team and external collaborators, 41 | are expected to follow standard practices to document and test new code. 42 | Software developers interested in contributing to the project are encouraged 43 | to create a Fork of the project and submit a Pull Request (PR) using GitHub. 44 | Pull requests will be reviewed by the core development team. 45 | Create a PR or help with other PRs which are in the library 46 | by referencing `pvops PR page `_. 47 | 48 | Guidelines for preparing and submitting pull-requests 49 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 50 | 51 | - Use a clear descriptive title for your pull-requests 52 | 53 | - Describe if your submission is a bugfix, documentation update, or a feature 54 | enhancement. Provide a concise description of your proposed changes. 55 | 56 | - Provide references to open issues, if applicable, to provide the necessary 57 | context to understand your pull request 58 | 59 | - Make sure that your pull-request merges cleanly with the `master` branch of 60 | pvOps. When working on a feature, always create your feature branch off of 61 | the latest `master` commit 62 | 63 | - Ensure that appropriate documentation and tests accompany any added features. 64 | -------------------------------------------------------------------------------- /docs/pages/development.rst: -------------------------------------------------------------------------------- 1 | .. _development: 2 | 3 | Developing pvOps 4 | ===================== 5 | 6 | Installation 7 | ------------ 8 | 9 | To maintain a local installation, developers should use the following commands:: 10 | 11 | git clone https://github.com/sandialabs/pvOps.git 12 | cd pvops 13 | pip install -e . 14 | 15 | Testing 16 | ------- 17 | To test locally, run:: 18 | 19 | pytest pvops 20 | 21 | at the root of the repository. Note that this requires the installation 22 | of pytest. 23 | 24 | Linting 25 | ------- 26 | 27 | Pvops uses flake8 to maintain code standards. To lint locally using 28 | the same filters required by pvops CI/CD pipeline, run the following 29 | command at the root of the repository:: 30 | 31 | flake8 . --count --statistics --show-source --ignore=E402,E203,E266,E501,W503,F403,F401,E402,W291,E302,W391,W292,F405,E722,W504,E121,E125,E712 32 | 33 | Note that this requires the installation of flake8. 34 | 35 | Documentation 36 | ------------------ 37 | 38 | Building docs 39 | ^^^^^^^^^^^^^^^ 40 | 41 | To build docs locally, navigate to ``pvops/docs`` and run:: 42 | 43 | make html 44 | 45 | After building, the static html files can be found in ``_build/html``. 46 | 47 | Docstrings 48 | ^^^^^^^^^^^ 49 | 50 | The pvOps documentation adheres to NumPy style docstrings. Not only does this 51 | help to keep a consistent style, but it is also necessary for the API documentation 52 | to be parsed and displayed correctly. For an example of what this should look like:: 53 | 54 | def func(arg1, arg2): 55 | """Summary line. 56 | 57 | Extended description of function. 58 | 59 | Parameters 60 | ---------- 61 | arg1 : int 62 | Description of arg1 63 | arg2 : str 64 | Description of arg2 65 | 66 | Returns 67 | ------- 68 | bool 69 | Description of return value 70 | 71 | """ 72 | return True 73 | 74 | Additional examples can be found in the 75 | `napoleon documentation `_. 76 | 77 | Extending Documentation 78 | ^^^^^^^^^^^^^^^^^^^^^^^ 79 | 80 | When adding new functionality to the repository, it is important 81 | to check that it is being properly documented in the API documentation. 82 | Most of this is automatic. For example, if a function is added to 83 | ``pvops.text.visualize`` with a proper docstring, there is no more work to do. 84 | However, when new files are created they must be added to the appropriate page 85 | in ``docs/pages/apidoc`` so that the automatic documentation recognizes it. 86 | 87 | New pages should be placed into ``docs/pages``, and linked to in 88 | ``index.html``, or another page. It is recommended to use absolute paths 89 | (starting from the root of the documentation) when linking anything. 90 | -------------------------------------------------------------------------------- /docs/pages/installation.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ============= 3 | 4 | pvops is tested on Python versions 3.8, 3.9, 3.10, and 3.11 and depends on a variety of 5 | packages. 6 | 7 | The latest release of pvops is accessible via PYPI using the following 8 | command line prompt:: 9 | 10 | $ pip install pvops 11 | 12 | Alternatively, the package can be installed using github:: 13 | 14 | $ git clone https://github.com/sandialabs/pvOps.git 15 | $ cd pvops 16 | $ pip install . 17 | 18 | NLTK data 19 | ---------- 20 | 21 | Functions in the text package rely on the "punkt_tab" dataset from the nltk package. 22 | After proper installation of pvops, run the commands:: 23 | 24 | >>> import nltk 25 | >>> nltk.download('punkt_tab') 26 | >>> nltk.download('stopwords') 27 | 28 | Those operating under a proxy may have difficulty with this installation. 29 | This `stack exchange post `_ 30 | may help. 31 | -------------------------------------------------------------------------------- /docs/pages/moduleguides/iv.rst: -------------------------------------------------------------------------------- 1 | IV Guide 2 | =============== 3 | 4 | Module Overview 5 | ---------------- 6 | 7 | These functions focus on current-voltage (IV) curve simulation and 8 | classification. 9 | 10 | .. note:: 11 | To use the capabilites in this module, pvOps must be installed with the ``iv`` option: 12 | ``pip install pvops[iv]``. 13 | 14 | 15 | Tutorials that exemplify usage can be found at: 16 | - `tutorial_iv_classifier.ipynb `_. 17 | - `tutorial_iv_diode_extractor.ipynb `_. 18 | - `tutorial_iv_simulator.ipynb `_. 19 | 20 | extractor 21 | ^^^^^^^^^^^^^^^^^^^^^ 22 | 23 | * :py:mod:`~pvops.iv.extractor` primarily features the 24 | :py:class:`~pvops.iv.extractor.BruteForceExtractor` class, which 25 | extracts diode parameters from IV curves (even outdoor-collected). 26 | 27 | physics_utils 28 | ^^^^^^^^^^^^^^^^^^^^^ 29 | 30 | :py:mod:`~pvops.iv.physics_utils` contains methods which aid the IV 31 | Simulator's physics-based calculations and the preprocessing pipeline's 32 | correction calculations. 33 | 34 | * :py:func:`~pvops.iv.physics_utils.calculate_IVparams` calculates 35 | key parameters of an IV curve. 36 | * :py:func:`~pvops.iv.physics_utils.smooth_curve` smooths 37 | IV curve using a polyfit. 38 | * :py:func:`~pvops.iv.physics_utils.iv_cutoff` cuts off IV curve 39 | greater than a given voltage value. 40 | * :py:func:`~pvops.iv.physics_utils.intersection` computes 41 | the intersection between two curves. 42 | * :py:func:`~pvops.iv.physics_utils.T_to_tcell` calculates 43 | a cell temperature given ambient temperature via NREL weather-correction 44 | tools. 45 | * :py:func:`~pvops.iv.physics_utils.bypass` limits voltage 46 | to above a minimum value. 47 | * :py:func:`~pvops.iv.physics_utils.add_series` adds two 48 | IV curves in series. 49 | * :py:func:`~pvops.iv.physics_utils.voltage_pts` 50 | provides voltage points for an IV curve. 51 | * :py:func:`~pvops.iv.physics_utils.gt_correction` corrects IV 52 | trace using irradiance and temperature using one of three 53 | available options. 54 | 55 | preprocess 56 | ^^^^^^^^^^^^^^^^^^^^^ 57 | 58 | :py:mod:`~pvops.iv.preprocess` contains the preprocessing function 59 | * :py:func:`~pvops.iv.preprocess.preprocess` which 60 | corrects a set of data according to irradiance and temperature and 61 | normalizes the curves so they are comparable. 62 | 63 | simulator 64 | ^^^^^^^^^^^^^^^^^^^^^ 65 | 66 | :py:mod:`~pvops.iv.simulator` holds the 67 | :py:class:`~pvops.iv.simulator.IV Simulator` class which can simulate 68 | current-voltage (IV) curves under different environmental and fault 69 | conditions. There is also a utility function 70 | :py:func:`~pvops.iv.simulator.create_df` for building an IV curve dataframe 71 | from a set of parameters. 72 | 73 | utils 74 | ^^^^^^^^^^^^^^^^^^^^^ 75 | 76 | :py:mod:`~pvops.iv.utils` holds the utility function 77 | :py:func:`~pvops.iv.utils.get_CEC_params` which connects to the 78 | California Energy Commission (CEC) 79 | database hosted by pvLib for cell-level and module-level parameters. 80 | 81 | timeseries_simulator 82 | ^^^^^^^^^^^^^^^^^^^^^ 83 | 84 | :py:mod:`~pvops.iv.timeseries_simulator` contains 85 | :py:class:`~pvops.iv.timeseries_simulator.IVTimeseriesGenerator`, 86 | a subclass of the IV Simulator, 87 | which allows users to specify time-based failure degradation 88 | patterns. The class 89 | :py:class:`~pvops.iv.timeseries_simulator.TimeseriesFailure` 90 | is used to define the time-based failures. 91 | 92 | .. Example Code 93 | .. -------------- 94 | -------------------------------------------------------------------------------- /docs/pages/moduleguides/text.rst: -------------------------------------------------------------------------------- 1 | Text Guide 2 | ============ 3 | 4 | Module Overview 5 | ---------------- 6 | 7 | This module aims to support the consistent extraction of key features 8 | in O&M data: 9 | 10 | * timestamp information 11 | * characteristic categorical information 12 | * a concise synopsis of the issue for context 13 | 14 | Implemented functions include those for filling in data gaps (text.preprocess submodule), 15 | machine learning analyses to fill in gaps in categorical information and to 16 | generate concise summary strings (text.classify submodule), functions 17 | to prepare data for natural language processing (text.nlp_utils submodule), 18 | and a visualization suite (text.visualize submodule). 19 | 20 | An example implementation of all capabilities can be found in 21 | `text_class_example.py `_ 22 | for specifics, and `tutorial_textmodule.ipynb `_ for basics. 23 | 24 | Text pre-processing 25 | ^^^^^^^^^^^^^^^^^^^^^ 26 | 27 | :py:mod:`~pvops.text.preprocess` 28 | 29 | These functions process the O&M data into concise, machine learning-ready documents. 30 | Additionally, there are options to extract dates from the text. 31 | 32 | * :py:func:`~pvops.text.preprocess.preprocessor` acts as a wrapper function, 33 | utilizing the other preprocessing functions, which prepares the data for machine learning. 34 | 35 | * See ``text_class_example.prep_data_for_ML`` for an example. 36 | 37 | * :py:func:`~pvops.text.preprocess.preprocessor` should be used with the keyword argument 38 | `extract_dates_only = True` if the primary interest is date extraction 39 | instead continuing to use the data for machine learning. 40 | 41 | * See ``text_class_example.extract_dates`` module for an example. 42 | 43 | 44 | Text classification 45 | ^^^^^^^^^^^^^^^^^^^^^ 46 | 47 | :py:mod:`~pvops.text.classify` 48 | 49 | These functions process the O&M data to make an inference on the specified event descriptor. 50 | 51 | * :py:func:`~pvops.text.classify.classification_deployer` is used to conduct supervised 52 | or unsupervised classification of text documents. 53 | This function conducts a grid search across the passed classifiers and hyperparameters. 54 | 55 | * The :py:func:`~pvops.text.defaults.supervised_classifier_defs` and 56 | :py:func:`~pvops.text.defaults.unsupervised_classifier_defs` 57 | functions return default values for conducting the grid search. 58 | 59 | * See ``text_class_example.classify_supervised`` or ``text_class_example.classify_unsupervised`` 60 | modules for an example. 61 | 62 | * Once the model is built and selected, classification (for supervised ML) 63 | or clustering (for unsupervised ML) analysis can be conducted on the best model returned from the pipeline object. 64 | 65 | * See ``text_class_example.predict_best_model`` module for an example. 66 | 67 | 68 | Utils 69 | ^^^^^^^^^^^^^^^^^^^^^ 70 | 71 | :py:mod:`~pvops.text.utils` 72 | 73 | These helper functions focus on performing exploratory or secondary processing activities for the O&M data. 74 | 75 | * :py:func:`pvops.text.nlp_utils.remap_attributes` is used to reorganize an attribute column into a new set of labels. 76 | 77 | NLP Utils 78 | ^^^^^^^^^^^^ 79 | 80 | :py:mod:`~pvops.text.utils` 81 | 82 | These helper functions focus on processing in preparation for NLP activities. 83 | 84 | * :py:func:`~pvops.text.nlp_utils.summarize_text_data` prints summarized contents of the O&M data. 85 | * :py:class:`~pvops.text.nlp_utils.Doc2VecModel` performs a gensim Doc2Vec 86 | transformation of the input documents to create embedded representations of the documents. 87 | * :py:class:`~pvops.text.nlp_utils.DataDensifier` is a data structure transformer which converts sparse data to dense data. 88 | * :py:func:`~pvops.text.nlp_utils.create_stopwords` concatenates a list of stopwords using both words grabbed from nltk and user-specified words 89 | 90 | 91 | Visualizations 92 | ^^^^^^^^^^^^^^^^^^^^^ 93 | These functions create visualizations to get a better understanding about your documents. 94 | 95 | * :py:func:`~pvops.text.visualize.visualize_attribute_connectivity` visualizes the connectivity of two attributes. 96 | 97 | .. image:: ../../assets/vis_attr_connect_example.svg 98 | :width: 600 99 | 100 | * :py:func:`~pvops.text.visualize.visualize_attribute_timeseries` evaluates the density of an attribute over time. 101 | 102 | .. image:: ../../assets/vis_attr_timeseries_example.svg 103 | :width: 600 104 | 105 | * :py:func:`~pvops.text.visualize.visualize_cluster_entropy` observes the performance of different text embeddings. 106 | 107 | .. image:: ../../assets/vis_cluster_entropy_example.svg 108 | :width: 600 109 | 110 | * :py:func:`~pvops.text.visualize.visualize_document_clusters` visualizes popular words in clusters after a cluster analysis is ran. 111 | 112 | .. image:: ../../assets/vis_doc_clusters_example.svg 113 | :width: 600 114 | 115 | * :py:func:`~pvops.text.visualize.visualize_word_frequency_plot` visualizes word frequencies in the associated attribute column of O&M data. 116 | 117 | .. image:: ../../assets/vis_freq_plot_example.svg 118 | :width: 600 119 | 120 | 121 | .. Example Code 122 | .. -------------- -------------------------------------------------------------------------------- /docs/pages/moduleguides/text2time.rst: -------------------------------------------------------------------------------- 1 | Text2Time Guide 2 | ================ 3 | 4 | Module Overview 5 | ---------------- 6 | 7 | Aligning production data with O&M tickets is not a trivial task since 8 | intersection of dates and identification of anomalies depends on the nuances 9 | within the two datasets. This set of functions facilitate this 10 | data fusion. Key features include: 11 | 12 | * conducting quality checks and controls on data. 13 | * identification of overlapping periods between O&M and production data. 14 | * generation of baseline values for production loss estimations. 15 | * calculation of losses from production anomalies for specific time periods. 16 | 17 | An example of usage can be found in 18 | `tutorial_text2time_module.ipynb `_. 19 | 20 | 21 | The text2time package can be broken down into three main components: 22 | `data pre-processing`, `utils`, and `visualizations`. 23 | 24 | Data pre-processing 25 | ^^^^^^^^^^^^^^^^^^^^^ 26 | 27 | :py:mod:`text2time.preprocess module ` 28 | 29 | These functions pre-process user O&M and production data to prepare them for 30 | further analyses and visualizations. 31 | 32 | * :py:func:`~pvops.text2time.preprocess.om_date_convert` and 33 | :py:func:`~pvops.text2time.preprocess.prod_date_convert` 34 | convert dates in string format to date-time objects in the O&M and 35 | production data respectively. 36 | * :py:func:`~pvops.text2time.preprocess.data_site_na` 37 | handles missing site IDs in the user data. This function can 38 | be used for both O&M and production data. 39 | * :py:func:`~pvops.text2time.preprocess.om_datelogic_check` 40 | detects and handles issues with the logic of the O&M date, specifically 41 | when the conclusion of an event occurs before it begins. 42 | * :py:func:`~pvops.text2time.preprocess.om_nadate_process` and 43 | :py:func:`~pvops.text2time.preprocess.prod_nadate_process` 44 | detect and handle any missing time-stamps in the O&M and 45 | production data respectively. 46 | 47 | Utils 48 | ^^^^^^^^^^^^^^^^^^^^^ 49 | 50 | :py:mod:`text2time.utils module ` 51 | 52 | These functions perform secondary calcuations 53 | on the O&M and production data to aid in data analyses and visualizations. 54 | 55 | * :py:func:`~pvops.text2time.utils.iec_calc` calculates a 56 | comparison dataset for the production data based on an irradiance as 57 | calculated by IEC calculation. 58 | * :py:func:`~pvops.text2time.utils.summarize_overlaps` summarizes 59 | the overlapping production and O&M data. 60 | * :py:func:`~pvops.text2time.utils.om_summary_stats` summarizes 61 | statistics (e.g., event duration and month of occurrence) of O&M data. 62 | * :py:func:`~pvops.text2time.utils.overlapping_data` trims the 63 | production and O&M data frames and only retain the data where both datasets 64 | overlap in time. 65 | * :py:func:`~pvops.text2time.utils.prod_anomalies` detects and handles 66 | issues when the production data is input in cumulative format and unexpected 67 | dips show up in the data. 68 | * :py:func:`~pvops.text2time.utils.prod_quant` calculates a 69 | comparison between the actual production data and a baseline 70 | (e.g. from a model from :ref:`timeseries models`). 71 | 72 | Visualizations 73 | ^^^^^^^^^^^^^^^^^^^^^ 74 | 75 | :py:mod:`text2time.visualize module ` 76 | 77 | These functions visualize the processed O&M and production data: 78 | 79 | * :py:func:`~pvops.text2time.visualize.visualize_categorical_scatter` 80 | generates categorical scatter plots of chosen variable based on specified 81 | category (e.g. site ID) for the O&M data. 82 | 83 | .. image:: ../../assets/vis_cat_scatter_example.svg 84 | :width: 600 85 | 86 | * :py:func:`~pvops.text2time.visualize.visualize_counts` 87 | generates a count plot of categories based on a chosen categorical variable 88 | column for the O&M data. 89 | If that variable is the user's site ID for every ticket, a plot for total 90 | count of events can be generated. 91 | 92 | .. image:: ../../assets/vis_counts_example.svg 93 | :width: 600 94 | 95 | * :py:func:`~pvops.text2time.visualize.visualize_om_prod_overlap` 96 | creates a visualization that overlays the O&M data on top of the 97 | coinciding production data. 98 | 99 | .. image:: ../../assets/vis_overlap_example.png 100 | :width: 600 101 | 102 | Example Code 103 | -------------- 104 | 105 | Load in OM data and convert dates to python date-time objects 106 | 107 | .. doctest:: 108 | 109 | >>> import pandas as pd 110 | >>> import os 111 | >>> from pvops.text2time import preprocess 112 | 113 | >>> example_OMpath = os.path.join('example_data', 'example_om_data2.csv') 114 | >>> om_data = pd.read_csv(example_OMpath, on_bad_lines='skip', engine='python') 115 | >>> om_col_dict = { 116 | ... 'siteid': 'randid', 117 | ... 'datestart': 'date_start', 118 | ... 'dateend': 'date_end', 119 | ... 'workID': 'WONumber', 120 | ... 'worktype': 'WOType', 121 | ... 'asset': 'Asset', 122 | ... 'eventdur': 'EventDur', #user's name choice for new column (Repair Duration) 123 | ... 'modatestart': 'MonthStart', #user's name choice for new column (Month when an event begins) 124 | ... 'agedatestart': 'AgeStart'} #user's name choice for new column (Age of system when event begins) 125 | >>> om_data_converted = preprocess.om_date_convert(om_data, om_col_dict) -------------------------------------------------------------------------------- /docs/pages/moduleguides/timeseries.rst: -------------------------------------------------------------------------------- 1 | Timeseries Guide 2 | ================== 3 | 4 | Module Overview 5 | ----------------- 6 | 7 | These funcions provide processing and modelling capabilities for timeseries 8 | production data. Processing functions prepare data to train two 9 | types of expected energy models: 10 | 11 | * AIT: additive interaction trained model, see :cite:t:`app12041872` 12 | for more information. 13 | * Linear: a high flexibility linear regression model. 14 | 15 | Additionally, the ability to generate expected energy via IEC 16 | standards (iec 61724-1) is implemented in the :py:mod:`~pvops.timeseries.models.iec` 17 | module. 18 | 19 | An example of usage can be found in 20 | `tutorial_timeseries_module.ipynb `. 21 | 22 | Preprocess 23 | ^^^^^^^^^^^^^^^^^^^^^ 24 | * :py:func:`pvops.timeseries.preprocess.prod_inverter_clipping_filter` 25 | filters out production periods with inverter clipping. 26 | The core method was adopted from `pvlib/pvanalytics`. 27 | * :py:func:`pvops.timeseries.preprocess.normalize_production_by_capacity` 28 | normalizes power by site capacity. 29 | * :py:func:`pvops.timeseries.preprocess.prod_irradiance_filter` 30 | filters rows of production data frame according to performance and data 31 | quality. NOTE: this method is currently in development. 32 | * :py:func:`pvops.timeseries.preprocess.establish_solar_loc` 33 | adds solar position data to production data using 34 | pvLib. 35 | 36 | Models 37 | ^^^^^^^^^^^^^^^^^^^^^ 38 | * :py:func:`pvops.timeseries.models.linear.modeller` is a wrapper method 39 | used to model timeseries data using a linear model. 40 | This method gives multiple options for the 41 | learned model structure. 42 | * :py:func:`pvops.timeseries.models.AIT.AIT_calc` Calculates expected energy 43 | using measured irradiance based on trained regression model from field data. 44 | * :py:func:`pvops.timeseries.models.iec.iec_calc` calculates expected energy using measured irradiance 45 | based on IEC calculations. 46 | 47 | Example Code 48 | -------------- 49 | 50 | load in data and run some processing functions -------------------------------------------------------------------------------- /docs/pages/modules.rst: -------------------------------------------------------------------------------- 1 | API Documentation 2 | ================== 3 | 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | apidoc/text 8 | apidoc/text2time 9 | apidoc/timeseries 10 | apidoc/iv 11 | -------------------------------------------------------------------------------- /docs/pages/references.rst: -------------------------------------------------------------------------------- 1 | References 2 | ========== 3 | 4 | Citing Us 5 | --------- 6 | 7 | If using this package, please cite us using the following 8 | 9 | .. code-block:: text 10 | 11 | Bonney et al., (2023). pvOps: a Python package for empirical analysis of photovoltaic field data. 12 | Journal of Open Source Software, 8(91), 5755, https://doi.org/10.21105/joss.05755 13 | 14 | In BibTex format: 15 | 16 | .. code-block:: text 17 | 18 | @article{Bonney2023, 19 | doi = {10.21105/joss.05755}, 20 | url = {https://doi.org/10.21105/joss.05755}, 21 | year = {2023}, 22 | publisher = {The Open Journal}, 23 | volume = {8}, 24 | number = {91}, 25 | pages = {5755}, 26 | author = {Kirk L. Bonney and Thushara Gunda and Michael W. Hopwood and Hector Mendoza and Nicole D. Jackson}, 27 | title = {pvOps: a Python package for empirical analysis of photovoltaic field data}, 28 | journal = {Journal of Open Source Software} } 29 | 30 | 31 | We also utilize content from other packages. See the NOTICE/ directory on our GitHub! 32 | 33 | Additionally, some of our own content comes from published papers. See the following external references. 34 | 35 | External references 36 | ------------------- 37 | 38 | .. bibliography:: 39 | :all: 40 | 41 | -------------------------------------------------------------------------------- /docs/pages/releasenotes.rst: -------------------------------------------------------------------------------- 1 | .. _whatsnew: 2 | 3 | What's New 4 | ========== 5 | 6 | These are new features and improvements of note in each release. 7 | 8 | .. include:: releasenotes/0.6.1.rst 9 | 10 | .. include:: releasenotes/0.6.0.rst 11 | 12 | .. include:: releasenotes/0.5.3.rst 13 | 14 | .. include:: releasenotes/0.5.2.rst 15 | 16 | .. include:: releasenotes/0.5.1.rst 17 | 18 | .. include:: releasenotes/0.5.0.rst 19 | 20 | .. include:: releasenotes/0.4.0.rst 21 | 22 | .. include:: releasenotes/0.3.0.rst 23 | 24 | .. include:: releasenotes/0.2.0.rst 25 | 26 | .. include:: releasenotes/0.1.9.rst 27 | 28 | .. include:: releasenotes/0.1.8.rst 29 | 30 | .. include:: releasenotes/0.1.7.rst 31 | 32 | .. include:: releasenotes/beta.rst 33 | 34 | .. include:: releasenotes/alpha.rst -------------------------------------------------------------------------------- /docs/pages/releasenotes/0.1.7.rst: -------------------------------------------------------------------------------- 1 | 0.1.7 (September 20 2021) 2 | ------------------------- 3 | 4 | Updated functions for data processing (text and timeseries) analysis. Also includes IV curve functions 5 | -------------------------------------------------------------------------------- /docs/pages/releasenotes/0.1.8.rst: -------------------------------------------------------------------------------- 1 | 0.1.8 (Jan 14 2022) 2 | ----------------------- 3 | 4 | Includes a data-derived expected energy model trained using machine learning methods. Associated example is also within the documentation. 5 | 6 | Functionality 7 | ~~~~~~~~~~~~~ 8 | 9 | * Added AIT model 10 | 11 | Other 12 | ~~~~~~~~~~~~~ 13 | * Add citation.cif 14 | -------------------------------------------------------------------------------- /docs/pages/releasenotes/0.1.9.rst: -------------------------------------------------------------------------------- 1 | 0.1.9 (November 21 2022) 2 | ------------------------- 3 | 4 | Includes updated documentation and fixes for dependency issues 5 | 6 | 7 | Documentation 8 | ~~~~~~~~~~~~~ 9 | 10 | * Docstrings polished across the package. 11 | * Resolved documentation build errors and warnings 12 | -------------------------------------------------------------------------------- /docs/pages/releasenotes/0.2.0.rst: -------------------------------------------------------------------------------- 1 | 0.2.0 (August 9 2023) 2 | ----------------------- 3 | 4 | This release incorporates new functions and addresses depreciated commands in some of the package dependencies. 5 | 6 | Documentation 7 | ~~~~~~~~~~~~~ 8 | 9 | * Doc pages "makeover" in preparation for JOSS publication 10 | * Added additional context and detail to example notebooks. 11 | * Added module guides 12 | * Added contributing pages 13 | 14 | New Features 15 | ~~~~~~~~~~~~ 16 | 17 | * Added `get_attributes_from_keywords` to text.classify 18 | * Added `get_keywords_of_interest` to text.preprocess 19 | * Added `remap_words_in_text` to text.visualize -------------------------------------------------------------------------------- /docs/pages/releasenotes/0.3.0.rst: -------------------------------------------------------------------------------- 1 | 0.3.0 (November 9 2023) 2 | ----------------------- 3 | 4 | This release incorporates new functions and addresses depreciated commands in some of the package dependencies. 5 | 6 | Functionality 7 | ~~~~~~~~~~~~~~ 8 | 9 | * Updated visualize_attribute_connectivity to use bipartite graph layout (updated function). 10 | 11 | * IV related dependencies moved to an installation extra (install using `pip install pvops[iv]`). 12 | 13 | * Removed deprecated normalization parameters in ML pipeline (bug fix). 14 | 15 | * Updated code to fix deprecation/future warnings. 16 | 17 | Testing 18 | ~~~~~~~~~~~~~~ 19 | 20 | * Added Python 3.11 to the test environment. 21 | 22 | Documentation 23 | ~~~~~~~~~~~~~~ 24 | 25 | * Fix small typos in index.rst. 26 | 27 | * Renamed references to examples as tutorials for consistency. 28 | 29 | * Updated docs to refer to modules as modules, rather than packages. 30 | 31 | * Updated RTD config to install doc requirements using the package installation extra 32 | 33 | * Removed redundant boilerplate in development.rst 34 | 35 | * Update tested versions in documentation 36 | 37 | * Added links to tutorials where appropriate in the user guide. 38 | 39 | * Added a simplified version of the module overview table from the JOSS manuscript to the homepage of the documentation. 40 | 41 | * Added statement of need to homepage 42 | 43 | * Fixed image embed in tutorial 44 | 45 | * Added dates to what's new sections 46 | 47 | * Expanded patch notes to include recent tags. 48 | 49 | * Deleted WIP docs pages to remove "not included in any toctree" errors. 50 | 51 | * Added nbsphinx gallery view to tutorials page. 52 | 53 | * Added more content to abbreviations page. 54 | 55 | Tutorials 56 | ~~~~~~~~~~~~~~ 57 | 58 | * Rename pvOps examples to tutorials for consistency throughout repository. 59 | 60 | * Linked to tutorials in README. 61 | 62 | * Added a description of data in timeseries tutorial. 63 | 64 | * Removed redundant plots in timeseries tutorial. 65 | 66 | Other 67 | ~~~~~~~~~~~~~~ 68 | 69 | * Added copyright and license attributes to pvops. 70 | 71 | * Removed manifest.in (not needed). 72 | 73 | * Removed docs/init.py (not a module). 74 | 75 | * Chose more appropriate author/copyright in setup.py and conf.py. 76 | 77 | * Added version to pvops (pvops.__version__ now exists). 78 | 79 | * Removed external licenses (determined to be unnecessary by legal). 80 | 81 | * Renamed citation file and updated version number. 82 | 83 | * Added noxfile for dev task running. 84 | 85 | * Removed unused docker files 86 | 87 | * Add standard python files to gitignore 88 | 89 | * Removed redundant requirements files 90 | 91 | * Pinned documentation related requirements -------------------------------------------------------------------------------- /docs/pages/releasenotes/0.4.0.rst: -------------------------------------------------------------------------------- 1 | 0.4.0 (October 25 2024) 2 | ----------------------- 3 | 4 | This release primarily addresses deprecations and future warnings related to dependencies, including a significant security vulnerability. 5 | 6 | Documentation 7 | ~~~~~~~~~~~~~ 8 | 9 | * Updated README and documentation to point to the JOSS publication. 10 | 11 | Tutorials 12 | ~~~~~~~~~~~~~~ 13 | 14 | * Miscellaneous fixes relevant to text2time, time, timeseries AIT module tutorials. 15 | 16 | Other 17 | ~~~~~~~~~~~~~~ 18 | 19 | * Now requiring nltk>=3.9.1 and switching punkt to punkt_tab. This addresses a security vulnerability in nltk. -------------------------------------------------------------------------------- /docs/pages/releasenotes/0.5.0.rst: -------------------------------------------------------------------------------- 1 | 0.5.0 (February 19 2025) 2 | ------------------------ 3 | 4 | This release adds a new tutorial demonstrating survival analysis on PV assets. 5 | 6 | Tutorials 7 | ~~~~~~~~~~~~~~ 8 | 9 | * Added a new timeseries survival analysis tutorial demonstrating Kaplan-Meier estimators and Weibull distribution fits. 10 | 11 | * Added a new example dataset to go along with the new tutorial. 12 | 13 | Other 14 | ~~~~~~~~~~~~~~ 15 | 16 | * Added scikit-survival as a new dependency. Used for Kaplan-Meier estimators in the new tutorial. -------------------------------------------------------------------------------- /docs/pages/releasenotes/0.5.1.rst: -------------------------------------------------------------------------------- 1 | 0.5.1 (February 19 2025) 2 | ------------------------ 3 | 4 | This release addresses a deprecation preventing the documentation from building. 5 | 6 | Other 7 | ~~~~~~~~~~~~~~ 8 | 9 | * Updated artifact/upload-artifact in buildthedocs from v3 to v4 -------------------------------------------------------------------------------- /docs/pages/releasenotes/0.5.2.rst: -------------------------------------------------------------------------------- 1 | 0.5.2 (February 21 2025) 2 | ------------------------ 3 | 4 | This release updates the documentation to reflect changes starting at v0.4.0 and fixes dependency requirements. 5 | 6 | Other 7 | ~~~~~~~~~~~~~~ 8 | 9 | * Updated release notes to include changes starting at v0.4.0. 10 | 11 | * Added new survival analysis notebook to the documentation. 12 | 13 | * In v0.5.0, scikit-survival was added to requirements.txt but not setup.py. That has been resolved. 14 | 15 | * Now requiring python<3.13 for tensorflow -------------------------------------------------------------------------------- /docs/pages/releasenotes/0.5.3.rst: -------------------------------------------------------------------------------- 1 | 0.5.3 (March 5 2025) 2 | ------------------------ 3 | 4 | This release takes the existing survival analysis tutorial and formalizes parts of it 5 | into functions within the timeseries module. 6 | 7 | Functionality 8 | ~~~~~~~~~~~~~~ 9 | 10 | * Created a new function in `pvops.timeseries.preprocess` that identifies right-censored data. 11 | 12 | * Created a new model under `pvops.timeseries.models` to fit survival analysis functions, namely, Kaplan-Meier and Weibull. 13 | 14 | Tutorials 15 | ~~~~~~~~~~~~~~ 16 | 17 | * Simplified the survival analysis tutorial now that the main functionality is incorporated into pvOps. -------------------------------------------------------------------------------- /docs/pages/releasenotes/0.6.0.rst: -------------------------------------------------------------------------------- 1 | 0.6.0 (March 17 2025) 2 | ------------------------ 3 | 4 | This release removes the `nltk` dependency and implements analogous functionality where needed in pvops. 5 | 6 | Functionality 7 | ~~~~~~~~~~~~~~ 8 | 9 | * `pvops.text.preprocess.regex_tokenize` for tokenizing text documents (replaces instances of `nltk.tokenize.word_tokenize`) 10 | 11 | Other 12 | ~~~~~~ 13 | 14 | * Includes a static version of the nltk English stopwords in `stopwords.txt` under `pvops.text` 15 | 16 | * `pvops.text.nltk_utils.create_stopwords` modified to pull from this new stopwords file (breaking change: removed language argument) 17 | 18 | * `pvops.text.visualize.visualize_word_frequency_plot` functionality implemented manually rather than through `nltk`; previous calls should still work -------------------------------------------------------------------------------- /docs/pages/releasenotes/0.6.1.rst: -------------------------------------------------------------------------------- 1 | 0.6.1 (March 17 2025) 2 | ----------------------- 3 | 4 | This release makes minor documentation updates. 5 | 6 | Documentation 7 | ~~~~~~~~~~~~~ 8 | 9 | * The "Overview" page now includes the new survival analysis functionality added to the timeseries module. 10 | 11 | * The release date is corrected for version 0.6.0 on the "What's New" page. -------------------------------------------------------------------------------- /docs/pages/releasenotes/alpha.rst: -------------------------------------------------------------------------------- 1 | Alpha 2 | ----------------------- 3 | 4 | The original release of pvOps consists mostly of new features. 5 | 6 | New features 7 | ~~~~~~~~~~~~ 8 | 9 | * `text` module added which conducts natural language processing on Operations & Maintenance (O&M) tickets, or other. 10 | * `text2time` module investigates the relationship between the production timeseries data and the O&M tickets. 11 | * `timeseries` module conducts timeseries preprocessing and modeling 12 | * `iv` incorporates the ability to simulate current-voltage (IV) curves under different environmental, load, and failure conditions. 13 | 14 | 15 | Documentation 16 | ~~~~~~~~~~~~~ 17 | 18 | * Built original website 19 | * Add whatsnew 20 | * Add jupyter notebook embeddings 21 | 22 | Testing 23 | ~~~~~~~ 24 | 25 | * Built comprehensive tests with pytest 26 | * Connected tests to automated testing pipeline -------------------------------------------------------------------------------- /docs/pages/releasenotes/beta.rst: -------------------------------------------------------------------------------- 1 | Beta 2 | ----------------------- 3 | 4 | New features and bug fixes are predominant in the beta versions. 5 | 6 | New features 7 | ~~~~~~~~~~~~ 8 | 9 | * IV trace classification framework built according to literature (PR #25) 10 | * Timeseries IV simulation for highly customizable degradation of system parameters (PR #28) 11 | * Leverage pvlib solarposition package to populate content per site (PR #32) 12 | * Add coefficient-level evaluations linear models (PR #32) 13 | * Give user ability to input own test-train splits to linear modeller (PR #32) 14 | * Remap attributes function must retain the unaltered attributes (PR #32) 15 | * Interpolate O&M data onto production data where overlaps exist (PR #32) 16 | 17 | Bug fixes 18 | ~~~~~~~~~ 19 | 20 | * Basic package fixes to README (PR #27) and documentation configuration (PR #24) 21 | * Fix IV simulator bug for edge case where two IV curves added have equal I_{sc} (PR #30) 22 | * Neural network configuration referencing in 1D CNN (PR #32) 23 | 24 | Docs 25 | ~~~~ 26 | 27 | * Update how to reference pvOps (PR #33) 28 | 29 | Tests 30 | ~~~~~ 31 | * Removed python 3.6 test support due to https://github.com/actions/setup-python/issues/162. 32 | -------------------------------------------------------------------------------- /docs/pages/tutorials.rst: -------------------------------------------------------------------------------- 1 | pvOps Tutorials 2 | =============== 3 | 4 | Check out the tutorials below! 5 | 6 | .. nbgallery:: 7 | :caption: Text & Text2Time tutorials: 8 | 9 | tutorials/tutorial_text2time_module 10 | tutorials/tutorial_textmodule 11 | 12 | .. nbgallery:: 13 | :caption: Timeseries tutorials: 14 | 15 | tutorials/tutorial_timeseries 16 | tutorials/tutorial_AIT_timeseries 17 | tutorials/tutorial_timeseries_sim 18 | tutorials/tutorial_timeseries_survival_analysis 19 | 20 | .. nbgallery:: 21 | :caption: IV tutorials: 22 | 23 | tutorials/tutorial_iv_simulator 24 | tutorials/tutorial_iv_classifier 25 | tutorials/tutorial_iv_diode_extractor 26 | -------------------------------------------------------------------------------- /docs/pages/tutorials/assets/diode_param_extractor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sandialabs/pvOps/d6248e77fa063c94f5b5a0736b05b2bbe51c6be4/docs/pages/tutorials/assets/diode_param_extractor.png -------------------------------------------------------------------------------- /docs/pages/tutorials/tutorial_AIT_timeseries.nblink: -------------------------------------------------------------------------------- 1 | { 2 | "path": "../../../tutorials/tutorial_AIT_timeseries.ipynb" 3 | } -------------------------------------------------------------------------------- /docs/pages/tutorials/tutorial_iv_classifier.nblink: -------------------------------------------------------------------------------- 1 | { 2 | "path": "../../../tutorials/tutorial_iv_classifier.ipynb" 3 | } -------------------------------------------------------------------------------- /docs/pages/tutorials/tutorial_iv_diode_extractor.nblink: -------------------------------------------------------------------------------- 1 | { 2 | "path": "../../../tutorials/tutorial_iv_diode_extractor.ipynb" 3 | } -------------------------------------------------------------------------------- /docs/pages/tutorials/tutorial_iv_simulator.nblink: -------------------------------------------------------------------------------- 1 | { 2 | "path": "../../../tutorials/tutorial_iv_simulator.ipynb" 3 | } -------------------------------------------------------------------------------- /docs/pages/tutorials/tutorial_text2time_module.nblink: -------------------------------------------------------------------------------- 1 | { 2 | "path": "../../../tutorials/tutorial_text2time_module.ipynb" 3 | } -------------------------------------------------------------------------------- /docs/pages/tutorials/tutorial_textmodule.nblink: -------------------------------------------------------------------------------- 1 | { 2 | "path": "../../../tutorials/tutorial_textmodule.ipynb" 3 | } -------------------------------------------------------------------------------- /docs/pages/tutorials/tutorial_timeseries.nblink: -------------------------------------------------------------------------------- 1 | { 2 | "path": "../../../tutorials/tutorial_timeseries.ipynb" 3 | } -------------------------------------------------------------------------------- /docs/pages/tutorials/tutorial_timeseries_sim.nblink: -------------------------------------------------------------------------------- 1 | { 2 | "path": "../../../tutorials/tutorial_timeseries_sim.ipynb" 3 | } -------------------------------------------------------------------------------- /docs/pages/tutorials/tutorial_timeseries_survival_analysis.nblink: -------------------------------------------------------------------------------- 1 | { 2 | "path": "../../../tutorials/tutorial_timeseries_survival_analysis.ipynb" 3 | } -------------------------------------------------------------------------------- /docs/pages/userguide.rst: -------------------------------------------------------------------------------- 1 | User Guide 2 | ========================== 3 | 4 | .. toctree:: 5 | :maxdepth: 1 6 | :caption: Getting Started 7 | 8 | installation 9 | 10 | 11 | .. toctree:: 12 | :maxdepth: 1 13 | :caption: Module Guides 14 | 15 | moduleguides/text 16 | moduleguides/text2time 17 | moduleguides/timeseries 18 | moduleguides/iv 19 | 20 | .. toctree:: 21 | :maxdepth: 1 22 | :caption: Abbreviations 23 | 24 | abbreviations 25 | -------------------------------------------------------------------------------- /docs/refs/pvops.bib: -------------------------------------------------------------------------------- 1 | @Article{app12041872, 2 | AUTHOR = {Hopwood, Michael W. and Gunda, Thushara}, 3 | TITLE = {Generation of Data-Driven Expected Energy Models for Photovoltaic Systems}, 4 | JOURNAL = {Applied Sciences}, 5 | VOLUME = {12}, 6 | YEAR = {2022}, 7 | NUMBER = {4}, 8 | ARTICLE-NUMBER = {1872}, 9 | URL = {https://www.mdpi.com/2076-3417/12/4/1872}, 10 | ISSN = {2076-3417}, 11 | ABSTRACT = {Although unique expected energy models can be generated for a given photovoltaic (PV) site, a standardized model is also needed to facilitate performance comparisons across fleets. Current standardized expected energy models for PV work well with sparse data, but they have demonstrated significant over-estimations, which impacts accurate diagnoses of field operations and maintenance issues. This research addresses this issue by using machine learning to develop a data-driven expected energy model that can more accurately generate inferences for energy production of PV systems. Irradiance and system capacity information was used from 172 sites across the United States to train a series of models using Lasso linear regression. The trained models generally perform better than the commonly used expected energy model from international standard (IEC 61724-1), with the two highest performing models ranging in model complexity from a third-order polynomial with 10 parameters (Radj2 = 0.994) to a simpler, second-order polynomial with 4 parameters (Radj2=0.993), the latter of which is subject to further evaluation. Subsequently, the trained models provide a more robust basis for identifying potential energy anomalies for operations and maintenance activities as well as informing planning-related financial assessments. We conclude with directions for future research, such as using splines to improve model continuity and better capture systems with low (≤1000 kW DC) capacity.}, 12 | DOI = {10.3390/app12041872} 13 | } 14 | 15 | @INPROCEEDINGS{9518439, 16 | author={Mendoza, Hector and Hopwood, Michael and Gunda, Thushara}, 17 | booktitle={2021 IEEE 48th Photovoltaic Specialists Conference (PVSC)}, 18 | title={pvOps: Improving Operational Assessments through Data Fusion}, 19 | year={2021}, 20 | volume={}, 21 | number={}, 22 | pages={0112-0119}, 23 | doi={10.1109/PVSC43889.2021.9518439} 24 | } 25 | 26 | @Article{en15145085, 27 | AUTHOR = {Hopwood, Michael W. and Stein, Joshua S. and Braid, Jennifer L. and Seigneur, Hubert P.}, 28 | TITLE = {Physics-Based Method for Generating Fully Synthetic IV Curve Training Datasets for Machine Learning Classification of PV Failures}, 29 | JOURNAL = {Energies}, 30 | VOLUME = {15}, 31 | YEAR = {2022}, 32 | NUMBER = {14}, 33 | ARTICLE-NUMBER = {5085}, 34 | URL = {https://www.mdpi.com/1996-1073/15/14/5085}, 35 | ISSN = {1996-1073}, 36 | ABSTRACT = {Classification machine learning models require high-quality labeled datasets for training. Among the most useful datasets for photovoltaic array fault detection and diagnosis are module or string current-voltage (IV) curves. Unfortunately, such datasets are rarely collected due to the cost of high fidelity monitoring, and the data that is available is generally not ideal, often consisting of unbalanced classes, noisy data due to environmental conditions, and few samples. In this paper, we propose an alternate approach that utilizes physics-based simulations of string-level IV curves as a fully synthetic training corpus that is independent of the test dataset. In our example, the training corpus consists of baseline (no fault), partial soiling, and cell crack system modes. The training corpus is used to train a 1D convolutional neural network (CNN) for failure classification. The approach is validated by comparing the model’s ability to classify failures detected on a real, measured IV curve testing corpus obtained from laboratory and field experiments. Results obtained using a fully synthetic training dataset achieve identical accuracy to those obtained with use of a measured training dataset. When evaluating the measured data’s test split, a 100% accuracy was found both when using simulations or measured data as the training corpus. When evaluating all of the measured data, a 96% accuracy was found when using a fully synthetic training dataset. The use of physics-based modeling results as a training corpus for failure detection and classification has many advantages for implementation as each PV system is configured differently, and it would be nearly impossible to train using labeled measured data.}, 37 | DOI = {10.3390/en15145085} 38 | } 39 | 40 | @ARTICLE{9186596, 41 | author={Hopwood, Michael W. and Gunda, Thushara and Seigneur, Hubert and Walters, Joseph}, 42 | journal={IEEE Access}, 43 | title={Neural Network-Based Classification of String-Level IV Curves From Physically-Induced Failures of Photovoltaic Modules}, 44 | year={2020}, 45 | volume={8}, 46 | number={}, 47 | pages={161480-161487}, 48 | doi={10.1109/ACCESS.2020.3021577} 49 | } 50 | 51 | @article{BISHOP198873, 52 | title = {Computer simulation of the effects of electrical mismatches in photovoltaic cell interconnection circuits}, 53 | journal = {Solar Cells}, 54 | volume = {25}, 55 | number = {1}, 56 | pages = {73-89}, 57 | year = {1988}, 58 | issn = {0379-6787}, 59 | doi = {https://doi.org/10.1016/0379-6787(88)90059-2}, 60 | url = {https://www.sciencedirect.com/science/article/pii/0379678788900592}, 61 | author = {J.W. Bishop}, 62 | abstract = {A Pascal program, PVNet, has been developed at the Commission of the European Communities Joint Research Centre, Ispra, to model the electrical behaviour of solar cell interconnection circuits. The program calculates three-quadrant solar cell current-voltage (I–V) curves using a lumped parameter equivalent circuit model, combines them to obtain the resultant I–V curve of any interconnection circuit, and calculates the operating point of each circuit element, set by user-defined operating conditions. The numerical values of the equivalent circuit parameters are generated by the program, and are varied so that the electrical parameters (short-circuit current, open-circuit voltage, fill factor) of calculated I–V curves show the same variations as those of measured crystalline silicon solar cell I–V curves. Equivalent circuit parameters can be changed by the user, making it possible to simulate the effects of electrical mismatches on the performance of an interconnection circuit. To illustrate the operation of the program, the electrical mechanisms leading to hot-spot heating in photovoltaic arrays are analysed. Three types of interconnection circuit are considered: a simple series string, a series-parallel block and a series connection of series-parallel blocks. The operation of parallel bypass diodes (used to limit hot-spot heating in series strings) and of series blocking diodes (used to prevent current imbalance in series-parallel circuits) are explained.} 63 | } 64 | 65 | @article{osti_1078057, 66 | title = {Weather-Corrected Performance Ratio}, 67 | author = {Dierauf, T. and Growitz, A. and Kurtz, S. and Cruz, J. L. B. and Riley, E. and Hansen, C.}, 68 | abstractNote = {Photovoltaic (PV) system performance depends on both the quality of the system and the weather. One simple way to communicate the system performance is to use the performance ratio (PR): the ratio of the electricity generated to the electricity that would have been generated if the plant consistently converted sunlight to electricity at the level expected from the DC nameplate rating. The annual system yield for flat-plate PV systems is estimated by the product of the annual insolation in the plane of the array, the nameplate rating of the system, and the PR, which provides an attractive way to estimate expected annual system yield. Unfortunately, the PR is, again, a function of both the PV system efficiency and the weather. If the PR is measured during the winter or during the summer, substantially different values may be obtained, making this metric insufficient to use as the basis for a performance guarantee when precise confidence intervals are required. This technical report defines a way to modify the PR calculation to neutralize biases that may be introduced by variations in the weather, while still reporting a PR that reflects the annual PR at that site given the project design and the project weather file. This resulting weather-corrected PR gives more consistent results throughout the year, enabling its use as a metric for performance guarantees while still retaining the familiarity this metric brings to the industry and the value of its use in predicting actual annual system yield. A testing protocol is also presented to illustrate the use of this new metric with the intent of providing a reference starting point for contractual content.}, 69 | doi = {10.2172/1078057}, 70 | url = {https://www.osti.gov/biblio/1078057}, 71 | journal = {}, 72 | number = {}, 73 | volume = {}, 74 | place = {United States}, 75 | year = {2013}, 76 | month = {4} 77 | } 78 | 79 | @techreport{deceglie2018rdtools, 80 | title={RdTools: an open source python library for PV degradation analysis}, 81 | author={Deceglie, Michael G and Jordan, Dirk and Nag, Ambarish and Deline, Christopher A and Shinn, Adam}, 82 | year={2018}, 83 | institution={National Renewable Energy Lab.(NREL), Golden, CO (United States)} 84 | } 85 | 86 | @article{holmgren2018pvlib, 87 | title={pvlib python: A python package for modeling solar energy systems}, 88 | author={Holmgren, William F and Hansen, Clifford W and Mikofski, Mark A}, 89 | journal={Journal of Open Source Software}, 90 | volume={3}, 91 | number={29}, 92 | pages={884}, 93 | doi={10.21105/joss.00884}, 94 | year={2018} 95 | } 96 | 97 | @inproceedings{pierce2020identifying, 98 | title={Identifying Degradation Modes of Photovoltaic Modules Using Unsupervised Machine Learning on Electroluminescense Images}, 99 | author={Pierce, Benjamin G and Karimi, Ahmad Maroof and Liu, JiQi and French, Roger H and Braid, Jennifer L}, 100 | booktitle={2020 47th IEEE Photovoltaic Specialists Conference (PVSC)}, 101 | pages={1850--1855}, 102 | year={2020}, 103 | organization={IEEE}, 104 | doi = {10.1109/PVSC45281.2020.9301021} 105 | } 106 | 107 | @techreport{klise2016performance, 108 | title={Performance Monitoring using Pecos (V. 0.1)}, 109 | author={Klise, Katherine A and Stein, Joshua S}, 110 | year={2016}, 111 | institution={Sandia National Laboraties}, 112 | doi = {10.2172/1734479} 113 | } 114 | -------------------------------------------------------------------------------- /noxfile.py: -------------------------------------------------------------------------------- 1 | import nox 2 | 3 | @nox.session 4 | def tests(session): 5 | """Run tests.""" 6 | session.install(".") 7 | session.install("pytest") 8 | session.run("pytest") 9 | 10 | @nox.session 11 | def lint(session): 12 | """Lint.""" 13 | session.install("flake8") 14 | session.run("flake8", "--import-order-style", "google") 15 | 16 | @nox.session 17 | def docs(session): 18 | """Generate documentation.""" 19 | session.install(".[docs]") 20 | session.cd("docs/") 21 | session.run("make", "html") 22 | 23 | @nox.session 24 | def serve(session): 25 | """Serve documentation. Port can be specified as a positional argument.""" 26 | try: 27 | port = session.posargs[0] 28 | except IndexError: 29 | port = "8085" 30 | session.run("python", "-m", "http.server", "-b", "localhost", "-d", "docs/_build/html", port) 31 | 32 | @nox.session 33 | def check_style(session): 34 | """Check if code follows black style.""" 35 | session.install("black") 36 | session.run("black", "--check", "src") 37 | 38 | @nox.session 39 | def enforce_style(session): 40 | """Apply black style to code base.""" 41 | session.install("black") 42 | session.run("black", "src") -------------------------------------------------------------------------------- /pvops/__init__.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | from pvops import text 3 | from pvops import text2time 4 | from pvops import timeseries 5 | try: 6 | from pvops import iv 7 | except ModuleNotFoundError: 8 | # warnings.warn("") 9 | pass 10 | 11 | __version__ = '0.6.1' 12 | 13 | __copyright__ = """Copyright 2023 National Technology & Engineering 14 | Solutions of Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 15 | with NTESS, the U.S. Government retains certain rights in this software.""" 16 | 17 | __license__ = "BSD 3-Clause License" 18 | -------------------------------------------------------------------------------- /pvops/iv/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | iv module 3 | """ 4 | import pvops.iv.models 5 | import pvops.iv.extractor 6 | import pvops.iv.physics_utils 7 | import pvops.iv.preprocess 8 | import pvops.iv.simulator 9 | import pvops.iv.timeseries_simulator 10 | import pvops.iv.utils -------------------------------------------------------------------------------- /pvops/iv/extractor.py: -------------------------------------------------------------------------------- 1 | """ 2 | Derive the effective diode parameters from a set of input curves. 3 | """ 4 | 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | import scipy 8 | import sklearn 9 | from pvops.iv.simulator import Simulator 10 | import time 11 | from pvops.iv.physics_utils import iv_cutoff, T_to_tcell, \ 12 | calculate_IVparams, smooth_curve 13 | 14 | 15 | class BruteForceExtractor(): 16 | '''Process measured IV curves to extract diode parameters. 17 | Requires a set of curves to create Isc vs Irr and Voc vs Temp vs Isc(Irr) 18 | 19 | Parameters 20 | ---------- 21 | input_df : DataFrame 22 | Contains IV curves with a datetime index 23 | current_col : string 24 | Indicates column where current values in IV curve are located; 25 | each cell is an array of current values in a single IV curve 26 | voltage_col : string 27 | Indicates column where voltage values in IV curve are located; 28 | each cell is an array of voltage values in a single IV curve 29 | irradiance_col : string 30 | Indicates column where irradiance value (W/m2) 31 | temperature_col : string 32 | Indicates column where temperature value (C) 33 | T_type : string 34 | Describe input temperature, either 'ambient' or 'module' or 'cell' 35 | ''' 36 | 37 | def __init__( 38 | self, 39 | input_df, 40 | current_col, 41 | voltage_col, 42 | irradiance_col, 43 | temperature_col, 44 | T_type, 45 | windspeed_col=None, 46 | Simulator_mod_specs=None, 47 | Simulator_pristine_condition=None): 48 | 49 | self.Simulator_mod_specs = Simulator_mod_specs 50 | self.Simulator_pristine_condition = Simulator_pristine_condition 51 | 52 | self.tstamps = input_df.index.tolist() 53 | self.Is = input_df[current_col].tolist() 54 | self.Vs = input_df[voltage_col].tolist() 55 | self.Irrs = input_df[irradiance_col].tolist() 56 | self.Temps = input_df[temperature_col].tolist() 57 | self.T_type = T_type 58 | self.Tcs = [] 59 | 60 | if self.T_type == 'ambient' and windspeed_col is None: 61 | raise Exception( 62 | "Wind speed must be specified if passing ambient temperature so that the cell temperature can be derived.") 63 | 64 | if windspeed_col is not None: 65 | self.WSs = input_df[windspeed_col].tolist() 66 | if self.T_type == 'ambient': 67 | for irr, temp, ws in zip(self.Irrs, self.Temps, self.WSs): 68 | Tc = T_to_tcell(irr, temp, ws, self.T_type) 69 | self.Tcs.append(Tc) 70 | 71 | if self.T_type == 'module': 72 | for irr, temp in zip(self.Irrs, self.Temps): 73 | Tc = T_to_tcell(irr, temp, [], self.T_type) 74 | self.Tcs.append(Tc) 75 | 76 | self.measured_info = [] 77 | for i in range(len(self.Is)): 78 | Varray = self.Vs[i] 79 | Iarray = self.Is[i] 80 | Irr = self.Irrs[i] 81 | T = self.Temps[i] 82 | self.measured_info.append({"V": Varray, "I": Iarray, "E": Irr, "T": T}) 83 | 84 | self.n_samples = len(input_df.index) 85 | 86 | self.params = {} 87 | 88 | def create_string_object(self, iph, io, rs, rsh, nnsvth): 89 | # TODO write docstring 90 | kwargs = {} 91 | if self.Simulator_mod_specs is not None: 92 | kwargs.update({'mod_specs': self.Simulator_mod_specs}) 93 | if self.Simulator_pristine_condition is not None: 94 | kwargs.update( 95 | {'pristine_condition': self.Simulator_pristine_condition}) 96 | kwargs.update({'replacement_5params': {'I_L_ref': iph, 97 | 'I_o_ref': io, 98 | 'R_s': rs, 99 | 'R_sh_ref': rsh, 100 | 'a_ref': nnsvth} 101 | }) 102 | 103 | sim = Simulator(**kwargs) 104 | 105 | # set new defaults 106 | for sample_i, sample in enumerate(self.measured_info): 107 | 108 | condition = {'identifier': f'case_{self.counter}_{sample_i}', 109 | 'E': sample['E'], 110 | 'Tc': sample['T'] 111 | } 112 | 113 | sim.add_preset_conditions( 114 | 'complete', condition, save_name=f'mod_case_{self.counter}_{sample_i}') 115 | 116 | if isinstance(self.n_mods, int): 117 | if self.n_mods > 1: 118 | sim.build_strings({f'str_case_{self.counter}_{sample_i}': [ 119 | f'mod_case_{self.counter}_{sample_i}'] * self.n_mods}) 120 | 121 | elif self.n_mods != 1: 122 | raise Exception( 123 | f"Input a valid number of modules, n_mods. You inputted {self.n_mods}") 124 | # elif isinstance(self.n_mods, (tuple, list, np.ndarray)): 125 | # sim.build_strings({f'str_case_{self.counter}_{sample_i}': [ 126 | # f'mod_case_{self.counter}_{sample_i}']*self.n_mods[0] + ['pristine'] * (self.n_mods[1]-self.n_mods[0])}) 127 | else: 128 | raise ValueError( 129 | f"Expected n_mods to be a integer. Got: {type(self.n_mods)}") 130 | 131 | start_t = time.time() 132 | sim.simulate() 133 | 134 | if self.verbose >= 2: 135 | print( 136 | f'\tSimulations completed after {round(time.time()-start_t,2)} seconds') 137 | 138 | return sim 139 | 140 | def f_multiple_samples(self, params): 141 | # TODO write docstring 142 | iph, io, rs, rsh, nnsvth = params 143 | 144 | if self.user_func is None: 145 | sim = self.create_string_object(self, iph, io, rs, rsh, nnsvth) 146 | else: 147 | sim = self.user_func(self, iph, io, rs, rsh, nnsvth) 148 | 149 | msse_tot = 0 150 | 151 | if self.verbose >= 2: 152 | perc_diff = 100 * \ 153 | (np.array(params) - np.array(self.start_conds)) / \ 154 | np.array(self.start_conds) 155 | 156 | meas_Iscs = [] 157 | meas_Vocs = [] 158 | meas_Pmps = [] 159 | sim_Iscs = [] 160 | sim_Vocs = [] 161 | sim_Pmps = [] 162 | 163 | for sample_i, sample in enumerate(self.measured_info): 164 | 165 | if self.n_mods > 1: 166 | Varr = sim.multilevel_ivdata['string'][f'str_case_{self.counter}_{sample_i}']['V'][0] 167 | Iarr = sim.multilevel_ivdata['string'][f'str_case_{self.counter}_{sample_i}']['I'][0] 168 | elif self.n_mods == 1: 169 | Varr = sim.multilevel_ivdata['module'][f'mod_case_{self.counter}_{sample_i}']['V'][0] 170 | Iarr = sim.multilevel_ivdata['module'][f'mod_case_{self.counter}_{sample_i}']['I'][0] 171 | 172 | # resample to same voltage domain as measured 173 | simI_interp = np.interp(sample['V'], Varr, Iarr) 174 | 175 | msse = sklearn.metrics.mean_squared_error(sample['I'], simI_interp) 176 | msse_tot += msse 177 | 178 | if self.verbose >= 2: 179 | 180 | Vco, Ico = iv_cutoff(Varr, Iarr, 0) 181 | sim_params = calculate_IVparams(Vco, Ico) 182 | meas_params = calculate_IVparams(sample['V'], sample['I']) 183 | 184 | meas_Iscs.append(meas_params['isc']) 185 | meas_Vocs.append(meas_params['voc']) 186 | meas_Pmps.append(meas_params['pmp']) 187 | sim_Iscs.append(sim_params['isc']) 188 | sim_Vocs.append(sim_params['voc']) 189 | sim_Pmps.append(sim_params['pmp']) 190 | 191 | if self.verbose >= 2: 192 | 193 | minpmps_m = min(min(meas_Pmps), min(sim_Pmps)) 194 | maxpmps_m = max(max(meas_Pmps), max(sim_Pmps)) 195 | plt.plot(meas_Pmps, sim_Pmps, 'go') 196 | plt.plot(list(range(int(minpmps_m - 10), int(maxpmps_m + 10 + 1))), 197 | list(range(int(minpmps_m - 10), int(maxpmps_m + 10 + 1))), 'b--') 198 | plt.title('Measured v. Simulated Pmpp') 199 | plt.xlabel('Measured (W)') 200 | plt.ylabel('Simulated (W)') 201 | plt.xlim(minpmps_m - 5, maxpmps_m + 5) 202 | plt.ylim(minpmps_m - 5, maxpmps_m + 5) 203 | plt.show() 204 | 205 | minvocs_m = min(min(meas_Vocs), min(sim_Vocs)) 206 | maxvocs_m = max(max(meas_Vocs), max(sim_Vocs)) 207 | plt.plot(meas_Vocs, sim_Vocs, 'ro') 208 | plt.plot(list(range(int(minvocs_m - 10), int(maxvocs_m + 10 + 1))), 209 | list(range(int(minvocs_m - 10), int(maxvocs_m + 10 + 1))), 'b--') 210 | plt.title('Measured v. Simulated Voc') 211 | plt.xlabel('Measured (V)') 212 | plt.ylabel('Simulated (V)') 213 | plt.xlim(minvocs_m - 5, maxvocs_m + 5) 214 | plt.ylim(minvocs_m - 5, maxvocs_m + 5) 215 | plt.show() 216 | 217 | miniscs_m = min(min(meas_Iscs), min(sim_Iscs)) 218 | maxiscs_m = max(max(meas_Iscs), max(sim_Iscs)) 219 | plt.plot(meas_Iscs, sim_Iscs, 'ko') 220 | plt.plot(list(range(int(miniscs_m - 0.5), int(maxiscs_m + 0.5 + 2))), 221 | list(range(int(miniscs_m - 0.5), int(maxiscs_m + 0.5 + 2))), 'b--') 222 | plt.title('Measured v. Simulated Isc') 223 | plt.xlabel('Measured (A)') 224 | plt.ylabel('Simulated (A)') 225 | plt.xlim(miniscs_m - 0.5, maxiscs_m + 0.5) 226 | plt.ylim(miniscs_m - 0.5, maxiscs_m + 0.5) 227 | plt.show() 228 | 229 | plt.plot(sample['V'], simI_interp, 'r', label='Simulated') 230 | plt.title("SIMULATED") 231 | plt.show() 232 | 233 | plt.plot(sample['V'], simI_interp, 'r', label='Simulated') 234 | plt.plot(sample['V'], sample['I'], 'k', label='Measured') 235 | plt.legend() 236 | plt.xlabel('Voltage (V)') 237 | plt.ylabel('Current (A)') 238 | plt.title( 239 | f'One example: case {self.counter} with % Diff.: {perc_diff}') 240 | plt.show() 241 | 242 | print('Params used in ^ iteration: ', params) 243 | 244 | self.counter += 1 245 | self.msses.append(msse_tot) 246 | return msse_tot 247 | 248 | def fit_params(self, cell_parameters, n_mods, bounds_func, user_func=None, verbose=0): 249 | """ 250 | Fit diode parameters from a set of IV curves. 251 | 252 | Parameters 253 | ---------- 254 | cell_parameters : dict 255 | Cell-level parameters, usually extracted from the CEC 256 | database, which will be used as the 257 | initial guesses in the optimization process. 258 | n_mods : int 259 | if int, defines the number of modules in a 260 | string(1=simulate a single module) 261 | bounds_func : function 262 | Function to establish the bounded search space 263 | See below for an example: 264 | 265 | .. code-block:: python 266 | 267 | def bounds_func(iph,io,rs,rsh,nnsvth,perc_adjust=0.5): 268 | return ((iph - 0.5*iph*perc_adjust, iph + 2*iph*perc_adjust), 269 | (io - 40*io*perc_adjust, io + 40*io*perc_adjust), 270 | (rs - 20*rs*perc_adjust, rs + 20*rs*perc_adjust), 271 | (rsh - 150*rsh*perc_adjust, rsh + 150*rsh*perc_adjust), 272 | (nnsvth - 10*nnsvth*perc_adjust, nnsvth + 10*nnsvth*perc_adjust)) 273 | 274 | user_func : function 275 | Optional, a function similar to `self.create_string_object` 276 | which has the following inputs: 277 | `self, iph, io, rs, rsh, nnsvth`. This can be used to 278 | extract unique failure parameterization. 279 | verbose : int 280 | if verbose >= 1, print information about fitting 281 | if verbose >= 2, plot information about each iteration 282 | """ 283 | 284 | self.user_func = user_func 285 | self.verbose = verbose 286 | self.n_mods = n_mods 287 | self.g = 1000 288 | self.t = 25 289 | 290 | self.cell_parameters = cell_parameters 291 | 292 | self.counter = 0 293 | self.msses = [] 294 | 295 | iph = cell_parameters['I_L_ref'] 296 | io = cell_parameters['I_o_ref'] 297 | rs = cell_parameters['R_s'] 298 | rsh = cell_parameters['R_sh_ref'] 299 | nnsvth = cell_parameters['a_ref'] 300 | 301 | self.start_conds = (iph, io, rs, rsh, nnsvth) 302 | 303 | bounds = bounds_func(*self.start_conds) 304 | 305 | if self.verbose >= 1: 306 | print('Given 5params:', iph, io, rs, rsh, nnsvth) 307 | converged_solution = scipy.optimize.minimize(self.f_multiple_samples, 308 | (iph, io, rs, rsh, nnsvth), 309 | bounds=bounds, 310 | method='TNC') 311 | 312 | if self.verbose >= 1: 313 | print('bounds', bounds) 314 | print('initial: ', (iph, io, rs, rsh, nnsvth)) 315 | print('solution: ', converged_solution) 316 | 317 | return converged_solution['x'] 318 | -------------------------------------------------------------------------------- /pvops/iv/models/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | """ 3 | 4 | from pvops.iv.models.nn import get_diff_array, feature_generation, \ 5 | balance_df, plot_profiles, classify_curves, IVClassifier -------------------------------------------------------------------------------- /pvops/iv/preprocess.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from pvops.iv.physics_utils import gt_correction 4 | 5 | 6 | def preprocess(input_df, resmpl_resolution, iv_col_dict, resmpl_cutoff=0.03, 7 | correct_gt=False, normalize_y=True, CECmodule_parameters=None, 8 | n_mods=None, gt_correct_option=3): 9 | """IV processing function which supports irradiance & temperature correction 10 | 11 | Parameters 12 | ---------- 13 | input_df : DataFrame 14 | resmpl_resolution : float 15 | iv_col_dict : dict 16 | resmpl_cutoff : float 17 | correct_gt : bool 18 | normalize_y : bool 19 | CECmodule_parameters : None 20 | n_mods : int 21 | gt_correct_option : int 22 | 23 | Returns 24 | ------- 25 | df : DataFrame 26 | """ 27 | 28 | current_col = iv_col_dict["current"] 29 | voltage_col = iv_col_dict["voltage"] 30 | power_col = iv_col_dict["power"] 31 | failure_mode_col = iv_col_dict["mode"] 32 | irradiance_col = iv_col_dict["irradiance"] 33 | temperature_col = iv_col_dict["temperature"] 34 | 35 | # Correct for irradiance and temperature 36 | if correct_gt: 37 | Vs, Is = [], [] 38 | for ind, row in input_df.iterrows(): 39 | if CECmodule_parameters is None or n_mods is None: 40 | raise ValueError( 41 | "You must specify CECmodule_parameters and n_mods if you want to correct the IV curves for irradiance and temperature.") 42 | Vt, It = gt_correction(row[voltage_col], row[current_col], row[irradiance_col], row[temperature_col], 43 | cecparams=CECmodule_parameters, n_units=n_mods, option=gt_correct_option) 44 | Vs.append(Vt) 45 | Is.append(It) 46 | else: 47 | Is = input_df[current_col].tolist() 48 | Vs = input_df[voltage_col].tolist() 49 | 50 | v_interps = np.arange( 51 | resmpl_cutoff, 1, resmpl_resolution) 52 | v_interps = np.append(v_interps, 1.0) 53 | 54 | procVs = [] 55 | procIs = [] 56 | # Resample IV curve to static voltage domain 57 | for iii in range(len(Vs)): 58 | Voc = max(Vs[iii]) 59 | Vnorm = Vs[iii] / Voc 60 | procVs.append(v_interps) 61 | interpolated_I = np.interp(v_interps, Vnorm, Is[iii]) 62 | 63 | if normalize_y: 64 | isc_iter = interpolated_I.max() 65 | procIs.append(interpolated_I / isc_iter) 66 | 67 | else: 68 | procIs.append(interpolated_I) 69 | 70 | df = pd.DataFrame() 71 | df[failure_mode_col] = input_df[failure_mode_col] 72 | 73 | procIs = np.array(procIs) 74 | procVs = np.array(procVs) 75 | procPs = procIs * procVs 76 | 77 | df[current_col] = list(procIs) 78 | df[voltage_col] = list(procVs) 79 | df[power_col] = list(procPs) 80 | df[irradiance_col] = input_df[irradiance_col].tolist() 81 | df[temperature_col] = input_df[temperature_col].tolist() 82 | 83 | return df 84 | -------------------------------------------------------------------------------- /pvops/iv/timeseries_simulator.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from datetime import timedelta 4 | from pvops.iv.simulator import Simulator 5 | 6 | 7 | class IVTimeseriesGenerator(Simulator): 8 | 9 | def __init__(self, **iv_sim_kwargs): 10 | """Simulate a PV System across time. 11 | 12 | Parameters 13 | ---------- 14 | iv_sim_kwargs : 15 | Optional, `simulator.Simulator` inputs 16 | """ 17 | super().__init__(**iv_sim_kwargs) 18 | 19 | def generate(self, env_df, failures, iv_col_dict, 20 | identifier_col, plot_trends=False): 21 | """Simulate a PV system 22 | 23 | Parameters 24 | ---------- 25 | env_df : dataframe 26 | DataFrame containing irradiance ("E") and temperature ("T") columns 27 | failures : list 28 | List of timeseries_simulator.TimeseriesFailure objects 29 | """ 30 | 31 | self.specs_df = env_df[[ 32 | identifier_col, iv_col_dict["irradiance"], 33 | iv_col_dict["temperature"]]].copy() 34 | for failure in failures: 35 | # Weigh all failure definitions together 36 | self.specs_df = failure.add_interpolation( 37 | self.specs_df, plot_trends) 38 | 39 | self.timeseries_condition_dicts = self._structure_Simulator_inputs( 40 | self.specs_df, iv_col_dict, identifier_col) 41 | return self.timeseries_condition_dicts 42 | 43 | def add_time_conditions(self, preset_mod_mapping, nmods=12): 44 | for condition_dict in self.timeseries_condition_dicts: 45 | self.add_preset_conditions(preset_mod_mapping, condition_dict, 46 | save_name=f"mod_{condition_dict['identifier']}") 47 | self.build_strings({f"str_{condition_dict['identifier']}": 48 | [f"mod_{condition_dict['identifier']}"] * nmods}) 49 | 50 | def _structure_Simulator_inputs(self, specs_df, 51 | iv_col_dict, identifier_col): 52 | keys = [] 53 | savekeys = [] 54 | spec_df_cols = specs_df.columns 55 | for key in ['identifier'] + self.acceptible_keys: 56 | if key == 'identifier': 57 | savekey = identifier_col 58 | elif key == 'E': 59 | savekey = iv_col_dict['irradiance'] 60 | elif key == 'Tc': 61 | savekey = iv_col_dict['temperature'] 62 | else: 63 | savekey = key 64 | if savekey in spec_df_cols: 65 | keys.append(key) 66 | savekeys.append(savekey) 67 | 68 | return [dict(zip(keys, vals)) 69 | for vals in specs_df[savekeys].values] 70 | 71 | 72 | class TimeseriesFailure: 73 | def __init__(self): 74 | """Define a failure in terms of the affected diode 75 | parameters and specify how the failure evolves over 76 | time (i.e. how quickly does it itensify? how fast is 77 | it detected? how fast is it fixed?) 78 | """ 79 | self.longterm_fcn_dict = {} 80 | self.annual_fcn_dict = {} 81 | self.daily_fcn_dict = {} 82 | 83 | def trend(self, longterm_fcn_dict=None, 84 | annual_fcn_dict=None, 85 | daily_fcn_dict=None, 86 | **kwargs): 87 | """Define a failure's trend across intraday (trending 88 | with time of day) and longterm timeframes. 89 | 90 | Parameters 91 | ---------- 92 | longterm_fcn_dict : dict 93 | A dictionary where keys are the diode-multipliers in IVSimulator 94 | ('Rsh_mult', 'Rs_mult', 'Io_mult', 'Il_mult', 'nnsvth_mult') and 95 | values are either a function or a string. If a function, the 96 | function should be a mathematical operation as a `function of the 97 | number of float years since operation start`, a value on domain 98 | [0,inf), and outputs the chosen diode-multiplier's values across 99 | this timeseries. If a string, must use a pre-defined definition: 100 | 101 | * 'degrade' : degrade over time at specified rate. 102 | Specify rate by passing a definition for 103 | `degradation_rate` 104 | 105 | For example, 106 | 107 | .. code-block:: python 108 | 109 | # 2 Ways of Doing Same Thing 110 | 111 | # Method 1 112 | longterm_fcn_dict = { 113 | 'Rs_mult': lambda x : 1.005 * x 114 | } 115 | f = Failure() 116 | f.trend(longterm_fcn_dict) 117 | 118 | # Method 2 119 | longterm_fcn_dict = { 120 | 'Rs_mult': 'degrade' 121 | } 122 | f = Failure() 123 | f.trend(longterm_fcn_dict, 124 | degradation_rate=1.005) 125 | 126 | annual_fcn_dict : dict 127 | A dictionary where keys are the diode-multipliers in IVSimulator 128 | ('Rsh_mult', 'Rs_mult', 'Io_mult', 'Il_mult', 'nnsvth_mult') and 129 | values are either a function or a string. If a function, the 130 | function should be a mathematical operation as a `function of the 131 | percentage through this year`, a value on domain [0,1], and outputs 132 | the chosen diode-multiplier's values across this timeseries. If a 133 | string, must use a pre-defined definition: 134 | 135 | daily_fcn_dict : function or str 136 | A dictionary where keys are the diode-multipliers in IVSimulator 137 | ('Rsh_mult', 'Rs_mult', 'Io_mult', 'Il_mult', 'nnsvth_mult') and 138 | values are either a function or a string. If a function, the 139 | function should be a mathematical operation as a `function of the 140 | percentage through this day`, a value on domain [0,1], and outputs 141 | the chosen diode-multiplier's values across this timeseries. If a 142 | string, must use a pre-defined definition: 143 | """ 144 | 145 | if not isinstance(longterm_fcn_dict, type(None)): 146 | self.longterm_fcn_dict = longterm_fcn_dict 147 | 148 | for param, fcn in longterm_fcn_dict.items(): 149 | if isinstance(fcn, str): 150 | self._predefined_trend(param, longterm_fcn=fcn, **kwargs) 151 | 152 | if not isinstance(annual_fcn_dict, type(None)): 153 | self.annual_fcn_dict = annual_fcn_dict 154 | 155 | for param, fcn in annual_fcn_dict.items(): 156 | if isinstance(fcn, str): 157 | self._predefined_trend(param, annual_fcn=fcn, **kwargs) 158 | 159 | if not isinstance(daily_fcn_dict, type(None)): 160 | self.daily_fcn_dict = daily_fcn_dict 161 | 162 | for param, fcn in daily_fcn_dict.items(): 163 | if isinstance(fcn, str): 164 | self._predefined_trend(param, daily_fcn=fcn, **kwargs) 165 | 166 | def _predefined_trend(self, param, longterm_fcn='degrade', 167 | annual_fcn='', daily_fcn='uniform', 168 | **kwargs): 169 | 170 | if longterm_fcn == 'degrade': 171 | try: 172 | degr_rate = kwargs['degradation_rate'] 173 | except KeyError: 174 | raise KeyError("TimeseriesFailure.trend requires a " 175 | "passed parameter `degradation_rate`" 176 | "if using `degrade` longterm_fcn definition.") 177 | self.longterm_fcn_dict[param] = lambda x: degr_rate * x 178 | 179 | def _combine(self, arr, specs_df, param): 180 | if param not in specs_df.columns: 181 | specs_df[param] = np.ones(len(specs_df)) 182 | 183 | if param in ["Rsh_mult", "Io_mult", "Il_mult"]: 184 | specs_df[param] -= arr 185 | 186 | elif param in ["Rs_mult", "nnsvth_mult"]: 187 | specs_df[param] += arr 188 | 189 | def add_interpolation(self, specs_df, plot_trends=False): 190 | """Add failure properties to specs_df 191 | """ 192 | 193 | # Degradation since start 194 | float_years = np.array( 195 | (specs_df.index - specs_df.index[0]) / timedelta(days=365.25)) 196 | for param, fcn in self.longterm_fcn_dict.items(): 197 | vals = fcn(float_years) 198 | self._combine(vals, specs_df, param) 199 | if plot_trends: 200 | plt.plot(specs_df.index, vals, 'o--', alpha=0.8, label=param) 201 | if plot_trends: 202 | if len(self.longterm_fcn_dict.keys()): 203 | plt.legend() 204 | plt.title("Longterm") 205 | plt.show() 206 | 207 | # Degradation cyclic per year 208 | pct_of_year = np.array(specs_df.index.dayofyear) / 365 209 | for param, fcn in self.annual_fcn_dict.items(): 210 | vals = fcn(pct_of_year) 211 | self._combine(vals, specs_df, param) 212 | if plot_trends: 213 | plt.plot(specs_df.index, vals, 'o--', alpha=0.8, label=param) 214 | if plot_trends: 215 | if len(self.annual_fcn_dict.keys()): 216 | plt.legend() 217 | plt.title("Annual") 218 | plt.show() 219 | 220 | # Degradation per day 221 | pct_of_day = np.array(specs_df.index.hour) / 24 222 | for param, fcn in self.daily_fcn_dict.items(): 223 | vals = fcn(pct_of_day) 224 | self._combine(vals, specs_df, param) 225 | if plot_trends: 226 | plt.plot(specs_df.index, vals, 'o--', alpha=0.8, label=param) 227 | if plot_trends: 228 | if len(self.annual_fcn_dict.keys()): 229 | plt.legend() 230 | plt.title("Daily") 231 | plt.show() 232 | 233 | return specs_df 234 | -------------------------------------------------------------------------------- /pvops/iv/utils.py: -------------------------------------------------------------------------------- 1 | import pvlib 2 | import copy 3 | 4 | 5 | def get_CEC_params(name, mod_spec): 6 | '''Query module-level parameters from CEC database and 7 | derive cell-level parameters. 8 | 9 | Utilizing methods from pvsystem.retrieve_sam('CECMod') 10 | 11 | Parameters 12 | ---------- 13 | name : string 14 | Representing module name in CEC database 15 | 16 | mod_specs : dict 17 | Provide 'ncols' and 'nsubstrings' 18 | 19 | Returns 20 | ------- 21 | module_parameters (dict), cell_parameters (dict) 22 | ''' 23 | 24 | moddb = pvlib.pvsystem.retrieve_sam('CECMod') 25 | module_parameters = moddb[name].to_dict() 26 | 27 | # add reverse bias parameters 28 | module_parameters['breakdown_factor'] = 1.e-4 29 | module_parameters['breakdown_voltage'] = -30. # -5.5 30 | module_parameters['breakdown_exp'] = 3.28 31 | module_parameters['ncols'] = mod_spec['ncols'] 32 | module_parameters['nsubstrings'] = mod_spec['nsubstrings'] 33 | module_parameters['ncells_substring'] = module_parameters['N_s'] / \ 34 | mod_spec['nsubstrings'] 35 | module_parameters['nrows'] = module_parameters['N_s'] / \ 36 | module_parameters['ncols'] 37 | # module_parameters['R_sh_ref'] *= rsh_premultiply # What should this value be? Dynamic. 38 | # TODO: Adjust Io smaller 39 | 40 | # set up cell-level parameters 41 | cell_parameters = copy.copy(module_parameters) 42 | cell_parameters['a_ref'] = module_parameters['a_ref'] / \ 43 | module_parameters['N_s'] 44 | cell_parameters['R_sh_ref'] = module_parameters['R_sh_ref'] / \ 45 | module_parameters['N_s'] 46 | cell_parameters['R_s'] = module_parameters['R_s'] / \ 47 | module_parameters['N_s'] 48 | return module_parameters, cell_parameters 49 | -------------------------------------------------------------------------------- /pvops/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sandialabs/pvOps/d6248e77fa063c94f5b5a0736b05b2bbe51c6be4/pvops/tests/__init__.py -------------------------------------------------------------------------------- /pvops/tests/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sandialabs/pvOps/d6248e77fa063c94f5b5a0736b05b2bbe51c6be4/pvops/tests/conftest.py -------------------------------------------------------------------------------- /pvops/tests/om_data_update_pick.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sandialabs/pvOps/d6248e77fa063c94f5b5a0736b05b2bbe51c6be4/pvops/tests/om_data_update_pick.pkl -------------------------------------------------------------------------------- /pvops/tests/om_summ_pick.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sandialabs/pvOps/d6248e77fa063c94f5b5a0736b05b2bbe51c6be4/pvops/tests/om_summ_pick.pkl -------------------------------------------------------------------------------- /pvops/tests/prod_data_clean_iec_pick.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sandialabs/pvOps/d6248e77fa063c94f5b5a0736b05b2bbe51c6be4/pvops/tests/prod_data_clean_iec_pick.pkl -------------------------------------------------------------------------------- /pvops/tests/prod_data_quant_pick.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sandialabs/pvOps/d6248e77fa063c94f5b5a0736b05b2bbe51c6be4/pvops/tests/prod_data_quant_pick.pkl -------------------------------------------------------------------------------- /pvops/tests/prod_summ_pick.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sandialabs/pvOps/d6248e77fa063c94f5b5a0736b05b2bbe51c6be4/pvops/tests/prod_summ_pick.pkl -------------------------------------------------------------------------------- /pvops/tests/test_iv.py: -------------------------------------------------------------------------------- 1 | import random 2 | import os 3 | import pandas as pd 4 | import numpy as np 5 | import pvops.iv.timeseries_simulator 6 | import pvops.iv.preprocess 7 | import pvops.iv.simulator 8 | from pvops.iv.models import nn 9 | 10 | datadir = os.path.join('tutorials', 'example_data') 11 | example_prodpath = os.path.join( 12 | datadir, 'example_prod_with_covariates.csv') 13 | 14 | 15 | def test_simulation(): 16 | random.seed(0) 17 | 18 | sim = pvops.iv.simulator.Simulator() 19 | 20 | # test adding presets 21 | heavy_shading = {'identifier': 'heavy_shade', 22 | 'E': 400, 23 | 'Tc': 20} 24 | light_shading = {'identifier': 'light_shade', 25 | 'E': 800} 26 | sim.add_preset_conditions('landscape', heavy_shading, rows_aff=2) 27 | sim.add_preset_conditions('portrait', heavy_shading, cols_aff=2) 28 | sim.add_preset_conditions('pole', heavy_shading, 29 | light_shading=light_shading, 30 | width=2, pos=None) 31 | 32 | # test adding manuals 33 | # Using 2D list (aka, multiple conditions as input) 34 | modcells = {'another_example': [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 35 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 36 | 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 37 | 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 38 | 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 39 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 40 | [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 41 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42 | 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 43 | 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 44 | 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 45 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]] 46 | } 47 | condition_dict = {0: {}, 48 | 1: {'identifier': 'heavy_shade', 49 | 'E': 405, 50 | } 51 | } 52 | sim.add_manual_conditions(modcells, condition_dict) 53 | 54 | # test generate many samples 55 | N = 2 56 | dicts = {'E': {'mean': 400, 57 | 'std': 500, 58 | 'low': 200, 59 | 'upp': 600 60 | }, 61 | 'Tc': {'mean': 30, 62 | 'std': 10, 63 | } 64 | } 65 | sim.generate_many_samples('heavy_shade', N, dicts) 66 | dicts = {'E': {'mean': 800, 67 | 'std': 500, 68 | 'low': 600, 69 | 'upp': 1000 70 | } 71 | } 72 | sim.generate_many_samples('light_shade', N, distributions=dicts) 73 | 74 | # test building strings 75 | sim.build_strings({'pole_bottom_mods': ['pristine', 'pristine', 'pristine', 76 | 'pristine', 'pristine', 'pristine', 77 | 'pole_2width', 'pole_2width', 78 | 'pole_2width', 'pole_2width', 79 | 'pole_2width', 'pole_2width'], 80 | 'portrait_2cols_3bottom_mods': ['pristine', 'pristine', 81 | 'pristine', 'pristine', 82 | 'pristine', 'pristine', 83 | 'pristine', 'pristine', 84 | 'pristine', 85 | 'portrait_2cols', 86 | 'portrait_2cols', 87 | 'portrait_2cols']}) 88 | 89 | # test simulating 90 | sim.simulate() 91 | 92 | df = sim.sims_to_df(focus=['string', 'module'], cutoff=True) 93 | 94 | n_str_samples = 16 95 | n_mod_samples = 29 96 | 97 | assert len(df[df['level'] == 'string']) == n_str_samples 98 | assert len(df[df['level'] == 'module']) == n_mod_samples 99 | 100 | 101 | def test_classification(): 102 | 103 | sim = pvops.iv.simulator.Simulator() 104 | 105 | condition = {'identifier': 'shade', 'Il_mult': 0.6} 106 | sim.add_preset_conditions('complete', condition, 107 | save_name='Complete_shading') 108 | dicts = {'Il_mult': {'mean': 0.6, 109 | 'std': 0.7, 110 | 'low': 0.33, 111 | 'upp': 0.95, 112 | } 113 | } 114 | sim.generate_many_samples('shade', 100, dicts) 115 | 116 | sim.build_strings({'Pristine array': ['pristine'] * 12, 117 | 'Partial Soiling (1M)': ['pristine'] * 11 + 118 | ['Complete_shading'] * 1, 119 | 'Partial Soiling (6M)': ['pristine'] * 6 + 120 | ['Complete_shading'] * 6 121 | } 122 | ) 123 | 124 | sim.simulate() 125 | df = sim.sims_to_df(focus=['string'], cutoff=True) 126 | 127 | iv_col_dict = { 128 | "mode": "mode", 129 | "current": "current", # Populated in simulator 130 | "voltage": "voltage", # Populated in simulator 131 | "irradiance": "E", # Populated in simulator 132 | "temperature": "T", # Populated in simulator 133 | "power": "power", # Populated in preprocess 134 | "derivative": "derivative", # Populated in feature_generation 135 | "current_diff": "current_diff", # Populated in feature_generation 136 | } 137 | 138 | # Irradiance & Temperature correction, and normalize axes 139 | prep_df = pvops.iv.preprocess.preprocess(df, 0.05, iv_col_dict, 140 | resmpl_cutoff=0.03, correct_gt=True, 141 | normalize_y=False, 142 | CECmodule_parameters=sim.module_parameters, 143 | n_mods=12, gt_correct_option=3) 144 | # Shuffle 145 | bigdf = prep_df.sample(frac=1).reset_index(drop=True) 146 | bigdf.dropna(inplace=True) 147 | 148 | feat_df = nn.feature_generation(bigdf, iv_col_dict) 149 | 150 | nn_config = { 151 | # NN parameters 152 | "model_choice": "1DCNN", 153 | "params": ['current', 'power', 'derivative', 'current_diff'], 154 | "dropout_pct": 0.5, 155 | "verbose": 1, 156 | # Training parameters 157 | "train_size": 0.8, 158 | "shuffle_split": True, 159 | "balance_tactic": 'truncate', 160 | "n_CV_splits": 2, 161 | "batch_size": 10, 162 | "max_epochs": 100, 163 | # LSTM parameters 164 | "use_attention_lstm": False, 165 | "units": 50, 166 | # 1DCNN parameters 167 | "nfilters": 64, 168 | "kernel_size": 12, 169 | } 170 | 171 | iv_col_dict = {'mode': 'mode'} 172 | model, _, _ = nn.classify_curves(feat_df, iv_col_dict, nn_config) 173 | 174 | if model.test_accuracy > 0.9: 175 | assert True 176 | else: 177 | assert False 178 | 179 | 180 | def test_timeseries_simulator(): 181 | 182 | env_df = pd.read_csv(example_prodpath) 183 | env_df.index = pd.to_datetime(env_df["date"]) 184 | env_df = env_df.sort_index() 185 | 186 | # Only simulate where irradiance > 200 187 | env_df = env_df[env_df['irrad_poa_Wm2'] > 600] 188 | # Two sites have data here so we choose one 189 | env_df = env_df[env_df['randid'] == 'R10'] 190 | # Remove any NaN environmental specifications 191 | env_df = env_df.dropna(subset=['irrad_poa_Wm2', 'temp_amb_C']) 192 | 193 | # Reduce number of simulations for test 194 | env_df = env_df.iloc[0:100] 195 | 196 | failureA = pvops.iv.timeseries_simulator.TimeseriesFailure() 197 | longterm_fcn_dict = { 198 | 'Rs_mult': "degrade" 199 | } 200 | annual_fcn_dict = { 201 | 'Rs_mult': lambda x: (0.3 * np.sin(np.pi * x)) 202 | } 203 | 204 | failureA.trend(longterm_fcn_dict=longterm_fcn_dict, 205 | annual_fcn_dict=annual_fcn_dict, 206 | degradation_rate=1.005) 207 | 208 | iv_col_dict = {'irradiance': 'irrad_poa_Wm2', 209 | 'temperature': 'temp_amb_C' 210 | } 211 | 212 | env_df['identifier'] = env_df.index.strftime("%Y-%m-%d %H:%M:%S") 213 | 214 | time_simulator = pvops.iv.timeseries_simulator.IVTimeseriesGenerator() 215 | time_simulator.generate( 216 | env_df, [failureA], iv_col_dict, 'identifier', plot_trends=False) 217 | 218 | time_simulator.add_time_conditions('complete', nmods=12) 219 | time_simulator.simulate() 220 | 221 | sims_df = time_simulator.sims_to_df(focus=['string'], cutoff=True) 222 | 223 | assert len(sims_df) == 100 224 | -------------------------------------------------------------------------------- /pvops/tests/test_text.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | from pvops.text import visualize, preprocess, nlp_utils 5 | 6 | import pandas as pd 7 | import numpy as np 8 | import datetime 9 | import matplotlib 10 | 11 | def test_text_remove_nondate_nums(): 12 | example = r"This is a test example https://www.google.com 10% #10 101 1-1-1 a-e4 13-1010 10.1 123456789 123/12 executed on 2/4/2020" 13 | answer = r" this is test example executed on 2/4/2020 " 14 | assert preprocess.text_remove_nondate_nums(example) == answer 15 | 16 | 17 | def test_text_remove_numbers_stopwords(): 18 | example = r"This is a test example 10% #10 101 1-1-1 13-1010 10.1 123456789 123/12 executed on 2/4/2020" 19 | answer = r"This test example executed" 20 | 21 | stopwords = nlp_utils.create_stopwords() 22 | assert preprocess.text_remove_numbers_stopwords(example, stopwords) == answer 23 | 24 | 25 | def test_get_dates(): 26 | df = pd.DataFrame( 27 | [ 28 | { 29 | "Date": "2020/01/23 12:34:56", 30 | "Document": "Find this date 2020/01/23 12:34:56", 31 | }, 32 | { 33 | "Date": np.nan, 34 | "Document": "Find this date March 5 2021 and April 7 2022", 35 | }, 36 | ] 37 | ) 38 | 39 | answer = [datetime.datetime.strptime( 40 | "2020/01/23 12:34:56", "%Y/%m/%d %H:%M:%S")] 41 | assert answer == preprocess.get_dates( 42 | df["Document"].iloc[0], df, 0, { 43 | "data": "Document", "eventstart": "Date"}, False 44 | ) 45 | 46 | answer = [ 47 | datetime.datetime.strptime("2021/03/05 00:00:00", "%Y/%m/%d %H:%M:%S"), 48 | datetime.datetime.strptime("2022/04/07 00:00:00", "%Y/%m/%d %H:%M:%S"), 49 | ] 50 | assert answer == preprocess.get_dates( 51 | df["Document"].iloc[1], df, 1, { 52 | "data": "Document", "eventstart": "Date"}, False 53 | ) 54 | 55 | 56 | def test_visualize_attribute_timeseries(): 57 | 58 | dates = pd.Series( 59 | [ 60 | "2020/01/23 12:34:56", 61 | "2020/01/24 12:34:56", 62 | "2020/01/25 12:34:56", 63 | ] 64 | ) 65 | 66 | dates = pd.to_datetime(dates).tolist() 67 | 68 | df = pd.DataFrame( 69 | {"labels": ["A word", "B word", "C word"], "date": dates}) 70 | 71 | fig = visualize.visualize_attribute_timeseries( 72 | df, {"label": "labels", "date": "date"}, date_structure="%Y-%m-%d" 73 | ) 74 | assert isinstance(fig, matplotlib.figure.Figure) 75 | 76 | 77 | def test_visualize_word_frequency_plot(): 78 | documents = ["A word", "B word", "C word"] 79 | words = " ".join(documents) 80 | tokenized_words = preprocess.regex_tokenize(words) 81 | 82 | result = visualize.visualize_word_frequency_plot(tokenized_words) 83 | 84 | assert isinstance(result[0], matplotlib.pyplot.Figure) 85 | assert isinstance(result[1], dict) 86 | 87 | 88 | def test_visualize_attribute_connectivity(): 89 | Attribute1 = ["A", "B", "C", "C"] 90 | Attribute2 = ["X", "X", "Y", "Z"] 91 | 92 | df = pd.DataFrame({"Attr1": Attribute1, "Attr2": Attribute2}) 93 | 94 | om_col_dict = {"attribute1_col": "Attr1", "attribute2_col": "Attr2"} 95 | 96 | fig, G = visualize.visualize_attribute_connectivity( 97 | df, 98 | om_col_dict, 99 | figsize=(10, 8), 100 | edge_width_scalar=2, 101 | graph_aargs={ 102 | "with_labels": True, 103 | "font_weight": "bold", 104 | }, 105 | ) 106 | 107 | assert isinstance(fig, matplotlib.pyplot.Figure) 108 | assert list(G.edges()) == [("A", "X"), ("B", "X"), ("C", "Y"), ("C", "Z")] 109 | 110 | matplotlib.pyplot.close() 111 | 112 | 113 | def test_summarize_text_data(): 114 | 115 | df = pd.DataFrame( 116 | [ 117 | { 118 | "Date": "2020/01/23 12:34:56", 119 | "Document": "Find this date 2020/01/23 12:34:56", 120 | }, 121 | { 122 | "Date": np.nan, 123 | "Document": "Find this date March 5 2021 and April 7 2022", 124 | }, 125 | ] 126 | ) 127 | 128 | answer = { 129 | "n_samples": 2, 130 | "n_nan_docs": 0, 131 | "n_words_doc_average": 7.50, 132 | "n_unique_words": 12, 133 | "n_total_words": 15.00, 134 | } 135 | 136 | info = nlp_utils.summarize_text_data(df, "Document") 137 | 138 | assert answer == info 139 | -------------------------------------------------------------------------------- /pvops/tests/test_timeseries.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import pandas as pd 4 | import numpy as np 5 | from pvops.timeseries.models import linear 6 | from pvops.timeseries import preprocess as tprep 7 | from pvops.text2time import preprocess as t2tprep 8 | 9 | # Define csv paths 10 | datadir = os.path.join('tutorials', 'example_data') 11 | example_OMpath = os.path.join(datadir, 'example_om_data2.csv') 12 | example_prodpath = os.path.join(datadir, 'example_perf_data.csv') 13 | example_metapath = os.path.join(datadir, 'example_metadata2.csv') 14 | example_prod2path = os.path.join(datadir, 'example_prod_with_covariates.csv') 15 | 16 | # Assigning dictionaries to connect pvops variables with User's column names 17 | # Format for dictionaries is {pvops variable: user-specific column names} 18 | prod_col_dict = {'siteid': 'randid', 19 | 'timestamp': 'Date', 20 | 'powerprod': 'AC_POWER', 21 | 'energyprod': 'Energy', 22 | 'irradiance': 'POAirradiance', 23 | 'baseline': 'IEC_pstep', 24 | 'dcsize': 'dcsize', 25 | 'compared': 'Compared', 26 | 'energy_pstep': 'Energy_pstep', 27 | 'clearsky_irr': 'clearsky_irr' 28 | } 29 | 30 | om_col_dict = {'siteid': 'randid', 31 | 'datestart': 'date_start', 32 | 'dateend': 'date_end', 33 | 'workID': 'WONumber', 34 | 'worktype': 'WOType', 35 | 'asset': 'Asset', 36 | 'eventdur': 'EventDur', 37 | 'modatestart': 'MonthStart', 38 | 'agedatestart': 'AgeStart'} 39 | 40 | metad_col_dict = {'siteid': 'randid', 41 | 'dcsize': 'DC_Size_kW', 42 | 'COD': 'COD', 43 | 'latitude': 'latitude', 44 | 'longitude': 'longitude'} 45 | 46 | 47 | def test_prod_irradiance_filter(): 48 | 49 | prod_df = pd.read_csv(example_prodpath) 50 | meta_df = pd.read_csv(example_metapath) 51 | 52 | prod_df = t2tprep.prod_date_convert(prod_df, prod_col_dict) 53 | prod_df.index = prod_df[prod_col_dict['timestamp']] 54 | prod_df['randid'] = 'R27' 55 | 56 | # Data is missing in the middle of this example, so only going to pass 57 | # The first set of rows 58 | prod_df = prod_df.iloc[0:200] 59 | 60 | prod_df_out, mask_series = tprep.prod_irradiance_filter(prod_df, 61 | prod_col_dict, 62 | meta_df, 63 | metad_col_dict) 64 | 65 | true_detection_irradiance = [0, 44] 66 | assert sum(mask_series) in true_detection_irradiance 67 | 68 | 69 | def test_prod_inverter_clipping_filter(): 70 | 71 | prod_df = pd.read_csv(example_prodpath) 72 | meta_df = pd.read_csv(example_metapath) 73 | 74 | prod_df = t2tprep.prod_date_convert(prod_df, prod_col_dict) 75 | prod_df.index = prod_df[prod_col_dict['timestamp']] 76 | prod_df['randid'] = 'R27' 77 | 78 | # Data is missing in the middle of this example, so only going to pass 79 | # The first set of rows 80 | prod_df = prod_df.iloc[0:200] 81 | 82 | geometric = tprep.prod_inverter_clipping_filter(prod_df, 83 | prod_col_dict, 84 | meta_df, metad_col_dict, 85 | model='geometric') 86 | 87 | threshold = tprep.prod_inverter_clipping_filter(prod_df, 88 | prod_col_dict, 89 | meta_df, metad_col_dict, 90 | model='threshold') 91 | 92 | levels = tprep.prod_inverter_clipping_filter(prod_df, 93 | prod_col_dict, 94 | meta_df, metad_col_dict, 95 | model='levels') 96 | 97 | true_detection_geometric = 0 98 | true_detection_threshold = 0 99 | true_detection_levels = 183 100 | 101 | assert sum(geometric['mask']) == true_detection_geometric 102 | assert sum(threshold['mask']) == true_detection_threshold 103 | assert sum(levels['mask']) == true_detection_levels 104 | 105 | 106 | def test_linear_model(): 107 | prod_df = pd.read_csv(example_prod2path) 108 | 109 | # Format for dictionaries is {pvops variable: user-specific column names} 110 | prod_col_dict = {'siteid': 'randid', 111 | 'timestamp': 'date', 112 | 'powerprod': 'generated_kW', 113 | 'irradiance': 'irrad_poa_Wm2', 114 | 'temperature': 'temp_amb_C', 115 | 'baseline': 'IEC_pstep', 116 | 'dcsize': 'dcsize', 117 | 'compared': 'Compared', 118 | 'energy_pstep': 'Energy_pstep'} 119 | 120 | prod_data_converted = t2tprep.prod_date_convert(prod_df, prod_col_dict) 121 | prod_data_datena_d, _ = t2tprep.prod_nadate_process( 122 | prod_data_converted, prod_col_dict, pnadrop=True) 123 | 124 | prod_data_datena_d.index = prod_data_datena_d[prod_col_dict['timestamp']] 125 | 126 | model_prod_data = prod_data_datena_d.dropna(subset=[ 127 | 'irrad_poa_Wm2', 'temp_amb_C', 'wind_speed_ms'] + 128 | [prod_col_dict['powerprod'] 129 | ]) 130 | model_prod_data = model_prod_data[model_prod_data['randid'] == 'R15'] 131 | 132 | model, train_df, test_df = linear.modeller(prod_col_dict, 133 | kernel_type='default', 134 | time_weighted='month', 135 | X_parameters=[ 136 | 'irrad_poa_Wm2', 137 | 'temp_amb_C'], 138 | prod_df=model_prod_data, 139 | test_split=0.05, 140 | degree=3, 141 | verbose=0) 142 | 143 | name = list(model.estimators.keys())[0] 144 | 145 | benchmark_r2 = 0.99 146 | benchmark_mse = 420000 147 | 148 | eval = model.estimators[name]['test_eval'] 149 | 150 | assert eval['r2'] > benchmark_r2 151 | assert eval['mse'] < benchmark_mse 152 | 153 | 154 | def test_establish_solar_loc(): 155 | prod_df = pd.read_csv(example_prod2path) 156 | meta_df = pd.read_csv(example_metapath) 157 | # Test-specific changes 158 | meta_df['randid'] = ["R10", "R15"] 159 | meta_df.index = meta_df['randid'] 160 | # Format for dictionaries is {pvops variable: user-specific column names} 161 | prod_col_dict = {'siteid': 'randid', 162 | 'timestamp': 'date', 163 | 'powerprod': 'generated_kW', 164 | 'irradiance': 'irrad_poa_Wm2', 165 | 'temperature': 'temp_amb_C', 166 | 'baseline': 'IEC_pstep', 167 | 'dcsize': 'dcsize', 168 | 'compared': 'Compared', 169 | 'energy_pstep': 'Energy_pstep'} 170 | prod_data_converted = t2tprep.prod_date_convert(prod_df, prod_col_dict) 171 | prod_data_datena_d, _ = t2tprep.prod_nadate_process( 172 | prod_data_converted, prod_col_dict, pnadrop=True) 173 | 174 | prod_data_datena_d.index = pd.to_datetime(prod_data_datena_d[prod_col_dict['timestamp']]) 175 | 176 | prod_with_solar_pos = tprep.establish_solar_loc(prod_data_datena_d, 177 | prod_col_dict, 178 | meta_df, 179 | metad_col_dict) 180 | 181 | positional_columns = ['apparent_zenith', 182 | 'zenith', 183 | 'apparent_elevation', 184 | 'elevation', 185 | 'azimuth', 186 | 'equation_of_time'] 187 | answer = [142.081554, 142.081554, -52.081554, 188 | -52.081554, 140.635657, -3.925820] 189 | rounded_answer = [round(a, 2) for a in answer] 190 | 191 | expected = prod_with_solar_pos.iloc[0][positional_columns].values 192 | rounded_expected = [round(a, 2) for a in expected] 193 | 194 | assert len(rounded_answer) == len(rounded_expected) 195 | assert all([a == b for a, b in zip(rounded_answer, 196 | rounded_expected)]) 197 | -------------------------------------------------------------------------------- /pvops/text/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | text module 3 | """ 4 | 5 | import pvops.text.classify 6 | import pvops.text.defaults 7 | import pvops.text.nlp_utils 8 | import pvops.text.preprocess 9 | import pvops.text.utils 10 | import pvops.text.visualize -------------------------------------------------------------------------------- /pvops/text/classify.py: -------------------------------------------------------------------------------- 1 | # Classifiers 2 | from sklearn.pipeline import Pipeline 3 | from sklearn.model_selection import GridSearchCV 4 | 5 | from scipy.sparse import issparse 6 | 7 | import numpy as np 8 | import pandas as pd 9 | import copy 10 | 11 | from pvops.text.preprocess import get_keywords_of_interest 12 | 13 | def classification_deployer( 14 | X, 15 | y, 16 | n_splits, 17 | classifiers, 18 | search_space, 19 | pipeline_steps, 20 | scoring, 21 | greater_is_better=True, 22 | verbose=3, 23 | ): 24 | """The classification deployer builds a classifier evaluator with an ingrained hyperparameter fine-tuning grid search protocol. 25 | The output of this function will be a data frame showing the performance of each classifier when utilizing a specific hyperparameter 26 | configuration. 27 | 28 | To see an example of this method's application, see ``tutorials//text_class_example.py`` 29 | 30 | Parameters 31 | ---------- 32 | X : list of str 33 | List of documents (str). The documents will be passed through the pipeline_steps, where they will be transformed into vectors. 34 | y : list 35 | List of labels corresponding with the documents in X 36 | n_splits : int 37 | Integer defining the number of splits in the cross validation split during training 38 | classifiers : dict 39 | Dictionary with key as classifier identifier (str) and value as classifier instance following sklearn's 40 | base model convention: sklearn_docs. 41 | 42 | .. sklearn_docs: https://scikit-learn.org/stable/modules/generated/sklearn.base.is_classifier.html 43 | .. code-block:: python 44 | 45 | classifiers = { 46 | 'LinearSVC' : LinearSVC(), 47 | 'AdaBoostClassifier' : AdaBoostClassifier(), 48 | 'RidgeClassifier' : RidgeClassifier() 49 | } 50 | 51 | See ``supervised_classifier_defs.py`` or ``unsupervised_classifier_defs.py`` for this package's defaults. 52 | search_space : dict 53 | Dictionary with classifier identifiers, as used in ``classifiers``, mapped to its hyperparameters. 54 | 55 | .. code-block:: python 56 | 57 | search_space = { 58 | 'LinearSVC' : { 59 | 'clf__C' : [1e-2,1e-1], 60 | 'clf__max_iter':[800,1000], 61 | }, 62 | 'AdaBoostClassifier' : { 63 | 'clf__n_estimators' : [50,100], 64 | 'clf__learning_rate':[1.,0.9,0.8], 65 | 'clf__algorithm' : ['SAMME.R'] 66 | }, 67 | 'RidgeClassifier' : { 68 | 'clf__alpha' : [0.,1e-3,1.], 69 | 'clf__normalize' : [False,True] 70 | } 71 | } 72 | 73 | See ``supervised_classifier_defs.py`` or ``unsupervised_classifier_defs.py`` for this package's defaults. 74 | pipeline_steps : list of tuples 75 | Define embedding and machine learning pipeline. The last tuple must be ``('clf', None)`` so that the output 76 | of the pipeline is a prediction. 77 | For supervised classifiers using a TFIDF embedding, one could specify 78 | 79 | .. code-block:: python 80 | 81 | pipeline_steps = [('tfidf', TfidfVectorizer()), 82 | ('clf', None)] 83 | 84 | For unsupervised clusterers using a TFIDF embedding, one could specify 85 | 86 | .. code-block:: python 87 | 88 | pipeline_steps = [('tfidf', TfidfVectorizer()), 89 | ('to_dense', DataDensifier.DataDensifier()), 90 | ('clf', None)] 91 | 92 | A densifier is required from some clusters, which fail if sparse data is passed. 93 | scoring : sklearn callable scorer (i.e., any statistic that summarizes predictions relative to observations). 94 | Example scorers include f1_score, accuracy, etc. 95 | Callable object that returns a scalar score created using sklearn.metrics.make_scorer 96 | For supervised classifiers, one could specify 97 | 98 | .. code-block:: python 99 | 100 | scoring = make_scorer(f1_score, average = 'weighted', pos_label = None) 101 | 102 | For unsupervised classifiers, one could specify 103 | 104 | .. code-block:: python 105 | 106 | scoring = make_scorer(homogeneity_score) 107 | 108 | greater_is_better : bool 109 | Whether the scoring parameter is better when greater (i.e. accuracy) or not. 110 | 111 | verbose : int 112 | Control the specificity of the prints. If greater than 1, a print out is shown when a new "best classifier" 113 | is found while iterating. Additionally, the verbosity during the grid search follows sklearn's definitions. 114 | The frequency of the messages increase with the verbosity level. 115 | 116 | Returns 117 | ------- 118 | DataFrame 119 | Summarization of results from all of the classifiers 120 | """ 121 | 122 | rows = [] 123 | 124 | if issparse(X): 125 | print("Converting passed data to dense array...") 126 | X = X.toarray() 127 | 128 | # get position of 'clf' in pipeline_steps 129 | idx_clf_pipeline = [i for i, it in enumerate( 130 | pipeline_steps) if it[0] == "clf"][0] 131 | 132 | best_gs_instance = None 133 | if greater_is_better: 134 | best_model_score = 0.0 135 | else: 136 | best_model_score = np.inf 137 | for iter_idx, key in enumerate(classifiers.keys()): 138 | clas = classifiers[key] 139 | space = search_space[key] 140 | 141 | iter_pipeline_steps = copy.deepcopy(pipeline_steps) 142 | iter_pipeline_steps[idx_clf_pipeline] = ("clf", clas) 143 | pipe = Pipeline(iter_pipeline_steps) 144 | 145 | gs_clf = GridSearchCV( 146 | pipe, 147 | space, 148 | scoring=scoring, 149 | cv=n_splits, 150 | n_jobs=-1, 151 | return_train_score=True, 152 | verbose=verbose, 153 | ) 154 | gs_clf.fit(X, y) 155 | params = gs_clf.cv_results_["params"] 156 | scores = [] 157 | for i in range(n_splits): 158 | r1 = gs_clf.cv_results_[f"split{i}_test_score"] 159 | scores.append(r1.reshape(len(params), 1)) 160 | 161 | r2 = gs_clf.cv_results_["mean_fit_time"] 162 | 163 | all_scores = np.hstack(scores) 164 | for param, score, time in zip(params, all_scores, r2): 165 | param["mean_fit_time"] = time 166 | d = { 167 | "estimator" : key, 168 | "min_score" : min(score), 169 | "max_score" : max(score), 170 | "mean_score" : np.mean(score), 171 | "std_score" : np.std(score), 172 | } 173 | rows.append((pd.Series({**param, **d}))) 174 | 175 | if greater_is_better: 176 | replacement_logic = gs_clf.best_score_ > best_model_score 177 | else: 178 | replacement_logic = gs_clf.best_score_ < best_model_score 179 | 180 | if replacement_logic: 181 | if verbose > 1: 182 | print( 183 | "Better score ({:.3f}) found on classifier: {}".format( 184 | gs_clf.best_score_, key 185 | ) 186 | ) 187 | best_model_score = gs_clf.best_score_ 188 | best_gs_instance = gs_clf 189 | 190 | return pd.concat(rows, axis=1).T, best_gs_instance.best_estimator_ 191 | 192 | def get_attributes_from_keywords(om_df, col_dict, reference_df, reference_col_dict): 193 | """Find keywords of interest in specified column of dataframe, return as new column value. 194 | 195 | If keywords of interest given in a reference dataframe are in the specified column of the 196 | dataframe, return the keyword category, or categories. 197 | For example, if the string 'inverter' is in the list of text, return ['inverter']. 198 | 199 | Parameters 200 | ---------- 201 | om_df : pd.DataFrame 202 | Dataframe to search for keywords of interest, must include text_col. 203 | col_dict : dict of {str : str} 204 | A dictionary that contains the column names needed: 205 | 206 | - data : string, should be assigned to associated column which stores the tokenized text logs 207 | - predicted_col : string, will be used to create keyword search label column 208 | reference_df : DataFrame 209 | Holds columns that define the reference dictionary to search for keywords of interest, 210 | Note: This function can currently only handle single words, no n-gram functionality. 211 | reference_col_dict : dict of {str : str} 212 | A dictionary that contains the column names that describes how 213 | referencing is going to be done 214 | 215 | - reference_col_from : string, should be assigned to 216 | associated column name in reference_df that are possible input reference values 217 | Example: pd.Series(['inverter', 'invert', 'inv']) 218 | - reference_col_to : string, should be assigned to 219 | associated column name in reference_df that are the output reference values 220 | of interest 221 | Example: pd.Series(['inverter', 'inverter', 'inverter']) 222 | 223 | Returns 224 | ------- 225 | om_df: pd.DataFrame 226 | Input df with new_col added, where each found keyword is its own row, may result in 227 | duplicate rows if more than one keywords of interest was found in text_col. 228 | """ 229 | om_df[col_dict['predicted_col']] = om_df[col_dict['data']].apply(get_keywords_of_interest, 230 | reference_df=reference_df, 231 | reference_col_dict=reference_col_dict) 232 | 233 | # each multi-category now in its own row, some logs have multiple equipment issues 234 | multiple_keywords_df = om_df[om_df[col_dict['predicted_col']].str.len() > 1] 235 | om_df = om_df.explode(col_dict['predicted_col']) 236 | 237 | msg = f'{len(multiple_keywords_df)} entries had multiple keywords of interest. Reference: {multiple_keywords_df.index} in original dataframe.' 238 | print(msg) 239 | 240 | return om_df -------------------------------------------------------------------------------- /pvops/text/nlp_utils.py: -------------------------------------------------------------------------------- 1 | from sklearn.base import BaseEstimator 2 | from gensim.models.doc2vec import TaggedDocument, Doc2Vec 3 | import scipy 4 | import numpy as np 5 | from importlib import resources 6 | from gensim.models import Word2Vec 7 | 8 | from pvops.text import preprocess 9 | 10 | 11 | class Doc2VecModel(BaseEstimator): 12 | """Performs a gensim Doc2Vec transformation of the input documents to create 13 | embedded representations of the documents. See gensim's 14 | Doc2Vec model for information regarding the hyperparameters. 15 | """ 16 | 17 | def __init__( 18 | self, 19 | vector_size=100, 20 | dm_mean=None, 21 | dm=1, 22 | dbow_words=0, 23 | dm_concat=0, 24 | dm_tag_count=1, 25 | dv=None, 26 | dv_mapfile=None, 27 | comment=None, 28 | trim_rule=None, 29 | callbacks=(), 30 | window=5, 31 | epochs=10, 32 | ): 33 | self.d2v_model = None 34 | self.vector_size = vector_size 35 | self.dm_mean = dm_mean 36 | self.dm = dm 37 | self.dbow_words = dbow_words 38 | self.dm_concat = dm_concat 39 | self.dm_tag_count = dm_tag_count 40 | self.dv = dv 41 | self.dv_mapfile = dv_mapfile 42 | self.comment = comment 43 | self.trim_rule = trim_rule 44 | self.callbacks = callbacks 45 | self.window = window 46 | self.epochs = epochs 47 | 48 | def fit(self, raw_documents, y=None): 49 | """Fits the Doc2Vec model.""" 50 | # Initialize model 51 | self.d2v_model = Doc2Vec( 52 | vector_size=self.vector_size, 53 | dm_mean=self.dm_mean, 54 | dm=self.dm, 55 | dbow_words=self.dbow_words, 56 | dm_concat=self.dm_concat, 57 | dm_tag_count=self.dm_tag_count, 58 | dv=self.dv, 59 | dv_mapfile=self.dv_mapfile, 60 | comment=self.comment, 61 | trim_rule=self.trim_rule, 62 | window=self.window, 63 | epochs=self.epochs, 64 | ) 65 | # Tag docs 66 | tagged_documents = [ 67 | TaggedDocument(words=preprocess.regex_tokenize(_d.lower()), tags=[str(i)]) 68 | for i, _d in enumerate(raw_documents) 69 | ] 70 | # Build vocabulary 71 | self.d2v_model.build_vocab(tagged_documents) 72 | # Train model 73 | self.d2v_model.train( 74 | tagged_documents, 75 | total_examples=len(tagged_documents), 76 | epochs=self.d2v_model.epochs, 77 | ) 78 | return self 79 | 80 | def transform(self, raw_documents): 81 | """Transforms the documents into Doc2Vec vectors.""" 82 | X = [] 83 | for doc in raw_documents: 84 | X.append(self.d2v_model.infer_vector(preprocess.regex_tokenize(doc))) 85 | return X 86 | 87 | def fit_transform(self, raw_documents, y=None): 88 | """Utilizes the ``fit()`` and ``transform()`` methods in this class.""" 89 | self.fit(raw_documents) 90 | return self.transform(raw_documents) 91 | 92 | 93 | class DataDensifier(BaseEstimator): 94 | """A data structure transformer which converts sparse data to dense data. 95 | This process is usually incorporated in this library when doing unsupervised machine learning. 96 | This class is built specifically to work inside a sklearn pipeline. 97 | Therefore, it uses the default ``transform``, ``fit``, ``fit_transform`` method structure. 98 | """ 99 | 100 | def transform(self, X, y=None): 101 | """Return a dense array if the input array is sparse. 102 | 103 | Parameters 104 | ---------- 105 | X : array 106 | Input data of numerical values. For this package, these values could 107 | represent embedded representations of documents. 108 | 109 | Returns 110 | ------- 111 | dense array 112 | """ 113 | if scipy.sparse.issparse(X): 114 | return X.toarray() 115 | else: 116 | return X.copy() 117 | 118 | def fit(self, X, y=None): 119 | """Placeholder method to conform to the sklearn class structure. 120 | 121 | Parameters 122 | ---------- 123 | X : array 124 | Input data 125 | y : Not utilized. 126 | 127 | Returns 128 | ------- 129 | DataDensifier object 130 | """ 131 | return self 132 | 133 | def fit_transform(self, X, y=None): 134 | """Performs same action as ``DataDensifier.transform()``, 135 | which returns a dense array when the input is sparse. 136 | 137 | Parameters 138 | ---------- 139 | X : array 140 | Input data 141 | y : Not utilized. 142 | 143 | Returns 144 | ------- 145 | dense array 146 | """ 147 | return self.transform(X=X, y=y) 148 | 149 | 150 | def create_stopwords(lst_add_words=[], lst_keep_words=[]): 151 | """Concatenate a list of stopwords using both words grabbed from nltk and user-specified words. 152 | The nltk stopwords are those that were current at the release of pvOps version 0.5.0 on 153 | Febuary 19th, 2025. See below for more on nltk. 154 | 155 | Bird, Steven, Edward Loper and Ewan Klein (2009), Natural Language Processing with Python. O'Reilly Media Inc. 156 | 157 | https://www.nltk.org/ 158 | 159 | Parameters 160 | ---------- 161 | lst_add_words : list 162 | List of words(e.g., "road" or "street") to add to stopwords list. If these words are already included in the nltk list, a duplicate will not be added. 163 | lst_keep_words : list 164 | List of words(e.g., "before" or "until") to remove from stopwords list. This is usually used to modify default stop words that might be of interest to PV. 165 | 166 | Returns 167 | ------- 168 | list 169 | List of alphabetized stopwords 170 | """ 171 | lst_stopwords = set() 172 | 173 | with resources.open_text('pvops.text', 'stopwords.txt') as file: 174 | default_stopwords = file.read().split() 175 | 176 | lst_stopwords = lst_stopwords.union(default_stopwords) 177 | lst_stopwords = lst_stopwords.union(lst_add_words) 178 | lst_stopwords = list(set(lst_stopwords) - set(lst_keep_words)) 179 | return sorted(list(set(lst_stopwords))) 180 | 181 | 182 | def summarize_text_data(om_df, colname): 183 | """Display information about a set of documents located in a dataframe, including 184 | the number of samples, average number of words, vocabulary size, and number of words 185 | in total. 186 | 187 | Parameters 188 | ---------- 189 | om_df : DataFrame 190 | A pandas dataframe containing O&M data, which contains at least the colname of interest 191 | colname : str 192 | Column name of column with text 193 | 194 | Returns 195 | ------- 196 | dict 197 | dictionary containing printed summary data 198 | """ 199 | df = om_df.copy() 200 | text = df[colname].tolist() 201 | 202 | nonan_text = [x for x in text if (str(x) != "nan" and x is not None)] 203 | 204 | tokenized = [sentence.split() for sentence in nonan_text] 205 | avg_n_words = np.array([len(tokens) for tokens in tokenized]).mean() 206 | sum_n_words = np.array([len(tokens) for tokens in tokenized]).sum() 207 | model = Word2Vec(tokenized, min_count=1) 208 | 209 | # Total vocabulary 210 | vocab = model.wv 211 | 212 | # Bold title. 213 | print("\033[1m" + "DETAILS" + "\033[0m") 214 | 215 | info = { 216 | "n_samples": len(df), 217 | "n_nan_docs": len(df) - len(nonan_text), 218 | "n_words_doc_average": avg_n_words, 219 | "n_unique_words": len(vocab), 220 | "n_total_words": sum_n_words, 221 | } 222 | 223 | # Display information. 224 | print(f' {info["n_samples"]} samples') 225 | print(f' {info["n_nan_docs"]} invalid documents') 226 | print(" {:.2f} words per sample on average".format( 227 | info["n_words_doc_average"])) 228 | print(f' Number of unique words {info["n_unique_words"]}') 229 | print(" {:.2f} total words".format(info["n_total_words"])) 230 | 231 | return info 232 | -------------------------------------------------------------------------------- /pvops/text/stopwords.txt: -------------------------------------------------------------------------------- 1 | a 2 | about 3 | above 4 | after 5 | again 6 | against 7 | ain 8 | all 9 | am 10 | an 11 | and 12 | any 13 | are 14 | aren 15 | aren't 16 | as 17 | at 18 | be 19 | because 20 | been 21 | before 22 | being 23 | below 24 | between 25 | both 26 | but 27 | by 28 | can 29 | couldn 30 | couldn't 31 | d 32 | did 33 | didn 34 | didn't 35 | do 36 | does 37 | doesn 38 | doesn't 39 | doing 40 | don 41 | don't 42 | down 43 | during 44 | each 45 | few 46 | for 47 | from 48 | further 49 | had 50 | hadn 51 | hadn't 52 | has 53 | hasn 54 | hasn't 55 | have 56 | haven 57 | haven't 58 | having 59 | he 60 | her 61 | here 62 | hers 63 | herself 64 | him 65 | himself 66 | his 67 | how 68 | i 69 | if 70 | in 71 | into 72 | is 73 | isn 74 | isn't 75 | it 76 | it's 77 | its 78 | itself 79 | just 80 | ll 81 | m 82 | ma 83 | me 84 | mightn 85 | mightn't 86 | more 87 | most 88 | mustn 89 | mustn't 90 | my 91 | myself 92 | needn 93 | needn't 94 | no 95 | nor 96 | not 97 | now 98 | o 99 | of 100 | off 101 | on 102 | once 103 | only 104 | or 105 | other 106 | our 107 | ours 108 | ourselves 109 | out 110 | over 111 | own 112 | re 113 | s 114 | same 115 | shan 116 | shan't 117 | she 118 | she's 119 | should 120 | should've 121 | shouldn 122 | shouldn't 123 | so 124 | some 125 | such 126 | t 127 | than 128 | that 129 | that'll 130 | the 131 | their 132 | theirs 133 | them 134 | themselves 135 | then 136 | there 137 | these 138 | they 139 | this 140 | those 141 | through 142 | to 143 | too 144 | under 145 | until 146 | up 147 | ve 148 | very 149 | was 150 | wasn 151 | wasn't 152 | we 153 | were 154 | weren 155 | weren't 156 | what 157 | when 158 | where 159 | which 160 | while 161 | who 162 | whom 163 | why 164 | will 165 | with 166 | won 167 | won't 168 | wouldn 169 | wouldn't 170 | y 171 | you 172 | you'd 173 | you'll 174 | you're 175 | you've 176 | your 177 | yours 178 | yourself 179 | yourselves 180 | -------------------------------------------------------------------------------- /pvops/text/utils.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | 4 | 5 | def remap_attributes(om_df, remapping_df, remapping_col_dict, 6 | allow_missing_mappings=False, print_info=False): 7 | """A utility function which remaps the attributes of om_df using columns 8 | within remapping_df. 9 | 10 | Parameters 11 | ---------- 12 | om_df : DataFrame 13 | A pandas dataframe containing O&M data, which needs to be remapped. 14 | remapping_df : DataFrame 15 | Holds columns that define the remappings 16 | remapping_col_dict : dict of {str : str} 17 | A dictionary that contains the column names that describes how 18 | remapping is going to be done 19 | 20 | - attribute_col : string, should be assigned to associated 21 | column name in om_df which will be remapped 22 | - remapping_col_from : string, should be assigned 23 | to associated column name in remapping_df that matches 24 | original attribute of interest in om_df 25 | - remapping_col_to : string, should be assigned to 26 | associated column name in remapping_df that contains the 27 | final mapped entries 28 | allow_missing_mappings : bool 29 | If True, allow attributes without specified mappings to exist in 30 | the final dataframe. 31 | If False, only attributes specified in `remapping_df` will be in 32 | final dataframe. 33 | print_info : bool 34 | If True, print information about remapping. 35 | 36 | Returns 37 | ------- 38 | DataFrame 39 | dataframe with remapped columns populated 40 | """ 41 | df = om_df.copy() 42 | ATTRIBUTE_COL = remapping_col_dict["attribute_col"] 43 | REMAPPING_COL_FROM = remapping_col_dict["remapping_col_from"] 44 | REMAPPING_COL_TO = remapping_col_dict["remapping_col_to"] 45 | 46 | # Lower all columns 47 | df[ATTRIBUTE_COL] = df[ATTRIBUTE_COL].str.lower() 48 | 49 | if print_info: 50 | print("Initial value counts:") 51 | print(df[ATTRIBUTE_COL].value_counts()) 52 | 53 | remapping_df[REMAPPING_COL_FROM] = remapping_df[REMAPPING_COL_FROM].str.lower() 54 | remapping_df[REMAPPING_COL_TO] = remapping_df[REMAPPING_COL_TO].str.lower() 55 | 56 | if allow_missing_mappings: 57 | # Find attributes not considered in mapping 58 | unique_words_in_data = set(df[ATTRIBUTE_COL].tolist()) 59 | missing_mappings = list(unique_words_in_data 60 | ^ set(remapping_df[REMAPPING_COL_FROM])) 61 | missing_mappings = [word for word in missing_mappings 62 | if word in unique_words_in_data] 63 | temp_remapping_df = pd.DataFrame() 64 | temp_remapping_df[REMAPPING_COL_FROM] = missing_mappings 65 | temp_remapping_df[REMAPPING_COL_TO] = missing_mappings 66 | remapping_df = pd.concat([remapping_df, temp_remapping_df]) 67 | 68 | if print_info: 69 | print("All mappings:\n", remapping_df) 70 | renamer = dict( 71 | zip(remapping_df[REMAPPING_COL_FROM], remapping_df[REMAPPING_COL_TO]) 72 | ) 73 | df[ATTRIBUTE_COL] = df[ATTRIBUTE_COL].map(renamer) 74 | 75 | if print_info: 76 | print("Final attribute distribution:") 77 | print(df[ATTRIBUTE_COL].value_counts()) 78 | 79 | print(f"Number of nan definitions of {ATTRIBUTE_COL}:" 80 | "{sum(df[ATTRIBUTE_COL].isna())}") 81 | 82 | return df 83 | 84 | def remap_words_in_text(om_df, remapping_df, remapping_col_dict): 85 | """A utility function which remaps a text column of om_df using columns 86 | within remapping_df. 87 | 88 | Parameters 89 | ---------- 90 | om_df : DataFrame 91 | A pandas dataframe containing O&M note data 92 | remapping_df : DataFrame 93 | Holds columns that define the remappings 94 | remapping_col_dict : dict of {str : str} 95 | A dictionary that contains the column names that describes how 96 | remapping is going to be done 97 | 98 | - data : string, should be assigned to associated 99 | column name in om_df which will have its text tokenized and remapped 100 | - remapping_col_from : string, should be assigned 101 | to associated column name in remapping_df that matches 102 | original attribute of interest in om_df 103 | - remapping_col_to : string, should be assigned to 104 | associated column name in remapping_df that contains the 105 | final mapped entries 106 | 107 | Returns 108 | ------- 109 | DataFrame 110 | dataframe with remapped columns populated 111 | """ 112 | df = om_df.copy() 113 | TEXT_COL = remapping_col_dict["data"] 114 | REMAPPING_COL_FROM = remapping_col_dict["remapping_col_from"] 115 | REMAPPING_COL_TO = remapping_col_dict["remapping_col_to"] 116 | 117 | # drop any values where input value is equal to output value 118 | remapping_df = remapping_df[remapping_df[REMAPPING_COL_FROM] != remapping_df[REMAPPING_COL_TO]] 119 | 120 | # case-sensitive 121 | remapping_df[REMAPPING_COL_FROM] = remapping_df[REMAPPING_COL_FROM].str.lower() 122 | remapping_df[REMAPPING_COL_TO] = remapping_df[REMAPPING_COL_TO].str.lower() 123 | df[TEXT_COL] = df[TEXT_COL].str.lower() 124 | 125 | renamer = dict( 126 | zip(remapping_df[REMAPPING_COL_FROM], remapping_df[REMAPPING_COL_TO]) 127 | ) 128 | 129 | df[TEXT_COL] = df[TEXT_COL].replace(renamer, regex=True) 130 | 131 | return df -------------------------------------------------------------------------------- /pvops/text2time/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | text2time module 3 | """ 4 | 5 | import pvops.text2time.preprocess 6 | import pvops.text2time.utils 7 | import pvops.text2time.visualize -------------------------------------------------------------------------------- /pvops/text2time/preprocess.py: -------------------------------------------------------------------------------- 1 | """ 2 | These functions focus on pre-processing user O&M and production data to 3 | create visualizations of the merged data 4 | """ 5 | from datetime import datetime 6 | import pandas as pd 7 | 8 | 9 | def data_site_na(pom_df, df_col_dict): 10 | """ 11 | Drops rows where site-ID is missing (NAN) within either production 12 | or O&M data. 13 | 14 | Parameters 15 | ---------- 16 | pom_df : DataFrame 17 | A data frame corresponding to either the production or O&M 18 | data. 19 | df_col_dict : dict of {str : str} 20 | A dictionary that contains the column names associated with 21 | the 22 | input `pom_df` and contains at least: 23 | 24 | - **siteid** (*string*), should be assigned to column name 25 | for user's site-ID 26 | 27 | Returns 28 | ------- 29 | pom_df : DataFrame 30 | An updated version of the input data frame, where rows with 31 | site-IDs of NAN are dropped. 32 | addressed : DataFrame 33 | A data frame showing rows from the input that were removed 34 | by this function. 35 | """ 36 | 37 | df_site = df_col_dict["siteid"] 38 | 39 | pom_df = pom_df.copy() 40 | 41 | namask = pom_df.loc[:, df_site].isna() 42 | addressed = pom_df.loc[namask] 43 | 44 | pom_df.dropna(subset=[df_site], inplace=True) 45 | 46 | return pom_df, addressed 47 | 48 | 49 | def om_date_convert(om_df, om_col_dict, toffset=0.0): 50 | """ 51 | Converts dates from string format to date time object in O&M 52 | dataframe. 53 | 54 | Parameters 55 | ---------- 56 | om_df : DataFrame 57 | A data frame corresponding to O&M data. 58 | om_col_dict : dict of {str : str} 59 | A dictionary that contains the column names associated with 60 | the O&M data, which consist of at least: 61 | 62 | - **datestart** (*string*), should be assigned to column 63 | name for O&M event start date in om_df 64 | - **dateend** (*string*), should be assigned to column name 65 | for O&M event end date in om_df 66 | 67 | toffset : float 68 | Value that specifies how many hours the O&M data should be 69 | shifted by in case time-stamps in production data and O&M data 70 | don't align as they should 71 | 72 | Returns 73 | ------- 74 | DataFrame 75 | An updated version of the input dataframe, but with 76 | time-stamps converted to localized (time-zone agnostic) 77 | date-time objects. 78 | """ 79 | 80 | om_df = om_df.copy() 81 | 82 | om_date_s = om_col_dict["datestart"] 83 | om_date_e = om_col_dict["dateend"] 84 | 85 | # Converting date-data from string data to DateTime objects 86 | om_df[om_date_s] = pd.to_datetime( 87 | om_df[om_date_s]) + pd.Timedelta(hours=toffset) 88 | om_df[om_date_e] = pd.to_datetime( 89 | om_df[om_date_e]) + pd.Timedelta(hours=toffset) 90 | 91 | # localizing timestamp 92 | om_df[om_date_s] = om_df[om_date_s].dt.tz_localize(None) 93 | om_df[om_date_e] = om_df[om_date_e].dt.tz_localize(None) 94 | 95 | return om_df 96 | 97 | 98 | def om_datelogic_check(om_df, om_col_dict, om_dflag="swap"): 99 | """ 100 | Addresses issues with O&M dates where the start 101 | of an event is listed as occurring after its end. These row are 102 | either dropped or the dates are swapped, depending on the user's 103 | preference. 104 | 105 | Parameters 106 | ---------- 107 | om_df : DataFrame 108 | A data frame corresponding to O&M data. 109 | om_col_dict : dict of {str : str} 110 | A dictionary that contains the column names associated with 111 | the O&M data, which consist of at least: 112 | 113 | - **datestart** (*string*), should be assigned to column 114 | name for associated O&M event start date in om_df 115 | - **dateend** (*string*), should be assigned to column name 116 | for associated O&M event end date in om_df 117 | 118 | om_dflag : str 119 | A flag that specifies how to address rows where the start of 120 | an event occurs after its conclusion. A flag of 'drop' will 121 | drop those rows, and a flag of 'swap' swap the two dates for 122 | that row. 123 | 124 | Returns 125 | ------- 126 | om_df : DataFrame 127 | An updated version of the input dataframe, but with O&M data 128 | quality issues addressed to ensure the start of an event 129 | precedes the event end date. 130 | addressed : DataFrame 131 | A data frame showing rows from the input that were addressed 132 | by this function. 133 | """ 134 | 135 | # assigning dictionary items to local variables for cleaner code 136 | om_date_s = om_col_dict["datestart"] 137 | om_date_e = om_col_dict["dateend"] 138 | 139 | om_df = om_df.copy() 140 | 141 | # addressing cases where Date_EventEnd ocurrs before Date_EventStart 142 | mask = om_df.loc[:, om_date_e] < om_df.loc[:, om_date_s] 143 | addressed = om_df.loc[mask] 144 | # swap dates for rows where End < Start 145 | if any(mask) and om_dflag == "swap": 146 | om_df.loc[mask, [om_date_s, om_date_e]] = om_df.loc[ 147 | mask, [om_date_e, om_date_s] 148 | ].values[0] 149 | # drop rows where End < Start 150 | elif any(mask) and om_dflag == "drop": 151 | om_df = om_df[~mask] 152 | 153 | return om_df, addressed 154 | 155 | 156 | def om_nadate_process(om_df, om_col_dict, om_dendflag="drop"): 157 | """ 158 | Addresses issues with O&M dataframe where dates are missing 159 | (NAN). Two operations are performed : 1) rows are dropped 160 | where start of an event is missing and (2) rows where the 161 | conclusion of an event is NAN can either be dropped or marked 162 | with the time at which program is run, depending on the user's 163 | preference. 164 | 165 | Parameters 166 | ---------- 167 | om_df : DataFrame 168 | A data frame corresponding to O&M data. 169 | 170 | om_col_dict : dict of {str : str} 171 | A dictionary that contains the column names associated with 172 | the O&M data, which consist of at least: 173 | 174 | - **datestart** (*string*), should be assigned to column 175 | name for user's O&M event start-date 176 | - **dateend** (*string*), should be assigned to column name 177 | for user's O&M event end-date 178 | 179 | om_dendflag : str 180 | A flag that specifies how to address rows where the conclusion 181 | of an event is missing (NAN). A flag of 'drop' will drop those 182 | rows, and a flag of 'today' will replace the NAN with the time 183 | at which the program is run. Any other value will leave the 184 | rows untouched. 185 | 186 | Returns 187 | ------- 188 | om_df : DataFrame 189 | An updated version of the input dataframe, but with no 190 | missing time-stamps in the O&M data. 191 | 192 | addressed : DataFrame 193 | A data frame showing rows from the input that were addressed 194 | by this function. 195 | """ 196 | 197 | om_df = om_df.copy() 198 | 199 | # assigning dictionary items to local variables for cleaner code 200 | om_date_s = om_col_dict["datestart"] 201 | om_date_e = om_col_dict["dateend"] 202 | 203 | # Dropping rows where om_date_s has values of NA in om_df 204 | mask1 = om_df.loc[:, om_date_s].isna() 205 | om_df.dropna( 206 | subset=[om_date_s], inplace=True 207 | ) # drops rows with om_date_e of NA in om_df 208 | 209 | # Addressing rows with 'om_date_e' values of NA in om_df 210 | mask2 = om_df.loc[:, om_date_e].isna() 211 | mask = mask1 | mask2 212 | addressed = om_df.loc[mask] 213 | 214 | if om_dendflag == "drop": 215 | om_df.dropna( 216 | subset=[om_date_e], inplace=True 217 | ) # drops rows with om_date_e of NA in om_df 218 | elif om_dendflag == "today": 219 | om_df[om_date_e].fillna( 220 | pd.to_datetime(str(datetime.now())[:20]), inplace=True 221 | ) # replacing NANs with today's date 222 | else: 223 | raise SyntaxError('Undefined om_dendflag') 224 | 225 | return om_df, addressed 226 | 227 | 228 | def prod_date_convert(prod_df, prod_col_dict, toffset=0.0): 229 | """Converts dates from string format to datetime format in 230 | production dataframe. 231 | 232 | 233 | Parameters 234 | ---------- 235 | prod_df : DataFrame 236 | A data frame corresponding to production data. 237 | 238 | prod_col_dict : dict of {str : str} 239 | A dictionary that contains the column names associated with 240 | the production data, which consist of at least: 241 | 242 | - **timestamp** (*string*), should be assigned to user's 243 | time-stamp column name 244 | 245 | toffset : float 246 | Value that specifies how many hours the production data 247 | should be shifted by in case time-stamps in production data 248 | and O&M data don't align as they should. 249 | 250 | Returns 251 | ------- 252 | DataFrame 253 | An updated version of the input dataframe, but with 254 | time-stamps converted to localized (time-zone agnostic) 255 | date-time objects. 256 | """ 257 | 258 | # creating local dataframes to not modify originals 259 | prod_df = prod_df.copy() 260 | 261 | prod_ts = prod_col_dict["timestamp"] 262 | 263 | # Converting date-data from string data to DateTime objects 264 | prod_df[prod_ts] = pd.to_datetime( 265 | prod_df[prod_ts]) + pd.Timedelta(hours=toffset) 266 | 267 | # localizing timestamp 268 | prod_df[prod_ts] = prod_df[prod_ts].dt.tz_localize(None) 269 | 270 | return prod_df 271 | 272 | 273 | def prod_nadate_process(prod_df, prod_col_dict, pnadrop=False): 274 | """ 275 | Processes rows of production data frame for missing time-stamp 276 | info (NAN). 277 | 278 | 279 | Parameters 280 | ---------- 281 | prod_df : DataFrame 282 | A data frame corresponding to production data. 283 | 284 | prod_df_col_dict : dict of {str : str} 285 | A dictionary that contains the column names associated with 286 | the production data, which consist of at least: 287 | 288 | - **timestamp** (*string*), should be assigned to 289 | associated time-stamp column name in prod_df 290 | 291 | pnadrop : bool 292 | Boolean flag that determines what to do with rows where 293 | time-stamp is missing. A value of `True` will drop these 294 | rows. Leaving the default value of `False` will identify 295 | rows with missing time-stamps for the user, but the function 296 | will output the same input data frame with no modifications. 297 | 298 | Returns 299 | ------- 300 | prod_df : DataFrame 301 | The output data frame. If pflag = 'drop', an updated version 302 | of the input data frame is output, but rows with missing 303 | time-stamps are removed. If default value is maintained, the 304 | input data frame is output with no modifications. 305 | 306 | addressed : DataFrame 307 | A data frame showing rows from the input that were addressed 308 | or identified by this function. 309 | """ 310 | 311 | prod_df = prod_df.copy() 312 | 313 | # creating local dataframes to not modify originals 314 | prod_df = prod_df.copy() 315 | 316 | prod_ts = prod_col_dict["timestamp"] 317 | 318 | # Dropping rows 319 | mask = prod_df.loc[:, prod_ts].isna() 320 | addressed = prod_df[mask] 321 | if pnadrop: 322 | prod_df.dropna(subset=[prod_ts], inplace=True) 323 | 324 | return prod_df, addressed 325 | -------------------------------------------------------------------------------- /pvops/timeseries/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | timeseries module 3 | """ 4 | 5 | import pvops.timeseries.preprocess 6 | import pvops.timeseries.models -------------------------------------------------------------------------------- /pvops/timeseries/models/AIT.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.core.fromnumeric import prod 3 | from sklearn.metrics import mean_squared_error, r2_score 4 | 5 | 6 | class Predictor: 7 | """ 8 | Predictor class 9 | """ 10 | def __init__(self): 11 | super(Predictor, self).__init__() 12 | 13 | def apply_additive_polynomial_model(self, model_terms, Xs): 14 | """Predict energy using a model derived by pvOps. 15 | 16 | Parameters 17 | ---------- 18 | df : dataframe 19 | Data containing columns with the values in 20 | the `prod_col_dict` 21 | 22 | model_terms : list of tuples 23 | Contain model coefficients and powers. For example, 24 | 25 | .. code-block:: python 26 | 27 | [(0.29359785963294494, [1, 0]), 28 | (0.754806343190528, [0, 1]), 29 | (0.396833207207238, [1, 1]), 30 | (-0.0588375219110795, [0, 0])] 31 | 32 | prod_col_dict : dict 33 | Dictionary mapping nicknamed parameters to 34 | the named parameters in the dataframe `df`. 35 | 36 | Returns 37 | ------- 38 | Array of predicted energy values 39 | """ 40 | for idx, (coeff, powers) in enumerate(model_terms): 41 | for i, (x, n) in enumerate(zip(Xs, powers)): 42 | if i == 0: 43 | term = x**n 44 | else: 45 | term *= x**n 46 | if idx == 0: 47 | energy = coeff * term 48 | else: 49 | energy += coeff * term 50 | return energy 51 | 52 | def evaluate(self, real, pred,): 53 | logrmse = np.log(np.sqrt(mean_squared_error(real, pred))) 54 | r2 = r2_score(real, pred) 55 | print(f"The fit has an R-squared of {r2} and a log RMSE of {logrmse}") 56 | return logrmse, r2 57 | 58 | 59 | class Processer: 60 | def __init__(self): 61 | super(Processer, self).__init__() 62 | self._col_scaled_prefix = 'stdscaled_' 63 | 64 | def check_data(self, data, prod_col_dict): 65 | self.do_eval = False 66 | if 'energyprod' in prod_col_dict: 67 | if prod_col_dict['energyprod'] in data.columns.tolist(): 68 | self.do_eval = True 69 | 70 | if not self.do_eval: 71 | print("Because the power production data is not" 72 | " passed, the fit will not be evaluated." 73 | " Predictions will still be rendered.") 74 | 75 | def _apply_transform(self, data, 76 | scaler_info): 77 | data -= scaler_info["mean"] 78 | data /= scaler_info["scale"] 79 | return data 80 | 81 | def _apply_inverse_transform(self, data, 82 | scaler_info): 83 | data *= scaler_info["scale"] 84 | data += scaler_info["mean"] 85 | return data 86 | 87 | def _clean_columns(self, scaler, prod_df, prod_col_dict): 88 | for k, d in scaler.items(): 89 | del prod_df[self._col_scaled_prefix + prod_col_dict[k]] 90 | 91 | 92 | # @dev: The 'AIT' class can be one of many models that inherit the 93 | # @dev: Processor and Predictor templates. When adding new models, 94 | # @dev: use the Processor and Predictor classes to hold general 95 | # @dev: functionality while having model-specific nuances in the 96 | # @dev: classes below. The above classes may be placed in a different 97 | # @dev: if it seems fit. 98 | class AIT(Processer, Predictor): 99 | def __init__(self): 100 | super(AIT, self).__init__() 101 | self._load_params() 102 | 103 | def _load_params(self): 104 | self.scaler_highcap = {"irradiance": {"mean": 571.45952959, 105 | "scale": 324.19905495}, 106 | "dcsize": {"mean": 14916.2339917, 107 | "scale": 20030.00088265}, 108 | "energyprod": {"mean": 7449.15184666, 109 | "scale": 12054.52533771} 110 | } 111 | self.model_terms_highcap = [(0.29359785963294494, [1, 0]), 112 | (0.754806343190528, [0, 1]), 113 | (0.396833207207238, [1, 1]), 114 | (-0.0588375219110795, [0, 0])] 115 | 116 | self.scaler_lowcap = {"irradiance": {"mean": 413.53334101, 117 | "scale": 286.11031612}, 118 | "dcsize": {"mean": 375.91883522, 119 | "scale": 234.15141671}, 120 | "energyprod": {"mean": 119.00787546, 121 | "scale": 119.82927847} 122 | } 123 | self.model_terms_lowcap = [(0.6866363032474436, [1, 0]), 124 | (0.6473846301807609, [0, 1]), 125 | (0.41926724219597955, [1, 1]), 126 | (0.06624491753542901, [0, 0])] 127 | 128 | def predict_subset(self, prod_df, scaler, model_terms, prod_col_dict): 129 | self.check_data(prod_df, prod_col_dict) 130 | 131 | """1. Standardize the data using same scales""" 132 | for k, d in scaler.items(): 133 | data = prod_df[prod_col_dict[k]].copy() 134 | scaled_data = self._apply_transform(data, d) 135 | prod_df[self._col_scaled_prefix + prod_col_dict[k]] = scaled_data 136 | 137 | prod_irr = prod_col_dict["irradiance"] 138 | prod_dcsize = prod_col_dict["dcsize"] 139 | 140 | irr = prod_df[self._col_scaled_prefix + prod_irr].values 141 | capacity = prod_df[self._col_scaled_prefix + prod_dcsize].values 142 | Xs = [irr, capacity] 143 | 144 | """2. Predict energy""" 145 | predicted_energy = self.apply_additive_polynomial_model(model_terms, 146 | Xs) 147 | """3. Rescale predictions""" 148 | predicted_rescaled_energy = self._apply_inverse_transform(predicted_energy, 149 | scaler['energyprod']) 150 | 151 | """4. Evaluate""" 152 | if self.do_eval: 153 | self.evaluate(prod_df[prod_col_dict["energyprod"]].values, 154 | predicted_rescaled_energy) 155 | return predicted_rescaled_energy 156 | 157 | def predict(self, prod_df, prod_col_dict): 158 | 159 | # High-capacity systems 160 | high_cap_mask = prod_df[prod_col_dict['dcsize']] > 1000 161 | if sum(high_cap_mask) > 0: 162 | predicted = self.predict_subset(prod_df.loc[high_cap_mask, :], 163 | self.scaler_highcap, 164 | self.model_terms_highcap, 165 | prod_col_dict) 166 | prod_df.loc[high_cap_mask, prod_col_dict["baseline"]] = predicted 167 | 168 | # Low-capacity systems 169 | low_cap_mask = prod_df[prod_col_dict['dcsize']] <= 1000 170 | if sum(low_cap_mask) > 0: 171 | predicted = self.predict_subset(prod_df.loc[low_cap_mask, :], 172 | self.scaler_lowcap, 173 | self.model_terms_lowcap, 174 | prod_col_dict) 175 | prod_df.loc[low_cap_mask, prod_col_dict["baseline"]] = predicted 176 | return prod_df 177 | 178 | 179 | def AIT_calc(prod_df, prod_col_dict): 180 | """ 181 | Calculates expected energy using measured irradiance 182 | based on trained regression model from field data. 183 | Plane-of-array irradiance is recommended when using the pre-trained AIT model. 184 | 185 | Parameters 186 | ---------- 187 | prod_df : DataFrame 188 | A data frame corresponding to the production data 189 | 190 | prod_col_dict : dict of {str : str} 191 | A dictionary that contains the column names relevant 192 | for the production data 193 | 194 | - **irradiance** (*string*), should be assigned to 195 | irradiance column name in prod_df, where data 196 | should be in [W/m^2] 197 | - **dcsize**, (*string*), should be assigned to 198 | preferred column name for site capacity in prod_df 199 | - **energyprod**, (*string*), should be assigned to 200 | the column name holding the power or energy production. 201 | If this is passed, an evaluation will be provided. 202 | - **baseline**, (*string*), should be assigned to 203 | preferred column name to capture the calculations 204 | in prod_df 205 | 206 | Example 207 | ------- 208 | 209 | .. code-block:: python 210 | 211 | production_col_dict = {'irradiance': 'irrad_poa_Wm2', 212 | 'ambient_temperature': 'temp_amb_C', 213 | 'dcsize': 'capacity_DC_kW', 214 | 'energyprod': 'energy_generated_kWh', 215 | 'baseline': 'predicted' 216 | } 217 | data = AIT_calc(data, production_col_dict) 218 | 219 | 220 | Returns 221 | ------- 222 | DataFrame 223 | A data frame for production data with a new column, 224 | the predicted energy 225 | """ 226 | prod_df = prod_df.copy() 227 | # assigning dictionary items to local variables for cleaner code 228 | model = AIT() 229 | prod_df = model.predict(prod_df, prod_col_dict) 230 | return prod_df 231 | -------------------------------------------------------------------------------- /pvops/timeseries/models/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | timeseries models 3 | """ 4 | 5 | import pvops.timeseries.models.AIT 6 | import pvops.timeseries.models.iec 7 | import pvops.timeseries.models.linear 8 | import pvops.timeseries.models.survival -------------------------------------------------------------------------------- /pvops/timeseries/models/iec.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def iec_calc(prod_df, prod_col_dict, meta_df, meta_col_dict, 4 | gi_ref=1000.0): 5 | """Calculates expected energy using measured irradiance 6 | based on IEC calculations. 7 | 8 | Parameters 9 | ---------- 10 | prod_df : DataFrame 11 | A data frame corresponding to the production data 12 | after having been processed by the perf_om_NA_qc 13 | and overlappingDFs functions. This data frame needs 14 | at least the columns specified in prod_col_dict. 15 | 16 | prod_col_dict : dict of {str : str} 17 | A dictionary that contains the column names relevant 18 | for the production data 19 | 20 | - **siteid** (*string*), should be assigned to 21 | site-ID column name in prod_df 22 | - **timestamp** (*string*), should be assigned to 23 | time-stamp column name in prod_df 24 | - **irradiance** (*string*), **plane-of-array**. Should be assigned to 25 | irradiance column name in prod_df, where data 26 | should be in [W/m^2]. 27 | - **baseline** (*string*), should be assigned to 28 | preferred column name to capture IEC calculations 29 | in prod_df 30 | - **dcsize**, (*string*), should be assigned to 31 | preferred column name for site capacity in prod_df 32 | 33 | meta_df : DataFrame 34 | A data frame corresponding to site metadata. 35 | At the least, the columns in meta_col_dict be 36 | present. 37 | 38 | meta_col_dict : dict of {str : str} 39 | A dictionary that contains the column names relevant 40 | for the meta-data 41 | 42 | - **siteid** (*string*), should be assigned to site-ID 43 | column name 44 | - **dcsize** (*string*), should be assigned to 45 | column name corresponding to site capacity, where 46 | data is in [kW] 47 | 48 | gi_ref : float 49 | reference plane of array irradiance in W/m^2 at 50 | which a site capacity is determined (default value 51 | is 1000 [W/m^2]) 52 | 53 | Returns 54 | ------- 55 | DataFrame 56 | A data frame for production data with a new column, 57 | iecE, which is the predicted energy calculated 58 | based on the IEC standard using measured irradiance 59 | data 60 | 61 | """ 62 | # assigning dictionary items to local variables for cleaner code 63 | prod_site = prod_col_dict["siteid"] 64 | prod_ts = prod_col_dict["timestamp"] 65 | prod_irr = prod_col_dict["irradiance"] 66 | prod_iec = prod_col_dict["baseline"] 67 | prod_dcsize = prod_col_dict["dcsize"] 68 | 69 | meta_site = meta_col_dict["siteid"] 70 | meta_size = meta_col_dict["dcsize"] 71 | 72 | # creating local dataframes to not modify originals 73 | prod_df = prod_df.copy() 74 | meta_df = meta_df.copy() 75 | 76 | # setting index for metadata for alignment to production data 77 | meta_df = meta_df.set_index(meta_site) 78 | 79 | # Creating new column in production data corresponding to site size (in terms of KW) 80 | prod_df[prod_dcsize] = prod_df.loc[:, prod_site].apply( 81 | lambda x: meta_df.loc[x, meta_size] 82 | ) 83 | 84 | # iec calculation 85 | 86 | for sid in prod_df.loc[:, prod_site].unique(): 87 | mask = prod_df.loc[:, prod_site] == sid 88 | tstep = prod_df.loc[mask, prod_ts].iloc[1] - \ 89 | prod_df.loc[mask, prod_ts].iloc[0] 90 | tstep = tstep / np.timedelta64( 91 | 1, "h" 92 | ) # Converting the time-step to float (representing hours) to 93 | # arrive at kWh for the iecE calculation 94 | 95 | prod_df.loc[mask, prod_iec] = ( 96 | prod_df.loc[mask, prod_dcsize] 97 | * prod_df.loc[mask, prod_irr] 98 | * tstep 99 | / gi_ref 100 | ) 101 | prod_df.drop(columns=[prod_dcsize], inplace=True) 102 | 103 | return prod_df 104 | -------------------------------------------------------------------------------- /pvops/timeseries/models/survival.py: -------------------------------------------------------------------------------- 1 | from scipy import stats 2 | from sksurv.nonparametric import kaplan_meier_estimator 3 | 4 | def fit_survival_function(df, col_dict, method): 5 | """ 6 | Calculate the survival function for different groups in a DataFrame using specified methods. 7 | 8 | This function computes the survival function for each unique group in the input DataFrame 9 | based on the specified method. It supports the Kaplan-Meier estimator and Weibull distribution 10 | fitting for survival analysis. The Kaplan-Meier estimator is a non-parametric statistic, 11 | while the Weibull distribution is a parametric model. 12 | 13 | Parameters 14 | ---------- 15 | df : pandas.DataFrame 16 | A DataFrame containing failure data with at least three columns specified in `col_dict`: 17 | one for grouping, one for the time to failure, and one indicating whether the failure was observed 18 | 19 | col_dict : dict of {str : str} 20 | A dictionary that contains the column names relevant for survival analysis 21 | 22 | - **group_by** (*string*), should be assigned to the column to group by 23 | - **time_to_fail** (*string*), should be assigned to the column containing the time until failure 24 | - **was_observed** (*string*), should be assigned to the column indicating whether the failure was observed 25 | 26 | method : str 27 | The method to use for calculating the survival function. Must be one of: 28 | 29 | - 'kaplan-meier': Uses the Kaplan-Meier estimator for survival analysis. 30 | - 'weibull': Fits a Weibull distribution to the data. 31 | 32 | Returns 33 | ------- 34 | dict 35 | 36 | - If `method` is `'kaplan-meier'`, contains keys `'times'`, `'fail_prob'`, and `'conf_int'`, which denote the times, failure probabilities, and confidence intervals on the failure probabilities. 37 | - If `method` is `'weibull'`, contains keys `'shape'`, `'scale'`, and `'distribution'`, which denote the shape parameter, scale parameter, and corresponding fitted `stats.weibull_min` distribution. 38 | """ 39 | 40 | implemented_methods = ['kaplan-meier', 'weibull'] 41 | if method not in implemented_methods: 42 | raise ValueError(f'method argument must be one of {implemented_methods}, got {method}') 43 | 44 | df = df.reset_index() 45 | 46 | group_by = col_dict['group_by'] 47 | time_to_fail = col_dict['time_to_fail'] 48 | was_observed = col_dict['was_observed'] 49 | 50 | results = {} 51 | 52 | unique_group_by = df[group_by].unique() 53 | for group in unique_group_by: 54 | group_df = df[df[group_by] == group] 55 | 56 | if method == 'kaplan-meier': 57 | km_result = kaplan_meier_estimator(group_df[was_observed], group_df[time_to_fail], conf_type='log-log') 58 | group_result = {'times': km_result[0], 'fail_prob': km_result[1], 'conf_int': km_result[2]} 59 | 60 | elif method == 'weibull': 61 | uncensored_times = group_df[group_df[was_observed]][time_to_fail] 62 | censored_times = group_df[~group_df[was_observed]][time_to_fail] 63 | data = stats.CensoredData(uncensored=uncensored_times, right=censored_times) 64 | shape, _, scale = stats.weibull_min.fit(data, floc=0) 65 | group_result = {'shape': shape, 'scale': scale, 'distribution': stats.weibull_min(c=shape, scale=scale)} 66 | 67 | results[group] = group_result 68 | 69 | return results -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # Required 2 | pandas 3 | numpy 4 | scipy 5 | scikit-learn 6 | scikit-survival 7 | datefinder 8 | matplotlib 9 | seaborn 10 | plotly 11 | gensim 12 | networkx 13 | pvlib 14 | pvanalytics 15 | timezonefinder 16 | pyDOE 17 | tensorflow 18 | tqdm 19 | 20 | # Testing 21 | pytest 22 | 23 | # Docs 24 | sphinx==7.2.6 25 | coverage==7.2.3 26 | ipykernel==6.22.0 27 | nbconvert==7.3.1 28 | nbformat==5.8.0 29 | nbsphinx==0.9.3 30 | nbsphinx-link==1.3.0 31 | sphinx-copybutton==0.5.2 32 | sphinxcontrib-bibtex==2.5.0 33 | sphinx_rtd_theme==1.3.0 -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | 4 | try: 5 | from setuptools import setup, find_packages 6 | except ImportError: 7 | raise RuntimeError('setuptools is required') 8 | 9 | DESCRIPTION = ('pvops is a python library for the analysis of ' + 10 | 'field collected operational data for photovoltaic systems.') 11 | 12 | LONG_DESCRIPTION = """ 13 | pvops is a python package for PV operators & researchers. It is 14 | a collection of functions for working with text-based data 15 | from photovoltaic power systems. The library includes functions for 16 | processing text data as well as fusion of the text information with 17 | time series data for visualization of contextual details for data 18 | analysis. 19 | 20 | Documentation: https://pvops.readthedocs.io/en/latest/index.html 21 | 22 | Source code: https://github.com/sandialabs/pvOps 23 | 24 | """ 25 | 26 | DISTNAME = 'pvops' 27 | MAINTAINER = "Thushara Gunda" 28 | MAINTAINER_EMAIL = 'tgunda@sandia.gov' 29 | AUTHOR = 'pvOps Developers' 30 | LICENSE = 'BSD 3-Clause License' 31 | URL = 'https://github.com/sandialabs/pvops' 32 | 33 | TESTS_REQUIRE = [ 34 | 'pytest', 35 | ] 36 | 37 | INSTALL_REQUIRES = [ 38 | 'numpy', 39 | 'pandas', 40 | 'scipy', 41 | 'scikit-learn', 42 | 'scikit-survival', 43 | 'datefinder', 44 | 'matplotlib', 45 | 'seaborn', 46 | 'plotly', 47 | 'gensim', 48 | 'networkx', 49 | 'pvlib', 50 | 'pvanalytics', 51 | 'timezonefinder', 52 | 'tqdm', 53 | ] 54 | 55 | DOCS_REQUIRE = [ 56 | 'sphinx==7.2.6', 57 | 'coverage==7.2.3', 58 | 'ipykernel==6.22.0', 59 | 'nbconvert==7.3.1', 60 | 'nbformat==5.8.0', 61 | 'nbsphinx==0.9.3', 62 | 'nbsphinx-link==1.3.0', 63 | 'sphinx-copybutton==0.5.2', 64 | 'sphinxcontrib-bibtex==2.5.0', 65 | 'sphinx_rtd_theme==1.3.0', 66 | ] 67 | 68 | IV_REQUIRE = [ 69 | 'keras', 70 | 'tensorflow;python_version<"3.13"', 71 | 'pyDOE', 72 | ] 73 | 74 | EXTRAS_REQUIRE = { 75 | 'iv': IV_REQUIRE, 76 | 'test': TESTS_REQUIRE, 77 | 'doc': DOCS_REQUIRE 78 | } 79 | 80 | EXTRAS_REQUIRE['all'] = sorted(set(sum(EXTRAS_REQUIRE.values(), []))) 81 | 82 | SETUP_REQUIRES = ['setuptools_scm'] 83 | 84 | CLASSIFIERS = [ 85 | 'Development Status :: 2 - Pre-Alpha', 86 | 'Operating System :: OS Independent', 87 | 'Intended Audience :: Science/Research', 88 | 'Programming Language :: Python :: 3', 89 | 'Topic :: Scientific/Engineering' 90 | ] 91 | 92 | PACKAGES = find_packages(exclude=["*.tests", "*.tests.*", "tests.*", "tests"]) 93 | 94 | # get version from __init__.py 95 | file_dir = os.path.abspath(os.path.dirname(__file__)) 96 | with open(os.path.join(file_dir, 'pvops', '__init__.py')) as f: 97 | version_file = f.read() 98 | version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]", 99 | version_file, re.M) 100 | if version_match: 101 | VERSION = version_match.group(1) 102 | else: 103 | raise RuntimeError("Unable to find version string.") 104 | 105 | setup( 106 | name=DISTNAME, 107 | use_scm_version=True, 108 | packages=PACKAGES, 109 | install_requires=INSTALL_REQUIRES, 110 | extras_require=EXTRAS_REQUIRE, 111 | tests_require=TESTS_REQUIRE, 112 | setup_requires=SETUP_REQUIRES, 113 | ext_modules=[], 114 | description=DESCRIPTION, 115 | long_description=LONG_DESCRIPTION, 116 | author=AUTHOR, 117 | maintainer=MAINTAINER, 118 | maintainer_email=MAINTAINER_EMAIL, 119 | license=LICENSE, 120 | classifiers=CLASSIFIERS, 121 | url=URL, 122 | version=VERSION 123 | ) 124 | -------------------------------------------------------------------------------- /tutorials/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sandialabs/pvOps/d6248e77fa063c94f5b5a0736b05b2bbe51c6be4/tutorials/__init__.py -------------------------------------------------------------------------------- /tutorials/assets/diode_param_extractor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sandialabs/pvOps/d6248e77fa063c94f5b5a0736b05b2bbe51c6be4/tutorials/assets/diode_param_extractor.png -------------------------------------------------------------------------------- /tutorials/example_data/example_metadata2.csv: -------------------------------------------------------------------------------- 1 | randid,DC_Size_kW,COD,latitude,longitude 2 | R23,2500,10/20/2013,-80,-35 3 | R27,475,10/21/2017,-81,-36 4 | -------------------------------------------------------------------------------- /tutorials/example_data/example_om_data.csv: -------------------------------------------------------------------------------- 1 | randid,Cause,ImpactLevel,CompletionDesc,CompletionActivity,Asset,Date_EventStart,Date_EventEnd 2 | 27,019 - Unplanned outage/derate. Hurricane Florence,Production Outage,hurricane florence outages/response. complete post-storm inspection form and upload to the work order. perform site inspection to assess any damage sustained from hurricane florence. site went offline around 1000 et on 14-sep. loss of ac voltage can be verified. update 16- sep: site came back online around 5pm 16-sep update 18-sep cb stuck at -74.21 amps. 019 - unplanned outage/derate. inspection complete. no damage. site operational.. techdispatched: yes,14 - Self Resolved,Facility,9/14/2018 10:00,9/18/2018 17:00 3 | 27,0000 - Unknown. ,Underperformance,hurricane response. perform site inspection to assess any damage sustained from hurricane 0000 - unknown. post hurricane inspection.. techdispatched: yes,09 - Inspection,Facility,10/12/2018 9:00,10/13/2018 17:00 4 | -------------------------------------------------------------------------------- /tutorials/example_data/example_om_data2.csv: -------------------------------------------------------------------------------- 1 | randid,Asset,date_start,date_end,WONumber,WOType,GeneralDesc 2 | ,Inverter,5/2/2018 12:00,5/17/2018 16:00,100,Corrective,"Inverter 1.1 Contactor 7, Inverter 1.2 Contactors 1 and 4 suspected DC production issues" 3 | R23,Facility,5/19/2018 15:44,5/19/2018 13:04,101,Preventive,Site offline due to grid disturbance 4 | R23,Facility,6/15/2018 6:46,6/15/2018 10:30,102,Corrective,Plant trip due to grid disturbance 5 | R23,Facility,6/18/2018 11:20,6/18/2018 14:03,103,Corrective,Site trip due to cause grid disturbance 6 | R23,Facility,7/21/2018 4:45,7/21/2018 13:15,104,Vegetation,Site tripped due to grid disturbance 7 | R23,Inverter,7/21/2018 13:16,7/21/2018 14:25,105,Corrective,Inverter failed to start following plant trip 8 | R23,Inverter,7/25/2018 14:20,7/25/2018 16:40,106,Corrective,inverter offline due to high ambient temp fault 9 | R23,Inverter,8/1/2018 11:45,,107,Corrective,Inverter major underperformance 10 | R23,Facility,8/2/2018 1:05,8/2/2018 9:28,108,Corrective,Site trip due to grid disturbance 11 | R27,Facility,9/14/2018 10:00,9/16/2018 16:00,1,corrective,hurricane florence outages/response. complete post-storm inspection form and upload to the work order. perform site inspection to assess any damage sustained from hurricane florence. site went offline around 1000 et on 14-sep. loss of ac voltage can be verified. update 16- sep: site came back online around 5pm 16-sep update 18-sep cb stuck at -74.21 amps. 019 - unplanned outage/derate. inspection complete. no damage. site operational.. techdispatched: yes 12 | R27,Facility,9/24/2018 10:00,9/16/2018 17:00,2,vegetation,Vegetation maintenance activities were performed 13 | R27,Other,9/19/2018 7:00,10/11/2018 20:00,3,corrective,hurricane response. perform site inspection to assess any damage sustained from hurricane 0000 - unknown. post hurricane inspection.. techdispatched: yes 14 | R27,Facility,10/13/2018 12:00,10/13/2018 17:00,4,preventive,Monthly visual inspection 15 | R27,other,10/14/2018 11:00,,5,preventive,Monthly visual inspection 16 | -------------------------------------------------------------------------------- /tutorials/example_data/mappings_cause.csv: -------------------------------------------------------------------------------- 1 | ,in,out_ 2 | 0,01 - Replace,Replacement 3 | 1,02 - Repair,Repair 4 | 2,03 - Adjust,Repair 5 | 3,03 - Modify,Repair 6 | 4,04 - Adjust,Repair 7 | 5,05 - Refit(Reset),Repair 8 | 6,06 - Check,"Troubleshooting ""Status""" 9 | 7,07 - Service,"Troubleshooting ""Status""" 10 | 8,08 - Test,"Troubleshooting ""Status""" 11 | 9,09 - Inspection,"Troubleshooting ""Status""" 12 | 10,09-Inspection,"Troubleshooting ""Status""" 13 | 11,10 - Overhaul,Repair 14 | 12,11 - Combination,Misc. 15 | 13,12 - Other,Misc. 16 | 14,13 - Remote Reset,Troubleshooting 17 | 15,14 - Self Resolved,None 18 | 16,15 - EPC Resolved,"Troubleshooting ""Status""" 19 | 17,Clear faults,"Troubleshooting ""Reset""" 20 | 18,Cleared faults through GUI,"Troubleshooting ""Reset""" 21 | 19,Cleared through GUI,"Troubleshooting ""Reset""" 22 | 20,Curtailment Lift,None 23 | 21,DEPCOM warranty service work,"Troubleshooting ""Status""" 24 | 22,Fault cleared manually,"Troubleshooting ""Reset""" 25 | 23,Faults cleared,"Troubleshooting ""Reset""" 26 | 24,Faults cleared through GUI with PC,"Troubleshooting ""Reset""" 27 | 25,Field Wiring Repair,Repair 28 | 26,Fuse Replacement,Replacement 29 | 27,"Ground fault was isolated at combiner box 01. All strings at CB 01 were disconnected and fuses were removed. Combiner box was placed in ""off"" position and locked out. DC disconnect for CB's 01 & 02 at inverter 1 was opened before power cycling the inv.",Repair 30 | 28,Hardware Adjustment,Repair 31 | 29,Hardware Replacement,Replacement 32 | 30,No Action Required,None 33 | 31,Not Fixed - Follow-up,Misc. 34 | 32,Other,Misc. 35 | 33,Other Site Work,Misc. 36 | 34,Power Cycle,"Troubleshooting ""Status""" 37 | 35,Preventative Maintenance,"Troubleshooting ""Status""" 38 | 36,Problem Self-Resolved,None 39 | 37,Reclose,Troubleshooting 40 | 38,Remote Reset,Troubleshooting 41 | 39,Remote Troubleshooting,Troubleshooting 42 | 40,Repair work on combiner box was sub' out to electrical contractors Anderson and Wood,Repair 43 | 41,Replace/Repair,Replacement 44 | 42,Replacement,Replacement 45 | 43,Software Change/Update,Software 46 | 44,Software/Firmware Adjustment,Software 47 | 45,Software/Firmware Update,Software 48 | 46,Unknown,Missing 49 | -------------------------------------------------------------------------------- /tutorials/example_data/mappings_equipment.csv: -------------------------------------------------------------------------------- 1 | in,out_ 2 | combiner,combiner 3 | comb,combiner 4 | cb,combiner 5 | battery,battery 6 | bess,battery 7 | inverter,inverter 8 | invert,inverter 9 | inv,inverter 10 | met,met 11 | meter,meter 12 | module,module 13 | mod,module 14 | recloser,recloser 15 | reclose,recloser 16 | relay,relay 17 | substation,substation 18 | switchgear,switchgear 19 | switch,switchgear 20 | tracker,tracker 21 | transformer,transformer 22 | xfmr,transformer 23 | wiring,wiring 24 | wire,wiring 25 | wires,wiring -------------------------------------------------------------------------------- /tutorials/example_data/mappings_pv_terms.csv: -------------------------------------------------------------------------------- 1 | in,out_ 2 | comm,communication 3 | energy,energy 4 | kwh,energy 5 | mwh,energy 6 | grid,grid 7 | curtailment,grid 8 | curtail,grid 9 | poi,grid 10 | offline,outage 11 | solar,solar 12 | pv,solar 13 | photovoltaic,solar 14 | system,system 15 | site,system 16 | farm,system 17 | project,system 18 | sma,make_model 19 | cm,corrective_maintence 20 | pm,preventative_maintence -------------------------------------------------------------------------------- /tutorials/example_data/remappings_asset.csv: -------------------------------------------------------------------------------- 1 | in,out_ 2 | inverter,inverter 3 | recloser,recloser 4 | transformer,transformer 5 | switchgear,switchgear 6 | combiner,combiner 7 | substation,substation 8 | facility,facility 9 | energy meter,energy meter 10 | relay,relay 11 | met station,met station 12 | tracker,tracker 13 | module,module 14 | DC Disconnect,combiner 15 | Recombiner,combiner 16 | Feeder (Dip Pole/Array),wiring 17 | Ground-Mount PV System,module 18 | Weather Station,met station 19 | Pyranometer,met station 20 | Temperature sensor,met station 21 | Met station battery,met station 22 | Anemometer,met station 23 | Reference cell,met station 24 | Relative humidity sensor,met station 25 | Meter,energy meter 26 | Energy Storage/Battery,energy storage 27 | AC Combiner,combiner 28 | Battery (Solar + storage facilities),energy storage 29 | Block,transformer 30 | Central Inverter,inverter 31 | Circuit,wiring 32 | Combiner Box,combiner 33 | DAS System,facility 34 | DC Combiner,combiner 35 | Data logger,facility 36 | Disconnect switch,relay 37 | Inverter Module,inverter 38 | Inverter module,inverter 39 | Inverter/String Inverter,inverter 40 | Modules,module 41 | Other,other 42 | PCS Transformer,transformer 43 | POI/Medium Voltage,other 44 | Pad,transformer 45 | Plant,other 46 | Point of Interconnection,other 47 | Racking/Trackers,tracker 48 | Rooftop PV System,other 49 | Site,other 50 | String,other 51 | String Inverter,inverter 52 | Subarray,other 53 | Summary,other 54 | Tracker control unit,tracker 55 | Tracking System,tracker 56 | -------------------------------------------------------------------------------- /tutorials/example_data/remappings_response.csv: -------------------------------------------------------------------------------- 1 | in,out_ 2 | Remote Troubleshooting,Remote troubleshotting/Reset 3 | Remote Reset,Remote troubleshotting/Reset 4 | 13 - Remote Reset,Remote troubleshotting/Reset 5 | Power Cycle,Troubleshoot/Reset 6 | 07 - Service,Troubleshoot/Reset 7 | 09 - Inspection,Troubleshoot/Reset 8 | 09-Inspection,Troubleshoot/Reset 9 | 06 - Check,Troubleshoot/Reset 10 | 08 - Test,Troubleshoot/Reset 11 | Clear faults,Troubleshoot/Reset 12 | Faults cleared,Troubleshoot/Reset 13 | Fault cleared manually,Troubleshoot/Reset 14 | No Action Required,Self-Resolved 15 | Problem Self-Resolved,Self-Resolved 16 | 14 - Self Resolved,Self-Resolved 17 | Hardware Replacement,Replacement 18 | Replacement,Replacement 19 | 01 - Replace,Replacement 20 | Fuse Replacement,Replacement 21 | Replace/Repair,Replacement 22 | -------------------------------------------------------------------------------- /tutorials/tutorial_text_classify_regex_example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": {}, 7 | "source": [ 8 | "# Adding keyword labels to O&M data\n", 9 | "This notebook demonstrates the use of the `pvops.classify.get_attributes_from_keywords` module for adding asset labels based off O&M notes." 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import pandas as pd\n", 19 | "from sklearn.metrics import accuracy_score\n", 20 | "\n", 21 | "from pvops.text import utils, preprocess\n", 22 | "from pvops.text.classify import get_attributes_from_keywords\n", 23 | "from pvops.text.visualize import visualize_classification_confusion_matrix" 24 | ] 25 | }, 26 | { 27 | "attachments": {}, 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "# Step 0: Get sample data, remap assets" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "# pull in sample data and remap assets for ease of comparison\n", 41 | "\n", 42 | "om_df = pd.read_csv('example_data/example_ML_ticket_data.csv')\n", 43 | "col_dict = {\n", 44 | " \"data\" : \"CompletionDesc\",\n", 45 | " \"eventstart\" : \"Date_EventStart\",\n", 46 | " \"save_data_column\" : \"processed_data\",\n", 47 | " \"save_date_column\" : \"processed_date\",\n", 48 | " \"attribute_col\" : \"Asset\",\n", 49 | " \"predicted_col\" : \"Keyword_Asset\",\n", 50 | " \"remapping_col_from\": \"in\",\n", 51 | " \"remapping_col_to\": \"out_\"\n", 52 | "}\n", 53 | "\n", 54 | "# remap assets\n", 55 | "remapping_df = pd.read_csv('example_data/remappings_asset.csv')\n", 56 | "remapping_df['out_'] = remapping_df['out_'].replace({'met station': 'met',\n", 57 | " 'energy storage': 'battery',\n", 58 | " 'energy meter': 'meter'})\n", 59 | "om_df = utils.remap_attributes(om_df, remapping_df, col_dict, allow_missing_mappings=True)\n", 60 | "om_df.head()" 61 | ] 62 | }, 63 | { 64 | "attachments": {}, 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "# Step 1: Text preprocessing" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "metadata": {}, 75 | "outputs": [], 76 | "source": [ 77 | "# preprocessing steps\n", 78 | "om_df[col_dict['attribute_col']] = om_df.apply(lambda row: row[col_dict['attribute_col']].lower(), axis=1)\n", 79 | "om_df = preprocess.preprocessor(om_df, lst_stopwords=[], col_dict=col_dict, print_info=False, extract_dates_only=False)\n", 80 | "\n", 81 | "DATA_COL = col_dict['data']\n", 82 | "om_df[DATA_COL] = om_df['processed_data']\n", 83 | "\n", 84 | "# replace terms\n", 85 | "equipment_df = pd.read_csv('~/pvOps/examples/example_data/mappings_equipment.csv')\n", 86 | "pv_terms_df = pd.read_csv('~/pvOps/examples/example_data/mappings_pv_terms.csv')\n", 87 | "pv_reference_df = pd.concat([equipment_df, pv_terms_df])\n", 88 | "om_df = utils.remap_words_in_text(om_df=om_df, remapping_df=pv_reference_df, remapping_col_dict=col_dict)\n", 89 | "\n", 90 | "om_df.head()" 91 | ] 92 | }, 93 | { 94 | "attachments": {}, 95 | "cell_type": "markdown", 96 | "metadata": {}, 97 | "source": [ 98 | "# Step 2: Search for keywords to use as labels" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": null, 104 | "metadata": {}, 105 | "outputs": [], 106 | "source": [ 107 | "# add asset labels from keyword reference dict\n", 108 | "om_df = get_attributes_from_keywords(om_df=om_df,\n", 109 | " col_dict=col_dict,\n", 110 | " reference_df=equipment_df)\n", 111 | "om_df.head()" 112 | ] 113 | }, 114 | { 115 | "attachments": {}, 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "# Step 3: Metrics" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [ 128 | "# get accuracy measures and count metrics\n", 129 | "PREDICT_COL = col_dict['predicted_col']\n", 130 | "LABEL_COL = col_dict['attribute_col']\n", 131 | "\n", 132 | "# entries with some keyword over interest, over all entries\n", 133 | "label_count = om_df[PREDICT_COL].count() / len(om_df)\n", 134 | "\n", 135 | "# replace 'Other' values with 'Unknown'\n", 136 | "om_df[LABEL_COL] = om_df[LABEL_COL].replace('other', 'unknown')\n", 137 | "# replace NaN values to use accuracy score\n", 138 | "om_df[[LABEL_COL, PREDICT_COL]] = om_df[[LABEL_COL, PREDICT_COL]].fillna('unknown')\n", 139 | "acc_score = accuracy_score(y_true=om_df[LABEL_COL], y_pred=om_df[PREDICT_COL])\n", 140 | "\n", 141 | "msg = f'{label_count:.2%} of entries had a keyword of interest, with {acc_score:.2%} accuracy.'\n", 142 | "print(msg)" 143 | ] 144 | }, 145 | { 146 | "attachments": {}, 147 | "cell_type": "markdown", 148 | "metadata": {}, 149 | "source": [ 150 | "# Step 4: Visualization" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": null, 156 | "metadata": {}, 157 | "outputs": [], 158 | "source": [ 159 | "# plot confusion matrix\n", 160 | "title = 'Confusion Matrix of Actual and Predicted Asset Labels'\n", 161 | "visualize_classification_confusion_matrix(om_df, col_dict, title)" 162 | ] 163 | } 164 | ], 165 | "metadata": { 166 | "kernelspec": { 167 | "display_name": "Python 3", 168 | "language": "python", 169 | "name": "python3" 170 | }, 171 | "language_info": { 172 | "codemirror_mode": { 173 | "name": "ipython", 174 | "version": 3 175 | }, 176 | "file_extension": ".py", 177 | "mimetype": "text/x-python", 178 | "name": "python", 179 | "nbconvert_exporter": "python", 180 | "pygments_lexer": "ipython3", 181 | "version": "3.7.5" 182 | }, 183 | "orig_nbformat": 4 184 | }, 185 | "nbformat": 4, 186 | "nbformat_minor": 2 187 | } 188 | --------------------------------------------------------------------------------