├── .coveragerc
├── .github
├── pull_request_template.md
└── workflows
│ ├── builddocs.yml
│ ├── lint-and-test.yml
│ └── pythonpublish.yml
├── .gitignore
├── .readthedocs.yml
├── CODE_OF_CONDUCT.md
├── LICENSE
├── README.md
├── docs
├── Makefile
├── _static
│ └── css
│ │ └── my_style.css
├── assets
│ ├── diode_param_extractor.png
│ ├── pvops_emblem.svg
│ ├── pvops_full_logo.svg
│ ├── vis_attr_connect_example.svg
│ ├── vis_attr_timeseries_example.svg
│ ├── vis_cat_scatter_example.svg
│ ├── vis_cluster_entropy_example.svg
│ ├── vis_counts_example.svg
│ ├── vis_doc_clusters_example.svg
│ ├── vis_freq_plot_example.svg
│ └── vis_overlap_example.png
├── conf.py
├── index.rst
├── make.bat
├── pages
│ ├── abbreviations.rst
│ ├── apidoc
│ │ ├── iv.rst
│ │ ├── text.rst
│ │ ├── text2time.rst
│ │ └── timeseries.rst
│ ├── contributing.rst
│ ├── development.rst
│ ├── installation.rst
│ ├── moduleguides
│ │ ├── iv.rst
│ │ ├── text.rst
│ │ ├── text2time.rst
│ │ └── timeseries.rst
│ ├── modules.rst
│ ├── references.rst
│ ├── releasenotes.rst
│ ├── releasenotes
│ │ ├── 0.1.7.rst
│ │ ├── 0.1.8.rst
│ │ ├── 0.1.9.rst
│ │ ├── 0.2.0.rst
│ │ ├── 0.3.0.rst
│ │ ├── 0.4.0.rst
│ │ ├── 0.5.0.rst
│ │ ├── 0.5.1.rst
│ │ ├── 0.5.2.rst
│ │ ├── 0.5.3.rst
│ │ ├── 0.6.0.rst
│ │ ├── 0.6.1.rst
│ │ ├── alpha.rst
│ │ └── beta.rst
│ ├── tutorials.rst
│ ├── tutorials
│ │ ├── assets
│ │ │ └── diode_param_extractor.png
│ │ ├── tutorial_AIT_timeseries.nblink
│ │ ├── tutorial_iv_classifier.nblink
│ │ ├── tutorial_iv_diode_extractor.nblink
│ │ ├── tutorial_iv_simulator.nblink
│ │ ├── tutorial_text2time_module.nblink
│ │ ├── tutorial_textmodule.nblink
│ │ ├── tutorial_timeseries.nblink
│ │ ├── tutorial_timeseries_sim.nblink
│ │ └── tutorial_timeseries_survival_analysis.nblink
│ └── userguide.rst
└── refs
│ └── pvops.bib
├── noxfile.py
├── pvops
├── __init__.py
├── iv
│ ├── __init__.py
│ ├── extractor.py
│ ├── models
│ │ ├── __init__.py
│ │ └── nn.py
│ ├── physics_utils.py
│ ├── preprocess.py
│ ├── simulator.py
│ ├── timeseries_simulator.py
│ └── utils.py
├── tests
│ ├── __init__.py
│ ├── conftest.py
│ ├── om_data_update_pick.pkl
│ ├── om_summ_pick.pkl
│ ├── prod_data_clean_iec_pick.pkl
│ ├── prod_data_quant_pick.pkl
│ ├── prod_summ_pick.pkl
│ ├── test_iv.py
│ ├── test_text.py
│ ├── test_text2time.py
│ └── test_timeseries.py
├── text
│ ├── __init__.py
│ ├── classify.py
│ ├── defaults.py
│ ├── nlp_utils.py
│ ├── preprocess.py
│ ├── stopwords.txt
│ ├── utils.py
│ └── visualize.py
├── text2time
│ ├── __init__.py
│ ├── preprocess.py
│ ├── utils.py
│ └── visualize.py
└── timeseries
│ ├── __init__.py
│ ├── models
│ ├── AIT.py
│ ├── __init__.py
│ ├── iec.py
│ ├── linear.py
│ └── survival.py
│ └── preprocess.py
├── requirements.txt
├── setup.py
└── tutorials
├── __init__.py
├── assets
└── diode_param_extractor.png
├── example_data
├── example_ML_ticket_data.csv
├── example_metadata2.csv
├── example_om_data.csv
├── example_om_data2.csv
├── example_om_survival_analysis_data.csv
├── example_perf_data.csv
├── example_prod_data_cumE2.csv
├── example_prod_with_covariates.csv
├── mappings_cause.csv
├── mappings_equipment.csv
├── mappings_pv_terms.csv
├── remappings_asset.csv
└── remappings_response.csv
├── text_class_example.py
├── tutorial_AIT_timeseries.ipynb
├── tutorial_iv_classifier.ipynb
├── tutorial_iv_diode_extractor.ipynb
├── tutorial_iv_simulator.ipynb
├── tutorial_text2time_module.ipynb
├── tutorial_text_classify_regex_example.ipynb
├── tutorial_textmodule.ipynb
├── tutorial_timeseries.ipynb
├── tutorial_timeseries_sim.ipynb
└── tutorial_timeseries_survival_analysis.ipynb
/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | omit =
3 | pvops/tests/conftest.py
4 |
--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
1 | ## Description
2 | *Thank you for your contribution! Please provide a brief description of the problem and the proposed solution or new feature (if not already fully described in a linked issue)*
3 |
4 |
5 |
6 | ## Motivation and Context
7 |
8 |
9 |
10 | ## How has this been tested?
11 |
12 |
13 |
14 |
15 | ## Screenshots (if appropriate):
16 |
17 | ## Types of changes
18 |
19 | - [ ] Bug fix (non-breaking change which fixes an issue)
20 | - [ ] New feature (non-breaking change which adds functionality)
21 | - [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected)
22 |
23 | ## Checklist:
24 |
25 |
26 | - [ ] My code follows the code style of this project.
27 | - [ ] My change requires a change to the documentation.
28 | - [ ] I have updated the documentation accordingly.
29 |
30 |
31 |
--------------------------------------------------------------------------------
/.github/workflows/builddocs.yml:
--------------------------------------------------------------------------------
1 | # This is a basic workflow to help you get started with Actions
2 |
3 | name: docs build experiment
4 |
5 | # Controls when the workflow will run
6 | on:
7 | # Triggers the workflow on push or pull request events but only for the master branch
8 | push:
9 | branches: [ master, docstrings ]
10 |
11 | # Allows you to run this workflow manually from the Actions tab
12 | workflow_dispatch:
13 |
14 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel
15 | jobs:
16 | # This workflow contains a single job called "build"
17 | build:
18 | # The type of runner that the job will run on
19 | runs-on: ubuntu-latest
20 |
21 | # Steps represent a sequence of tasks that will be executed as part of the job
22 | steps:
23 | # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
24 | - uses: actions/checkout@v2
25 | with:
26 | fetch-depth: 0
27 | - name: Install pandoc
28 | run: sudo apt-get update -y && sudo apt-get install -y pandoc
29 | - uses: actions/setup-python@v2
30 | with:
31 | python-version: '3.11'
32 | - name: Setup Python 3.11
33 | run: |
34 | python -m pip install --upgrade pip
35 | python -m pip install -r requirements.txt
36 | pip install --upgrade coverage pytest
37 | - name: Install package
38 | run: |
39 | python -m pip install -e .
40 | - name: Build documentation
41 | run: sphinx-build -b html docs/ docs/_build/html
42 | - uses: actions/upload-artifact@v4
43 | with:
44 | name: html-docs
45 | path: docs/_build/html
46 |
--------------------------------------------------------------------------------
/.github/workflows/lint-and-test.yml:
--------------------------------------------------------------------------------
1 | name: lint and test
2 |
3 | on: [push, pull_request]
4 |
5 | jobs:
6 | test:
7 | runs-on: ${{ matrix.os }}
8 | strategy:
9 | matrix:
10 | os: [ubuntu-latest, macos-latest, windows-latest]
11 | python-version: ['3.8', '3.9', '3.10', '3.11']
12 | fail-fast: false
13 | steps:
14 | - uses: actions/checkout@v2
15 | - name: Set up Python ${{ matrix.python-version }}
16 | uses: actions/setup-python@v4
17 | with:
18 | python-version: ${{ matrix.python-version }}
19 | - name: Install pvops
20 | run: |
21 | python -m pip install --upgrade pip
22 | pip install .[iv]
23 | - name: Test with pytest
24 | run: |
25 | pip install pytest pytest-cov
26 | pytest --cov=pvops --cov-config=.coveragerc --cov-report term-missing pvops
27 |
28 | lint:
29 | runs-on: ubuntu-latest
30 | strategy:
31 | matrix:
32 | python-version: ['3.8', '3.9', '3.10', '3.11']
33 | steps:
34 | - uses: actions/checkout@v2
35 | - name: Set up Python ${{ matrix.python-version }}
36 | uses: actions/setup-python@v4
37 | with:
38 | python-version: ${{ matrix.python-version }}
39 | - name: Install flake8
40 | run: |
41 | python -m pip install --upgrade pip
42 | pip install flake8
43 | - name: Lint with flake8
44 | run: |
45 | flake8 . --count --statistics --show-source --ignore=E402,E203,E266,E501,W503,F403,F401,E402,W291,E302,W391,W292,F405,E722,W504,E121,E125,E712
46 |
--------------------------------------------------------------------------------
/.github/workflows/pythonpublish.yml:
--------------------------------------------------------------------------------
1 | name: Upload to PyPi
2 | on:
3 | release:
4 | types: [published]
5 |
6 | jobs:
7 | deploy:
8 | runs-on: ubuntu-latest
9 | steps:
10 | - uses: actions/checkout@v2
11 | - name: Set up Python
12 | uses: actions/setup-python@v2
13 | with:
14 | python-version: '3.x'
15 | - name: Install dependencies
16 | run: |
17 | python -m pip install --upgrade pip
18 | pip install setuptools wheel twine
19 | - name: Build and publish
20 | env:
21 | TWINE_USERNAME: __token__
22 | TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
23 | run: |
24 | python setup.py sdist bdist_wheel
25 | twine upload dist/*
26 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py,cover
50 | .hypothesis/
51 | .pytest_cache/
52 | cover/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | .pybuilder/
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | # For a library or package, you might want to ignore these files since the code is
87 | # intended to run in multiple environments; otherwise, check them in:
88 | # .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # poetry
98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99 | # This is especially recommended for binary packages to ensure reproducibility, and is more
100 | # commonly ignored for libraries.
101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 |
104 | # pdm
105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | # in version control.
109 | # https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 |
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 |
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 |
119 | # SageMath parsed files
120 | *.sage.py
121 |
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 |
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 |
135 | # Rope project settings
136 | .ropeproject
137 |
138 | # mkdocs documentation
139 | /site
140 |
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 |
146 | # Pyre type checker
147 | .pyre/
148 |
149 | # pytype static type analyzer
150 | .pytype/
151 |
152 | # Cython debug symbols
153 | cython_debug/
154 |
155 | # PyCharm
156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | # and can be added to the global gitignore or merged into this file. For a more nuclear
159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 |
162 | __pycache__/
163 | pvops/text/__pycache__/
164 | pvops/text2time/__pycache__/
165 | examples/*.npy
166 | docs/_build/
167 | .pytest_cache/
168 | *.py[cod]
169 | examples/analysis/
170 | *.ipynb_checkpoints/
171 | *~
172 | pvops/.vscode/*
173 | pvops/text2time/.vscode
174 | .vscode/*
175 | .vscode/settings.json
176 | .coverage
--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 |
3 | build:
4 | os: ubuntu-22.04
5 | tools:
6 | python: "3.11"
7 |
8 | sphinx:
9 | configuration: docs/conf.py
10 |
11 | formats: all
12 |
13 | python:
14 | install:
15 | - method: pip
16 | path: .
17 | extra_requirements:
18 | - doc, iv
19 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Contributor Covenant Code of Conduct
2 |
3 | ## Our Pledge
4 |
5 | We as members, contributors, and leaders pledge to make participation
6 | in our community a harassment-free experience for everyone, regardless
7 | of age, body size, visible or invisible disability, ethnicity, sex
8 | characteristics, gender identity and expression, level of experience,
9 | education, socio-economic status, nationality, personal appearance,
10 | race, religion, or sexual identity and orientation.
11 |
12 | We pledge to act and interact in ways that contribute to an open,
13 | welcoming, diverse, inclusive, and healthy community.
14 |
15 | ## Our Standards
16 |
17 | Examples of behavior that contributes to a positive environment for
18 | our community include:
19 |
20 | * Demonstrating empathy and kindness toward other people
21 | * Being respectful of differing opinions, viewpoints, and experiences
22 | * Giving and gracefully accepting constructive feedback
23 | * Accepting responsibility and apologizing to those affected by our
24 | mistakes, and learning from the experience
25 | * Focusing on what is best not just for us as individuals, but for the
26 | overall community
27 |
28 | Examples of unacceptable behavior include:
29 |
30 | * The use of sexualized language or imagery, and sexual attention or
31 | advances of any kind
32 | * Trolling, insulting or derogatory comments, and personal or
33 | political attacks
34 | * Public or private harassment
35 | * Publishing others' private information, such as a physical or email
36 | address, without their explicit permission
37 | * Other conduct which could reasonably be considered inappropriate in
38 | a professional setting
39 |
40 | ## Enforcement Responsibilities
41 |
42 | Community leaders are responsible for clarifying and enforcing our
43 | standards of acceptable behavior and will take appropriate and fair
44 | corrective action in response to any behavior that they deem
45 | inappropriate, threatening, offensive, or harmful.
46 |
47 | Community leaders have the right and responsibility to remove, edit,
48 | or reject comments, commits, code, wiki edits, issues, and other
49 | contributions that are not aligned to this Code of Conduct, and will
50 | communicate reasons for moderation decisions when appropriate.
51 |
52 | ## Scope
53 |
54 | This Code of Conduct applies within all community spaces, and also
55 | applies when an individual is officially representing the community in
56 | public spaces. Examples of representing our community include using an
57 | official e-mail address, posting via an official social media account,
58 | or acting as an appointed representative at an online or offline
59 | event.
60 |
61 | ## Enforcement
62 |
63 | Instances of abusive, harassing, or otherwise unacceptable behavior
64 | may be reported to the community leaders responsible for enforcement
65 | at cwhanse@sandia.gov or wfvinin@sandia.gov. All complaints will be
66 | reviewed and investigated promptly and fairly.
67 |
68 | All community leaders are obligated to respect the privacy and
69 | security of the reporter of any incident.
70 |
71 | ## Enforcement Guidelines
72 |
73 | Community leaders will follow these Community Impact Guidelines in
74 | determining the consequences for any action they deem in violation of
75 | this Code of Conduct:
76 |
77 | ### 1. Correction
78 |
79 | **Community Impact**: Use of inappropriate language or other behavior
80 | deemed unprofessional or unwelcome in the community.
81 |
82 | **Consequence**: A private, written warning from community leaders,
83 | providing clarity around the nature of the violation and an
84 | explanation of why the behavior was inappropriate. A public apology
85 | may be requested.
86 |
87 | ### 2. Warning
88 |
89 | **Community Impact**: A violation through a single incident or series
90 | of actions.
91 |
92 | **Consequence**: A warning with consequences for continued
93 | behavior. No interaction with the people involved, including
94 | unsolicited interaction with those enforcing the Code of Conduct, for
95 | a specified period of time. This includes avoiding interactions in
96 | community spaces as well as external channels like social
97 | media. Violating these terms may lead to a temporary or permanent
98 | ban.
99 |
100 | ### 3. Temporary Ban
101 |
102 | **Community Impact**: A serious violation of community standards,
103 | including sustained inappropriate behavior.
104 |
105 | **Consequence**: A temporary ban from any sort of interaction or
106 | public communication with the community for a specified period of
107 | time. No public or private interaction with the people involved,
108 | including unsolicited interaction with those enforcing the Code of
109 | Conduct, is allowed during this period. Violating these terms may
110 | lead to a permanent ban.
111 |
112 | ### 4. Permanent Ban
113 |
114 | **Community Impact**: Demonstrating a pattern of violation of
115 | community standards, including sustained inappropriate behavior,
116 | harassment of an individual, or aggression toward or disparagement of
117 | classes of individuals.
118 |
119 | **Consequence**: A permanent ban from any sort of public interaction
120 | within the community.
121 |
122 | ## Attribution
123 |
124 | This Code of Conduct is adapted from the [Contributor
125 | Covenant][homepage], version 2.0, available at
126 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
127 |
128 | Community Impact Guidelines were inspired by [Mozilla's code of
129 | conduct enforcement ladder](https://github.com/mozilla/diversity).
130 |
131 | [homepage]: https://www.contributor-covenant.org
132 |
133 | For answers to common questions about this code of conduct, see the
134 | FAQ at https://www.contributor-covenant.org/faq. Translations are
135 | available at https://www.contributor-covenant.org/translations.
136 |
137 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | BSD 3-Clause License
2 |
3 | Copyright (c) 2021 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
4 | Under the terms of Contract DE-NA0003525 with NTESS, the U.S. Government retains
5 | certain rights in this software.
6 |
7 | Redistribution and use in source and binary forms, with or without modification,
8 | are permitted provided that the following conditions are met:
9 |
10 | Redistributions of source code must retain the above copyright notice, this
11 | list of conditions and the following disclaimer.
12 |
13 | Redistributions in binary form must reproduce the above copyright notice, this
14 | list of conditions and the following disclaimer in the documentation and/or
15 | other materials provided with the distribution.
16 |
17 | Neither the name of the {organization} nor the names of its
18 | contributors may be used to endorse or promote products derived from
19 | this software without specific prior written permission.
20 |
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
22 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
25 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
28 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | [](https://badge.fury.io/gh/sandialabs%2FpvOps)
4 | [](https://github.com/sandialabs/pvOps/blob/master/LICENSE)
5 | [](https://github.com/sandialabs/pvOps/actions)
6 | [](https://joss.theoj.org/papers/6c3554c98b1771125613cff94241847c)
7 |
8 | pvops contains a series of functions to facilitate fusion of text-based data with time series production data collected at photovoltaic sites. The package also contains example datasets and tutorials to help demonstrate how the functions can be used.
9 |
10 | Installation
11 | =============
12 | pvops can be installed using `pip`. See more information at [readthedocs](https://pvops.readthedocs.io/en/latest/).
13 |
14 | Tutorials
15 | =========
16 | To get started with pvops we recommended working with the [tutorials](https://pvops.readthedocs.io/en/latest/pages/tutorials.html)
17 |
18 |
19 | Package Layout and Documentation
20 | ==============
21 |
22 | The package is delineated into the following directories.
23 | ```
24 | ├───docs : Documentation directory
25 | |
26 | ├───tutorials : Contains tutorials of functionality
27 | │ └─── example_data : └─── Example data
28 | |
29 | └───pvops : Source function library
30 | ├───tests : ├─── Library stability tests
31 | ├───text : ├─── Text processing functions
32 | ├───text2time : ├─── Text2Timeseries functions
33 | ├───timeseries : ├─── Timeseries functions
34 | └───iv : └─── Current-voltage functions
35 | ```
36 |
37 | More information about these modules is available at [readthedocs](https://pvops.readthedocs.io/en/latest/).
38 |
39 | Citing
40 | ======
41 |
42 | If using this package, please cite our [JOSS paper](https://joss.theoj.org/papers/10.21105/joss.05755#) using the following:
43 |
44 | **Citation:**
45 |
46 | ```
47 | Bonney et al., (2023). pvOps: a Python package for empirical analysis of photovoltaic field data.
48 | Journal of Open Source Software, 8(91), 5755, https://doi.org/10.21105/joss.05755
49 | ```
50 |
51 | **BibTex:**
52 |
53 | ```
54 | @article{Bonney2023,
55 | doi = {10.21105/joss.05755},
56 | url = {https://doi.org/10.21105/joss.05755},
57 | year = {2023},
58 | publisher = {The Open Journal},
59 | volume = {8},
60 | number = {91},
61 | pages = {5755},
62 | author = {Kirk L. Bonney and Thushara Gunda and Michael W. Hopwood and Hector Mendoza and Nicole D. Jackson},
63 | title = {pvOps: a Python package for empirical analysis of photovoltaic field data},
64 | journal = {Journal of Open Source Software} }
65 | ```
66 |
67 | Contributing
68 | ============
69 |
70 | The long-term success of pvops requires community support. Please see the [Contributing page](https://pvops.readthedocs.io/en/latest/) for more on how you can contribute.
71 |
72 | [](https://badges.pufler.dev)
73 |
74 | Logo Credit: [Daniel Rubinstein](http://www.danielrubinstein.com/)
75 |
76 | Copyright and License
77 | =======
78 |
79 | pvops is copyright through National Technology & Engineering Solutions of Sandia (NTESS). The software is distributed under the Revised BSD License. See the LICENSE file for more information.
80 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = .
9 | BUILDDIR = _build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/docs/_static/css/my_style.css:
--------------------------------------------------------------------------------
1 | @import url("theme.css");
2 |
3 | .wy-nav-content {
4 | max-width: 1000px !important;
5 | }
--------------------------------------------------------------------------------
/docs/assets/diode_param_extractor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sandialabs/pvOps/d6248e77fa063c94f5b5a0736b05b2bbe51c6be4/docs/assets/diode_param_extractor.png
--------------------------------------------------------------------------------
/docs/assets/vis_overlap_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sandialabs/pvOps/d6248e77fa063c94f5b5a0736b05b2bbe51c6be4/docs/assets/vis_overlap_example.png
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | # Configuration file for the Sphinx documentation builder.
2 | #
3 | # This file only contains a selection of the most common options. For a full
4 | # list see the documentation:
5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
6 |
7 | # -- Path setup --------------------------------------------------------------
8 |
9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | import os
14 | import sys
15 | from pvops import __version__
16 |
17 | sys.path.insert(0, os.path.abspath("../"))
18 |
19 | # -- Project information -----------------------------------------------------
20 |
21 | project = u"pvops"
22 | copyright = u"2021 National Technology & Engineering Solutions of Sandia, LLC (NTESS)"
23 | author = u"pvOps Developers"
24 | version = __version__
25 | release = __version__
26 |
27 |
28 | # -- General configuration ---------------------------------------------------
29 |
30 | # Add any Sphinx extension module names here, as strings. They can be
31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
32 | # ones.
33 |
34 | language = 'en'
35 |
36 | extensions = [
37 | "sphinx.ext.autodoc",
38 | # pull in documentation from docstrings in a semi-automatic way.
39 | "nbsphinx",
40 | # nbsphinx is a Sphinx extension that provides a source parser
41 | # for *.ipynb files
42 | "nbsphinx_link",
43 | # A sphinx extension for including notebook files from outside
44 | # the sphinx source root.
45 | "sphinx_copybutton",
46 | # adds copy button to code blocks
47 | "sphinx.ext.coverage",
48 | # `make coverage` summarizes what has docstrings
49 | 'sphinx.ext.doctest',
50 | # allows for testing of code snippets
51 | 'sphinx.ext.viewcode',
52 | # add links to highlighted source code
53 | 'sphinx.ext.napoleon',
54 | # add parsing for google/numpy style docs
55 | 'sphinxcontrib.bibtex',
56 | # for bibtex referencing
57 | ]
58 |
59 |
60 | coverage_show_missing_items = True
61 | napoleon_numpy_docstring = True # use numpy style
62 | napoleon_google_docstring = False # not google style
63 | napoleon_use_rtype = False # option for return section formatting
64 | numpydoc_show_class_members = True
65 | numpydoc_show_inherited_class_members = False
66 | numpydoc_class_members_toctree = False
67 | napoleon_use_ivar = True # option for attribute section formatting
68 | napoleon_use_param = False # option for parameter section formatting
69 | viewcode_import = True # tries to find the source files
70 | bibtex_bibfiles = ['refs/pvops.bib']
71 |
72 | # Add any paths that contain templates here, relative to this directory.
73 | templates_path = ["_templates"]
74 |
75 | # List of patterns, relative to source directory, that match files and
76 | # directories to ignore when looking for source files.
77 | # This pattern also affects html_static_path and html_extra_path.
78 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
79 |
80 |
81 | # -- Options for HTML output -------------------------------------------------
82 |
83 | # The theme to use for HTML and HTML Help pages. See the documentation for
84 | # a list of builtin themes.
85 | #
86 | html_theme = "sphinx_rtd_theme"
87 |
88 | # Add any paths that contain custom static files (such as style sheets) here,
89 | # relative to this directory. They are copied after the builtin static files,
90 | # so a file named "default.css" will overwrite the builtin "default.css".
91 | html_static_path = ["_static"]
92 | html_style = 'css/my_style.css'
93 |
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | .. _index:
2 |
3 | .. image:: assets/pvops_full_logo.svg
4 | :width: 400
5 |
6 | Overview
7 | ============
8 | pvops is a python package for PV operators & researchers.
9 | It consists of a set of documented functions for supporting operations
10 | research of photovoltaic (PV) energy systems.
11 | The library leverages advances in machine learning, natural
12 | language processing and visualization
13 | tools to extract and visualize actionable information from common
14 | PV data including Operations & Maintenance (O&M) text data, timeseries
15 | production data, and current-voltage (IV) curves.
16 |
17 | .. list-table:: Module Overview
18 | :widths: 25 25 50
19 | :header-rows: 1
20 |
21 | * - Module
22 | - Type of data
23 | - Highlights of functions
24 | * - text
25 | - O&M records
26 | - - fill data gaps in dates and categorical records
27 | - visualize word clusters and patterns over time
28 | * - timeseries
29 | - Production data
30 | - - estimate expected energy with multiple models
31 | - evaluate inverter clipping
32 | - survival analysis for O&M records
33 | * - text2time
34 | - O&M records and production data
35 | - - analyze overlaps between O&M and production (timeseries) records
36 | - visualize overlaps between O&M records and production data
37 | * - iv
38 | - IV records
39 | - - simulate IV curves with physical faults
40 | - extract diode parameters from IV curves
41 | - classify faults using IV curves
42 |
43 | Statement of Need
44 | =================
45 |
46 | Continued interest in PV deployment across the world has resulted in increased awareness of needs associated
47 | with managing reliability and performance of these systems during operation. Current open-source packages for
48 | PV analysis focus on theoretical evaluations of solar power simulations (e.g., `pvlib`; :cite:p:`holmgren2018pvlib`),
49 | specific use cases of empirical evaluations (e.g., `RdTools`; :cite:p:`deceglie2018rdtools` and `Pecos`; :cite:p:`klise2016performance`
50 | for degradation analysis), or analysis of electroluminescene images (e.g., `PVimage`; :cite:p:`pierce2020identifying`). However,
51 | a general package that can support data-driven, exploratory evaluations of diverse field collected information is currently lacking.
52 | To address this gap, we present `pvOps`, an open-source, Python package that can be used by researchers and industry
53 | analysts alike to evaluate different types of data routinely collected during PV field operations.
54 |
55 | PV data collected in the field varies greatly in structure (i.e., timeseries and text records) and quality
56 | (i.e., completeness and consistency). The data available for analysis is frequently semi-structured.
57 | Furthermore, the level of detail collected between different owners/operators might vary.
58 | For example, some may capture a general start and end time for an associated event whereas others might include
59 | additional time details for different resolution activities. This diversity in data types and structures often
60 | leads to data being under-utilized due to the amount of manual processing required. To address these issues,
61 | `pvOps` provides a suite of data processing, cleaning, and visualization methods to leverage insights across a
62 | broad range of data types, including operations and maintenance records, production timeseries, and IV curves.
63 | The functions within `pvOps` enable users to better parse available data to understand patterns in outages and production losses.
64 |
65 |
66 | .. toctree::
67 | :maxdepth: 1
68 | :caption: Available resources:
69 |
70 | Overview
71 | pages/userguide
72 | pages/tutorials
73 | pages/modules
74 | pages/development
75 | pages/contributing
76 | pages/releasenotes
77 | pages/references
78 |
79 |
80 |
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 |
13 | if "%1" == "" goto help
14 |
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | echo.
18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | echo.installed, then set the SPHINXBUILD environment variable to point
20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | echo.may add the Sphinx directory to PATH.
22 | echo.
23 | echo.If you don't have Sphinx installed, grab it from
24 | echo.http://sphinx-doc.org/
25 | exit /b 1
26 | )
27 |
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 |
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 |
34 | :end
35 | popd
36 |
--------------------------------------------------------------------------------
/docs/pages/abbreviations.rst:
--------------------------------------------------------------------------------
1 | Abbreviations/Terminology
2 | ====================================
3 | * AIT: Additive Interaction Model described in :cite:p:`app12041872`
4 | * CEC: California Energy Commission
5 | * WS: wind speed
6 | * Varr: Voltage array
7 | * T: Average cell temperature
8 | * Rsh_mult: Multiplier usually less than 1 to simulate a drop in RSH
9 | * Rs_mult: Multiplier usually less than 1 to simulate a drop in RS
10 | * Io_mult: Multiplier usually less than 1 to simulate a drop in IO
11 | * Il_mult: Multiplier usually less than 1 to simulate a drop in IL
12 | * nnsvth_mult: Multiplier usually less than 1 to simulate a drop in NNSVTH
13 | * E: Irradiance
14 | * Tc: Cell temp
15 | * gt: (G - Irradiation and T - temperature)
16 | * v_rbd: Reverse bias diode voltage
17 | * v_oc: Open circuit voltage
--------------------------------------------------------------------------------
/docs/pages/apidoc/iv.rst:
--------------------------------------------------------------------------------
1 | iv module
2 | ==========
3 |
4 | iv.extractor module
5 | -------------------
6 |
7 | .. automodule:: pvops.iv.extractor
8 | :members:
9 | :undoc-members:
10 | :show-inheritance:
11 |
12 | iv.physics_utils module
13 | -----------------------
14 |
15 | .. automodule:: pvops.iv.physics_utils
16 | :members:
17 | :undoc-members:
18 | :show-inheritance:
19 |
20 | iv.preprocess module
21 | --------------------
22 |
23 | .. automodule:: pvops.iv.preprocess
24 | :members:
25 | :undoc-members:
26 | :show-inheritance:
27 |
28 | iv.simulator module
29 | -------------------
30 |
31 | .. automodule:: pvops.iv.simulator
32 | :members:
33 | :undoc-members:
34 | :show-inheritance:
35 |
36 | iv.utils module
37 | ---------------
38 |
39 | .. automodule:: pvops.iv.utils
40 | :members:
41 | :undoc-members:
42 | :show-inheritance:
43 |
44 | iv.timeseries_simulator module
45 | -----------------------------------
46 |
47 | .. automodule:: pvops.iv.timeseries_simulator
48 | :members:
49 | :undoc-members:
50 | :show-inheritance:
51 |
52 | iv.models.nn module
53 | -------------------
54 |
55 | .. automodule:: pvops.iv.models.nn
56 | :members:
57 | :undoc-members:
58 | :show-inheritance:
--------------------------------------------------------------------------------
/docs/pages/apidoc/text.rst:
--------------------------------------------------------------------------------
1 | text module
2 | ============
3 |
4 | text.classify module
5 | ------------------------------------
6 |
7 | .. automodule:: pvops.text.classify
8 | :members:
9 | :undoc-members:
10 | :show-inheritance:
11 |
12 | text.defaults module
13 | ------------------------------------
14 |
15 | .. automodule:: pvops.text.defaults
16 | :members:
17 | :undoc-members:
18 | :show-inheritance:
19 |
20 |
21 | text.nlp_utils module
22 | ------------------------------------
23 |
24 | .. automodule:: pvops.text.nlp_utils
25 | :members:
26 | :undoc-members:
27 | :show-inheritance:
28 |
29 | text.preprocess module
30 | ------------------------------------
31 |
32 | .. automodule:: pvops.text.preprocess
33 | :members:
34 | :undoc-members:
35 | :show-inheritance:
36 |
37 | text.utils module
38 | ------------------------------------
39 |
40 | .. automodule:: pvops.text.utils
41 | :members:
42 | :undoc-members:
43 | :show-inheritance:
44 |
45 | text.visualize module
46 | ------------------------------------
47 |
48 | .. automodule:: pvops.text.visualize
49 | :members:
50 | :undoc-members:
51 | :show-inheritance:
52 |
--------------------------------------------------------------------------------
/docs/pages/apidoc/text2time.rst:
--------------------------------------------------------------------------------
1 | text2time module
2 | =================
3 |
4 | text2time.preprocess module
5 | ---------------------------
6 |
7 | .. automodule:: pvops.text2time.preprocess
8 | :members:
9 | :undoc-members:
10 | :show-inheritance:
11 |
12 | text2time.utils module
13 | ----------------------
14 |
15 | .. automodule:: pvops.text2time.utils
16 | :members:
17 | :undoc-members:
18 | :show-inheritance:
19 |
20 | text2time.visualize module
21 | --------------------------
22 |
23 | .. automodule:: pvops.text2time.visualize
24 | :members:
25 | :undoc-members:
26 | :show-inheritance:
27 |
28 |
--------------------------------------------------------------------------------
/docs/pages/apidoc/timeseries.rst:
--------------------------------------------------------------------------------
1 | timeseries module
2 | ==================
3 |
4 | timeseries.preprocess module
5 | ----------------------------
6 |
7 | .. automodule:: pvops.timeseries.preprocess
8 | :members:
9 | :undoc-members:
10 | :show-inheritance:
11 |
12 | .. _timeseries models:
13 |
14 | timeseries models
15 | -----------------
16 |
17 | timeseries.models.linear module
18 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
19 |
20 | .. automodule:: pvops.timeseries.models.linear
21 | :members:
22 | :undoc-members:
23 | :show-inheritance:
24 |
25 | timeseries.models.AIT module
26 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
27 |
28 | .. automodule:: pvops.timeseries.models.AIT
29 | :members:
30 | :undoc-members:
31 | :show-inheritance:
32 |
33 | timeseries.models.iec module
34 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
35 |
36 | .. automodule:: pvops.timeseries.models.iec
37 | :members:
38 | :undoc-members:
39 | :show-inheritance:
40 |
41 | timeseries.models.survival module
42 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
43 |
44 | .. automodule:: pvops.timeseries.models.survival
45 | :members:
46 | :undoc-members:
47 | :show-inheritance:
--------------------------------------------------------------------------------
/docs/pages/contributing.rst:
--------------------------------------------------------------------------------
1 | .. _contributing:
2 |
3 | Contributing
4 | ============
5 |
6 | Thank you for wanting to contribute to this library! We will try to make this
7 | an easy process for you. It is recommended that you read
8 | the :ref:`development` page so that you can lint
9 | and test before submitting code.
10 | Checking that your PR passes the required testing and linting procedures will speed up
11 | the acceptance of your PR.
12 |
13 | Issues and bug reporting
14 | ------------------------
15 |
16 | To report issues or bugs please create a new issue on
17 | the `pvops issues page `_.
18 | Before submitting your bug report, please perform a cursory search
19 | to see if the problem has been already reported. If it has been reported,
20 | and the issue is still open, add a comment to the existing issue instead of opening a new issue.
21 |
22 | Guidelines for effective bug reporting
23 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
24 |
25 | - Use a clear descriptive title for the issue.
26 |
27 | - Describe the steps to reproduce the problem,
28 | the behavior you observed after following the steps, and the expected behavior.
29 |
30 | - If possible, provide a simple example of the bug using pvOps example data.
31 |
32 | - When relevant, provide information on your computing environment
33 | (operating system, python version, pvOps version or commit).
34 |
35 | - For runtime errors, provide a function call stack.
36 |
37 | Contributing code
38 | -----------------
39 |
40 | Software developers, within the core development team and external collaborators,
41 | are expected to follow standard practices to document and test new code.
42 | Software developers interested in contributing to the project are encouraged
43 | to create a Fork of the project and submit a Pull Request (PR) using GitHub.
44 | Pull requests will be reviewed by the core development team.
45 | Create a PR or help with other PRs which are in the library
46 | by referencing `pvops PR page `_.
47 |
48 | Guidelines for preparing and submitting pull-requests
49 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
50 |
51 | - Use a clear descriptive title for your pull-requests
52 |
53 | - Describe if your submission is a bugfix, documentation update, or a feature
54 | enhancement. Provide a concise description of your proposed changes.
55 |
56 | - Provide references to open issues, if applicable, to provide the necessary
57 | context to understand your pull request
58 |
59 | - Make sure that your pull-request merges cleanly with the `master` branch of
60 | pvOps. When working on a feature, always create your feature branch off of
61 | the latest `master` commit
62 |
63 | - Ensure that appropriate documentation and tests accompany any added features.
64 |
--------------------------------------------------------------------------------
/docs/pages/development.rst:
--------------------------------------------------------------------------------
1 | .. _development:
2 |
3 | Developing pvOps
4 | =====================
5 |
6 | Installation
7 | ------------
8 |
9 | To maintain a local installation, developers should use the following commands::
10 |
11 | git clone https://github.com/sandialabs/pvOps.git
12 | cd pvops
13 | pip install -e .
14 |
15 | Testing
16 | -------
17 | To test locally, run::
18 |
19 | pytest pvops
20 |
21 | at the root of the repository. Note that this requires the installation
22 | of pytest.
23 |
24 | Linting
25 | -------
26 |
27 | Pvops uses flake8 to maintain code standards. To lint locally using
28 | the same filters required by pvops CI/CD pipeline, run the following
29 | command at the root of the repository::
30 |
31 | flake8 . --count --statistics --show-source --ignore=E402,E203,E266,E501,W503,F403,F401,E402,W291,E302,W391,W292,F405,E722,W504,E121,E125,E712
32 |
33 | Note that this requires the installation of flake8.
34 |
35 | Documentation
36 | ------------------
37 |
38 | Building docs
39 | ^^^^^^^^^^^^^^^
40 |
41 | To build docs locally, navigate to ``pvops/docs`` and run::
42 |
43 | make html
44 |
45 | After building, the static html files can be found in ``_build/html``.
46 |
47 | Docstrings
48 | ^^^^^^^^^^^
49 |
50 | The pvOps documentation adheres to NumPy style docstrings. Not only does this
51 | help to keep a consistent style, but it is also necessary for the API documentation
52 | to be parsed and displayed correctly. For an example of what this should look like::
53 |
54 | def func(arg1, arg2):
55 | """Summary line.
56 |
57 | Extended description of function.
58 |
59 | Parameters
60 | ----------
61 | arg1 : int
62 | Description of arg1
63 | arg2 : str
64 | Description of arg2
65 |
66 | Returns
67 | -------
68 | bool
69 | Description of return value
70 |
71 | """
72 | return True
73 |
74 | Additional examples can be found in the
75 | `napoleon documentation `_.
76 |
77 | Extending Documentation
78 | ^^^^^^^^^^^^^^^^^^^^^^^
79 |
80 | When adding new functionality to the repository, it is important
81 | to check that it is being properly documented in the API documentation.
82 | Most of this is automatic. For example, if a function is added to
83 | ``pvops.text.visualize`` with a proper docstring, there is no more work to do.
84 | However, when new files are created they must be added to the appropriate page
85 | in ``docs/pages/apidoc`` so that the automatic documentation recognizes it.
86 |
87 | New pages should be placed into ``docs/pages``, and linked to in
88 | ``index.html``, or another page. It is recommended to use absolute paths
89 | (starting from the root of the documentation) when linking anything.
90 |
--------------------------------------------------------------------------------
/docs/pages/installation.rst:
--------------------------------------------------------------------------------
1 | Installation
2 | =============
3 |
4 | pvops is tested on Python versions 3.8, 3.9, 3.10, and 3.11 and depends on a variety of
5 | packages.
6 |
7 | The latest release of pvops is accessible via PYPI using the following
8 | command line prompt::
9 |
10 | $ pip install pvops
11 |
12 | Alternatively, the package can be installed using github::
13 |
14 | $ git clone https://github.com/sandialabs/pvOps.git
15 | $ cd pvops
16 | $ pip install .
17 |
18 | NLTK data
19 | ----------
20 |
21 | Functions in the text package rely on the "punkt_tab" dataset from the nltk package.
22 | After proper installation of pvops, run the commands::
23 |
24 | >>> import nltk
25 | >>> nltk.download('punkt_tab')
26 | >>> nltk.download('stopwords')
27 |
28 | Those operating under a proxy may have difficulty with this installation.
29 | This `stack exchange post `_
30 | may help.
31 |
--------------------------------------------------------------------------------
/docs/pages/moduleguides/iv.rst:
--------------------------------------------------------------------------------
1 | IV Guide
2 | ===============
3 |
4 | Module Overview
5 | ----------------
6 |
7 | These functions focus on current-voltage (IV) curve simulation and
8 | classification.
9 |
10 | .. note::
11 | To use the capabilites in this module, pvOps must be installed with the ``iv`` option:
12 | ``pip install pvops[iv]``.
13 |
14 |
15 | Tutorials that exemplify usage can be found at:
16 | - `tutorial_iv_classifier.ipynb `_.
17 | - `tutorial_iv_diode_extractor.ipynb `_.
18 | - `tutorial_iv_simulator.ipynb `_.
19 |
20 | extractor
21 | ^^^^^^^^^^^^^^^^^^^^^
22 |
23 | * :py:mod:`~pvops.iv.extractor` primarily features the
24 | :py:class:`~pvops.iv.extractor.BruteForceExtractor` class, which
25 | extracts diode parameters from IV curves (even outdoor-collected).
26 |
27 | physics_utils
28 | ^^^^^^^^^^^^^^^^^^^^^
29 |
30 | :py:mod:`~pvops.iv.physics_utils` contains methods which aid the IV
31 | Simulator's physics-based calculations and the preprocessing pipeline's
32 | correction calculations.
33 |
34 | * :py:func:`~pvops.iv.physics_utils.calculate_IVparams` calculates
35 | key parameters of an IV curve.
36 | * :py:func:`~pvops.iv.physics_utils.smooth_curve` smooths
37 | IV curve using a polyfit.
38 | * :py:func:`~pvops.iv.physics_utils.iv_cutoff` cuts off IV curve
39 | greater than a given voltage value.
40 | * :py:func:`~pvops.iv.physics_utils.intersection` computes
41 | the intersection between two curves.
42 | * :py:func:`~pvops.iv.physics_utils.T_to_tcell` calculates
43 | a cell temperature given ambient temperature via NREL weather-correction
44 | tools.
45 | * :py:func:`~pvops.iv.physics_utils.bypass` limits voltage
46 | to above a minimum value.
47 | * :py:func:`~pvops.iv.physics_utils.add_series` adds two
48 | IV curves in series.
49 | * :py:func:`~pvops.iv.physics_utils.voltage_pts`
50 | provides voltage points for an IV curve.
51 | * :py:func:`~pvops.iv.physics_utils.gt_correction` corrects IV
52 | trace using irradiance and temperature using one of three
53 | available options.
54 |
55 | preprocess
56 | ^^^^^^^^^^^^^^^^^^^^^
57 |
58 | :py:mod:`~pvops.iv.preprocess` contains the preprocessing function
59 | * :py:func:`~pvops.iv.preprocess.preprocess` which
60 | corrects a set of data according to irradiance and temperature and
61 | normalizes the curves so they are comparable.
62 |
63 | simulator
64 | ^^^^^^^^^^^^^^^^^^^^^
65 |
66 | :py:mod:`~pvops.iv.simulator` holds the
67 | :py:class:`~pvops.iv.simulator.IV Simulator` class which can simulate
68 | current-voltage (IV) curves under different environmental and fault
69 | conditions. There is also a utility function
70 | :py:func:`~pvops.iv.simulator.create_df` for building an IV curve dataframe
71 | from a set of parameters.
72 |
73 | utils
74 | ^^^^^^^^^^^^^^^^^^^^^
75 |
76 | :py:mod:`~pvops.iv.utils` holds the utility function
77 | :py:func:`~pvops.iv.utils.get_CEC_params` which connects to the
78 | California Energy Commission (CEC)
79 | database hosted by pvLib for cell-level and module-level parameters.
80 |
81 | timeseries_simulator
82 | ^^^^^^^^^^^^^^^^^^^^^
83 |
84 | :py:mod:`~pvops.iv.timeseries_simulator` contains
85 | :py:class:`~pvops.iv.timeseries_simulator.IVTimeseriesGenerator`,
86 | a subclass of the IV Simulator,
87 | which allows users to specify time-based failure degradation
88 | patterns. The class
89 | :py:class:`~pvops.iv.timeseries_simulator.TimeseriesFailure`
90 | is used to define the time-based failures.
91 |
92 | .. Example Code
93 | .. --------------
94 |
--------------------------------------------------------------------------------
/docs/pages/moduleguides/text.rst:
--------------------------------------------------------------------------------
1 | Text Guide
2 | ============
3 |
4 | Module Overview
5 | ----------------
6 |
7 | This module aims to support the consistent extraction of key features
8 | in O&M data:
9 |
10 | * timestamp information
11 | * characteristic categorical information
12 | * a concise synopsis of the issue for context
13 |
14 | Implemented functions include those for filling in data gaps (text.preprocess submodule),
15 | machine learning analyses to fill in gaps in categorical information and to
16 | generate concise summary strings (text.classify submodule), functions
17 | to prepare data for natural language processing (text.nlp_utils submodule),
18 | and a visualization suite (text.visualize submodule).
19 |
20 | An example implementation of all capabilities can be found in
21 | `text_class_example.py `_
22 | for specifics, and `tutorial_textmodule.ipynb `_ for basics.
23 |
24 | Text pre-processing
25 | ^^^^^^^^^^^^^^^^^^^^^
26 |
27 | :py:mod:`~pvops.text.preprocess`
28 |
29 | These functions process the O&M data into concise, machine learning-ready documents.
30 | Additionally, there are options to extract dates from the text.
31 |
32 | * :py:func:`~pvops.text.preprocess.preprocessor` acts as a wrapper function,
33 | utilizing the other preprocessing functions, which prepares the data for machine learning.
34 |
35 | * See ``text_class_example.prep_data_for_ML`` for an example.
36 |
37 | * :py:func:`~pvops.text.preprocess.preprocessor` should be used with the keyword argument
38 | `extract_dates_only = True` if the primary interest is date extraction
39 | instead continuing to use the data for machine learning.
40 |
41 | * See ``text_class_example.extract_dates`` module for an example.
42 |
43 |
44 | Text classification
45 | ^^^^^^^^^^^^^^^^^^^^^
46 |
47 | :py:mod:`~pvops.text.classify`
48 |
49 | These functions process the O&M data to make an inference on the specified event descriptor.
50 |
51 | * :py:func:`~pvops.text.classify.classification_deployer` is used to conduct supervised
52 | or unsupervised classification of text documents.
53 | This function conducts a grid search across the passed classifiers and hyperparameters.
54 |
55 | * The :py:func:`~pvops.text.defaults.supervised_classifier_defs` and
56 | :py:func:`~pvops.text.defaults.unsupervised_classifier_defs`
57 | functions return default values for conducting the grid search.
58 |
59 | * See ``text_class_example.classify_supervised`` or ``text_class_example.classify_unsupervised``
60 | modules for an example.
61 |
62 | * Once the model is built and selected, classification (for supervised ML)
63 | or clustering (for unsupervised ML) analysis can be conducted on the best model returned from the pipeline object.
64 |
65 | * See ``text_class_example.predict_best_model`` module for an example.
66 |
67 |
68 | Utils
69 | ^^^^^^^^^^^^^^^^^^^^^
70 |
71 | :py:mod:`~pvops.text.utils`
72 |
73 | These helper functions focus on performing exploratory or secondary processing activities for the O&M data.
74 |
75 | * :py:func:`pvops.text.nlp_utils.remap_attributes` is used to reorganize an attribute column into a new set of labels.
76 |
77 | NLP Utils
78 | ^^^^^^^^^^^^
79 |
80 | :py:mod:`~pvops.text.utils`
81 |
82 | These helper functions focus on processing in preparation for NLP activities.
83 |
84 | * :py:func:`~pvops.text.nlp_utils.summarize_text_data` prints summarized contents of the O&M data.
85 | * :py:class:`~pvops.text.nlp_utils.Doc2VecModel` performs a gensim Doc2Vec
86 | transformation of the input documents to create embedded representations of the documents.
87 | * :py:class:`~pvops.text.nlp_utils.DataDensifier` is a data structure transformer which converts sparse data to dense data.
88 | * :py:func:`~pvops.text.nlp_utils.create_stopwords` concatenates a list of stopwords using both words grabbed from nltk and user-specified words
89 |
90 |
91 | Visualizations
92 | ^^^^^^^^^^^^^^^^^^^^^
93 | These functions create visualizations to get a better understanding about your documents.
94 |
95 | * :py:func:`~pvops.text.visualize.visualize_attribute_connectivity` visualizes the connectivity of two attributes.
96 |
97 | .. image:: ../../assets/vis_attr_connect_example.svg
98 | :width: 600
99 |
100 | * :py:func:`~pvops.text.visualize.visualize_attribute_timeseries` evaluates the density of an attribute over time.
101 |
102 | .. image:: ../../assets/vis_attr_timeseries_example.svg
103 | :width: 600
104 |
105 | * :py:func:`~pvops.text.visualize.visualize_cluster_entropy` observes the performance of different text embeddings.
106 |
107 | .. image:: ../../assets/vis_cluster_entropy_example.svg
108 | :width: 600
109 |
110 | * :py:func:`~pvops.text.visualize.visualize_document_clusters` visualizes popular words in clusters after a cluster analysis is ran.
111 |
112 | .. image:: ../../assets/vis_doc_clusters_example.svg
113 | :width: 600
114 |
115 | * :py:func:`~pvops.text.visualize.visualize_word_frequency_plot` visualizes word frequencies in the associated attribute column of O&M data.
116 |
117 | .. image:: ../../assets/vis_freq_plot_example.svg
118 | :width: 600
119 |
120 |
121 | .. Example Code
122 | .. --------------
--------------------------------------------------------------------------------
/docs/pages/moduleguides/text2time.rst:
--------------------------------------------------------------------------------
1 | Text2Time Guide
2 | ================
3 |
4 | Module Overview
5 | ----------------
6 |
7 | Aligning production data with O&M tickets is not a trivial task since
8 | intersection of dates and identification of anomalies depends on the nuances
9 | within the two datasets. This set of functions facilitate this
10 | data fusion. Key features include:
11 |
12 | * conducting quality checks and controls on data.
13 | * identification of overlapping periods between O&M and production data.
14 | * generation of baseline values for production loss estimations.
15 | * calculation of losses from production anomalies for specific time periods.
16 |
17 | An example of usage can be found in
18 | `tutorial_text2time_module.ipynb `_.
19 |
20 |
21 | The text2time package can be broken down into three main components:
22 | `data pre-processing`, `utils`, and `visualizations`.
23 |
24 | Data pre-processing
25 | ^^^^^^^^^^^^^^^^^^^^^
26 |
27 | :py:mod:`text2time.preprocess module `
28 |
29 | These functions pre-process user O&M and production data to prepare them for
30 | further analyses and visualizations.
31 |
32 | * :py:func:`~pvops.text2time.preprocess.om_date_convert` and
33 | :py:func:`~pvops.text2time.preprocess.prod_date_convert`
34 | convert dates in string format to date-time objects in the O&M and
35 | production data respectively.
36 | * :py:func:`~pvops.text2time.preprocess.data_site_na`
37 | handles missing site IDs in the user data. This function can
38 | be used for both O&M and production data.
39 | * :py:func:`~pvops.text2time.preprocess.om_datelogic_check`
40 | detects and handles issues with the logic of the O&M date, specifically
41 | when the conclusion of an event occurs before it begins.
42 | * :py:func:`~pvops.text2time.preprocess.om_nadate_process` and
43 | :py:func:`~pvops.text2time.preprocess.prod_nadate_process`
44 | detect and handle any missing time-stamps in the O&M and
45 | production data respectively.
46 |
47 | Utils
48 | ^^^^^^^^^^^^^^^^^^^^^
49 |
50 | :py:mod:`text2time.utils module `
51 |
52 | These functions perform secondary calcuations
53 | on the O&M and production data to aid in data analyses and visualizations.
54 |
55 | * :py:func:`~pvops.text2time.utils.iec_calc` calculates a
56 | comparison dataset for the production data based on an irradiance as
57 | calculated by IEC calculation.
58 | * :py:func:`~pvops.text2time.utils.summarize_overlaps` summarizes
59 | the overlapping production and O&M data.
60 | * :py:func:`~pvops.text2time.utils.om_summary_stats` summarizes
61 | statistics (e.g., event duration and month of occurrence) of O&M data.
62 | * :py:func:`~pvops.text2time.utils.overlapping_data` trims the
63 | production and O&M data frames and only retain the data where both datasets
64 | overlap in time.
65 | * :py:func:`~pvops.text2time.utils.prod_anomalies` detects and handles
66 | issues when the production data is input in cumulative format and unexpected
67 | dips show up in the data.
68 | * :py:func:`~pvops.text2time.utils.prod_quant` calculates a
69 | comparison between the actual production data and a baseline
70 | (e.g. from a model from :ref:`timeseries models`).
71 |
72 | Visualizations
73 | ^^^^^^^^^^^^^^^^^^^^^
74 |
75 | :py:mod:`text2time.visualize module `
76 |
77 | These functions visualize the processed O&M and production data:
78 |
79 | * :py:func:`~pvops.text2time.visualize.visualize_categorical_scatter`
80 | generates categorical scatter plots of chosen variable based on specified
81 | category (e.g. site ID) for the O&M data.
82 |
83 | .. image:: ../../assets/vis_cat_scatter_example.svg
84 | :width: 600
85 |
86 | * :py:func:`~pvops.text2time.visualize.visualize_counts`
87 | generates a count plot of categories based on a chosen categorical variable
88 | column for the O&M data.
89 | If that variable is the user's site ID for every ticket, a plot for total
90 | count of events can be generated.
91 |
92 | .. image:: ../../assets/vis_counts_example.svg
93 | :width: 600
94 |
95 | * :py:func:`~pvops.text2time.visualize.visualize_om_prod_overlap`
96 | creates a visualization that overlays the O&M data on top of the
97 | coinciding production data.
98 |
99 | .. image:: ../../assets/vis_overlap_example.png
100 | :width: 600
101 |
102 | Example Code
103 | --------------
104 |
105 | Load in OM data and convert dates to python date-time objects
106 |
107 | .. doctest::
108 |
109 | >>> import pandas as pd
110 | >>> import os
111 | >>> from pvops.text2time import preprocess
112 |
113 | >>> example_OMpath = os.path.join('example_data', 'example_om_data2.csv')
114 | >>> om_data = pd.read_csv(example_OMpath, on_bad_lines='skip', engine='python')
115 | >>> om_col_dict = {
116 | ... 'siteid': 'randid',
117 | ... 'datestart': 'date_start',
118 | ... 'dateend': 'date_end',
119 | ... 'workID': 'WONumber',
120 | ... 'worktype': 'WOType',
121 | ... 'asset': 'Asset',
122 | ... 'eventdur': 'EventDur', #user's name choice for new column (Repair Duration)
123 | ... 'modatestart': 'MonthStart', #user's name choice for new column (Month when an event begins)
124 | ... 'agedatestart': 'AgeStart'} #user's name choice for new column (Age of system when event begins)
125 | >>> om_data_converted = preprocess.om_date_convert(om_data, om_col_dict)
--------------------------------------------------------------------------------
/docs/pages/moduleguides/timeseries.rst:
--------------------------------------------------------------------------------
1 | Timeseries Guide
2 | ==================
3 |
4 | Module Overview
5 | -----------------
6 |
7 | These funcions provide processing and modelling capabilities for timeseries
8 | production data. Processing functions prepare data to train two
9 | types of expected energy models:
10 |
11 | * AIT: additive interaction trained model, see :cite:t:`app12041872`
12 | for more information.
13 | * Linear: a high flexibility linear regression model.
14 |
15 | Additionally, the ability to generate expected energy via IEC
16 | standards (iec 61724-1) is implemented in the :py:mod:`~pvops.timeseries.models.iec`
17 | module.
18 |
19 | An example of usage can be found in
20 | `tutorial_timeseries_module.ipynb `.
21 |
22 | Preprocess
23 | ^^^^^^^^^^^^^^^^^^^^^
24 | * :py:func:`pvops.timeseries.preprocess.prod_inverter_clipping_filter`
25 | filters out production periods with inverter clipping.
26 | The core method was adopted from `pvlib/pvanalytics`.
27 | * :py:func:`pvops.timeseries.preprocess.normalize_production_by_capacity`
28 | normalizes power by site capacity.
29 | * :py:func:`pvops.timeseries.preprocess.prod_irradiance_filter`
30 | filters rows of production data frame according to performance and data
31 | quality. NOTE: this method is currently in development.
32 | * :py:func:`pvops.timeseries.preprocess.establish_solar_loc`
33 | adds solar position data to production data using
34 | pvLib.
35 |
36 | Models
37 | ^^^^^^^^^^^^^^^^^^^^^
38 | * :py:func:`pvops.timeseries.models.linear.modeller` is a wrapper method
39 | used to model timeseries data using a linear model.
40 | This method gives multiple options for the
41 | learned model structure.
42 | * :py:func:`pvops.timeseries.models.AIT.AIT_calc` Calculates expected energy
43 | using measured irradiance based on trained regression model from field data.
44 | * :py:func:`pvops.timeseries.models.iec.iec_calc` calculates expected energy using measured irradiance
45 | based on IEC calculations.
46 |
47 | Example Code
48 | --------------
49 |
50 | load in data and run some processing functions
--------------------------------------------------------------------------------
/docs/pages/modules.rst:
--------------------------------------------------------------------------------
1 | API Documentation
2 | ==================
3 |
4 | .. toctree::
5 | :maxdepth: 2
6 |
7 | apidoc/text
8 | apidoc/text2time
9 | apidoc/timeseries
10 | apidoc/iv
11 |
--------------------------------------------------------------------------------
/docs/pages/references.rst:
--------------------------------------------------------------------------------
1 | References
2 | ==========
3 |
4 | Citing Us
5 | ---------
6 |
7 | If using this package, please cite us using the following
8 |
9 | .. code-block:: text
10 |
11 | Bonney et al., (2023). pvOps: a Python package for empirical analysis of photovoltaic field data.
12 | Journal of Open Source Software, 8(91), 5755, https://doi.org/10.21105/joss.05755
13 |
14 | In BibTex format:
15 |
16 | .. code-block:: text
17 |
18 | @article{Bonney2023,
19 | doi = {10.21105/joss.05755},
20 | url = {https://doi.org/10.21105/joss.05755},
21 | year = {2023},
22 | publisher = {The Open Journal},
23 | volume = {8},
24 | number = {91},
25 | pages = {5755},
26 | author = {Kirk L. Bonney and Thushara Gunda and Michael W. Hopwood and Hector Mendoza and Nicole D. Jackson},
27 | title = {pvOps: a Python package for empirical analysis of photovoltaic field data},
28 | journal = {Journal of Open Source Software} }
29 |
30 |
31 | We also utilize content from other packages. See the NOTICE/ directory on our GitHub!
32 |
33 | Additionally, some of our own content comes from published papers. See the following external references.
34 |
35 | External references
36 | -------------------
37 |
38 | .. bibliography::
39 | :all:
40 |
41 |
--------------------------------------------------------------------------------
/docs/pages/releasenotes.rst:
--------------------------------------------------------------------------------
1 | .. _whatsnew:
2 |
3 | What's New
4 | ==========
5 |
6 | These are new features and improvements of note in each release.
7 |
8 | .. include:: releasenotes/0.6.1.rst
9 |
10 | .. include:: releasenotes/0.6.0.rst
11 |
12 | .. include:: releasenotes/0.5.3.rst
13 |
14 | .. include:: releasenotes/0.5.2.rst
15 |
16 | .. include:: releasenotes/0.5.1.rst
17 |
18 | .. include:: releasenotes/0.5.0.rst
19 |
20 | .. include:: releasenotes/0.4.0.rst
21 |
22 | .. include:: releasenotes/0.3.0.rst
23 |
24 | .. include:: releasenotes/0.2.0.rst
25 |
26 | .. include:: releasenotes/0.1.9.rst
27 |
28 | .. include:: releasenotes/0.1.8.rst
29 |
30 | .. include:: releasenotes/0.1.7.rst
31 |
32 | .. include:: releasenotes/beta.rst
33 |
34 | .. include:: releasenotes/alpha.rst
--------------------------------------------------------------------------------
/docs/pages/releasenotes/0.1.7.rst:
--------------------------------------------------------------------------------
1 | 0.1.7 (September 20 2021)
2 | -------------------------
3 |
4 | Updated functions for data processing (text and timeseries) analysis. Also includes IV curve functions
5 |
--------------------------------------------------------------------------------
/docs/pages/releasenotes/0.1.8.rst:
--------------------------------------------------------------------------------
1 | 0.1.8 (Jan 14 2022)
2 | -----------------------
3 |
4 | Includes a data-derived expected energy model trained using machine learning methods. Associated example is also within the documentation.
5 |
6 | Functionality
7 | ~~~~~~~~~~~~~
8 |
9 | * Added AIT model
10 |
11 | Other
12 | ~~~~~~~~~~~~~
13 | * Add citation.cif
14 |
--------------------------------------------------------------------------------
/docs/pages/releasenotes/0.1.9.rst:
--------------------------------------------------------------------------------
1 | 0.1.9 (November 21 2022)
2 | -------------------------
3 |
4 | Includes updated documentation and fixes for dependency issues
5 |
6 |
7 | Documentation
8 | ~~~~~~~~~~~~~
9 |
10 | * Docstrings polished across the package.
11 | * Resolved documentation build errors and warnings
12 |
--------------------------------------------------------------------------------
/docs/pages/releasenotes/0.2.0.rst:
--------------------------------------------------------------------------------
1 | 0.2.0 (August 9 2023)
2 | -----------------------
3 |
4 | This release incorporates new functions and addresses depreciated commands in some of the package dependencies.
5 |
6 | Documentation
7 | ~~~~~~~~~~~~~
8 |
9 | * Doc pages "makeover" in preparation for JOSS publication
10 | * Added additional context and detail to example notebooks.
11 | * Added module guides
12 | * Added contributing pages
13 |
14 | New Features
15 | ~~~~~~~~~~~~
16 |
17 | * Added `get_attributes_from_keywords` to text.classify
18 | * Added `get_keywords_of_interest` to text.preprocess
19 | * Added `remap_words_in_text` to text.visualize
--------------------------------------------------------------------------------
/docs/pages/releasenotes/0.3.0.rst:
--------------------------------------------------------------------------------
1 | 0.3.0 (November 9 2023)
2 | -----------------------
3 |
4 | This release incorporates new functions and addresses depreciated commands in some of the package dependencies.
5 |
6 | Functionality
7 | ~~~~~~~~~~~~~~
8 |
9 | * Updated visualize_attribute_connectivity to use bipartite graph layout (updated function).
10 |
11 | * IV related dependencies moved to an installation extra (install using `pip install pvops[iv]`).
12 |
13 | * Removed deprecated normalization parameters in ML pipeline (bug fix).
14 |
15 | * Updated code to fix deprecation/future warnings.
16 |
17 | Testing
18 | ~~~~~~~~~~~~~~
19 |
20 | * Added Python 3.11 to the test environment.
21 |
22 | Documentation
23 | ~~~~~~~~~~~~~~
24 |
25 | * Fix small typos in index.rst.
26 |
27 | * Renamed references to examples as tutorials for consistency.
28 |
29 | * Updated docs to refer to modules as modules, rather than packages.
30 |
31 | * Updated RTD config to install doc requirements using the package installation extra
32 |
33 | * Removed redundant boilerplate in development.rst
34 |
35 | * Update tested versions in documentation
36 |
37 | * Added links to tutorials where appropriate in the user guide.
38 |
39 | * Added a simplified version of the module overview table from the JOSS manuscript to the homepage of the documentation.
40 |
41 | * Added statement of need to homepage
42 |
43 | * Fixed image embed in tutorial
44 |
45 | * Added dates to what's new sections
46 |
47 | * Expanded patch notes to include recent tags.
48 |
49 | * Deleted WIP docs pages to remove "not included in any toctree" errors.
50 |
51 | * Added nbsphinx gallery view to tutorials page.
52 |
53 | * Added more content to abbreviations page.
54 |
55 | Tutorials
56 | ~~~~~~~~~~~~~~
57 |
58 | * Rename pvOps examples to tutorials for consistency throughout repository.
59 |
60 | * Linked to tutorials in README.
61 |
62 | * Added a description of data in timeseries tutorial.
63 |
64 | * Removed redundant plots in timeseries tutorial.
65 |
66 | Other
67 | ~~~~~~~~~~~~~~
68 |
69 | * Added copyright and license attributes to pvops.
70 |
71 | * Removed manifest.in (not needed).
72 |
73 | * Removed docs/init.py (not a module).
74 |
75 | * Chose more appropriate author/copyright in setup.py and conf.py.
76 |
77 | * Added version to pvops (pvops.__version__ now exists).
78 |
79 | * Removed external licenses (determined to be unnecessary by legal).
80 |
81 | * Renamed citation file and updated version number.
82 |
83 | * Added noxfile for dev task running.
84 |
85 | * Removed unused docker files
86 |
87 | * Add standard python files to gitignore
88 |
89 | * Removed redundant requirements files
90 |
91 | * Pinned documentation related requirements
--------------------------------------------------------------------------------
/docs/pages/releasenotes/0.4.0.rst:
--------------------------------------------------------------------------------
1 | 0.4.0 (October 25 2024)
2 | -----------------------
3 |
4 | This release primarily addresses deprecations and future warnings related to dependencies, including a significant security vulnerability.
5 |
6 | Documentation
7 | ~~~~~~~~~~~~~
8 |
9 | * Updated README and documentation to point to the JOSS publication.
10 |
11 | Tutorials
12 | ~~~~~~~~~~~~~~
13 |
14 | * Miscellaneous fixes relevant to text2time, time, timeseries AIT module tutorials.
15 |
16 | Other
17 | ~~~~~~~~~~~~~~
18 |
19 | * Now requiring nltk>=3.9.1 and switching punkt to punkt_tab. This addresses a security vulnerability in nltk.
--------------------------------------------------------------------------------
/docs/pages/releasenotes/0.5.0.rst:
--------------------------------------------------------------------------------
1 | 0.5.0 (February 19 2025)
2 | ------------------------
3 |
4 | This release adds a new tutorial demonstrating survival analysis on PV assets.
5 |
6 | Tutorials
7 | ~~~~~~~~~~~~~~
8 |
9 | * Added a new timeseries survival analysis tutorial demonstrating Kaplan-Meier estimators and Weibull distribution fits.
10 |
11 | * Added a new example dataset to go along with the new tutorial.
12 |
13 | Other
14 | ~~~~~~~~~~~~~~
15 |
16 | * Added scikit-survival as a new dependency. Used for Kaplan-Meier estimators in the new tutorial.
--------------------------------------------------------------------------------
/docs/pages/releasenotes/0.5.1.rst:
--------------------------------------------------------------------------------
1 | 0.5.1 (February 19 2025)
2 | ------------------------
3 |
4 | This release addresses a deprecation preventing the documentation from building.
5 |
6 | Other
7 | ~~~~~~~~~~~~~~
8 |
9 | * Updated artifact/upload-artifact in buildthedocs from v3 to v4
--------------------------------------------------------------------------------
/docs/pages/releasenotes/0.5.2.rst:
--------------------------------------------------------------------------------
1 | 0.5.2 (February 21 2025)
2 | ------------------------
3 |
4 | This release updates the documentation to reflect changes starting at v0.4.0 and fixes dependency requirements.
5 |
6 | Other
7 | ~~~~~~~~~~~~~~
8 |
9 | * Updated release notes to include changes starting at v0.4.0.
10 |
11 | * Added new survival analysis notebook to the documentation.
12 |
13 | * In v0.5.0, scikit-survival was added to requirements.txt but not setup.py. That has been resolved.
14 |
15 | * Now requiring python<3.13 for tensorflow
--------------------------------------------------------------------------------
/docs/pages/releasenotes/0.5.3.rst:
--------------------------------------------------------------------------------
1 | 0.5.3 (March 5 2025)
2 | ------------------------
3 |
4 | This release takes the existing survival analysis tutorial and formalizes parts of it
5 | into functions within the timeseries module.
6 |
7 | Functionality
8 | ~~~~~~~~~~~~~~
9 |
10 | * Created a new function in `pvops.timeseries.preprocess` that identifies right-censored data.
11 |
12 | * Created a new model under `pvops.timeseries.models` to fit survival analysis functions, namely, Kaplan-Meier and Weibull.
13 |
14 | Tutorials
15 | ~~~~~~~~~~~~~~
16 |
17 | * Simplified the survival analysis tutorial now that the main functionality is incorporated into pvOps.
--------------------------------------------------------------------------------
/docs/pages/releasenotes/0.6.0.rst:
--------------------------------------------------------------------------------
1 | 0.6.0 (March 17 2025)
2 | ------------------------
3 |
4 | This release removes the `nltk` dependency and implements analogous functionality where needed in pvops.
5 |
6 | Functionality
7 | ~~~~~~~~~~~~~~
8 |
9 | * `pvops.text.preprocess.regex_tokenize` for tokenizing text documents (replaces instances of `nltk.tokenize.word_tokenize`)
10 |
11 | Other
12 | ~~~~~~
13 |
14 | * Includes a static version of the nltk English stopwords in `stopwords.txt` under `pvops.text`
15 |
16 | * `pvops.text.nltk_utils.create_stopwords` modified to pull from this new stopwords file (breaking change: removed language argument)
17 |
18 | * `pvops.text.visualize.visualize_word_frequency_plot` functionality implemented manually rather than through `nltk`; previous calls should still work
--------------------------------------------------------------------------------
/docs/pages/releasenotes/0.6.1.rst:
--------------------------------------------------------------------------------
1 | 0.6.1 (March 17 2025)
2 | -----------------------
3 |
4 | This release makes minor documentation updates.
5 |
6 | Documentation
7 | ~~~~~~~~~~~~~
8 |
9 | * The "Overview" page now includes the new survival analysis functionality added to the timeseries module.
10 |
11 | * The release date is corrected for version 0.6.0 on the "What's New" page.
--------------------------------------------------------------------------------
/docs/pages/releasenotes/alpha.rst:
--------------------------------------------------------------------------------
1 | Alpha
2 | -----------------------
3 |
4 | The original release of pvOps consists mostly of new features.
5 |
6 | New features
7 | ~~~~~~~~~~~~
8 |
9 | * `text` module added which conducts natural language processing on Operations & Maintenance (O&M) tickets, or other.
10 | * `text2time` module investigates the relationship between the production timeseries data and the O&M tickets.
11 | * `timeseries` module conducts timeseries preprocessing and modeling
12 | * `iv` incorporates the ability to simulate current-voltage (IV) curves under different environmental, load, and failure conditions.
13 |
14 |
15 | Documentation
16 | ~~~~~~~~~~~~~
17 |
18 | * Built original website
19 | * Add whatsnew
20 | * Add jupyter notebook embeddings
21 |
22 | Testing
23 | ~~~~~~~
24 |
25 | * Built comprehensive tests with pytest
26 | * Connected tests to automated testing pipeline
--------------------------------------------------------------------------------
/docs/pages/releasenotes/beta.rst:
--------------------------------------------------------------------------------
1 | Beta
2 | -----------------------
3 |
4 | New features and bug fixes are predominant in the beta versions.
5 |
6 | New features
7 | ~~~~~~~~~~~~
8 |
9 | * IV trace classification framework built according to literature (PR #25)
10 | * Timeseries IV simulation for highly customizable degradation of system parameters (PR #28)
11 | * Leverage pvlib solarposition package to populate content per site (PR #32)
12 | * Add coefficient-level evaluations linear models (PR #32)
13 | * Give user ability to input own test-train splits to linear modeller (PR #32)
14 | * Remap attributes function must retain the unaltered attributes (PR #32)
15 | * Interpolate O&M data onto production data where overlaps exist (PR #32)
16 |
17 | Bug fixes
18 | ~~~~~~~~~
19 |
20 | * Basic package fixes to README (PR #27) and documentation configuration (PR #24)
21 | * Fix IV simulator bug for edge case where two IV curves added have equal I_{sc} (PR #30)
22 | * Neural network configuration referencing in 1D CNN (PR #32)
23 |
24 | Docs
25 | ~~~~
26 |
27 | * Update how to reference pvOps (PR #33)
28 |
29 | Tests
30 | ~~~~~
31 | * Removed python 3.6 test support due to https://github.com/actions/setup-python/issues/162.
32 |
--------------------------------------------------------------------------------
/docs/pages/tutorials.rst:
--------------------------------------------------------------------------------
1 | pvOps Tutorials
2 | ===============
3 |
4 | Check out the tutorials below!
5 |
6 | .. nbgallery::
7 | :caption: Text & Text2Time tutorials:
8 |
9 | tutorials/tutorial_text2time_module
10 | tutorials/tutorial_textmodule
11 |
12 | .. nbgallery::
13 | :caption: Timeseries tutorials:
14 |
15 | tutorials/tutorial_timeseries
16 | tutorials/tutorial_AIT_timeseries
17 | tutorials/tutorial_timeseries_sim
18 | tutorials/tutorial_timeseries_survival_analysis
19 |
20 | .. nbgallery::
21 | :caption: IV tutorials:
22 |
23 | tutorials/tutorial_iv_simulator
24 | tutorials/tutorial_iv_classifier
25 | tutorials/tutorial_iv_diode_extractor
26 |
--------------------------------------------------------------------------------
/docs/pages/tutorials/assets/diode_param_extractor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sandialabs/pvOps/d6248e77fa063c94f5b5a0736b05b2bbe51c6be4/docs/pages/tutorials/assets/diode_param_extractor.png
--------------------------------------------------------------------------------
/docs/pages/tutorials/tutorial_AIT_timeseries.nblink:
--------------------------------------------------------------------------------
1 | {
2 | "path": "../../../tutorials/tutorial_AIT_timeseries.ipynb"
3 | }
--------------------------------------------------------------------------------
/docs/pages/tutorials/tutorial_iv_classifier.nblink:
--------------------------------------------------------------------------------
1 | {
2 | "path": "../../../tutorials/tutorial_iv_classifier.ipynb"
3 | }
--------------------------------------------------------------------------------
/docs/pages/tutorials/tutorial_iv_diode_extractor.nblink:
--------------------------------------------------------------------------------
1 | {
2 | "path": "../../../tutorials/tutorial_iv_diode_extractor.ipynb"
3 | }
--------------------------------------------------------------------------------
/docs/pages/tutorials/tutorial_iv_simulator.nblink:
--------------------------------------------------------------------------------
1 | {
2 | "path": "../../../tutorials/tutorial_iv_simulator.ipynb"
3 | }
--------------------------------------------------------------------------------
/docs/pages/tutorials/tutorial_text2time_module.nblink:
--------------------------------------------------------------------------------
1 | {
2 | "path": "../../../tutorials/tutorial_text2time_module.ipynb"
3 | }
--------------------------------------------------------------------------------
/docs/pages/tutorials/tutorial_textmodule.nblink:
--------------------------------------------------------------------------------
1 | {
2 | "path": "../../../tutorials/tutorial_textmodule.ipynb"
3 | }
--------------------------------------------------------------------------------
/docs/pages/tutorials/tutorial_timeseries.nblink:
--------------------------------------------------------------------------------
1 | {
2 | "path": "../../../tutorials/tutorial_timeseries.ipynb"
3 | }
--------------------------------------------------------------------------------
/docs/pages/tutorials/tutorial_timeseries_sim.nblink:
--------------------------------------------------------------------------------
1 | {
2 | "path": "../../../tutorials/tutorial_timeseries_sim.ipynb"
3 | }
--------------------------------------------------------------------------------
/docs/pages/tutorials/tutorial_timeseries_survival_analysis.nblink:
--------------------------------------------------------------------------------
1 | {
2 | "path": "../../../tutorials/tutorial_timeseries_survival_analysis.ipynb"
3 | }
--------------------------------------------------------------------------------
/docs/pages/userguide.rst:
--------------------------------------------------------------------------------
1 | User Guide
2 | ==========================
3 |
4 | .. toctree::
5 | :maxdepth: 1
6 | :caption: Getting Started
7 |
8 | installation
9 |
10 |
11 | .. toctree::
12 | :maxdepth: 1
13 | :caption: Module Guides
14 |
15 | moduleguides/text
16 | moduleguides/text2time
17 | moduleguides/timeseries
18 | moduleguides/iv
19 |
20 | .. toctree::
21 | :maxdepth: 1
22 | :caption: Abbreviations
23 |
24 | abbreviations
25 |
--------------------------------------------------------------------------------
/docs/refs/pvops.bib:
--------------------------------------------------------------------------------
1 | @Article{app12041872,
2 | AUTHOR = {Hopwood, Michael W. and Gunda, Thushara},
3 | TITLE = {Generation of Data-Driven Expected Energy Models for Photovoltaic Systems},
4 | JOURNAL = {Applied Sciences},
5 | VOLUME = {12},
6 | YEAR = {2022},
7 | NUMBER = {4},
8 | ARTICLE-NUMBER = {1872},
9 | URL = {https://www.mdpi.com/2076-3417/12/4/1872},
10 | ISSN = {2076-3417},
11 | ABSTRACT = {Although unique expected energy models can be generated for a given photovoltaic (PV) site, a standardized model is also needed to facilitate performance comparisons across fleets. Current standardized expected energy models for PV work well with sparse data, but they have demonstrated significant over-estimations, which impacts accurate diagnoses of field operations and maintenance issues. This research addresses this issue by using machine learning to develop a data-driven expected energy model that can more accurately generate inferences for energy production of PV systems. Irradiance and system capacity information was used from 172 sites across the United States to train a series of models using Lasso linear regression. The trained models generally perform better than the commonly used expected energy model from international standard (IEC 61724-1), with the two highest performing models ranging in model complexity from a third-order polynomial with 10 parameters (Radj2 = 0.994) to a simpler, second-order polynomial with 4 parameters (Radj2=0.993), the latter of which is subject to further evaluation. Subsequently, the trained models provide a more robust basis for identifying potential energy anomalies for operations and maintenance activities as well as informing planning-related financial assessments. We conclude with directions for future research, such as using splines to improve model continuity and better capture systems with low (≤1000 kW DC) capacity.},
12 | DOI = {10.3390/app12041872}
13 | }
14 |
15 | @INPROCEEDINGS{9518439,
16 | author={Mendoza, Hector and Hopwood, Michael and Gunda, Thushara},
17 | booktitle={2021 IEEE 48th Photovoltaic Specialists Conference (PVSC)},
18 | title={pvOps: Improving Operational Assessments through Data Fusion},
19 | year={2021},
20 | volume={},
21 | number={},
22 | pages={0112-0119},
23 | doi={10.1109/PVSC43889.2021.9518439}
24 | }
25 |
26 | @Article{en15145085,
27 | AUTHOR = {Hopwood, Michael W. and Stein, Joshua S. and Braid, Jennifer L. and Seigneur, Hubert P.},
28 | TITLE = {Physics-Based Method for Generating Fully Synthetic IV Curve Training Datasets for Machine Learning Classification of PV Failures},
29 | JOURNAL = {Energies},
30 | VOLUME = {15},
31 | YEAR = {2022},
32 | NUMBER = {14},
33 | ARTICLE-NUMBER = {5085},
34 | URL = {https://www.mdpi.com/1996-1073/15/14/5085},
35 | ISSN = {1996-1073},
36 | ABSTRACT = {Classification machine learning models require high-quality labeled datasets for training. Among the most useful datasets for photovoltaic array fault detection and diagnosis are module or string current-voltage (IV) curves. Unfortunately, such datasets are rarely collected due to the cost of high fidelity monitoring, and the data that is available is generally not ideal, often consisting of unbalanced classes, noisy data due to environmental conditions, and few samples. In this paper, we propose an alternate approach that utilizes physics-based simulations of string-level IV curves as a fully synthetic training corpus that is independent of the test dataset. In our example, the training corpus consists of baseline (no fault), partial soiling, and cell crack system modes. The training corpus is used to train a 1D convolutional neural network (CNN) for failure classification. The approach is validated by comparing the model’s ability to classify failures detected on a real, measured IV curve testing corpus obtained from laboratory and field experiments. Results obtained using a fully synthetic training dataset achieve identical accuracy to those obtained with use of a measured training dataset. When evaluating the measured data’s test split, a 100% accuracy was found both when using simulations or measured data as the training corpus. When evaluating all of the measured data, a 96% accuracy was found when using a fully synthetic training dataset. The use of physics-based modeling results as a training corpus for failure detection and classification has many advantages for implementation as each PV system is configured differently, and it would be nearly impossible to train using labeled measured data.},
37 | DOI = {10.3390/en15145085}
38 | }
39 |
40 | @ARTICLE{9186596,
41 | author={Hopwood, Michael W. and Gunda, Thushara and Seigneur, Hubert and Walters, Joseph},
42 | journal={IEEE Access},
43 | title={Neural Network-Based Classification of String-Level IV Curves From Physically-Induced Failures of Photovoltaic Modules},
44 | year={2020},
45 | volume={8},
46 | number={},
47 | pages={161480-161487},
48 | doi={10.1109/ACCESS.2020.3021577}
49 | }
50 |
51 | @article{BISHOP198873,
52 | title = {Computer simulation of the effects of electrical mismatches in photovoltaic cell interconnection circuits},
53 | journal = {Solar Cells},
54 | volume = {25},
55 | number = {1},
56 | pages = {73-89},
57 | year = {1988},
58 | issn = {0379-6787},
59 | doi = {https://doi.org/10.1016/0379-6787(88)90059-2},
60 | url = {https://www.sciencedirect.com/science/article/pii/0379678788900592},
61 | author = {J.W. Bishop},
62 | abstract = {A Pascal program, PVNet, has been developed at the Commission of the European Communities Joint Research Centre, Ispra, to model the electrical behaviour of solar cell interconnection circuits. The program calculates three-quadrant solar cell current-voltage (I–V) curves using a lumped parameter equivalent circuit model, combines them to obtain the resultant I–V curve of any interconnection circuit, and calculates the operating point of each circuit element, set by user-defined operating conditions. The numerical values of the equivalent circuit parameters are generated by the program, and are varied so that the electrical parameters (short-circuit current, open-circuit voltage, fill factor) of calculated I–V curves show the same variations as those of measured crystalline silicon solar cell I–V curves. Equivalent circuit parameters can be changed by the user, making it possible to simulate the effects of electrical mismatches on the performance of an interconnection circuit. To illustrate the operation of the program, the electrical mechanisms leading to hot-spot heating in photovoltaic arrays are analysed. Three types of interconnection circuit are considered: a simple series string, a series-parallel block and a series connection of series-parallel blocks. The operation of parallel bypass diodes (used to limit hot-spot heating in series strings) and of series blocking diodes (used to prevent current imbalance in series-parallel circuits) are explained.}
63 | }
64 |
65 | @article{osti_1078057,
66 | title = {Weather-Corrected Performance Ratio},
67 | author = {Dierauf, T. and Growitz, A. and Kurtz, S. and Cruz, J. L. B. and Riley, E. and Hansen, C.},
68 | abstractNote = {Photovoltaic (PV) system performance depends on both the quality of the system and the weather. One simple way to communicate the system performance is to use the performance ratio (PR): the ratio of the electricity generated to the electricity that would have been generated if the plant consistently converted sunlight to electricity at the level expected from the DC nameplate rating. The annual system yield for flat-plate PV systems is estimated by the product of the annual insolation in the plane of the array, the nameplate rating of the system, and the PR, which provides an attractive way to estimate expected annual system yield. Unfortunately, the PR is, again, a function of both the PV system efficiency and the weather. If the PR is measured during the winter or during the summer, substantially different values may be obtained, making this metric insufficient to use as the basis for a performance guarantee when precise confidence intervals are required. This technical report defines a way to modify the PR calculation to neutralize biases that may be introduced by variations in the weather, while still reporting a PR that reflects the annual PR at that site given the project design and the project weather file. This resulting weather-corrected PR gives more consistent results throughout the year, enabling its use as a metric for performance guarantees while still retaining the familiarity this metric brings to the industry and the value of its use in predicting actual annual system yield. A testing protocol is also presented to illustrate the use of this new metric with the intent of providing a reference starting point for contractual content.},
69 | doi = {10.2172/1078057},
70 | url = {https://www.osti.gov/biblio/1078057},
71 | journal = {},
72 | number = {},
73 | volume = {},
74 | place = {United States},
75 | year = {2013},
76 | month = {4}
77 | }
78 |
79 | @techreport{deceglie2018rdtools,
80 | title={RdTools: an open source python library for PV degradation analysis},
81 | author={Deceglie, Michael G and Jordan, Dirk and Nag, Ambarish and Deline, Christopher A and Shinn, Adam},
82 | year={2018},
83 | institution={National Renewable Energy Lab.(NREL), Golden, CO (United States)}
84 | }
85 |
86 | @article{holmgren2018pvlib,
87 | title={pvlib python: A python package for modeling solar energy systems},
88 | author={Holmgren, William F and Hansen, Clifford W and Mikofski, Mark A},
89 | journal={Journal of Open Source Software},
90 | volume={3},
91 | number={29},
92 | pages={884},
93 | doi={10.21105/joss.00884},
94 | year={2018}
95 | }
96 |
97 | @inproceedings{pierce2020identifying,
98 | title={Identifying Degradation Modes of Photovoltaic Modules Using Unsupervised Machine Learning on Electroluminescense Images},
99 | author={Pierce, Benjamin G and Karimi, Ahmad Maroof and Liu, JiQi and French, Roger H and Braid, Jennifer L},
100 | booktitle={2020 47th IEEE Photovoltaic Specialists Conference (PVSC)},
101 | pages={1850--1855},
102 | year={2020},
103 | organization={IEEE},
104 | doi = {10.1109/PVSC45281.2020.9301021}
105 | }
106 |
107 | @techreport{klise2016performance,
108 | title={Performance Monitoring using Pecos (V. 0.1)},
109 | author={Klise, Katherine A and Stein, Joshua S},
110 | year={2016},
111 | institution={Sandia National Laboraties},
112 | doi = {10.2172/1734479}
113 | }
114 |
--------------------------------------------------------------------------------
/noxfile.py:
--------------------------------------------------------------------------------
1 | import nox
2 |
3 | @nox.session
4 | def tests(session):
5 | """Run tests."""
6 | session.install(".")
7 | session.install("pytest")
8 | session.run("pytest")
9 |
10 | @nox.session
11 | def lint(session):
12 | """Lint."""
13 | session.install("flake8")
14 | session.run("flake8", "--import-order-style", "google")
15 |
16 | @nox.session
17 | def docs(session):
18 | """Generate documentation."""
19 | session.install(".[docs]")
20 | session.cd("docs/")
21 | session.run("make", "html")
22 |
23 | @nox.session
24 | def serve(session):
25 | """Serve documentation. Port can be specified as a positional argument."""
26 | try:
27 | port = session.posargs[0]
28 | except IndexError:
29 | port = "8085"
30 | session.run("python", "-m", "http.server", "-b", "localhost", "-d", "docs/_build/html", port)
31 |
32 | @nox.session
33 | def check_style(session):
34 | """Check if code follows black style."""
35 | session.install("black")
36 | session.run("black", "--check", "src")
37 |
38 | @nox.session
39 | def enforce_style(session):
40 | """Apply black style to code base."""
41 | session.install("black")
42 | session.run("black", "src")
--------------------------------------------------------------------------------
/pvops/__init__.py:
--------------------------------------------------------------------------------
1 | import warnings
2 | from pvops import text
3 | from pvops import text2time
4 | from pvops import timeseries
5 | try:
6 | from pvops import iv
7 | except ModuleNotFoundError:
8 | # warnings.warn("")
9 | pass
10 |
11 | __version__ = '0.6.1'
12 |
13 | __copyright__ = """Copyright 2023 National Technology & Engineering
14 | Solutions of Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525
15 | with NTESS, the U.S. Government retains certain rights in this software."""
16 |
17 | __license__ = "BSD 3-Clause License"
18 |
--------------------------------------------------------------------------------
/pvops/iv/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | iv module
3 | """
4 | import pvops.iv.models
5 | import pvops.iv.extractor
6 | import pvops.iv.physics_utils
7 | import pvops.iv.preprocess
8 | import pvops.iv.simulator
9 | import pvops.iv.timeseries_simulator
10 | import pvops.iv.utils
--------------------------------------------------------------------------------
/pvops/iv/extractor.py:
--------------------------------------------------------------------------------
1 | """
2 | Derive the effective diode parameters from a set of input curves.
3 | """
4 |
5 | import numpy as np
6 | import matplotlib.pyplot as plt
7 | import scipy
8 | import sklearn
9 | from pvops.iv.simulator import Simulator
10 | import time
11 | from pvops.iv.physics_utils import iv_cutoff, T_to_tcell, \
12 | calculate_IVparams, smooth_curve
13 |
14 |
15 | class BruteForceExtractor():
16 | '''Process measured IV curves to extract diode parameters.
17 | Requires a set of curves to create Isc vs Irr and Voc vs Temp vs Isc(Irr)
18 |
19 | Parameters
20 | ----------
21 | input_df : DataFrame
22 | Contains IV curves with a datetime index
23 | current_col : string
24 | Indicates column where current values in IV curve are located;
25 | each cell is an array of current values in a single IV curve
26 | voltage_col : string
27 | Indicates column where voltage values in IV curve are located;
28 | each cell is an array of voltage values in a single IV curve
29 | irradiance_col : string
30 | Indicates column where irradiance value (W/m2)
31 | temperature_col : string
32 | Indicates column where temperature value (C)
33 | T_type : string
34 | Describe input temperature, either 'ambient' or 'module' or 'cell'
35 | '''
36 |
37 | def __init__(
38 | self,
39 | input_df,
40 | current_col,
41 | voltage_col,
42 | irradiance_col,
43 | temperature_col,
44 | T_type,
45 | windspeed_col=None,
46 | Simulator_mod_specs=None,
47 | Simulator_pristine_condition=None):
48 |
49 | self.Simulator_mod_specs = Simulator_mod_specs
50 | self.Simulator_pristine_condition = Simulator_pristine_condition
51 |
52 | self.tstamps = input_df.index.tolist()
53 | self.Is = input_df[current_col].tolist()
54 | self.Vs = input_df[voltage_col].tolist()
55 | self.Irrs = input_df[irradiance_col].tolist()
56 | self.Temps = input_df[temperature_col].tolist()
57 | self.T_type = T_type
58 | self.Tcs = []
59 |
60 | if self.T_type == 'ambient' and windspeed_col is None:
61 | raise Exception(
62 | "Wind speed must be specified if passing ambient temperature so that the cell temperature can be derived.")
63 |
64 | if windspeed_col is not None:
65 | self.WSs = input_df[windspeed_col].tolist()
66 | if self.T_type == 'ambient':
67 | for irr, temp, ws in zip(self.Irrs, self.Temps, self.WSs):
68 | Tc = T_to_tcell(irr, temp, ws, self.T_type)
69 | self.Tcs.append(Tc)
70 |
71 | if self.T_type == 'module':
72 | for irr, temp in zip(self.Irrs, self.Temps):
73 | Tc = T_to_tcell(irr, temp, [], self.T_type)
74 | self.Tcs.append(Tc)
75 |
76 | self.measured_info = []
77 | for i in range(len(self.Is)):
78 | Varray = self.Vs[i]
79 | Iarray = self.Is[i]
80 | Irr = self.Irrs[i]
81 | T = self.Temps[i]
82 | self.measured_info.append({"V": Varray, "I": Iarray, "E": Irr, "T": T})
83 |
84 | self.n_samples = len(input_df.index)
85 |
86 | self.params = {}
87 |
88 | def create_string_object(self, iph, io, rs, rsh, nnsvth):
89 | # TODO write docstring
90 | kwargs = {}
91 | if self.Simulator_mod_specs is not None:
92 | kwargs.update({'mod_specs': self.Simulator_mod_specs})
93 | if self.Simulator_pristine_condition is not None:
94 | kwargs.update(
95 | {'pristine_condition': self.Simulator_pristine_condition})
96 | kwargs.update({'replacement_5params': {'I_L_ref': iph,
97 | 'I_o_ref': io,
98 | 'R_s': rs,
99 | 'R_sh_ref': rsh,
100 | 'a_ref': nnsvth}
101 | })
102 |
103 | sim = Simulator(**kwargs)
104 |
105 | # set new defaults
106 | for sample_i, sample in enumerate(self.measured_info):
107 |
108 | condition = {'identifier': f'case_{self.counter}_{sample_i}',
109 | 'E': sample['E'],
110 | 'Tc': sample['T']
111 | }
112 |
113 | sim.add_preset_conditions(
114 | 'complete', condition, save_name=f'mod_case_{self.counter}_{sample_i}')
115 |
116 | if isinstance(self.n_mods, int):
117 | if self.n_mods > 1:
118 | sim.build_strings({f'str_case_{self.counter}_{sample_i}': [
119 | f'mod_case_{self.counter}_{sample_i}'] * self.n_mods})
120 |
121 | elif self.n_mods != 1:
122 | raise Exception(
123 | f"Input a valid number of modules, n_mods. You inputted {self.n_mods}")
124 | # elif isinstance(self.n_mods, (tuple, list, np.ndarray)):
125 | # sim.build_strings({f'str_case_{self.counter}_{sample_i}': [
126 | # f'mod_case_{self.counter}_{sample_i}']*self.n_mods[0] + ['pristine'] * (self.n_mods[1]-self.n_mods[0])})
127 | else:
128 | raise ValueError(
129 | f"Expected n_mods to be a integer. Got: {type(self.n_mods)}")
130 |
131 | start_t = time.time()
132 | sim.simulate()
133 |
134 | if self.verbose >= 2:
135 | print(
136 | f'\tSimulations completed after {round(time.time()-start_t,2)} seconds')
137 |
138 | return sim
139 |
140 | def f_multiple_samples(self, params):
141 | # TODO write docstring
142 | iph, io, rs, rsh, nnsvth = params
143 |
144 | if self.user_func is None:
145 | sim = self.create_string_object(self, iph, io, rs, rsh, nnsvth)
146 | else:
147 | sim = self.user_func(self, iph, io, rs, rsh, nnsvth)
148 |
149 | msse_tot = 0
150 |
151 | if self.verbose >= 2:
152 | perc_diff = 100 * \
153 | (np.array(params) - np.array(self.start_conds)) / \
154 | np.array(self.start_conds)
155 |
156 | meas_Iscs = []
157 | meas_Vocs = []
158 | meas_Pmps = []
159 | sim_Iscs = []
160 | sim_Vocs = []
161 | sim_Pmps = []
162 |
163 | for sample_i, sample in enumerate(self.measured_info):
164 |
165 | if self.n_mods > 1:
166 | Varr = sim.multilevel_ivdata['string'][f'str_case_{self.counter}_{sample_i}']['V'][0]
167 | Iarr = sim.multilevel_ivdata['string'][f'str_case_{self.counter}_{sample_i}']['I'][0]
168 | elif self.n_mods == 1:
169 | Varr = sim.multilevel_ivdata['module'][f'mod_case_{self.counter}_{sample_i}']['V'][0]
170 | Iarr = sim.multilevel_ivdata['module'][f'mod_case_{self.counter}_{sample_i}']['I'][0]
171 |
172 | # resample to same voltage domain as measured
173 | simI_interp = np.interp(sample['V'], Varr, Iarr)
174 |
175 | msse = sklearn.metrics.mean_squared_error(sample['I'], simI_interp)
176 | msse_tot += msse
177 |
178 | if self.verbose >= 2:
179 |
180 | Vco, Ico = iv_cutoff(Varr, Iarr, 0)
181 | sim_params = calculate_IVparams(Vco, Ico)
182 | meas_params = calculate_IVparams(sample['V'], sample['I'])
183 |
184 | meas_Iscs.append(meas_params['isc'])
185 | meas_Vocs.append(meas_params['voc'])
186 | meas_Pmps.append(meas_params['pmp'])
187 | sim_Iscs.append(sim_params['isc'])
188 | sim_Vocs.append(sim_params['voc'])
189 | sim_Pmps.append(sim_params['pmp'])
190 |
191 | if self.verbose >= 2:
192 |
193 | minpmps_m = min(min(meas_Pmps), min(sim_Pmps))
194 | maxpmps_m = max(max(meas_Pmps), max(sim_Pmps))
195 | plt.plot(meas_Pmps, sim_Pmps, 'go')
196 | plt.plot(list(range(int(minpmps_m - 10), int(maxpmps_m + 10 + 1))),
197 | list(range(int(minpmps_m - 10), int(maxpmps_m + 10 + 1))), 'b--')
198 | plt.title('Measured v. Simulated Pmpp')
199 | plt.xlabel('Measured (W)')
200 | plt.ylabel('Simulated (W)')
201 | plt.xlim(minpmps_m - 5, maxpmps_m + 5)
202 | plt.ylim(minpmps_m - 5, maxpmps_m + 5)
203 | plt.show()
204 |
205 | minvocs_m = min(min(meas_Vocs), min(sim_Vocs))
206 | maxvocs_m = max(max(meas_Vocs), max(sim_Vocs))
207 | plt.plot(meas_Vocs, sim_Vocs, 'ro')
208 | plt.plot(list(range(int(minvocs_m - 10), int(maxvocs_m + 10 + 1))),
209 | list(range(int(minvocs_m - 10), int(maxvocs_m + 10 + 1))), 'b--')
210 | plt.title('Measured v. Simulated Voc')
211 | plt.xlabel('Measured (V)')
212 | plt.ylabel('Simulated (V)')
213 | plt.xlim(minvocs_m - 5, maxvocs_m + 5)
214 | plt.ylim(minvocs_m - 5, maxvocs_m + 5)
215 | plt.show()
216 |
217 | miniscs_m = min(min(meas_Iscs), min(sim_Iscs))
218 | maxiscs_m = max(max(meas_Iscs), max(sim_Iscs))
219 | plt.plot(meas_Iscs, sim_Iscs, 'ko')
220 | plt.plot(list(range(int(miniscs_m - 0.5), int(maxiscs_m + 0.5 + 2))),
221 | list(range(int(miniscs_m - 0.5), int(maxiscs_m + 0.5 + 2))), 'b--')
222 | plt.title('Measured v. Simulated Isc')
223 | plt.xlabel('Measured (A)')
224 | plt.ylabel('Simulated (A)')
225 | plt.xlim(miniscs_m - 0.5, maxiscs_m + 0.5)
226 | plt.ylim(miniscs_m - 0.5, maxiscs_m + 0.5)
227 | plt.show()
228 |
229 | plt.plot(sample['V'], simI_interp, 'r', label='Simulated')
230 | plt.title("SIMULATED")
231 | plt.show()
232 |
233 | plt.plot(sample['V'], simI_interp, 'r', label='Simulated')
234 | plt.plot(sample['V'], sample['I'], 'k', label='Measured')
235 | plt.legend()
236 | plt.xlabel('Voltage (V)')
237 | plt.ylabel('Current (A)')
238 | plt.title(
239 | f'One example: case {self.counter} with % Diff.: {perc_diff}')
240 | plt.show()
241 |
242 | print('Params used in ^ iteration: ', params)
243 |
244 | self.counter += 1
245 | self.msses.append(msse_tot)
246 | return msse_tot
247 |
248 | def fit_params(self, cell_parameters, n_mods, bounds_func, user_func=None, verbose=0):
249 | """
250 | Fit diode parameters from a set of IV curves.
251 |
252 | Parameters
253 | ----------
254 | cell_parameters : dict
255 | Cell-level parameters, usually extracted from the CEC
256 | database, which will be used as the
257 | initial guesses in the optimization process.
258 | n_mods : int
259 | if int, defines the number of modules in a
260 | string(1=simulate a single module)
261 | bounds_func : function
262 | Function to establish the bounded search space
263 | See below for an example:
264 |
265 | .. code-block:: python
266 |
267 | def bounds_func(iph,io,rs,rsh,nnsvth,perc_adjust=0.5):
268 | return ((iph - 0.5*iph*perc_adjust, iph + 2*iph*perc_adjust),
269 | (io - 40*io*perc_adjust, io + 40*io*perc_adjust),
270 | (rs - 20*rs*perc_adjust, rs + 20*rs*perc_adjust),
271 | (rsh - 150*rsh*perc_adjust, rsh + 150*rsh*perc_adjust),
272 | (nnsvth - 10*nnsvth*perc_adjust, nnsvth + 10*nnsvth*perc_adjust))
273 |
274 | user_func : function
275 | Optional, a function similar to `self.create_string_object`
276 | which has the following inputs:
277 | `self, iph, io, rs, rsh, nnsvth`. This can be used to
278 | extract unique failure parameterization.
279 | verbose : int
280 | if verbose >= 1, print information about fitting
281 | if verbose >= 2, plot information about each iteration
282 | """
283 |
284 | self.user_func = user_func
285 | self.verbose = verbose
286 | self.n_mods = n_mods
287 | self.g = 1000
288 | self.t = 25
289 |
290 | self.cell_parameters = cell_parameters
291 |
292 | self.counter = 0
293 | self.msses = []
294 |
295 | iph = cell_parameters['I_L_ref']
296 | io = cell_parameters['I_o_ref']
297 | rs = cell_parameters['R_s']
298 | rsh = cell_parameters['R_sh_ref']
299 | nnsvth = cell_parameters['a_ref']
300 |
301 | self.start_conds = (iph, io, rs, rsh, nnsvth)
302 |
303 | bounds = bounds_func(*self.start_conds)
304 |
305 | if self.verbose >= 1:
306 | print('Given 5params:', iph, io, rs, rsh, nnsvth)
307 | converged_solution = scipy.optimize.minimize(self.f_multiple_samples,
308 | (iph, io, rs, rsh, nnsvth),
309 | bounds=bounds,
310 | method='TNC')
311 |
312 | if self.verbose >= 1:
313 | print('bounds', bounds)
314 | print('initial: ', (iph, io, rs, rsh, nnsvth))
315 | print('solution: ', converged_solution)
316 |
317 | return converged_solution['x']
318 |
--------------------------------------------------------------------------------
/pvops/iv/models/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | """
3 |
4 | from pvops.iv.models.nn import get_diff_array, feature_generation, \
5 | balance_df, plot_profiles, classify_curves, IVClassifier
--------------------------------------------------------------------------------
/pvops/iv/preprocess.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 | from pvops.iv.physics_utils import gt_correction
4 |
5 |
6 | def preprocess(input_df, resmpl_resolution, iv_col_dict, resmpl_cutoff=0.03,
7 | correct_gt=False, normalize_y=True, CECmodule_parameters=None,
8 | n_mods=None, gt_correct_option=3):
9 | """IV processing function which supports irradiance & temperature correction
10 |
11 | Parameters
12 | ----------
13 | input_df : DataFrame
14 | resmpl_resolution : float
15 | iv_col_dict : dict
16 | resmpl_cutoff : float
17 | correct_gt : bool
18 | normalize_y : bool
19 | CECmodule_parameters : None
20 | n_mods : int
21 | gt_correct_option : int
22 |
23 | Returns
24 | -------
25 | df : DataFrame
26 | """
27 |
28 | current_col = iv_col_dict["current"]
29 | voltage_col = iv_col_dict["voltage"]
30 | power_col = iv_col_dict["power"]
31 | failure_mode_col = iv_col_dict["mode"]
32 | irradiance_col = iv_col_dict["irradiance"]
33 | temperature_col = iv_col_dict["temperature"]
34 |
35 | # Correct for irradiance and temperature
36 | if correct_gt:
37 | Vs, Is = [], []
38 | for ind, row in input_df.iterrows():
39 | if CECmodule_parameters is None or n_mods is None:
40 | raise ValueError(
41 | "You must specify CECmodule_parameters and n_mods if you want to correct the IV curves for irradiance and temperature.")
42 | Vt, It = gt_correction(row[voltage_col], row[current_col], row[irradiance_col], row[temperature_col],
43 | cecparams=CECmodule_parameters, n_units=n_mods, option=gt_correct_option)
44 | Vs.append(Vt)
45 | Is.append(It)
46 | else:
47 | Is = input_df[current_col].tolist()
48 | Vs = input_df[voltage_col].tolist()
49 |
50 | v_interps = np.arange(
51 | resmpl_cutoff, 1, resmpl_resolution)
52 | v_interps = np.append(v_interps, 1.0)
53 |
54 | procVs = []
55 | procIs = []
56 | # Resample IV curve to static voltage domain
57 | for iii in range(len(Vs)):
58 | Voc = max(Vs[iii])
59 | Vnorm = Vs[iii] / Voc
60 | procVs.append(v_interps)
61 | interpolated_I = np.interp(v_interps, Vnorm, Is[iii])
62 |
63 | if normalize_y:
64 | isc_iter = interpolated_I.max()
65 | procIs.append(interpolated_I / isc_iter)
66 |
67 | else:
68 | procIs.append(interpolated_I)
69 |
70 | df = pd.DataFrame()
71 | df[failure_mode_col] = input_df[failure_mode_col]
72 |
73 | procIs = np.array(procIs)
74 | procVs = np.array(procVs)
75 | procPs = procIs * procVs
76 |
77 | df[current_col] = list(procIs)
78 | df[voltage_col] = list(procVs)
79 | df[power_col] = list(procPs)
80 | df[irradiance_col] = input_df[irradiance_col].tolist()
81 | df[temperature_col] = input_df[temperature_col].tolist()
82 |
83 | return df
84 |
--------------------------------------------------------------------------------
/pvops/iv/timeseries_simulator.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import matplotlib.pyplot as plt
3 | from datetime import timedelta
4 | from pvops.iv.simulator import Simulator
5 |
6 |
7 | class IVTimeseriesGenerator(Simulator):
8 |
9 | def __init__(self, **iv_sim_kwargs):
10 | """Simulate a PV System across time.
11 |
12 | Parameters
13 | ----------
14 | iv_sim_kwargs :
15 | Optional, `simulator.Simulator` inputs
16 | """
17 | super().__init__(**iv_sim_kwargs)
18 |
19 | def generate(self, env_df, failures, iv_col_dict,
20 | identifier_col, plot_trends=False):
21 | """Simulate a PV system
22 |
23 | Parameters
24 | ----------
25 | env_df : dataframe
26 | DataFrame containing irradiance ("E") and temperature ("T") columns
27 | failures : list
28 | List of timeseries_simulator.TimeseriesFailure objects
29 | """
30 |
31 | self.specs_df = env_df[[
32 | identifier_col, iv_col_dict["irradiance"],
33 | iv_col_dict["temperature"]]].copy()
34 | for failure in failures:
35 | # Weigh all failure definitions together
36 | self.specs_df = failure.add_interpolation(
37 | self.specs_df, plot_trends)
38 |
39 | self.timeseries_condition_dicts = self._structure_Simulator_inputs(
40 | self.specs_df, iv_col_dict, identifier_col)
41 | return self.timeseries_condition_dicts
42 |
43 | def add_time_conditions(self, preset_mod_mapping, nmods=12):
44 | for condition_dict in self.timeseries_condition_dicts:
45 | self.add_preset_conditions(preset_mod_mapping, condition_dict,
46 | save_name=f"mod_{condition_dict['identifier']}")
47 | self.build_strings({f"str_{condition_dict['identifier']}":
48 | [f"mod_{condition_dict['identifier']}"] * nmods})
49 |
50 | def _structure_Simulator_inputs(self, specs_df,
51 | iv_col_dict, identifier_col):
52 | keys = []
53 | savekeys = []
54 | spec_df_cols = specs_df.columns
55 | for key in ['identifier'] + self.acceptible_keys:
56 | if key == 'identifier':
57 | savekey = identifier_col
58 | elif key == 'E':
59 | savekey = iv_col_dict['irradiance']
60 | elif key == 'Tc':
61 | savekey = iv_col_dict['temperature']
62 | else:
63 | savekey = key
64 | if savekey in spec_df_cols:
65 | keys.append(key)
66 | savekeys.append(savekey)
67 |
68 | return [dict(zip(keys, vals))
69 | for vals in specs_df[savekeys].values]
70 |
71 |
72 | class TimeseriesFailure:
73 | def __init__(self):
74 | """Define a failure in terms of the affected diode
75 | parameters and specify how the failure evolves over
76 | time (i.e. how quickly does it itensify? how fast is
77 | it detected? how fast is it fixed?)
78 | """
79 | self.longterm_fcn_dict = {}
80 | self.annual_fcn_dict = {}
81 | self.daily_fcn_dict = {}
82 |
83 | def trend(self, longterm_fcn_dict=None,
84 | annual_fcn_dict=None,
85 | daily_fcn_dict=None,
86 | **kwargs):
87 | """Define a failure's trend across intraday (trending
88 | with time of day) and longterm timeframes.
89 |
90 | Parameters
91 | ----------
92 | longterm_fcn_dict : dict
93 | A dictionary where keys are the diode-multipliers in IVSimulator
94 | ('Rsh_mult', 'Rs_mult', 'Io_mult', 'Il_mult', 'nnsvth_mult') and
95 | values are either a function or a string. If a function, the
96 | function should be a mathematical operation as a `function of the
97 | number of float years since operation start`, a value on domain
98 | [0,inf), and outputs the chosen diode-multiplier's values across
99 | this timeseries. If a string, must use a pre-defined definition:
100 |
101 | * 'degrade' : degrade over time at specified rate.
102 | Specify rate by passing a definition for
103 | `degradation_rate`
104 |
105 | For example,
106 |
107 | .. code-block:: python
108 |
109 | # 2 Ways of Doing Same Thing
110 |
111 | # Method 1
112 | longterm_fcn_dict = {
113 | 'Rs_mult': lambda x : 1.005 * x
114 | }
115 | f = Failure()
116 | f.trend(longterm_fcn_dict)
117 |
118 | # Method 2
119 | longterm_fcn_dict = {
120 | 'Rs_mult': 'degrade'
121 | }
122 | f = Failure()
123 | f.trend(longterm_fcn_dict,
124 | degradation_rate=1.005)
125 |
126 | annual_fcn_dict : dict
127 | A dictionary where keys are the diode-multipliers in IVSimulator
128 | ('Rsh_mult', 'Rs_mult', 'Io_mult', 'Il_mult', 'nnsvth_mult') and
129 | values are either a function or a string. If a function, the
130 | function should be a mathematical operation as a `function of the
131 | percentage through this year`, a value on domain [0,1], and outputs
132 | the chosen diode-multiplier's values across this timeseries. If a
133 | string, must use a pre-defined definition:
134 |
135 | daily_fcn_dict : function or str
136 | A dictionary where keys are the diode-multipliers in IVSimulator
137 | ('Rsh_mult', 'Rs_mult', 'Io_mult', 'Il_mult', 'nnsvth_mult') and
138 | values are either a function or a string. If a function, the
139 | function should be a mathematical operation as a `function of the
140 | percentage through this day`, a value on domain [0,1], and outputs
141 | the chosen diode-multiplier's values across this timeseries. If a
142 | string, must use a pre-defined definition:
143 | """
144 |
145 | if not isinstance(longterm_fcn_dict, type(None)):
146 | self.longterm_fcn_dict = longterm_fcn_dict
147 |
148 | for param, fcn in longterm_fcn_dict.items():
149 | if isinstance(fcn, str):
150 | self._predefined_trend(param, longterm_fcn=fcn, **kwargs)
151 |
152 | if not isinstance(annual_fcn_dict, type(None)):
153 | self.annual_fcn_dict = annual_fcn_dict
154 |
155 | for param, fcn in annual_fcn_dict.items():
156 | if isinstance(fcn, str):
157 | self._predefined_trend(param, annual_fcn=fcn, **kwargs)
158 |
159 | if not isinstance(daily_fcn_dict, type(None)):
160 | self.daily_fcn_dict = daily_fcn_dict
161 |
162 | for param, fcn in daily_fcn_dict.items():
163 | if isinstance(fcn, str):
164 | self._predefined_trend(param, daily_fcn=fcn, **kwargs)
165 |
166 | def _predefined_trend(self, param, longterm_fcn='degrade',
167 | annual_fcn='', daily_fcn='uniform',
168 | **kwargs):
169 |
170 | if longterm_fcn == 'degrade':
171 | try:
172 | degr_rate = kwargs['degradation_rate']
173 | except KeyError:
174 | raise KeyError("TimeseriesFailure.trend requires a "
175 | "passed parameter `degradation_rate`"
176 | "if using `degrade` longterm_fcn definition.")
177 | self.longterm_fcn_dict[param] = lambda x: degr_rate * x
178 |
179 | def _combine(self, arr, specs_df, param):
180 | if param not in specs_df.columns:
181 | specs_df[param] = np.ones(len(specs_df))
182 |
183 | if param in ["Rsh_mult", "Io_mult", "Il_mult"]:
184 | specs_df[param] -= arr
185 |
186 | elif param in ["Rs_mult", "nnsvth_mult"]:
187 | specs_df[param] += arr
188 |
189 | def add_interpolation(self, specs_df, plot_trends=False):
190 | """Add failure properties to specs_df
191 | """
192 |
193 | # Degradation since start
194 | float_years = np.array(
195 | (specs_df.index - specs_df.index[0]) / timedelta(days=365.25))
196 | for param, fcn in self.longterm_fcn_dict.items():
197 | vals = fcn(float_years)
198 | self._combine(vals, specs_df, param)
199 | if plot_trends:
200 | plt.plot(specs_df.index, vals, 'o--', alpha=0.8, label=param)
201 | if plot_trends:
202 | if len(self.longterm_fcn_dict.keys()):
203 | plt.legend()
204 | plt.title("Longterm")
205 | plt.show()
206 |
207 | # Degradation cyclic per year
208 | pct_of_year = np.array(specs_df.index.dayofyear) / 365
209 | for param, fcn in self.annual_fcn_dict.items():
210 | vals = fcn(pct_of_year)
211 | self._combine(vals, specs_df, param)
212 | if plot_trends:
213 | plt.plot(specs_df.index, vals, 'o--', alpha=0.8, label=param)
214 | if plot_trends:
215 | if len(self.annual_fcn_dict.keys()):
216 | plt.legend()
217 | plt.title("Annual")
218 | plt.show()
219 |
220 | # Degradation per day
221 | pct_of_day = np.array(specs_df.index.hour) / 24
222 | for param, fcn in self.daily_fcn_dict.items():
223 | vals = fcn(pct_of_day)
224 | self._combine(vals, specs_df, param)
225 | if plot_trends:
226 | plt.plot(specs_df.index, vals, 'o--', alpha=0.8, label=param)
227 | if plot_trends:
228 | if len(self.annual_fcn_dict.keys()):
229 | plt.legend()
230 | plt.title("Daily")
231 | plt.show()
232 |
233 | return specs_df
234 |
--------------------------------------------------------------------------------
/pvops/iv/utils.py:
--------------------------------------------------------------------------------
1 | import pvlib
2 | import copy
3 |
4 |
5 | def get_CEC_params(name, mod_spec):
6 | '''Query module-level parameters from CEC database and
7 | derive cell-level parameters.
8 |
9 | Utilizing methods from pvsystem.retrieve_sam('CECMod')
10 |
11 | Parameters
12 | ----------
13 | name : string
14 | Representing module name in CEC database
15 |
16 | mod_specs : dict
17 | Provide 'ncols' and 'nsubstrings'
18 |
19 | Returns
20 | -------
21 | module_parameters (dict), cell_parameters (dict)
22 | '''
23 |
24 | moddb = pvlib.pvsystem.retrieve_sam('CECMod')
25 | module_parameters = moddb[name].to_dict()
26 |
27 | # add reverse bias parameters
28 | module_parameters['breakdown_factor'] = 1.e-4
29 | module_parameters['breakdown_voltage'] = -30. # -5.5
30 | module_parameters['breakdown_exp'] = 3.28
31 | module_parameters['ncols'] = mod_spec['ncols']
32 | module_parameters['nsubstrings'] = mod_spec['nsubstrings']
33 | module_parameters['ncells_substring'] = module_parameters['N_s'] / \
34 | mod_spec['nsubstrings']
35 | module_parameters['nrows'] = module_parameters['N_s'] / \
36 | module_parameters['ncols']
37 | # module_parameters['R_sh_ref'] *= rsh_premultiply # What should this value be? Dynamic.
38 | # TODO: Adjust Io smaller
39 |
40 | # set up cell-level parameters
41 | cell_parameters = copy.copy(module_parameters)
42 | cell_parameters['a_ref'] = module_parameters['a_ref'] / \
43 | module_parameters['N_s']
44 | cell_parameters['R_sh_ref'] = module_parameters['R_sh_ref'] / \
45 | module_parameters['N_s']
46 | cell_parameters['R_s'] = module_parameters['R_s'] / \
47 | module_parameters['N_s']
48 | return module_parameters, cell_parameters
49 |
--------------------------------------------------------------------------------
/pvops/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sandialabs/pvOps/d6248e77fa063c94f5b5a0736b05b2bbe51c6be4/pvops/tests/__init__.py
--------------------------------------------------------------------------------
/pvops/tests/conftest.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sandialabs/pvOps/d6248e77fa063c94f5b5a0736b05b2bbe51c6be4/pvops/tests/conftest.py
--------------------------------------------------------------------------------
/pvops/tests/om_data_update_pick.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sandialabs/pvOps/d6248e77fa063c94f5b5a0736b05b2bbe51c6be4/pvops/tests/om_data_update_pick.pkl
--------------------------------------------------------------------------------
/pvops/tests/om_summ_pick.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sandialabs/pvOps/d6248e77fa063c94f5b5a0736b05b2bbe51c6be4/pvops/tests/om_summ_pick.pkl
--------------------------------------------------------------------------------
/pvops/tests/prod_data_clean_iec_pick.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sandialabs/pvOps/d6248e77fa063c94f5b5a0736b05b2bbe51c6be4/pvops/tests/prod_data_clean_iec_pick.pkl
--------------------------------------------------------------------------------
/pvops/tests/prod_data_quant_pick.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sandialabs/pvOps/d6248e77fa063c94f5b5a0736b05b2bbe51c6be4/pvops/tests/prod_data_quant_pick.pkl
--------------------------------------------------------------------------------
/pvops/tests/prod_summ_pick.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sandialabs/pvOps/d6248e77fa063c94f5b5a0736b05b2bbe51c6be4/pvops/tests/prod_summ_pick.pkl
--------------------------------------------------------------------------------
/pvops/tests/test_iv.py:
--------------------------------------------------------------------------------
1 | import random
2 | import os
3 | import pandas as pd
4 | import numpy as np
5 | import pvops.iv.timeseries_simulator
6 | import pvops.iv.preprocess
7 | import pvops.iv.simulator
8 | from pvops.iv.models import nn
9 |
10 | datadir = os.path.join('tutorials', 'example_data')
11 | example_prodpath = os.path.join(
12 | datadir, 'example_prod_with_covariates.csv')
13 |
14 |
15 | def test_simulation():
16 | random.seed(0)
17 |
18 | sim = pvops.iv.simulator.Simulator()
19 |
20 | # test adding presets
21 | heavy_shading = {'identifier': 'heavy_shade',
22 | 'E': 400,
23 | 'Tc': 20}
24 | light_shading = {'identifier': 'light_shade',
25 | 'E': 800}
26 | sim.add_preset_conditions('landscape', heavy_shading, rows_aff=2)
27 | sim.add_preset_conditions('portrait', heavy_shading, cols_aff=2)
28 | sim.add_preset_conditions('pole', heavy_shading,
29 | light_shading=light_shading,
30 | width=2, pos=None)
31 |
32 | # test adding manuals
33 | # Using 2D list (aka, multiple conditions as input)
34 | modcells = {'another_example': [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
35 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36 | 1, 1, 1, 0, 0, 0, 0, 1, 1, 1,
37 | 1, 1, 1, 0, 0, 0, 0, 1, 1, 1,
38 | 1, 1, 1, 0, 0, 0, 0, 1, 1, 1,
39 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
40 | [1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
42 | 0, 0, 0, 1, 1, 1, 1, 0, 0, 0,
43 | 0, 0, 0, 1, 1, 1, 1, 0, 0, 0,
44 | 0, 0, 0, 1, 1, 1, 1, 0, 0, 0,
45 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]
46 | }
47 | condition_dict = {0: {},
48 | 1: {'identifier': 'heavy_shade',
49 | 'E': 405,
50 | }
51 | }
52 | sim.add_manual_conditions(modcells, condition_dict)
53 |
54 | # test generate many samples
55 | N = 2
56 | dicts = {'E': {'mean': 400,
57 | 'std': 500,
58 | 'low': 200,
59 | 'upp': 600
60 | },
61 | 'Tc': {'mean': 30,
62 | 'std': 10,
63 | }
64 | }
65 | sim.generate_many_samples('heavy_shade', N, dicts)
66 | dicts = {'E': {'mean': 800,
67 | 'std': 500,
68 | 'low': 600,
69 | 'upp': 1000
70 | }
71 | }
72 | sim.generate_many_samples('light_shade', N, distributions=dicts)
73 |
74 | # test building strings
75 | sim.build_strings({'pole_bottom_mods': ['pristine', 'pristine', 'pristine',
76 | 'pristine', 'pristine', 'pristine',
77 | 'pole_2width', 'pole_2width',
78 | 'pole_2width', 'pole_2width',
79 | 'pole_2width', 'pole_2width'],
80 | 'portrait_2cols_3bottom_mods': ['pristine', 'pristine',
81 | 'pristine', 'pristine',
82 | 'pristine', 'pristine',
83 | 'pristine', 'pristine',
84 | 'pristine',
85 | 'portrait_2cols',
86 | 'portrait_2cols',
87 | 'portrait_2cols']})
88 |
89 | # test simulating
90 | sim.simulate()
91 |
92 | df = sim.sims_to_df(focus=['string', 'module'], cutoff=True)
93 |
94 | n_str_samples = 16
95 | n_mod_samples = 29
96 |
97 | assert len(df[df['level'] == 'string']) == n_str_samples
98 | assert len(df[df['level'] == 'module']) == n_mod_samples
99 |
100 |
101 | def test_classification():
102 |
103 | sim = pvops.iv.simulator.Simulator()
104 |
105 | condition = {'identifier': 'shade', 'Il_mult': 0.6}
106 | sim.add_preset_conditions('complete', condition,
107 | save_name='Complete_shading')
108 | dicts = {'Il_mult': {'mean': 0.6,
109 | 'std': 0.7,
110 | 'low': 0.33,
111 | 'upp': 0.95,
112 | }
113 | }
114 | sim.generate_many_samples('shade', 100, dicts)
115 |
116 | sim.build_strings({'Pristine array': ['pristine'] * 12,
117 | 'Partial Soiling (1M)': ['pristine'] * 11 +
118 | ['Complete_shading'] * 1,
119 | 'Partial Soiling (6M)': ['pristine'] * 6 +
120 | ['Complete_shading'] * 6
121 | }
122 | )
123 |
124 | sim.simulate()
125 | df = sim.sims_to_df(focus=['string'], cutoff=True)
126 |
127 | iv_col_dict = {
128 | "mode": "mode",
129 | "current": "current", # Populated in simulator
130 | "voltage": "voltage", # Populated in simulator
131 | "irradiance": "E", # Populated in simulator
132 | "temperature": "T", # Populated in simulator
133 | "power": "power", # Populated in preprocess
134 | "derivative": "derivative", # Populated in feature_generation
135 | "current_diff": "current_diff", # Populated in feature_generation
136 | }
137 |
138 | # Irradiance & Temperature correction, and normalize axes
139 | prep_df = pvops.iv.preprocess.preprocess(df, 0.05, iv_col_dict,
140 | resmpl_cutoff=0.03, correct_gt=True,
141 | normalize_y=False,
142 | CECmodule_parameters=sim.module_parameters,
143 | n_mods=12, gt_correct_option=3)
144 | # Shuffle
145 | bigdf = prep_df.sample(frac=1).reset_index(drop=True)
146 | bigdf.dropna(inplace=True)
147 |
148 | feat_df = nn.feature_generation(bigdf, iv_col_dict)
149 |
150 | nn_config = {
151 | # NN parameters
152 | "model_choice": "1DCNN",
153 | "params": ['current', 'power', 'derivative', 'current_diff'],
154 | "dropout_pct": 0.5,
155 | "verbose": 1,
156 | # Training parameters
157 | "train_size": 0.8,
158 | "shuffle_split": True,
159 | "balance_tactic": 'truncate',
160 | "n_CV_splits": 2,
161 | "batch_size": 10,
162 | "max_epochs": 100,
163 | # LSTM parameters
164 | "use_attention_lstm": False,
165 | "units": 50,
166 | # 1DCNN parameters
167 | "nfilters": 64,
168 | "kernel_size": 12,
169 | }
170 |
171 | iv_col_dict = {'mode': 'mode'}
172 | model, _, _ = nn.classify_curves(feat_df, iv_col_dict, nn_config)
173 |
174 | if model.test_accuracy > 0.9:
175 | assert True
176 | else:
177 | assert False
178 |
179 |
180 | def test_timeseries_simulator():
181 |
182 | env_df = pd.read_csv(example_prodpath)
183 | env_df.index = pd.to_datetime(env_df["date"])
184 | env_df = env_df.sort_index()
185 |
186 | # Only simulate where irradiance > 200
187 | env_df = env_df[env_df['irrad_poa_Wm2'] > 600]
188 | # Two sites have data here so we choose one
189 | env_df = env_df[env_df['randid'] == 'R10']
190 | # Remove any NaN environmental specifications
191 | env_df = env_df.dropna(subset=['irrad_poa_Wm2', 'temp_amb_C'])
192 |
193 | # Reduce number of simulations for test
194 | env_df = env_df.iloc[0:100]
195 |
196 | failureA = pvops.iv.timeseries_simulator.TimeseriesFailure()
197 | longterm_fcn_dict = {
198 | 'Rs_mult': "degrade"
199 | }
200 | annual_fcn_dict = {
201 | 'Rs_mult': lambda x: (0.3 * np.sin(np.pi * x))
202 | }
203 |
204 | failureA.trend(longterm_fcn_dict=longterm_fcn_dict,
205 | annual_fcn_dict=annual_fcn_dict,
206 | degradation_rate=1.005)
207 |
208 | iv_col_dict = {'irradiance': 'irrad_poa_Wm2',
209 | 'temperature': 'temp_amb_C'
210 | }
211 |
212 | env_df['identifier'] = env_df.index.strftime("%Y-%m-%d %H:%M:%S")
213 |
214 | time_simulator = pvops.iv.timeseries_simulator.IVTimeseriesGenerator()
215 | time_simulator.generate(
216 | env_df, [failureA], iv_col_dict, 'identifier', plot_trends=False)
217 |
218 | time_simulator.add_time_conditions('complete', nmods=12)
219 | time_simulator.simulate()
220 |
221 | sims_df = time_simulator.sims_to_df(focus=['string'], cutoff=True)
222 |
223 | assert len(sims_df) == 100
224 |
--------------------------------------------------------------------------------
/pvops/tests/test_text.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 |
4 | from pvops.text import visualize, preprocess, nlp_utils
5 |
6 | import pandas as pd
7 | import numpy as np
8 | import datetime
9 | import matplotlib
10 |
11 | def test_text_remove_nondate_nums():
12 | example = r"This is a test example https://www.google.com 10% #10 101 1-1-1 a-e4 13-1010 10.1 123456789 123/12 executed on 2/4/2020"
13 | answer = r" this is test example executed on 2/4/2020 "
14 | assert preprocess.text_remove_nondate_nums(example) == answer
15 |
16 |
17 | def test_text_remove_numbers_stopwords():
18 | example = r"This is a test example 10% #10 101 1-1-1 13-1010 10.1 123456789 123/12 executed on 2/4/2020"
19 | answer = r"This test example executed"
20 |
21 | stopwords = nlp_utils.create_stopwords()
22 | assert preprocess.text_remove_numbers_stopwords(example, stopwords) == answer
23 |
24 |
25 | def test_get_dates():
26 | df = pd.DataFrame(
27 | [
28 | {
29 | "Date": "2020/01/23 12:34:56",
30 | "Document": "Find this date 2020/01/23 12:34:56",
31 | },
32 | {
33 | "Date": np.nan,
34 | "Document": "Find this date March 5 2021 and April 7 2022",
35 | },
36 | ]
37 | )
38 |
39 | answer = [datetime.datetime.strptime(
40 | "2020/01/23 12:34:56", "%Y/%m/%d %H:%M:%S")]
41 | assert answer == preprocess.get_dates(
42 | df["Document"].iloc[0], df, 0, {
43 | "data": "Document", "eventstart": "Date"}, False
44 | )
45 |
46 | answer = [
47 | datetime.datetime.strptime("2021/03/05 00:00:00", "%Y/%m/%d %H:%M:%S"),
48 | datetime.datetime.strptime("2022/04/07 00:00:00", "%Y/%m/%d %H:%M:%S"),
49 | ]
50 | assert answer == preprocess.get_dates(
51 | df["Document"].iloc[1], df, 1, {
52 | "data": "Document", "eventstart": "Date"}, False
53 | )
54 |
55 |
56 | def test_visualize_attribute_timeseries():
57 |
58 | dates = pd.Series(
59 | [
60 | "2020/01/23 12:34:56",
61 | "2020/01/24 12:34:56",
62 | "2020/01/25 12:34:56",
63 | ]
64 | )
65 |
66 | dates = pd.to_datetime(dates).tolist()
67 |
68 | df = pd.DataFrame(
69 | {"labels": ["A word", "B word", "C word"], "date": dates})
70 |
71 | fig = visualize.visualize_attribute_timeseries(
72 | df, {"label": "labels", "date": "date"}, date_structure="%Y-%m-%d"
73 | )
74 | assert isinstance(fig, matplotlib.figure.Figure)
75 |
76 |
77 | def test_visualize_word_frequency_plot():
78 | documents = ["A word", "B word", "C word"]
79 | words = " ".join(documents)
80 | tokenized_words = preprocess.regex_tokenize(words)
81 |
82 | result = visualize.visualize_word_frequency_plot(tokenized_words)
83 |
84 | assert isinstance(result[0], matplotlib.pyplot.Figure)
85 | assert isinstance(result[1], dict)
86 |
87 |
88 | def test_visualize_attribute_connectivity():
89 | Attribute1 = ["A", "B", "C", "C"]
90 | Attribute2 = ["X", "X", "Y", "Z"]
91 |
92 | df = pd.DataFrame({"Attr1": Attribute1, "Attr2": Attribute2})
93 |
94 | om_col_dict = {"attribute1_col": "Attr1", "attribute2_col": "Attr2"}
95 |
96 | fig, G = visualize.visualize_attribute_connectivity(
97 | df,
98 | om_col_dict,
99 | figsize=(10, 8),
100 | edge_width_scalar=2,
101 | graph_aargs={
102 | "with_labels": True,
103 | "font_weight": "bold",
104 | },
105 | )
106 |
107 | assert isinstance(fig, matplotlib.pyplot.Figure)
108 | assert list(G.edges()) == [("A", "X"), ("B", "X"), ("C", "Y"), ("C", "Z")]
109 |
110 | matplotlib.pyplot.close()
111 |
112 |
113 | def test_summarize_text_data():
114 |
115 | df = pd.DataFrame(
116 | [
117 | {
118 | "Date": "2020/01/23 12:34:56",
119 | "Document": "Find this date 2020/01/23 12:34:56",
120 | },
121 | {
122 | "Date": np.nan,
123 | "Document": "Find this date March 5 2021 and April 7 2022",
124 | },
125 | ]
126 | )
127 |
128 | answer = {
129 | "n_samples": 2,
130 | "n_nan_docs": 0,
131 | "n_words_doc_average": 7.50,
132 | "n_unique_words": 12,
133 | "n_total_words": 15.00,
134 | }
135 |
136 | info = nlp_utils.summarize_text_data(df, "Document")
137 |
138 | assert answer == info
139 |
--------------------------------------------------------------------------------
/pvops/tests/test_timeseries.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import pandas as pd
4 | import numpy as np
5 | from pvops.timeseries.models import linear
6 | from pvops.timeseries import preprocess as tprep
7 | from pvops.text2time import preprocess as t2tprep
8 |
9 | # Define csv paths
10 | datadir = os.path.join('tutorials', 'example_data')
11 | example_OMpath = os.path.join(datadir, 'example_om_data2.csv')
12 | example_prodpath = os.path.join(datadir, 'example_perf_data.csv')
13 | example_metapath = os.path.join(datadir, 'example_metadata2.csv')
14 | example_prod2path = os.path.join(datadir, 'example_prod_with_covariates.csv')
15 |
16 | # Assigning dictionaries to connect pvops variables with User's column names
17 | # Format for dictionaries is {pvops variable: user-specific column names}
18 | prod_col_dict = {'siteid': 'randid',
19 | 'timestamp': 'Date',
20 | 'powerprod': 'AC_POWER',
21 | 'energyprod': 'Energy',
22 | 'irradiance': 'POAirradiance',
23 | 'baseline': 'IEC_pstep',
24 | 'dcsize': 'dcsize',
25 | 'compared': 'Compared',
26 | 'energy_pstep': 'Energy_pstep',
27 | 'clearsky_irr': 'clearsky_irr'
28 | }
29 |
30 | om_col_dict = {'siteid': 'randid',
31 | 'datestart': 'date_start',
32 | 'dateend': 'date_end',
33 | 'workID': 'WONumber',
34 | 'worktype': 'WOType',
35 | 'asset': 'Asset',
36 | 'eventdur': 'EventDur',
37 | 'modatestart': 'MonthStart',
38 | 'agedatestart': 'AgeStart'}
39 |
40 | metad_col_dict = {'siteid': 'randid',
41 | 'dcsize': 'DC_Size_kW',
42 | 'COD': 'COD',
43 | 'latitude': 'latitude',
44 | 'longitude': 'longitude'}
45 |
46 |
47 | def test_prod_irradiance_filter():
48 |
49 | prod_df = pd.read_csv(example_prodpath)
50 | meta_df = pd.read_csv(example_metapath)
51 |
52 | prod_df = t2tprep.prod_date_convert(prod_df, prod_col_dict)
53 | prod_df.index = prod_df[prod_col_dict['timestamp']]
54 | prod_df['randid'] = 'R27'
55 |
56 | # Data is missing in the middle of this example, so only going to pass
57 | # The first set of rows
58 | prod_df = prod_df.iloc[0:200]
59 |
60 | prod_df_out, mask_series = tprep.prod_irradiance_filter(prod_df,
61 | prod_col_dict,
62 | meta_df,
63 | metad_col_dict)
64 |
65 | true_detection_irradiance = [0, 44]
66 | assert sum(mask_series) in true_detection_irradiance
67 |
68 |
69 | def test_prod_inverter_clipping_filter():
70 |
71 | prod_df = pd.read_csv(example_prodpath)
72 | meta_df = pd.read_csv(example_metapath)
73 |
74 | prod_df = t2tprep.prod_date_convert(prod_df, prod_col_dict)
75 | prod_df.index = prod_df[prod_col_dict['timestamp']]
76 | prod_df['randid'] = 'R27'
77 |
78 | # Data is missing in the middle of this example, so only going to pass
79 | # The first set of rows
80 | prod_df = prod_df.iloc[0:200]
81 |
82 | geometric = tprep.prod_inverter_clipping_filter(prod_df,
83 | prod_col_dict,
84 | meta_df, metad_col_dict,
85 | model='geometric')
86 |
87 | threshold = tprep.prod_inverter_clipping_filter(prod_df,
88 | prod_col_dict,
89 | meta_df, metad_col_dict,
90 | model='threshold')
91 |
92 | levels = tprep.prod_inverter_clipping_filter(prod_df,
93 | prod_col_dict,
94 | meta_df, metad_col_dict,
95 | model='levels')
96 |
97 | true_detection_geometric = 0
98 | true_detection_threshold = 0
99 | true_detection_levels = 183
100 |
101 | assert sum(geometric['mask']) == true_detection_geometric
102 | assert sum(threshold['mask']) == true_detection_threshold
103 | assert sum(levels['mask']) == true_detection_levels
104 |
105 |
106 | def test_linear_model():
107 | prod_df = pd.read_csv(example_prod2path)
108 |
109 | # Format for dictionaries is {pvops variable: user-specific column names}
110 | prod_col_dict = {'siteid': 'randid',
111 | 'timestamp': 'date',
112 | 'powerprod': 'generated_kW',
113 | 'irradiance': 'irrad_poa_Wm2',
114 | 'temperature': 'temp_amb_C',
115 | 'baseline': 'IEC_pstep',
116 | 'dcsize': 'dcsize',
117 | 'compared': 'Compared',
118 | 'energy_pstep': 'Energy_pstep'}
119 |
120 | prod_data_converted = t2tprep.prod_date_convert(prod_df, prod_col_dict)
121 | prod_data_datena_d, _ = t2tprep.prod_nadate_process(
122 | prod_data_converted, prod_col_dict, pnadrop=True)
123 |
124 | prod_data_datena_d.index = prod_data_datena_d[prod_col_dict['timestamp']]
125 |
126 | model_prod_data = prod_data_datena_d.dropna(subset=[
127 | 'irrad_poa_Wm2', 'temp_amb_C', 'wind_speed_ms'] +
128 | [prod_col_dict['powerprod']
129 | ])
130 | model_prod_data = model_prod_data[model_prod_data['randid'] == 'R15']
131 |
132 | model, train_df, test_df = linear.modeller(prod_col_dict,
133 | kernel_type='default',
134 | time_weighted='month',
135 | X_parameters=[
136 | 'irrad_poa_Wm2',
137 | 'temp_amb_C'],
138 | prod_df=model_prod_data,
139 | test_split=0.05,
140 | degree=3,
141 | verbose=0)
142 |
143 | name = list(model.estimators.keys())[0]
144 |
145 | benchmark_r2 = 0.99
146 | benchmark_mse = 420000
147 |
148 | eval = model.estimators[name]['test_eval']
149 |
150 | assert eval['r2'] > benchmark_r2
151 | assert eval['mse'] < benchmark_mse
152 |
153 |
154 | def test_establish_solar_loc():
155 | prod_df = pd.read_csv(example_prod2path)
156 | meta_df = pd.read_csv(example_metapath)
157 | # Test-specific changes
158 | meta_df['randid'] = ["R10", "R15"]
159 | meta_df.index = meta_df['randid']
160 | # Format for dictionaries is {pvops variable: user-specific column names}
161 | prod_col_dict = {'siteid': 'randid',
162 | 'timestamp': 'date',
163 | 'powerprod': 'generated_kW',
164 | 'irradiance': 'irrad_poa_Wm2',
165 | 'temperature': 'temp_amb_C',
166 | 'baseline': 'IEC_pstep',
167 | 'dcsize': 'dcsize',
168 | 'compared': 'Compared',
169 | 'energy_pstep': 'Energy_pstep'}
170 | prod_data_converted = t2tprep.prod_date_convert(prod_df, prod_col_dict)
171 | prod_data_datena_d, _ = t2tprep.prod_nadate_process(
172 | prod_data_converted, prod_col_dict, pnadrop=True)
173 |
174 | prod_data_datena_d.index = pd.to_datetime(prod_data_datena_d[prod_col_dict['timestamp']])
175 |
176 | prod_with_solar_pos = tprep.establish_solar_loc(prod_data_datena_d,
177 | prod_col_dict,
178 | meta_df,
179 | metad_col_dict)
180 |
181 | positional_columns = ['apparent_zenith',
182 | 'zenith',
183 | 'apparent_elevation',
184 | 'elevation',
185 | 'azimuth',
186 | 'equation_of_time']
187 | answer = [142.081554, 142.081554, -52.081554,
188 | -52.081554, 140.635657, -3.925820]
189 | rounded_answer = [round(a, 2) for a in answer]
190 |
191 | expected = prod_with_solar_pos.iloc[0][positional_columns].values
192 | rounded_expected = [round(a, 2) for a in expected]
193 |
194 | assert len(rounded_answer) == len(rounded_expected)
195 | assert all([a == b for a, b in zip(rounded_answer,
196 | rounded_expected)])
197 |
--------------------------------------------------------------------------------
/pvops/text/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | text module
3 | """
4 |
5 | import pvops.text.classify
6 | import pvops.text.defaults
7 | import pvops.text.nlp_utils
8 | import pvops.text.preprocess
9 | import pvops.text.utils
10 | import pvops.text.visualize
--------------------------------------------------------------------------------
/pvops/text/classify.py:
--------------------------------------------------------------------------------
1 | # Classifiers
2 | from sklearn.pipeline import Pipeline
3 | from sklearn.model_selection import GridSearchCV
4 |
5 | from scipy.sparse import issparse
6 |
7 | import numpy as np
8 | import pandas as pd
9 | import copy
10 |
11 | from pvops.text.preprocess import get_keywords_of_interest
12 |
13 | def classification_deployer(
14 | X,
15 | y,
16 | n_splits,
17 | classifiers,
18 | search_space,
19 | pipeline_steps,
20 | scoring,
21 | greater_is_better=True,
22 | verbose=3,
23 | ):
24 | """The classification deployer builds a classifier evaluator with an ingrained hyperparameter fine-tuning grid search protocol.
25 | The output of this function will be a data frame showing the performance of each classifier when utilizing a specific hyperparameter
26 | configuration.
27 |
28 | To see an example of this method's application, see ``tutorials//text_class_example.py``
29 |
30 | Parameters
31 | ----------
32 | X : list of str
33 | List of documents (str). The documents will be passed through the pipeline_steps, where they will be transformed into vectors.
34 | y : list
35 | List of labels corresponding with the documents in X
36 | n_splits : int
37 | Integer defining the number of splits in the cross validation split during training
38 | classifiers : dict
39 | Dictionary with key as classifier identifier (str) and value as classifier instance following sklearn's
40 | base model convention: sklearn_docs.
41 |
42 | .. sklearn_docs: https://scikit-learn.org/stable/modules/generated/sklearn.base.is_classifier.html
43 | .. code-block:: python
44 |
45 | classifiers = {
46 | 'LinearSVC' : LinearSVC(),
47 | 'AdaBoostClassifier' : AdaBoostClassifier(),
48 | 'RidgeClassifier' : RidgeClassifier()
49 | }
50 |
51 | See ``supervised_classifier_defs.py`` or ``unsupervised_classifier_defs.py`` for this package's defaults.
52 | search_space : dict
53 | Dictionary with classifier identifiers, as used in ``classifiers``, mapped to its hyperparameters.
54 |
55 | .. code-block:: python
56 |
57 | search_space = {
58 | 'LinearSVC' : {
59 | 'clf__C' : [1e-2,1e-1],
60 | 'clf__max_iter':[800,1000],
61 | },
62 | 'AdaBoostClassifier' : {
63 | 'clf__n_estimators' : [50,100],
64 | 'clf__learning_rate':[1.,0.9,0.8],
65 | 'clf__algorithm' : ['SAMME.R']
66 | },
67 | 'RidgeClassifier' : {
68 | 'clf__alpha' : [0.,1e-3,1.],
69 | 'clf__normalize' : [False,True]
70 | }
71 | }
72 |
73 | See ``supervised_classifier_defs.py`` or ``unsupervised_classifier_defs.py`` for this package's defaults.
74 | pipeline_steps : list of tuples
75 | Define embedding and machine learning pipeline. The last tuple must be ``('clf', None)`` so that the output
76 | of the pipeline is a prediction.
77 | For supervised classifiers using a TFIDF embedding, one could specify
78 |
79 | .. code-block:: python
80 |
81 | pipeline_steps = [('tfidf', TfidfVectorizer()),
82 | ('clf', None)]
83 |
84 | For unsupervised clusterers using a TFIDF embedding, one could specify
85 |
86 | .. code-block:: python
87 |
88 | pipeline_steps = [('tfidf', TfidfVectorizer()),
89 | ('to_dense', DataDensifier.DataDensifier()),
90 | ('clf', None)]
91 |
92 | A densifier is required from some clusters, which fail if sparse data is passed.
93 | scoring : sklearn callable scorer (i.e., any statistic that summarizes predictions relative to observations).
94 | Example scorers include f1_score, accuracy, etc.
95 | Callable object that returns a scalar score created using sklearn.metrics.make_scorer
96 | For supervised classifiers, one could specify
97 |
98 | .. code-block:: python
99 |
100 | scoring = make_scorer(f1_score, average = 'weighted', pos_label = None)
101 |
102 | For unsupervised classifiers, one could specify
103 |
104 | .. code-block:: python
105 |
106 | scoring = make_scorer(homogeneity_score)
107 |
108 | greater_is_better : bool
109 | Whether the scoring parameter is better when greater (i.e. accuracy) or not.
110 |
111 | verbose : int
112 | Control the specificity of the prints. If greater than 1, a print out is shown when a new "best classifier"
113 | is found while iterating. Additionally, the verbosity during the grid search follows sklearn's definitions.
114 | The frequency of the messages increase with the verbosity level.
115 |
116 | Returns
117 | -------
118 | DataFrame
119 | Summarization of results from all of the classifiers
120 | """
121 |
122 | rows = []
123 |
124 | if issparse(X):
125 | print("Converting passed data to dense array...")
126 | X = X.toarray()
127 |
128 | # get position of 'clf' in pipeline_steps
129 | idx_clf_pipeline = [i for i, it in enumerate(
130 | pipeline_steps) if it[0] == "clf"][0]
131 |
132 | best_gs_instance = None
133 | if greater_is_better:
134 | best_model_score = 0.0
135 | else:
136 | best_model_score = np.inf
137 | for iter_idx, key in enumerate(classifiers.keys()):
138 | clas = classifiers[key]
139 | space = search_space[key]
140 |
141 | iter_pipeline_steps = copy.deepcopy(pipeline_steps)
142 | iter_pipeline_steps[idx_clf_pipeline] = ("clf", clas)
143 | pipe = Pipeline(iter_pipeline_steps)
144 |
145 | gs_clf = GridSearchCV(
146 | pipe,
147 | space,
148 | scoring=scoring,
149 | cv=n_splits,
150 | n_jobs=-1,
151 | return_train_score=True,
152 | verbose=verbose,
153 | )
154 | gs_clf.fit(X, y)
155 | params = gs_clf.cv_results_["params"]
156 | scores = []
157 | for i in range(n_splits):
158 | r1 = gs_clf.cv_results_[f"split{i}_test_score"]
159 | scores.append(r1.reshape(len(params), 1))
160 |
161 | r2 = gs_clf.cv_results_["mean_fit_time"]
162 |
163 | all_scores = np.hstack(scores)
164 | for param, score, time in zip(params, all_scores, r2):
165 | param["mean_fit_time"] = time
166 | d = {
167 | "estimator" : key,
168 | "min_score" : min(score),
169 | "max_score" : max(score),
170 | "mean_score" : np.mean(score),
171 | "std_score" : np.std(score),
172 | }
173 | rows.append((pd.Series({**param, **d})))
174 |
175 | if greater_is_better:
176 | replacement_logic = gs_clf.best_score_ > best_model_score
177 | else:
178 | replacement_logic = gs_clf.best_score_ < best_model_score
179 |
180 | if replacement_logic:
181 | if verbose > 1:
182 | print(
183 | "Better score ({:.3f}) found on classifier: {}".format(
184 | gs_clf.best_score_, key
185 | )
186 | )
187 | best_model_score = gs_clf.best_score_
188 | best_gs_instance = gs_clf
189 |
190 | return pd.concat(rows, axis=1).T, best_gs_instance.best_estimator_
191 |
192 | def get_attributes_from_keywords(om_df, col_dict, reference_df, reference_col_dict):
193 | """Find keywords of interest in specified column of dataframe, return as new column value.
194 |
195 | If keywords of interest given in a reference dataframe are in the specified column of the
196 | dataframe, return the keyword category, or categories.
197 | For example, if the string 'inverter' is in the list of text, return ['inverter'].
198 |
199 | Parameters
200 | ----------
201 | om_df : pd.DataFrame
202 | Dataframe to search for keywords of interest, must include text_col.
203 | col_dict : dict of {str : str}
204 | A dictionary that contains the column names needed:
205 |
206 | - data : string, should be assigned to associated column which stores the tokenized text logs
207 | - predicted_col : string, will be used to create keyword search label column
208 | reference_df : DataFrame
209 | Holds columns that define the reference dictionary to search for keywords of interest,
210 | Note: This function can currently only handle single words, no n-gram functionality.
211 | reference_col_dict : dict of {str : str}
212 | A dictionary that contains the column names that describes how
213 | referencing is going to be done
214 |
215 | - reference_col_from : string, should be assigned to
216 | associated column name in reference_df that are possible input reference values
217 | Example: pd.Series(['inverter', 'invert', 'inv'])
218 | - reference_col_to : string, should be assigned to
219 | associated column name in reference_df that are the output reference values
220 | of interest
221 | Example: pd.Series(['inverter', 'inverter', 'inverter'])
222 |
223 | Returns
224 | -------
225 | om_df: pd.DataFrame
226 | Input df with new_col added, where each found keyword is its own row, may result in
227 | duplicate rows if more than one keywords of interest was found in text_col.
228 | """
229 | om_df[col_dict['predicted_col']] = om_df[col_dict['data']].apply(get_keywords_of_interest,
230 | reference_df=reference_df,
231 | reference_col_dict=reference_col_dict)
232 |
233 | # each multi-category now in its own row, some logs have multiple equipment issues
234 | multiple_keywords_df = om_df[om_df[col_dict['predicted_col']].str.len() > 1]
235 | om_df = om_df.explode(col_dict['predicted_col'])
236 |
237 | msg = f'{len(multiple_keywords_df)} entries had multiple keywords of interest. Reference: {multiple_keywords_df.index} in original dataframe.'
238 | print(msg)
239 |
240 | return om_df
--------------------------------------------------------------------------------
/pvops/text/nlp_utils.py:
--------------------------------------------------------------------------------
1 | from sklearn.base import BaseEstimator
2 | from gensim.models.doc2vec import TaggedDocument, Doc2Vec
3 | import scipy
4 | import numpy as np
5 | from importlib import resources
6 | from gensim.models import Word2Vec
7 |
8 | from pvops.text import preprocess
9 |
10 |
11 | class Doc2VecModel(BaseEstimator):
12 | """Performs a gensim Doc2Vec transformation of the input documents to create
13 | embedded representations of the documents. See gensim's
14 | Doc2Vec model for information regarding the hyperparameters.
15 | """
16 |
17 | def __init__(
18 | self,
19 | vector_size=100,
20 | dm_mean=None,
21 | dm=1,
22 | dbow_words=0,
23 | dm_concat=0,
24 | dm_tag_count=1,
25 | dv=None,
26 | dv_mapfile=None,
27 | comment=None,
28 | trim_rule=None,
29 | callbacks=(),
30 | window=5,
31 | epochs=10,
32 | ):
33 | self.d2v_model = None
34 | self.vector_size = vector_size
35 | self.dm_mean = dm_mean
36 | self.dm = dm
37 | self.dbow_words = dbow_words
38 | self.dm_concat = dm_concat
39 | self.dm_tag_count = dm_tag_count
40 | self.dv = dv
41 | self.dv_mapfile = dv_mapfile
42 | self.comment = comment
43 | self.trim_rule = trim_rule
44 | self.callbacks = callbacks
45 | self.window = window
46 | self.epochs = epochs
47 |
48 | def fit(self, raw_documents, y=None):
49 | """Fits the Doc2Vec model."""
50 | # Initialize model
51 | self.d2v_model = Doc2Vec(
52 | vector_size=self.vector_size,
53 | dm_mean=self.dm_mean,
54 | dm=self.dm,
55 | dbow_words=self.dbow_words,
56 | dm_concat=self.dm_concat,
57 | dm_tag_count=self.dm_tag_count,
58 | dv=self.dv,
59 | dv_mapfile=self.dv_mapfile,
60 | comment=self.comment,
61 | trim_rule=self.trim_rule,
62 | window=self.window,
63 | epochs=self.epochs,
64 | )
65 | # Tag docs
66 | tagged_documents = [
67 | TaggedDocument(words=preprocess.regex_tokenize(_d.lower()), tags=[str(i)])
68 | for i, _d in enumerate(raw_documents)
69 | ]
70 | # Build vocabulary
71 | self.d2v_model.build_vocab(tagged_documents)
72 | # Train model
73 | self.d2v_model.train(
74 | tagged_documents,
75 | total_examples=len(tagged_documents),
76 | epochs=self.d2v_model.epochs,
77 | )
78 | return self
79 |
80 | def transform(self, raw_documents):
81 | """Transforms the documents into Doc2Vec vectors."""
82 | X = []
83 | for doc in raw_documents:
84 | X.append(self.d2v_model.infer_vector(preprocess.regex_tokenize(doc)))
85 | return X
86 |
87 | def fit_transform(self, raw_documents, y=None):
88 | """Utilizes the ``fit()`` and ``transform()`` methods in this class."""
89 | self.fit(raw_documents)
90 | return self.transform(raw_documents)
91 |
92 |
93 | class DataDensifier(BaseEstimator):
94 | """A data structure transformer which converts sparse data to dense data.
95 | This process is usually incorporated in this library when doing unsupervised machine learning.
96 | This class is built specifically to work inside a sklearn pipeline.
97 | Therefore, it uses the default ``transform``, ``fit``, ``fit_transform`` method structure.
98 | """
99 |
100 | def transform(self, X, y=None):
101 | """Return a dense array if the input array is sparse.
102 |
103 | Parameters
104 | ----------
105 | X : array
106 | Input data of numerical values. For this package, these values could
107 | represent embedded representations of documents.
108 |
109 | Returns
110 | -------
111 | dense array
112 | """
113 | if scipy.sparse.issparse(X):
114 | return X.toarray()
115 | else:
116 | return X.copy()
117 |
118 | def fit(self, X, y=None):
119 | """Placeholder method to conform to the sklearn class structure.
120 |
121 | Parameters
122 | ----------
123 | X : array
124 | Input data
125 | y : Not utilized.
126 |
127 | Returns
128 | -------
129 | DataDensifier object
130 | """
131 | return self
132 |
133 | def fit_transform(self, X, y=None):
134 | """Performs same action as ``DataDensifier.transform()``,
135 | which returns a dense array when the input is sparse.
136 |
137 | Parameters
138 | ----------
139 | X : array
140 | Input data
141 | y : Not utilized.
142 |
143 | Returns
144 | -------
145 | dense array
146 | """
147 | return self.transform(X=X, y=y)
148 |
149 |
150 | def create_stopwords(lst_add_words=[], lst_keep_words=[]):
151 | """Concatenate a list of stopwords using both words grabbed from nltk and user-specified words.
152 | The nltk stopwords are those that were current at the release of pvOps version 0.5.0 on
153 | Febuary 19th, 2025. See below for more on nltk.
154 |
155 | Bird, Steven, Edward Loper and Ewan Klein (2009), Natural Language Processing with Python. O'Reilly Media Inc.
156 |
157 | https://www.nltk.org/
158 |
159 | Parameters
160 | ----------
161 | lst_add_words : list
162 | List of words(e.g., "road" or "street") to add to stopwords list. If these words are already included in the nltk list, a duplicate will not be added.
163 | lst_keep_words : list
164 | List of words(e.g., "before" or "until") to remove from stopwords list. This is usually used to modify default stop words that might be of interest to PV.
165 |
166 | Returns
167 | -------
168 | list
169 | List of alphabetized stopwords
170 | """
171 | lst_stopwords = set()
172 |
173 | with resources.open_text('pvops.text', 'stopwords.txt') as file:
174 | default_stopwords = file.read().split()
175 |
176 | lst_stopwords = lst_stopwords.union(default_stopwords)
177 | lst_stopwords = lst_stopwords.union(lst_add_words)
178 | lst_stopwords = list(set(lst_stopwords) - set(lst_keep_words))
179 | return sorted(list(set(lst_stopwords)))
180 |
181 |
182 | def summarize_text_data(om_df, colname):
183 | """Display information about a set of documents located in a dataframe, including
184 | the number of samples, average number of words, vocabulary size, and number of words
185 | in total.
186 |
187 | Parameters
188 | ----------
189 | om_df : DataFrame
190 | A pandas dataframe containing O&M data, which contains at least the colname of interest
191 | colname : str
192 | Column name of column with text
193 |
194 | Returns
195 | -------
196 | dict
197 | dictionary containing printed summary data
198 | """
199 | df = om_df.copy()
200 | text = df[colname].tolist()
201 |
202 | nonan_text = [x for x in text if (str(x) != "nan" and x is not None)]
203 |
204 | tokenized = [sentence.split() for sentence in nonan_text]
205 | avg_n_words = np.array([len(tokens) for tokens in tokenized]).mean()
206 | sum_n_words = np.array([len(tokens) for tokens in tokenized]).sum()
207 | model = Word2Vec(tokenized, min_count=1)
208 |
209 | # Total vocabulary
210 | vocab = model.wv
211 |
212 | # Bold title.
213 | print("\033[1m" + "DETAILS" + "\033[0m")
214 |
215 | info = {
216 | "n_samples": len(df),
217 | "n_nan_docs": len(df) - len(nonan_text),
218 | "n_words_doc_average": avg_n_words,
219 | "n_unique_words": len(vocab),
220 | "n_total_words": sum_n_words,
221 | }
222 |
223 | # Display information.
224 | print(f' {info["n_samples"]} samples')
225 | print(f' {info["n_nan_docs"]} invalid documents')
226 | print(" {:.2f} words per sample on average".format(
227 | info["n_words_doc_average"]))
228 | print(f' Number of unique words {info["n_unique_words"]}')
229 | print(" {:.2f} total words".format(info["n_total_words"]))
230 |
231 | return info
232 |
--------------------------------------------------------------------------------
/pvops/text/stopwords.txt:
--------------------------------------------------------------------------------
1 | a
2 | about
3 | above
4 | after
5 | again
6 | against
7 | ain
8 | all
9 | am
10 | an
11 | and
12 | any
13 | are
14 | aren
15 | aren't
16 | as
17 | at
18 | be
19 | because
20 | been
21 | before
22 | being
23 | below
24 | between
25 | both
26 | but
27 | by
28 | can
29 | couldn
30 | couldn't
31 | d
32 | did
33 | didn
34 | didn't
35 | do
36 | does
37 | doesn
38 | doesn't
39 | doing
40 | don
41 | don't
42 | down
43 | during
44 | each
45 | few
46 | for
47 | from
48 | further
49 | had
50 | hadn
51 | hadn't
52 | has
53 | hasn
54 | hasn't
55 | have
56 | haven
57 | haven't
58 | having
59 | he
60 | her
61 | here
62 | hers
63 | herself
64 | him
65 | himself
66 | his
67 | how
68 | i
69 | if
70 | in
71 | into
72 | is
73 | isn
74 | isn't
75 | it
76 | it's
77 | its
78 | itself
79 | just
80 | ll
81 | m
82 | ma
83 | me
84 | mightn
85 | mightn't
86 | more
87 | most
88 | mustn
89 | mustn't
90 | my
91 | myself
92 | needn
93 | needn't
94 | no
95 | nor
96 | not
97 | now
98 | o
99 | of
100 | off
101 | on
102 | once
103 | only
104 | or
105 | other
106 | our
107 | ours
108 | ourselves
109 | out
110 | over
111 | own
112 | re
113 | s
114 | same
115 | shan
116 | shan't
117 | she
118 | she's
119 | should
120 | should've
121 | shouldn
122 | shouldn't
123 | so
124 | some
125 | such
126 | t
127 | than
128 | that
129 | that'll
130 | the
131 | their
132 | theirs
133 | them
134 | themselves
135 | then
136 | there
137 | these
138 | they
139 | this
140 | those
141 | through
142 | to
143 | too
144 | under
145 | until
146 | up
147 | ve
148 | very
149 | was
150 | wasn
151 | wasn't
152 | we
153 | were
154 | weren
155 | weren't
156 | what
157 | when
158 | where
159 | which
160 | while
161 | who
162 | whom
163 | why
164 | will
165 | with
166 | won
167 | won't
168 | wouldn
169 | wouldn't
170 | y
171 | you
172 | you'd
173 | you'll
174 | you're
175 | you've
176 | your
177 | yours
178 | yourself
179 | yourselves
180 |
--------------------------------------------------------------------------------
/pvops/text/utils.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 |
4 |
5 | def remap_attributes(om_df, remapping_df, remapping_col_dict,
6 | allow_missing_mappings=False, print_info=False):
7 | """A utility function which remaps the attributes of om_df using columns
8 | within remapping_df.
9 |
10 | Parameters
11 | ----------
12 | om_df : DataFrame
13 | A pandas dataframe containing O&M data, which needs to be remapped.
14 | remapping_df : DataFrame
15 | Holds columns that define the remappings
16 | remapping_col_dict : dict of {str : str}
17 | A dictionary that contains the column names that describes how
18 | remapping is going to be done
19 |
20 | - attribute_col : string, should be assigned to associated
21 | column name in om_df which will be remapped
22 | - remapping_col_from : string, should be assigned
23 | to associated column name in remapping_df that matches
24 | original attribute of interest in om_df
25 | - remapping_col_to : string, should be assigned to
26 | associated column name in remapping_df that contains the
27 | final mapped entries
28 | allow_missing_mappings : bool
29 | If True, allow attributes without specified mappings to exist in
30 | the final dataframe.
31 | If False, only attributes specified in `remapping_df` will be in
32 | final dataframe.
33 | print_info : bool
34 | If True, print information about remapping.
35 |
36 | Returns
37 | -------
38 | DataFrame
39 | dataframe with remapped columns populated
40 | """
41 | df = om_df.copy()
42 | ATTRIBUTE_COL = remapping_col_dict["attribute_col"]
43 | REMAPPING_COL_FROM = remapping_col_dict["remapping_col_from"]
44 | REMAPPING_COL_TO = remapping_col_dict["remapping_col_to"]
45 |
46 | # Lower all columns
47 | df[ATTRIBUTE_COL] = df[ATTRIBUTE_COL].str.lower()
48 |
49 | if print_info:
50 | print("Initial value counts:")
51 | print(df[ATTRIBUTE_COL].value_counts())
52 |
53 | remapping_df[REMAPPING_COL_FROM] = remapping_df[REMAPPING_COL_FROM].str.lower()
54 | remapping_df[REMAPPING_COL_TO] = remapping_df[REMAPPING_COL_TO].str.lower()
55 |
56 | if allow_missing_mappings:
57 | # Find attributes not considered in mapping
58 | unique_words_in_data = set(df[ATTRIBUTE_COL].tolist())
59 | missing_mappings = list(unique_words_in_data
60 | ^ set(remapping_df[REMAPPING_COL_FROM]))
61 | missing_mappings = [word for word in missing_mappings
62 | if word in unique_words_in_data]
63 | temp_remapping_df = pd.DataFrame()
64 | temp_remapping_df[REMAPPING_COL_FROM] = missing_mappings
65 | temp_remapping_df[REMAPPING_COL_TO] = missing_mappings
66 | remapping_df = pd.concat([remapping_df, temp_remapping_df])
67 |
68 | if print_info:
69 | print("All mappings:\n", remapping_df)
70 | renamer = dict(
71 | zip(remapping_df[REMAPPING_COL_FROM], remapping_df[REMAPPING_COL_TO])
72 | )
73 | df[ATTRIBUTE_COL] = df[ATTRIBUTE_COL].map(renamer)
74 |
75 | if print_info:
76 | print("Final attribute distribution:")
77 | print(df[ATTRIBUTE_COL].value_counts())
78 |
79 | print(f"Number of nan definitions of {ATTRIBUTE_COL}:"
80 | "{sum(df[ATTRIBUTE_COL].isna())}")
81 |
82 | return df
83 |
84 | def remap_words_in_text(om_df, remapping_df, remapping_col_dict):
85 | """A utility function which remaps a text column of om_df using columns
86 | within remapping_df.
87 |
88 | Parameters
89 | ----------
90 | om_df : DataFrame
91 | A pandas dataframe containing O&M note data
92 | remapping_df : DataFrame
93 | Holds columns that define the remappings
94 | remapping_col_dict : dict of {str : str}
95 | A dictionary that contains the column names that describes how
96 | remapping is going to be done
97 |
98 | - data : string, should be assigned to associated
99 | column name in om_df which will have its text tokenized and remapped
100 | - remapping_col_from : string, should be assigned
101 | to associated column name in remapping_df that matches
102 | original attribute of interest in om_df
103 | - remapping_col_to : string, should be assigned to
104 | associated column name in remapping_df that contains the
105 | final mapped entries
106 |
107 | Returns
108 | -------
109 | DataFrame
110 | dataframe with remapped columns populated
111 | """
112 | df = om_df.copy()
113 | TEXT_COL = remapping_col_dict["data"]
114 | REMAPPING_COL_FROM = remapping_col_dict["remapping_col_from"]
115 | REMAPPING_COL_TO = remapping_col_dict["remapping_col_to"]
116 |
117 | # drop any values where input value is equal to output value
118 | remapping_df = remapping_df[remapping_df[REMAPPING_COL_FROM] != remapping_df[REMAPPING_COL_TO]]
119 |
120 | # case-sensitive
121 | remapping_df[REMAPPING_COL_FROM] = remapping_df[REMAPPING_COL_FROM].str.lower()
122 | remapping_df[REMAPPING_COL_TO] = remapping_df[REMAPPING_COL_TO].str.lower()
123 | df[TEXT_COL] = df[TEXT_COL].str.lower()
124 |
125 | renamer = dict(
126 | zip(remapping_df[REMAPPING_COL_FROM], remapping_df[REMAPPING_COL_TO])
127 | )
128 |
129 | df[TEXT_COL] = df[TEXT_COL].replace(renamer, regex=True)
130 |
131 | return df
--------------------------------------------------------------------------------
/pvops/text2time/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | text2time module
3 | """
4 |
5 | import pvops.text2time.preprocess
6 | import pvops.text2time.utils
7 | import pvops.text2time.visualize
--------------------------------------------------------------------------------
/pvops/text2time/preprocess.py:
--------------------------------------------------------------------------------
1 | """
2 | These functions focus on pre-processing user O&M and production data to
3 | create visualizations of the merged data
4 | """
5 | from datetime import datetime
6 | import pandas as pd
7 |
8 |
9 | def data_site_na(pom_df, df_col_dict):
10 | """
11 | Drops rows where site-ID is missing (NAN) within either production
12 | or O&M data.
13 |
14 | Parameters
15 | ----------
16 | pom_df : DataFrame
17 | A data frame corresponding to either the production or O&M
18 | data.
19 | df_col_dict : dict of {str : str}
20 | A dictionary that contains the column names associated with
21 | the
22 | input `pom_df` and contains at least:
23 |
24 | - **siteid** (*string*), should be assigned to column name
25 | for user's site-ID
26 |
27 | Returns
28 | -------
29 | pom_df : DataFrame
30 | An updated version of the input data frame, where rows with
31 | site-IDs of NAN are dropped.
32 | addressed : DataFrame
33 | A data frame showing rows from the input that were removed
34 | by this function.
35 | """
36 |
37 | df_site = df_col_dict["siteid"]
38 |
39 | pom_df = pom_df.copy()
40 |
41 | namask = pom_df.loc[:, df_site].isna()
42 | addressed = pom_df.loc[namask]
43 |
44 | pom_df.dropna(subset=[df_site], inplace=True)
45 |
46 | return pom_df, addressed
47 |
48 |
49 | def om_date_convert(om_df, om_col_dict, toffset=0.0):
50 | """
51 | Converts dates from string format to date time object in O&M
52 | dataframe.
53 |
54 | Parameters
55 | ----------
56 | om_df : DataFrame
57 | A data frame corresponding to O&M data.
58 | om_col_dict : dict of {str : str}
59 | A dictionary that contains the column names associated with
60 | the O&M data, which consist of at least:
61 |
62 | - **datestart** (*string*), should be assigned to column
63 | name for O&M event start date in om_df
64 | - **dateend** (*string*), should be assigned to column name
65 | for O&M event end date in om_df
66 |
67 | toffset : float
68 | Value that specifies how many hours the O&M data should be
69 | shifted by in case time-stamps in production data and O&M data
70 | don't align as they should
71 |
72 | Returns
73 | -------
74 | DataFrame
75 | An updated version of the input dataframe, but with
76 | time-stamps converted to localized (time-zone agnostic)
77 | date-time objects.
78 | """
79 |
80 | om_df = om_df.copy()
81 |
82 | om_date_s = om_col_dict["datestart"]
83 | om_date_e = om_col_dict["dateend"]
84 |
85 | # Converting date-data from string data to DateTime objects
86 | om_df[om_date_s] = pd.to_datetime(
87 | om_df[om_date_s]) + pd.Timedelta(hours=toffset)
88 | om_df[om_date_e] = pd.to_datetime(
89 | om_df[om_date_e]) + pd.Timedelta(hours=toffset)
90 |
91 | # localizing timestamp
92 | om_df[om_date_s] = om_df[om_date_s].dt.tz_localize(None)
93 | om_df[om_date_e] = om_df[om_date_e].dt.tz_localize(None)
94 |
95 | return om_df
96 |
97 |
98 | def om_datelogic_check(om_df, om_col_dict, om_dflag="swap"):
99 | """
100 | Addresses issues with O&M dates where the start
101 | of an event is listed as occurring after its end. These row are
102 | either dropped or the dates are swapped, depending on the user's
103 | preference.
104 |
105 | Parameters
106 | ----------
107 | om_df : DataFrame
108 | A data frame corresponding to O&M data.
109 | om_col_dict : dict of {str : str}
110 | A dictionary that contains the column names associated with
111 | the O&M data, which consist of at least:
112 |
113 | - **datestart** (*string*), should be assigned to column
114 | name for associated O&M event start date in om_df
115 | - **dateend** (*string*), should be assigned to column name
116 | for associated O&M event end date in om_df
117 |
118 | om_dflag : str
119 | A flag that specifies how to address rows where the start of
120 | an event occurs after its conclusion. A flag of 'drop' will
121 | drop those rows, and a flag of 'swap' swap the two dates for
122 | that row.
123 |
124 | Returns
125 | -------
126 | om_df : DataFrame
127 | An updated version of the input dataframe, but with O&M data
128 | quality issues addressed to ensure the start of an event
129 | precedes the event end date.
130 | addressed : DataFrame
131 | A data frame showing rows from the input that were addressed
132 | by this function.
133 | """
134 |
135 | # assigning dictionary items to local variables for cleaner code
136 | om_date_s = om_col_dict["datestart"]
137 | om_date_e = om_col_dict["dateend"]
138 |
139 | om_df = om_df.copy()
140 |
141 | # addressing cases where Date_EventEnd ocurrs before Date_EventStart
142 | mask = om_df.loc[:, om_date_e] < om_df.loc[:, om_date_s]
143 | addressed = om_df.loc[mask]
144 | # swap dates for rows where End < Start
145 | if any(mask) and om_dflag == "swap":
146 | om_df.loc[mask, [om_date_s, om_date_e]] = om_df.loc[
147 | mask, [om_date_e, om_date_s]
148 | ].values[0]
149 | # drop rows where End < Start
150 | elif any(mask) and om_dflag == "drop":
151 | om_df = om_df[~mask]
152 |
153 | return om_df, addressed
154 |
155 |
156 | def om_nadate_process(om_df, om_col_dict, om_dendflag="drop"):
157 | """
158 | Addresses issues with O&M dataframe where dates are missing
159 | (NAN). Two operations are performed : 1) rows are dropped
160 | where start of an event is missing and (2) rows where the
161 | conclusion of an event is NAN can either be dropped or marked
162 | with the time at which program is run, depending on the user's
163 | preference.
164 |
165 | Parameters
166 | ----------
167 | om_df : DataFrame
168 | A data frame corresponding to O&M data.
169 |
170 | om_col_dict : dict of {str : str}
171 | A dictionary that contains the column names associated with
172 | the O&M data, which consist of at least:
173 |
174 | - **datestart** (*string*), should be assigned to column
175 | name for user's O&M event start-date
176 | - **dateend** (*string*), should be assigned to column name
177 | for user's O&M event end-date
178 |
179 | om_dendflag : str
180 | A flag that specifies how to address rows where the conclusion
181 | of an event is missing (NAN). A flag of 'drop' will drop those
182 | rows, and a flag of 'today' will replace the NAN with the time
183 | at which the program is run. Any other value will leave the
184 | rows untouched.
185 |
186 | Returns
187 | -------
188 | om_df : DataFrame
189 | An updated version of the input dataframe, but with no
190 | missing time-stamps in the O&M data.
191 |
192 | addressed : DataFrame
193 | A data frame showing rows from the input that were addressed
194 | by this function.
195 | """
196 |
197 | om_df = om_df.copy()
198 |
199 | # assigning dictionary items to local variables for cleaner code
200 | om_date_s = om_col_dict["datestart"]
201 | om_date_e = om_col_dict["dateend"]
202 |
203 | # Dropping rows where om_date_s has values of NA in om_df
204 | mask1 = om_df.loc[:, om_date_s].isna()
205 | om_df.dropna(
206 | subset=[om_date_s], inplace=True
207 | ) # drops rows with om_date_e of NA in om_df
208 |
209 | # Addressing rows with 'om_date_e' values of NA in om_df
210 | mask2 = om_df.loc[:, om_date_e].isna()
211 | mask = mask1 | mask2
212 | addressed = om_df.loc[mask]
213 |
214 | if om_dendflag == "drop":
215 | om_df.dropna(
216 | subset=[om_date_e], inplace=True
217 | ) # drops rows with om_date_e of NA in om_df
218 | elif om_dendflag == "today":
219 | om_df[om_date_e].fillna(
220 | pd.to_datetime(str(datetime.now())[:20]), inplace=True
221 | ) # replacing NANs with today's date
222 | else:
223 | raise SyntaxError('Undefined om_dendflag')
224 |
225 | return om_df, addressed
226 |
227 |
228 | def prod_date_convert(prod_df, prod_col_dict, toffset=0.0):
229 | """Converts dates from string format to datetime format in
230 | production dataframe.
231 |
232 |
233 | Parameters
234 | ----------
235 | prod_df : DataFrame
236 | A data frame corresponding to production data.
237 |
238 | prod_col_dict : dict of {str : str}
239 | A dictionary that contains the column names associated with
240 | the production data, which consist of at least:
241 |
242 | - **timestamp** (*string*), should be assigned to user's
243 | time-stamp column name
244 |
245 | toffset : float
246 | Value that specifies how many hours the production data
247 | should be shifted by in case time-stamps in production data
248 | and O&M data don't align as they should.
249 |
250 | Returns
251 | -------
252 | DataFrame
253 | An updated version of the input dataframe, but with
254 | time-stamps converted to localized (time-zone agnostic)
255 | date-time objects.
256 | """
257 |
258 | # creating local dataframes to not modify originals
259 | prod_df = prod_df.copy()
260 |
261 | prod_ts = prod_col_dict["timestamp"]
262 |
263 | # Converting date-data from string data to DateTime objects
264 | prod_df[prod_ts] = pd.to_datetime(
265 | prod_df[prod_ts]) + pd.Timedelta(hours=toffset)
266 |
267 | # localizing timestamp
268 | prod_df[prod_ts] = prod_df[prod_ts].dt.tz_localize(None)
269 |
270 | return prod_df
271 |
272 |
273 | def prod_nadate_process(prod_df, prod_col_dict, pnadrop=False):
274 | """
275 | Processes rows of production data frame for missing time-stamp
276 | info (NAN).
277 |
278 |
279 | Parameters
280 | ----------
281 | prod_df : DataFrame
282 | A data frame corresponding to production data.
283 |
284 | prod_df_col_dict : dict of {str : str}
285 | A dictionary that contains the column names associated with
286 | the production data, which consist of at least:
287 |
288 | - **timestamp** (*string*), should be assigned to
289 | associated time-stamp column name in prod_df
290 |
291 | pnadrop : bool
292 | Boolean flag that determines what to do with rows where
293 | time-stamp is missing. A value of `True` will drop these
294 | rows. Leaving the default value of `False` will identify
295 | rows with missing time-stamps for the user, but the function
296 | will output the same input data frame with no modifications.
297 |
298 | Returns
299 | -------
300 | prod_df : DataFrame
301 | The output data frame. If pflag = 'drop', an updated version
302 | of the input data frame is output, but rows with missing
303 | time-stamps are removed. If default value is maintained, the
304 | input data frame is output with no modifications.
305 |
306 | addressed : DataFrame
307 | A data frame showing rows from the input that were addressed
308 | or identified by this function.
309 | """
310 |
311 | prod_df = prod_df.copy()
312 |
313 | # creating local dataframes to not modify originals
314 | prod_df = prod_df.copy()
315 |
316 | prod_ts = prod_col_dict["timestamp"]
317 |
318 | # Dropping rows
319 | mask = prod_df.loc[:, prod_ts].isna()
320 | addressed = prod_df[mask]
321 | if pnadrop:
322 | prod_df.dropna(subset=[prod_ts], inplace=True)
323 |
324 | return prod_df, addressed
325 |
--------------------------------------------------------------------------------
/pvops/timeseries/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | timeseries module
3 | """
4 |
5 | import pvops.timeseries.preprocess
6 | import pvops.timeseries.models
--------------------------------------------------------------------------------
/pvops/timeseries/models/AIT.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from numpy.core.fromnumeric import prod
3 | from sklearn.metrics import mean_squared_error, r2_score
4 |
5 |
6 | class Predictor:
7 | """
8 | Predictor class
9 | """
10 | def __init__(self):
11 | super(Predictor, self).__init__()
12 |
13 | def apply_additive_polynomial_model(self, model_terms, Xs):
14 | """Predict energy using a model derived by pvOps.
15 |
16 | Parameters
17 | ----------
18 | df : dataframe
19 | Data containing columns with the values in
20 | the `prod_col_dict`
21 |
22 | model_terms : list of tuples
23 | Contain model coefficients and powers. For example,
24 |
25 | .. code-block:: python
26 |
27 | [(0.29359785963294494, [1, 0]),
28 | (0.754806343190528, [0, 1]),
29 | (0.396833207207238, [1, 1]),
30 | (-0.0588375219110795, [0, 0])]
31 |
32 | prod_col_dict : dict
33 | Dictionary mapping nicknamed parameters to
34 | the named parameters in the dataframe `df`.
35 |
36 | Returns
37 | -------
38 | Array of predicted energy values
39 | """
40 | for idx, (coeff, powers) in enumerate(model_terms):
41 | for i, (x, n) in enumerate(zip(Xs, powers)):
42 | if i == 0:
43 | term = x**n
44 | else:
45 | term *= x**n
46 | if idx == 0:
47 | energy = coeff * term
48 | else:
49 | energy += coeff * term
50 | return energy
51 |
52 | def evaluate(self, real, pred,):
53 | logrmse = np.log(np.sqrt(mean_squared_error(real, pred)))
54 | r2 = r2_score(real, pred)
55 | print(f"The fit has an R-squared of {r2} and a log RMSE of {logrmse}")
56 | return logrmse, r2
57 |
58 |
59 | class Processer:
60 | def __init__(self):
61 | super(Processer, self).__init__()
62 | self._col_scaled_prefix = 'stdscaled_'
63 |
64 | def check_data(self, data, prod_col_dict):
65 | self.do_eval = False
66 | if 'energyprod' in prod_col_dict:
67 | if prod_col_dict['energyprod'] in data.columns.tolist():
68 | self.do_eval = True
69 |
70 | if not self.do_eval:
71 | print("Because the power production data is not"
72 | " passed, the fit will not be evaluated."
73 | " Predictions will still be rendered.")
74 |
75 | def _apply_transform(self, data,
76 | scaler_info):
77 | data -= scaler_info["mean"]
78 | data /= scaler_info["scale"]
79 | return data
80 |
81 | def _apply_inverse_transform(self, data,
82 | scaler_info):
83 | data *= scaler_info["scale"]
84 | data += scaler_info["mean"]
85 | return data
86 |
87 | def _clean_columns(self, scaler, prod_df, prod_col_dict):
88 | for k, d in scaler.items():
89 | del prod_df[self._col_scaled_prefix + prod_col_dict[k]]
90 |
91 |
92 | # @dev: The 'AIT' class can be one of many models that inherit the
93 | # @dev: Processor and Predictor templates. When adding new models,
94 | # @dev: use the Processor and Predictor classes to hold general
95 | # @dev: functionality while having model-specific nuances in the
96 | # @dev: classes below. The above classes may be placed in a different
97 | # @dev: if it seems fit.
98 | class AIT(Processer, Predictor):
99 | def __init__(self):
100 | super(AIT, self).__init__()
101 | self._load_params()
102 |
103 | def _load_params(self):
104 | self.scaler_highcap = {"irradiance": {"mean": 571.45952959,
105 | "scale": 324.19905495},
106 | "dcsize": {"mean": 14916.2339917,
107 | "scale": 20030.00088265},
108 | "energyprod": {"mean": 7449.15184666,
109 | "scale": 12054.52533771}
110 | }
111 | self.model_terms_highcap = [(0.29359785963294494, [1, 0]),
112 | (0.754806343190528, [0, 1]),
113 | (0.396833207207238, [1, 1]),
114 | (-0.0588375219110795, [0, 0])]
115 |
116 | self.scaler_lowcap = {"irradiance": {"mean": 413.53334101,
117 | "scale": 286.11031612},
118 | "dcsize": {"mean": 375.91883522,
119 | "scale": 234.15141671},
120 | "energyprod": {"mean": 119.00787546,
121 | "scale": 119.82927847}
122 | }
123 | self.model_terms_lowcap = [(0.6866363032474436, [1, 0]),
124 | (0.6473846301807609, [0, 1]),
125 | (0.41926724219597955, [1, 1]),
126 | (0.06624491753542901, [0, 0])]
127 |
128 | def predict_subset(self, prod_df, scaler, model_terms, prod_col_dict):
129 | self.check_data(prod_df, prod_col_dict)
130 |
131 | """1. Standardize the data using same scales"""
132 | for k, d in scaler.items():
133 | data = prod_df[prod_col_dict[k]].copy()
134 | scaled_data = self._apply_transform(data, d)
135 | prod_df[self._col_scaled_prefix + prod_col_dict[k]] = scaled_data
136 |
137 | prod_irr = prod_col_dict["irradiance"]
138 | prod_dcsize = prod_col_dict["dcsize"]
139 |
140 | irr = prod_df[self._col_scaled_prefix + prod_irr].values
141 | capacity = prod_df[self._col_scaled_prefix + prod_dcsize].values
142 | Xs = [irr, capacity]
143 |
144 | """2. Predict energy"""
145 | predicted_energy = self.apply_additive_polynomial_model(model_terms,
146 | Xs)
147 | """3. Rescale predictions"""
148 | predicted_rescaled_energy = self._apply_inverse_transform(predicted_energy,
149 | scaler['energyprod'])
150 |
151 | """4. Evaluate"""
152 | if self.do_eval:
153 | self.evaluate(prod_df[prod_col_dict["energyprod"]].values,
154 | predicted_rescaled_energy)
155 | return predicted_rescaled_energy
156 |
157 | def predict(self, prod_df, prod_col_dict):
158 |
159 | # High-capacity systems
160 | high_cap_mask = prod_df[prod_col_dict['dcsize']] > 1000
161 | if sum(high_cap_mask) > 0:
162 | predicted = self.predict_subset(prod_df.loc[high_cap_mask, :],
163 | self.scaler_highcap,
164 | self.model_terms_highcap,
165 | prod_col_dict)
166 | prod_df.loc[high_cap_mask, prod_col_dict["baseline"]] = predicted
167 |
168 | # Low-capacity systems
169 | low_cap_mask = prod_df[prod_col_dict['dcsize']] <= 1000
170 | if sum(low_cap_mask) > 0:
171 | predicted = self.predict_subset(prod_df.loc[low_cap_mask, :],
172 | self.scaler_lowcap,
173 | self.model_terms_lowcap,
174 | prod_col_dict)
175 | prod_df.loc[low_cap_mask, prod_col_dict["baseline"]] = predicted
176 | return prod_df
177 |
178 |
179 | def AIT_calc(prod_df, prod_col_dict):
180 | """
181 | Calculates expected energy using measured irradiance
182 | based on trained regression model from field data.
183 | Plane-of-array irradiance is recommended when using the pre-trained AIT model.
184 |
185 | Parameters
186 | ----------
187 | prod_df : DataFrame
188 | A data frame corresponding to the production data
189 |
190 | prod_col_dict : dict of {str : str}
191 | A dictionary that contains the column names relevant
192 | for the production data
193 |
194 | - **irradiance** (*string*), should be assigned to
195 | irradiance column name in prod_df, where data
196 | should be in [W/m^2]
197 | - **dcsize**, (*string*), should be assigned to
198 | preferred column name for site capacity in prod_df
199 | - **energyprod**, (*string*), should be assigned to
200 | the column name holding the power or energy production.
201 | If this is passed, an evaluation will be provided.
202 | - **baseline**, (*string*), should be assigned to
203 | preferred column name to capture the calculations
204 | in prod_df
205 |
206 | Example
207 | -------
208 |
209 | .. code-block:: python
210 |
211 | production_col_dict = {'irradiance': 'irrad_poa_Wm2',
212 | 'ambient_temperature': 'temp_amb_C',
213 | 'dcsize': 'capacity_DC_kW',
214 | 'energyprod': 'energy_generated_kWh',
215 | 'baseline': 'predicted'
216 | }
217 | data = AIT_calc(data, production_col_dict)
218 |
219 |
220 | Returns
221 | -------
222 | DataFrame
223 | A data frame for production data with a new column,
224 | the predicted energy
225 | """
226 | prod_df = prod_df.copy()
227 | # assigning dictionary items to local variables for cleaner code
228 | model = AIT()
229 | prod_df = model.predict(prod_df, prod_col_dict)
230 | return prod_df
231 |
--------------------------------------------------------------------------------
/pvops/timeseries/models/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | timeseries models
3 | """
4 |
5 | import pvops.timeseries.models.AIT
6 | import pvops.timeseries.models.iec
7 | import pvops.timeseries.models.linear
8 | import pvops.timeseries.models.survival
--------------------------------------------------------------------------------
/pvops/timeseries/models/iec.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | def iec_calc(prod_df, prod_col_dict, meta_df, meta_col_dict,
4 | gi_ref=1000.0):
5 | """Calculates expected energy using measured irradiance
6 | based on IEC calculations.
7 |
8 | Parameters
9 | ----------
10 | prod_df : DataFrame
11 | A data frame corresponding to the production data
12 | after having been processed by the perf_om_NA_qc
13 | and overlappingDFs functions. This data frame needs
14 | at least the columns specified in prod_col_dict.
15 |
16 | prod_col_dict : dict of {str : str}
17 | A dictionary that contains the column names relevant
18 | for the production data
19 |
20 | - **siteid** (*string*), should be assigned to
21 | site-ID column name in prod_df
22 | - **timestamp** (*string*), should be assigned to
23 | time-stamp column name in prod_df
24 | - **irradiance** (*string*), **plane-of-array**. Should be assigned to
25 | irradiance column name in prod_df, where data
26 | should be in [W/m^2].
27 | - **baseline** (*string*), should be assigned to
28 | preferred column name to capture IEC calculations
29 | in prod_df
30 | - **dcsize**, (*string*), should be assigned to
31 | preferred column name for site capacity in prod_df
32 |
33 | meta_df : DataFrame
34 | A data frame corresponding to site metadata.
35 | At the least, the columns in meta_col_dict be
36 | present.
37 |
38 | meta_col_dict : dict of {str : str}
39 | A dictionary that contains the column names relevant
40 | for the meta-data
41 |
42 | - **siteid** (*string*), should be assigned to site-ID
43 | column name
44 | - **dcsize** (*string*), should be assigned to
45 | column name corresponding to site capacity, where
46 | data is in [kW]
47 |
48 | gi_ref : float
49 | reference plane of array irradiance in W/m^2 at
50 | which a site capacity is determined (default value
51 | is 1000 [W/m^2])
52 |
53 | Returns
54 | -------
55 | DataFrame
56 | A data frame for production data with a new column,
57 | iecE, which is the predicted energy calculated
58 | based on the IEC standard using measured irradiance
59 | data
60 |
61 | """
62 | # assigning dictionary items to local variables for cleaner code
63 | prod_site = prod_col_dict["siteid"]
64 | prod_ts = prod_col_dict["timestamp"]
65 | prod_irr = prod_col_dict["irradiance"]
66 | prod_iec = prod_col_dict["baseline"]
67 | prod_dcsize = prod_col_dict["dcsize"]
68 |
69 | meta_site = meta_col_dict["siteid"]
70 | meta_size = meta_col_dict["dcsize"]
71 |
72 | # creating local dataframes to not modify originals
73 | prod_df = prod_df.copy()
74 | meta_df = meta_df.copy()
75 |
76 | # setting index for metadata for alignment to production data
77 | meta_df = meta_df.set_index(meta_site)
78 |
79 | # Creating new column in production data corresponding to site size (in terms of KW)
80 | prod_df[prod_dcsize] = prod_df.loc[:, prod_site].apply(
81 | lambda x: meta_df.loc[x, meta_size]
82 | )
83 |
84 | # iec calculation
85 |
86 | for sid in prod_df.loc[:, prod_site].unique():
87 | mask = prod_df.loc[:, prod_site] == sid
88 | tstep = prod_df.loc[mask, prod_ts].iloc[1] - \
89 | prod_df.loc[mask, prod_ts].iloc[0]
90 | tstep = tstep / np.timedelta64(
91 | 1, "h"
92 | ) # Converting the time-step to float (representing hours) to
93 | # arrive at kWh for the iecE calculation
94 |
95 | prod_df.loc[mask, prod_iec] = (
96 | prod_df.loc[mask, prod_dcsize]
97 | * prod_df.loc[mask, prod_irr]
98 | * tstep
99 | / gi_ref
100 | )
101 | prod_df.drop(columns=[prod_dcsize], inplace=True)
102 |
103 | return prod_df
104 |
--------------------------------------------------------------------------------
/pvops/timeseries/models/survival.py:
--------------------------------------------------------------------------------
1 | from scipy import stats
2 | from sksurv.nonparametric import kaplan_meier_estimator
3 |
4 | def fit_survival_function(df, col_dict, method):
5 | """
6 | Calculate the survival function for different groups in a DataFrame using specified methods.
7 |
8 | This function computes the survival function for each unique group in the input DataFrame
9 | based on the specified method. It supports the Kaplan-Meier estimator and Weibull distribution
10 | fitting for survival analysis. The Kaplan-Meier estimator is a non-parametric statistic,
11 | while the Weibull distribution is a parametric model.
12 |
13 | Parameters
14 | ----------
15 | df : pandas.DataFrame
16 | A DataFrame containing failure data with at least three columns specified in `col_dict`:
17 | one for grouping, one for the time to failure, and one indicating whether the failure was observed
18 |
19 | col_dict : dict of {str : str}
20 | A dictionary that contains the column names relevant for survival analysis
21 |
22 | - **group_by** (*string*), should be assigned to the column to group by
23 | - **time_to_fail** (*string*), should be assigned to the column containing the time until failure
24 | - **was_observed** (*string*), should be assigned to the column indicating whether the failure was observed
25 |
26 | method : str
27 | The method to use for calculating the survival function. Must be one of:
28 |
29 | - 'kaplan-meier': Uses the Kaplan-Meier estimator for survival analysis.
30 | - 'weibull': Fits a Weibull distribution to the data.
31 |
32 | Returns
33 | -------
34 | dict
35 |
36 | - If `method` is `'kaplan-meier'`, contains keys `'times'`, `'fail_prob'`, and `'conf_int'`, which denote the times, failure probabilities, and confidence intervals on the failure probabilities.
37 | - If `method` is `'weibull'`, contains keys `'shape'`, `'scale'`, and `'distribution'`, which denote the shape parameter, scale parameter, and corresponding fitted `stats.weibull_min` distribution.
38 | """
39 |
40 | implemented_methods = ['kaplan-meier', 'weibull']
41 | if method not in implemented_methods:
42 | raise ValueError(f'method argument must be one of {implemented_methods}, got {method}')
43 |
44 | df = df.reset_index()
45 |
46 | group_by = col_dict['group_by']
47 | time_to_fail = col_dict['time_to_fail']
48 | was_observed = col_dict['was_observed']
49 |
50 | results = {}
51 |
52 | unique_group_by = df[group_by].unique()
53 | for group in unique_group_by:
54 | group_df = df[df[group_by] == group]
55 |
56 | if method == 'kaplan-meier':
57 | km_result = kaplan_meier_estimator(group_df[was_observed], group_df[time_to_fail], conf_type='log-log')
58 | group_result = {'times': km_result[0], 'fail_prob': km_result[1], 'conf_int': km_result[2]}
59 |
60 | elif method == 'weibull':
61 | uncensored_times = group_df[group_df[was_observed]][time_to_fail]
62 | censored_times = group_df[~group_df[was_observed]][time_to_fail]
63 | data = stats.CensoredData(uncensored=uncensored_times, right=censored_times)
64 | shape, _, scale = stats.weibull_min.fit(data, floc=0)
65 | group_result = {'shape': shape, 'scale': scale, 'distribution': stats.weibull_min(c=shape, scale=scale)}
66 |
67 | results[group] = group_result
68 |
69 | return results
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | # Required
2 | pandas
3 | numpy
4 | scipy
5 | scikit-learn
6 | scikit-survival
7 | datefinder
8 | matplotlib
9 | seaborn
10 | plotly
11 | gensim
12 | networkx
13 | pvlib
14 | pvanalytics
15 | timezonefinder
16 | pyDOE
17 | tensorflow
18 | tqdm
19 |
20 | # Testing
21 | pytest
22 |
23 | # Docs
24 | sphinx==7.2.6
25 | coverage==7.2.3
26 | ipykernel==6.22.0
27 | nbconvert==7.3.1
28 | nbformat==5.8.0
29 | nbsphinx==0.9.3
30 | nbsphinx-link==1.3.0
31 | sphinx-copybutton==0.5.2
32 | sphinxcontrib-bibtex==2.5.0
33 | sphinx_rtd_theme==1.3.0
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import os
2 | import re
3 |
4 | try:
5 | from setuptools import setup, find_packages
6 | except ImportError:
7 | raise RuntimeError('setuptools is required')
8 |
9 | DESCRIPTION = ('pvops is a python library for the analysis of ' +
10 | 'field collected operational data for photovoltaic systems.')
11 |
12 | LONG_DESCRIPTION = """
13 | pvops is a python package for PV operators & researchers. It is
14 | a collection of functions for working with text-based data
15 | from photovoltaic power systems. The library includes functions for
16 | processing text data as well as fusion of the text information with
17 | time series data for visualization of contextual details for data
18 | analysis.
19 |
20 | Documentation: https://pvops.readthedocs.io/en/latest/index.html
21 |
22 | Source code: https://github.com/sandialabs/pvOps
23 |
24 | """
25 |
26 | DISTNAME = 'pvops'
27 | MAINTAINER = "Thushara Gunda"
28 | MAINTAINER_EMAIL = 'tgunda@sandia.gov'
29 | AUTHOR = 'pvOps Developers'
30 | LICENSE = 'BSD 3-Clause License'
31 | URL = 'https://github.com/sandialabs/pvops'
32 |
33 | TESTS_REQUIRE = [
34 | 'pytest',
35 | ]
36 |
37 | INSTALL_REQUIRES = [
38 | 'numpy',
39 | 'pandas',
40 | 'scipy',
41 | 'scikit-learn',
42 | 'scikit-survival',
43 | 'datefinder',
44 | 'matplotlib',
45 | 'seaborn',
46 | 'plotly',
47 | 'gensim',
48 | 'networkx',
49 | 'pvlib',
50 | 'pvanalytics',
51 | 'timezonefinder',
52 | 'tqdm',
53 | ]
54 |
55 | DOCS_REQUIRE = [
56 | 'sphinx==7.2.6',
57 | 'coverage==7.2.3',
58 | 'ipykernel==6.22.0',
59 | 'nbconvert==7.3.1',
60 | 'nbformat==5.8.0',
61 | 'nbsphinx==0.9.3',
62 | 'nbsphinx-link==1.3.0',
63 | 'sphinx-copybutton==0.5.2',
64 | 'sphinxcontrib-bibtex==2.5.0',
65 | 'sphinx_rtd_theme==1.3.0',
66 | ]
67 |
68 | IV_REQUIRE = [
69 | 'keras',
70 | 'tensorflow;python_version<"3.13"',
71 | 'pyDOE',
72 | ]
73 |
74 | EXTRAS_REQUIRE = {
75 | 'iv': IV_REQUIRE,
76 | 'test': TESTS_REQUIRE,
77 | 'doc': DOCS_REQUIRE
78 | }
79 |
80 | EXTRAS_REQUIRE['all'] = sorted(set(sum(EXTRAS_REQUIRE.values(), [])))
81 |
82 | SETUP_REQUIRES = ['setuptools_scm']
83 |
84 | CLASSIFIERS = [
85 | 'Development Status :: 2 - Pre-Alpha',
86 | 'Operating System :: OS Independent',
87 | 'Intended Audience :: Science/Research',
88 | 'Programming Language :: Python :: 3',
89 | 'Topic :: Scientific/Engineering'
90 | ]
91 |
92 | PACKAGES = find_packages(exclude=["*.tests", "*.tests.*", "tests.*", "tests"])
93 |
94 | # get version from __init__.py
95 | file_dir = os.path.abspath(os.path.dirname(__file__))
96 | with open(os.path.join(file_dir, 'pvops', '__init__.py')) as f:
97 | version_file = f.read()
98 | version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]",
99 | version_file, re.M)
100 | if version_match:
101 | VERSION = version_match.group(1)
102 | else:
103 | raise RuntimeError("Unable to find version string.")
104 |
105 | setup(
106 | name=DISTNAME,
107 | use_scm_version=True,
108 | packages=PACKAGES,
109 | install_requires=INSTALL_REQUIRES,
110 | extras_require=EXTRAS_REQUIRE,
111 | tests_require=TESTS_REQUIRE,
112 | setup_requires=SETUP_REQUIRES,
113 | ext_modules=[],
114 | description=DESCRIPTION,
115 | long_description=LONG_DESCRIPTION,
116 | author=AUTHOR,
117 | maintainer=MAINTAINER,
118 | maintainer_email=MAINTAINER_EMAIL,
119 | license=LICENSE,
120 | classifiers=CLASSIFIERS,
121 | url=URL,
122 | version=VERSION
123 | )
124 |
--------------------------------------------------------------------------------
/tutorials/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sandialabs/pvOps/d6248e77fa063c94f5b5a0736b05b2bbe51c6be4/tutorials/__init__.py
--------------------------------------------------------------------------------
/tutorials/assets/diode_param_extractor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sandialabs/pvOps/d6248e77fa063c94f5b5a0736b05b2bbe51c6be4/tutorials/assets/diode_param_extractor.png
--------------------------------------------------------------------------------
/tutorials/example_data/example_metadata2.csv:
--------------------------------------------------------------------------------
1 | randid,DC_Size_kW,COD,latitude,longitude
2 | R23,2500,10/20/2013,-80,-35
3 | R27,475,10/21/2017,-81,-36
4 |
--------------------------------------------------------------------------------
/tutorials/example_data/example_om_data.csv:
--------------------------------------------------------------------------------
1 | randid,Cause,ImpactLevel,CompletionDesc,CompletionActivity,Asset,Date_EventStart,Date_EventEnd
2 | 27,019 - Unplanned outage/derate. Hurricane Florence,Production Outage,hurricane florence outages/response. complete post-storm inspection form and upload to the work order. perform site inspection to assess any damage sustained from hurricane florence. site went offline around 1000 et on 14-sep. loss of ac voltage can be verified. update 16- sep: site came back online around 5pm 16-sep update 18-sep cb stuck at -74.21 amps. 019 - unplanned outage/derate. inspection complete. no damage. site operational.. techdispatched: yes,14 - Self Resolved,Facility,9/14/2018 10:00,9/18/2018 17:00
3 | 27,0000 - Unknown. ,Underperformance,hurricane response. perform site inspection to assess any damage sustained from hurricane 0000 - unknown. post hurricane inspection.. techdispatched: yes,09 - Inspection,Facility,10/12/2018 9:00,10/13/2018 17:00
4 |
--------------------------------------------------------------------------------
/tutorials/example_data/example_om_data2.csv:
--------------------------------------------------------------------------------
1 | randid,Asset,date_start,date_end,WONumber,WOType,GeneralDesc
2 | ,Inverter,5/2/2018 12:00,5/17/2018 16:00,100,Corrective,"Inverter 1.1 Contactor 7, Inverter 1.2 Contactors 1 and 4 suspected DC production issues"
3 | R23,Facility,5/19/2018 15:44,5/19/2018 13:04,101,Preventive,Site offline due to grid disturbance
4 | R23,Facility,6/15/2018 6:46,6/15/2018 10:30,102,Corrective,Plant trip due to grid disturbance
5 | R23,Facility,6/18/2018 11:20,6/18/2018 14:03,103,Corrective,Site trip due to cause grid disturbance
6 | R23,Facility,7/21/2018 4:45,7/21/2018 13:15,104,Vegetation,Site tripped due to grid disturbance
7 | R23,Inverter,7/21/2018 13:16,7/21/2018 14:25,105,Corrective,Inverter failed to start following plant trip
8 | R23,Inverter,7/25/2018 14:20,7/25/2018 16:40,106,Corrective,inverter offline due to high ambient temp fault
9 | R23,Inverter,8/1/2018 11:45,,107,Corrective,Inverter major underperformance
10 | R23,Facility,8/2/2018 1:05,8/2/2018 9:28,108,Corrective,Site trip due to grid disturbance
11 | R27,Facility,9/14/2018 10:00,9/16/2018 16:00,1,corrective,hurricane florence outages/response. complete post-storm inspection form and upload to the work order. perform site inspection to assess any damage sustained from hurricane florence. site went offline around 1000 et on 14-sep. loss of ac voltage can be verified. update 16- sep: site came back online around 5pm 16-sep update 18-sep cb stuck at -74.21 amps. 019 - unplanned outage/derate. inspection complete. no damage. site operational.. techdispatched: yes
12 | R27,Facility,9/24/2018 10:00,9/16/2018 17:00,2,vegetation,Vegetation maintenance activities were performed
13 | R27,Other,9/19/2018 7:00,10/11/2018 20:00,3,corrective,hurricane response. perform site inspection to assess any damage sustained from hurricane 0000 - unknown. post hurricane inspection.. techdispatched: yes
14 | R27,Facility,10/13/2018 12:00,10/13/2018 17:00,4,preventive,Monthly visual inspection
15 | R27,other,10/14/2018 11:00,,5,preventive,Monthly visual inspection
16 |
--------------------------------------------------------------------------------
/tutorials/example_data/mappings_cause.csv:
--------------------------------------------------------------------------------
1 | ,in,out_
2 | 0,01 - Replace,Replacement
3 | 1,02 - Repair,Repair
4 | 2,03 - Adjust,Repair
5 | 3,03 - Modify,Repair
6 | 4,04 - Adjust,Repair
7 | 5,05 - Refit(Reset),Repair
8 | 6,06 - Check,"Troubleshooting ""Status"""
9 | 7,07 - Service,"Troubleshooting ""Status"""
10 | 8,08 - Test,"Troubleshooting ""Status"""
11 | 9,09 - Inspection,"Troubleshooting ""Status"""
12 | 10,09-Inspection,"Troubleshooting ""Status"""
13 | 11,10 - Overhaul,Repair
14 | 12,11 - Combination,Misc.
15 | 13,12 - Other,Misc.
16 | 14,13 - Remote Reset,Troubleshooting
17 | 15,14 - Self Resolved,None
18 | 16,15 - EPC Resolved,"Troubleshooting ""Status"""
19 | 17,Clear faults,"Troubleshooting ""Reset"""
20 | 18,Cleared faults through GUI,"Troubleshooting ""Reset"""
21 | 19,Cleared through GUI,"Troubleshooting ""Reset"""
22 | 20,Curtailment Lift,None
23 | 21,DEPCOM warranty service work,"Troubleshooting ""Status"""
24 | 22,Fault cleared manually,"Troubleshooting ""Reset"""
25 | 23,Faults cleared,"Troubleshooting ""Reset"""
26 | 24,Faults cleared through GUI with PC,"Troubleshooting ""Reset"""
27 | 25,Field Wiring Repair,Repair
28 | 26,Fuse Replacement,Replacement
29 | 27,"Ground fault was isolated at combiner box 01. All strings at CB 01 were disconnected and fuses were removed. Combiner box was placed in ""off"" position and locked out. DC disconnect for CB's 01 & 02 at inverter 1 was opened before power cycling the inv.",Repair
30 | 28,Hardware Adjustment,Repair
31 | 29,Hardware Replacement,Replacement
32 | 30,No Action Required,None
33 | 31,Not Fixed - Follow-up,Misc.
34 | 32,Other,Misc.
35 | 33,Other Site Work,Misc.
36 | 34,Power Cycle,"Troubleshooting ""Status"""
37 | 35,Preventative Maintenance,"Troubleshooting ""Status"""
38 | 36,Problem Self-Resolved,None
39 | 37,Reclose,Troubleshooting
40 | 38,Remote Reset,Troubleshooting
41 | 39,Remote Troubleshooting,Troubleshooting
42 | 40,Repair work on combiner box was sub' out to electrical contractors Anderson and Wood,Repair
43 | 41,Replace/Repair,Replacement
44 | 42,Replacement,Replacement
45 | 43,Software Change/Update,Software
46 | 44,Software/Firmware Adjustment,Software
47 | 45,Software/Firmware Update,Software
48 | 46,Unknown,Missing
49 |
--------------------------------------------------------------------------------
/tutorials/example_data/mappings_equipment.csv:
--------------------------------------------------------------------------------
1 | in,out_
2 | combiner,combiner
3 | comb,combiner
4 | cb,combiner
5 | battery,battery
6 | bess,battery
7 | inverter,inverter
8 | invert,inverter
9 | inv,inverter
10 | met,met
11 | meter,meter
12 | module,module
13 | mod,module
14 | recloser,recloser
15 | reclose,recloser
16 | relay,relay
17 | substation,substation
18 | switchgear,switchgear
19 | switch,switchgear
20 | tracker,tracker
21 | transformer,transformer
22 | xfmr,transformer
23 | wiring,wiring
24 | wire,wiring
25 | wires,wiring
--------------------------------------------------------------------------------
/tutorials/example_data/mappings_pv_terms.csv:
--------------------------------------------------------------------------------
1 | in,out_
2 | comm,communication
3 | energy,energy
4 | kwh,energy
5 | mwh,energy
6 | grid,grid
7 | curtailment,grid
8 | curtail,grid
9 | poi,grid
10 | offline,outage
11 | solar,solar
12 | pv,solar
13 | photovoltaic,solar
14 | system,system
15 | site,system
16 | farm,system
17 | project,system
18 | sma,make_model
19 | cm,corrective_maintence
20 | pm,preventative_maintence
--------------------------------------------------------------------------------
/tutorials/example_data/remappings_asset.csv:
--------------------------------------------------------------------------------
1 | in,out_
2 | inverter,inverter
3 | recloser,recloser
4 | transformer,transformer
5 | switchgear,switchgear
6 | combiner,combiner
7 | substation,substation
8 | facility,facility
9 | energy meter,energy meter
10 | relay,relay
11 | met station,met station
12 | tracker,tracker
13 | module,module
14 | DC Disconnect,combiner
15 | Recombiner,combiner
16 | Feeder (Dip Pole/Array),wiring
17 | Ground-Mount PV System,module
18 | Weather Station,met station
19 | Pyranometer,met station
20 | Temperature sensor,met station
21 | Met station battery,met station
22 | Anemometer,met station
23 | Reference cell,met station
24 | Relative humidity sensor,met station
25 | Meter,energy meter
26 | Energy Storage/Battery,energy storage
27 | AC Combiner,combiner
28 | Battery (Solar + storage facilities),energy storage
29 | Block,transformer
30 | Central Inverter,inverter
31 | Circuit,wiring
32 | Combiner Box,combiner
33 | DAS System,facility
34 | DC Combiner,combiner
35 | Data logger,facility
36 | Disconnect switch,relay
37 | Inverter Module,inverter
38 | Inverter module,inverter
39 | Inverter/String Inverter,inverter
40 | Modules,module
41 | Other,other
42 | PCS Transformer,transformer
43 | POI/Medium Voltage,other
44 | Pad,transformer
45 | Plant,other
46 | Point of Interconnection,other
47 | Racking/Trackers,tracker
48 | Rooftop PV System,other
49 | Site,other
50 | String,other
51 | String Inverter,inverter
52 | Subarray,other
53 | Summary,other
54 | Tracker control unit,tracker
55 | Tracking System,tracker
56 |
--------------------------------------------------------------------------------
/tutorials/example_data/remappings_response.csv:
--------------------------------------------------------------------------------
1 | in,out_
2 | Remote Troubleshooting,Remote troubleshotting/Reset
3 | Remote Reset,Remote troubleshotting/Reset
4 | 13 - Remote Reset,Remote troubleshotting/Reset
5 | Power Cycle,Troubleshoot/Reset
6 | 07 - Service,Troubleshoot/Reset
7 | 09 - Inspection,Troubleshoot/Reset
8 | 09-Inspection,Troubleshoot/Reset
9 | 06 - Check,Troubleshoot/Reset
10 | 08 - Test,Troubleshoot/Reset
11 | Clear faults,Troubleshoot/Reset
12 | Faults cleared,Troubleshoot/Reset
13 | Fault cleared manually,Troubleshoot/Reset
14 | No Action Required,Self-Resolved
15 | Problem Self-Resolved,Self-Resolved
16 | 14 - Self Resolved,Self-Resolved
17 | Hardware Replacement,Replacement
18 | Replacement,Replacement
19 | 01 - Replace,Replacement
20 | Fuse Replacement,Replacement
21 | Replace/Repair,Replacement
22 |
--------------------------------------------------------------------------------
/tutorials/tutorial_text_classify_regex_example.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "attachments": {},
5 | "cell_type": "markdown",
6 | "metadata": {},
7 | "source": [
8 | "# Adding keyword labels to O&M data\n",
9 | "This notebook demonstrates the use of the `pvops.classify.get_attributes_from_keywords` module for adding asset labels based off O&M notes."
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": null,
15 | "metadata": {},
16 | "outputs": [],
17 | "source": [
18 | "import pandas as pd\n",
19 | "from sklearn.metrics import accuracy_score\n",
20 | "\n",
21 | "from pvops.text import utils, preprocess\n",
22 | "from pvops.text.classify import get_attributes_from_keywords\n",
23 | "from pvops.text.visualize import visualize_classification_confusion_matrix"
24 | ]
25 | },
26 | {
27 | "attachments": {},
28 | "cell_type": "markdown",
29 | "metadata": {},
30 | "source": [
31 | "# Step 0: Get sample data, remap assets"
32 | ]
33 | },
34 | {
35 | "cell_type": "code",
36 | "execution_count": null,
37 | "metadata": {},
38 | "outputs": [],
39 | "source": [
40 | "# pull in sample data and remap assets for ease of comparison\n",
41 | "\n",
42 | "om_df = pd.read_csv('example_data/example_ML_ticket_data.csv')\n",
43 | "col_dict = {\n",
44 | " \"data\" : \"CompletionDesc\",\n",
45 | " \"eventstart\" : \"Date_EventStart\",\n",
46 | " \"save_data_column\" : \"processed_data\",\n",
47 | " \"save_date_column\" : \"processed_date\",\n",
48 | " \"attribute_col\" : \"Asset\",\n",
49 | " \"predicted_col\" : \"Keyword_Asset\",\n",
50 | " \"remapping_col_from\": \"in\",\n",
51 | " \"remapping_col_to\": \"out_\"\n",
52 | "}\n",
53 | "\n",
54 | "# remap assets\n",
55 | "remapping_df = pd.read_csv('example_data/remappings_asset.csv')\n",
56 | "remapping_df['out_'] = remapping_df['out_'].replace({'met station': 'met',\n",
57 | " 'energy storage': 'battery',\n",
58 | " 'energy meter': 'meter'})\n",
59 | "om_df = utils.remap_attributes(om_df, remapping_df, col_dict, allow_missing_mappings=True)\n",
60 | "om_df.head()"
61 | ]
62 | },
63 | {
64 | "attachments": {},
65 | "cell_type": "markdown",
66 | "metadata": {},
67 | "source": [
68 | "# Step 1: Text preprocessing"
69 | ]
70 | },
71 | {
72 | "cell_type": "code",
73 | "execution_count": null,
74 | "metadata": {},
75 | "outputs": [],
76 | "source": [
77 | "# preprocessing steps\n",
78 | "om_df[col_dict['attribute_col']] = om_df.apply(lambda row: row[col_dict['attribute_col']].lower(), axis=1)\n",
79 | "om_df = preprocess.preprocessor(om_df, lst_stopwords=[], col_dict=col_dict, print_info=False, extract_dates_only=False)\n",
80 | "\n",
81 | "DATA_COL = col_dict['data']\n",
82 | "om_df[DATA_COL] = om_df['processed_data']\n",
83 | "\n",
84 | "# replace terms\n",
85 | "equipment_df = pd.read_csv('~/pvOps/examples/example_data/mappings_equipment.csv')\n",
86 | "pv_terms_df = pd.read_csv('~/pvOps/examples/example_data/mappings_pv_terms.csv')\n",
87 | "pv_reference_df = pd.concat([equipment_df, pv_terms_df])\n",
88 | "om_df = utils.remap_words_in_text(om_df=om_df, remapping_df=pv_reference_df, remapping_col_dict=col_dict)\n",
89 | "\n",
90 | "om_df.head()"
91 | ]
92 | },
93 | {
94 | "attachments": {},
95 | "cell_type": "markdown",
96 | "metadata": {},
97 | "source": [
98 | "# Step 2: Search for keywords to use as labels"
99 | ]
100 | },
101 | {
102 | "cell_type": "code",
103 | "execution_count": null,
104 | "metadata": {},
105 | "outputs": [],
106 | "source": [
107 | "# add asset labels from keyword reference dict\n",
108 | "om_df = get_attributes_from_keywords(om_df=om_df,\n",
109 | " col_dict=col_dict,\n",
110 | " reference_df=equipment_df)\n",
111 | "om_df.head()"
112 | ]
113 | },
114 | {
115 | "attachments": {},
116 | "cell_type": "markdown",
117 | "metadata": {},
118 | "source": [
119 | "# Step 3: Metrics"
120 | ]
121 | },
122 | {
123 | "cell_type": "code",
124 | "execution_count": null,
125 | "metadata": {},
126 | "outputs": [],
127 | "source": [
128 | "# get accuracy measures and count metrics\n",
129 | "PREDICT_COL = col_dict['predicted_col']\n",
130 | "LABEL_COL = col_dict['attribute_col']\n",
131 | "\n",
132 | "# entries with some keyword over interest, over all entries\n",
133 | "label_count = om_df[PREDICT_COL].count() / len(om_df)\n",
134 | "\n",
135 | "# replace 'Other' values with 'Unknown'\n",
136 | "om_df[LABEL_COL] = om_df[LABEL_COL].replace('other', 'unknown')\n",
137 | "# replace NaN values to use accuracy score\n",
138 | "om_df[[LABEL_COL, PREDICT_COL]] = om_df[[LABEL_COL, PREDICT_COL]].fillna('unknown')\n",
139 | "acc_score = accuracy_score(y_true=om_df[LABEL_COL], y_pred=om_df[PREDICT_COL])\n",
140 | "\n",
141 | "msg = f'{label_count:.2%} of entries had a keyword of interest, with {acc_score:.2%} accuracy.'\n",
142 | "print(msg)"
143 | ]
144 | },
145 | {
146 | "attachments": {},
147 | "cell_type": "markdown",
148 | "metadata": {},
149 | "source": [
150 | "# Step 4: Visualization"
151 | ]
152 | },
153 | {
154 | "cell_type": "code",
155 | "execution_count": null,
156 | "metadata": {},
157 | "outputs": [],
158 | "source": [
159 | "# plot confusion matrix\n",
160 | "title = 'Confusion Matrix of Actual and Predicted Asset Labels'\n",
161 | "visualize_classification_confusion_matrix(om_df, col_dict, title)"
162 | ]
163 | }
164 | ],
165 | "metadata": {
166 | "kernelspec": {
167 | "display_name": "Python 3",
168 | "language": "python",
169 | "name": "python3"
170 | },
171 | "language_info": {
172 | "codemirror_mode": {
173 | "name": "ipython",
174 | "version": 3
175 | },
176 | "file_extension": ".py",
177 | "mimetype": "text/x-python",
178 | "name": "python",
179 | "nbconvert_exporter": "python",
180 | "pygments_lexer": "ipython3",
181 | "version": "3.7.5"
182 | },
183 | "orig_nbformat": 4
184 | },
185 | "nbformat": 4,
186 | "nbformat_minor": 2
187 | }
188 |
--------------------------------------------------------------------------------