├── .gitattributes ├── .github └── workflows │ └── build_and_test.yml ├── .gitignore ├── .readthedocs.yaml ├── CHANGES.txt ├── CITATION.cff ├── CONTRIBUTING.md ├── LICENSE ├── MANIFEST.in ├── README.rst ├── docs ├── Makefile ├── _static │ ├── concept.png │ ├── forest_mask_bolivia.png │ ├── iqr_results_bolivia.png │ └── logo.png ├── api_reference.rst ├── classes.rst ├── conf.py ├── fit.rst ├── gallery │ ├── README.rst │ ├── dontplot_bolivia_stac.py │ ├── plot_parallel_computing.py │ └── plot_simulation_data.py ├── index.rst ├── make.bat └── quickstart.rst ├── nrt ├── __init__.py ├── fit_methods.py ├── log.py ├── monitor │ ├── __init__.py │ ├── ccdc.py │ ├── cusum.py │ ├── ewma.py │ ├── iqr.py │ └── mosum.py ├── outliers.py ├── stats.py ├── utils.py └── utils_efp.py ├── pyproject.toml ├── pytest.ini └── tests ├── integration_tests ├── __init__.py ├── conftest.py ├── test_ccdc.py └── test_monitor.py └── unit_tests ├── conftest.py ├── data ├── RLM_X.csv ├── RLM_y.csv ├── X.csv ├── cusum_result.csv ├── dates.csv ├── efp_result.csv ├── mosum_result.csv ├── roc_history_result.csv ├── rr_result.csv └── y.csv ├── test_fit_methods.py ├── test_outliers.py ├── test_stats.py └── test_utils_efp.py /.gitattributes: -------------------------------------------------------------------------------- 1 | *.nc filter=lfs diff=lfs merge=lfs -text 2 | *.tif filter=lfs diff=lfs merge=lfs -text 3 | -------------------------------------------------------------------------------- /.github/workflows/build_and_test.yml: -------------------------------------------------------------------------------- 1 | name: Build, Test, and Deploy nrt 2 | 3 | on: 4 | push: 5 | pull_request: 6 | 7 | jobs: 8 | build-and-test: 9 | runs-on: ubuntu-latest 10 | strategy: 11 | matrix: 12 | python-version: ["3.9", "3.10", "3.11", "3.12"] 13 | 14 | steps: 15 | - name: checkout repo 16 | uses: actions/checkout@v4 17 | - name: Set up Python ${{ matrix.python-version }} 18 | uses: actions/setup-python@v5 19 | with: 20 | python-version: ${{ matrix.python-version }} 21 | - name: Install dependencies and package 22 | run: | 23 | python -m pip install --upgrade pip 24 | pip install pytest wheel 25 | pip install . 26 | - name: Test with pytest 27 | run: | 28 | pytest 29 | 30 | # Deploy tagged commits that pass the test step 31 | deploy: 32 | needs: build-and-test 33 | runs-on: ubuntu-latest 34 | if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') 35 | steps: 36 | - name: Checkout repository 37 | uses: actions/checkout@v4 38 | 39 | - name: Set up Python 3.11 40 | uses: actions/setup-python@v5 41 | with: 42 | python-version: '3.11' 43 | 44 | - name: Install pypi-publish requirements 45 | run: | 46 | python -m pip install --upgrade pip twine build 47 | 48 | - name: Build package 49 | run: | 50 | python -m build 51 | 52 | - name: Publish package to PyPI 53 | uses: pypa/gh-action-pypi-publish@v1.9.0 54 | with: 55 | password: ${{ secrets.PYPI_API_TOKEN }} 56 | user: __token__ 57 | 58 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | *.pyc 3 | *.pyo 4 | 5 | # Distribution / packaging 6 | *.egg-info/ 7 | dist/ 8 | build/ 9 | 10 | # Sphinx doc 11 | _build/ 12 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yaml 2 | # Read the Docs configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | 5 | # Required 6 | version: 2 7 | 8 | # Set the version of Python and other tools you might need 9 | build: 10 | os: ubuntu-20.04 11 | tools: 12 | python: "3.11" 13 | 14 | # Build documentation in the docs/ directory with Sphinx 15 | sphinx: 16 | configuration: docs/conf.py 17 | 18 | # If using Sphinx, optionally build your docs in additional formats such as PDF 19 | # formats: 20 | # - pdf 21 | 22 | # Optionally declare the Python requirements required to build your docs 23 | python: 24 | install: 25 | - method: pip 26 | path: . 27 | extra_requirements: 28 | - docs 29 | -------------------------------------------------------------------------------- /CHANGES.txt: -------------------------------------------------------------------------------- 1 | Changes 2 | ======= 3 | 4 | 0.3.0 (2024-08-29) 5 | ------------------ 6 | - nrt.data now lives in its own namespace package (nrt-data). Subpackage removed, 7 | and namespace enabling in multiple __init__.py files 8 | - A few edits (e.g. pkg-data and MANIFEST.in) as a consequence of removing 9 | nrt.data subpackage 10 | - Transitioned from setup.py to pyproject.toml 11 | - Package version now maintained in pyproject.toml 12 | - Adapted integration fixture to new test data access method (+ band names slightly changed 13 | B4 is now B04, etc) 14 | - Version number now managed via git tags using setuptools-scm 15 | - Removed all LFS hack in .readthedocs, github actions, etc. No longer needed 16 | given that the package itself no longer contain data 17 | 18 | 19 | 0.2.1 (2024-07-15) 20 | ------------------ 21 | 22 | - Various improvements to documentation and package metadata suggested by JOSS 23 | review (new example using PC archive and STAC catalogue, Contributing guidelines, 24 | explicit citation information, etc) 25 | - Small documentation improvement for CCDC monitoring 26 | - New update_mask argument and moniroting class attribute to allow monitoring to 27 | continue regardless of boundary crossing 28 | - Exclude version 0.59 of numba in requirements due to known bug in pararrel accelerator. 29 | See https://github.com/numba/numba/issues/9490 30 | 31 | 0.2.0 (2024-01-15) 32 | ------------------ 33 | 34 | - np.linalg.inv replaced by the more recommanded np.linalg.solve in many places 35 | - Integration of numba parallel accelerator in most fitting functions (new argument 36 | to control number of threads in the .fit method of BaseNrt class) 37 | - Possibility to pass kwargs to function of data module that load xarray.Datasets 38 | objects (particularly useful to specify chunking and get a dask based object) 39 | - New example in gallery on parallel fitting 40 | - Add a minimum python version requirement (>=3.9). 3.8 not longer tested 41 | - New set of functions to generate synthetic data (single time-series and DataArrays) 42 | added to the data module 43 | - Gallery section added to the documentation, with one example on use of synthetic data 44 | for nrt simulation and computation of performance metrics 45 | 46 | 0.1.0 (2022-04-27) 47 | ------------------ 48 | 49 | - The report() method can now receive a list of layers as argument to select the 50 | attributes of the monitoring instance to be retrieved and written to disk. Backward 51 | compatibility of the .report() method is not affected but backward compatibility 52 | of the private ._report() method, used to prepare the array is broken 53 | 54 | 0.0.5 (2022-03-21) 55 | ------------------ 56 | 57 | - First pypi release 58 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: "1.2.0" 2 | authors: 3 | - family-names: Dutrieux 4 | given-names: Loïc 5 | orcid: "https://orcid.org/0000-0002-5058-2526" 6 | - family-names: Viehweger 7 | given-names: Jonas 8 | orcid: "https://orcid.org/0000-0002-1610-4600" 9 | doi: 10.5281/zenodo.12799278 10 | message: If you use nrt in your research or project, please cite our article in the 11 | Journal of Open Source Software. 12 | preferred-citation: 13 | authors: 14 | - family-names: Dutrieux 15 | given-names: Loïc 16 | orcid: "https://orcid.org/0000-0002-5058-2526" 17 | - family-names: Viehweger 18 | given-names: Jonas 19 | orcid: "https://orcid.org/0000-0002-1610-4600" 20 | date-published: 2024-08-15 21 | doi: 10.21105/joss.06815 22 | issn: 2475-9066 23 | issue: 100 24 | journal: Journal of Open Source Software 25 | publisher: 26 | name: Open Journals 27 | start: 6815 28 | title: "nrt: operational monitoring of satellite image time-series in 29 | Python" 30 | type: article 31 | url: "https://joss.theoj.org/papers/10.21105/joss.06815" 32 | volume: 9 33 | title: "nrt: operational monitoring of satellite image time-series in 34 | Python" 35 | 36 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to nrt 2 | 3 | Thanks for taking the time to contribute to nrt! 🎉 4 | 5 | ## Rights 6 | 7 | The EUPL v2 license (see LICENSE) applies to all contributions. 8 | 9 | ## How Can I Contribute? 10 | 11 | ### Reporting Bugs 12 | 13 | If you find a bug, please open an issue and include as much detail as possible. Include steps to reproduce the issue and any relevant logs or screenshots. 14 | 15 | ### Suggesting Enhancements 16 | 17 | Enhancement suggestions are welcome! If you have an idea to improve nrt or its documentation, please open an issue and describe your idea in detail. If possible, provide examples of how the enhancement would be used. 18 | 19 | ### Code Contributions 20 | 21 | For any contribution to the code base or the documentation, use the pull request mechanism. 22 | 1. Fork the repository: Click the 'Fork' button on the upper right corner of the repository page. 23 | 2. Apply changes to your fork. 24 | 3. Open a pull request on github 25 | 26 | 27 | Your contribution will be reviewed and discussied as part of the pull request. If approved, it will then be merged 28 | into the main branch of the repository and included in the following release. 29 | 30 | 31 | ### Testing 32 | 33 | We use `pytest` for unit tests. 34 | 35 | - Unit tests are written using the `pytest` framework. 36 | - Tests are automatically run using GitHub CI with every push and pull request. 37 | - You can run tests locally by simply calling `pytest` in the root directory of the project. 38 | 39 | 40 | ### Releasing a new version 41 | 42 | Package version is set via git tags thanks to [setuptools-scm](https://setuptools-scm.readthedocs.io/en/latest/). A new release 43 | is made for every tagged commit pushed to github and that passes unit tests. 44 | Examples git tag command: `git tag -a v0.3.0 -m "version 0.3.0" 45 | 46 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | EUROPEAN UNION PUBLIC LICENCE v. 1.2 2 | EUPL © the European Union 2007, 2016 3 | 4 | This European Union Public Licence (the ‘EUPL’) applies to the Work (as defined 5 | below) which is provided under the terms of this Licence. Any use of the Work, 6 | other than as authorised under this Licence is prohibited (to the extent such 7 | use is covered by a right of the copyright holder of the Work). 8 | 9 | The Work is provided under the terms of this Licence when the Licensor (as 10 | defined below) has placed the following notice immediately following the 11 | copyright notice for the Work: 12 | 13 | Licensed under the EUPL 14 | 15 | or has expressed by any other means his willingness to license under the EUPL. 16 | 17 | 1. Definitions 18 | 19 | In this Licence, the following terms have the following meaning: 20 | 21 | - ‘The Licence’: this Licence. 22 | 23 | - ‘The Original Work’: the work or software distributed or communicated by the 24 | Licensor under this Licence, available as Source Code and also as Executable 25 | Code as the case may be. 26 | 27 | - ‘Derivative Works’: the works or software that could be created by the 28 | Licensee, based upon the Original Work or modifications thereof. This Licence 29 | does not define the extent of modification or dependence on the Original Work 30 | required in order to classify a work as a Derivative Work; this extent is 31 | determined by copyright law applicable in the country mentioned in Article 15. 32 | 33 | - ‘The Work’: the Original Work or its Derivative Works. 34 | 35 | - ‘The Source Code’: the human-readable form of the Work which is the most 36 | convenient for people to study and modify. 37 | 38 | - ‘The Executable Code’: any code which has generally been compiled and which is 39 | meant to be interpreted by a computer as a program. 40 | 41 | - ‘The Licensor’: the natural or legal person that distributes or communicates 42 | the Work under the Licence. 43 | 44 | - ‘Contributor(s)’: any natural or legal person who modifies the Work under the 45 | Licence, or otherwise contributes to the creation of a Derivative Work. 46 | 47 | - ‘The Licensee’ or ‘You’: any natural or legal person who makes any usage of 48 | the Work under the terms of the Licence. 49 | 50 | - ‘Distribution’ or ‘Communication’: any act of selling, giving, lending, 51 | renting, distributing, communicating, transmitting, or otherwise making 52 | available, online or offline, copies of the Work or providing access to its 53 | essential functionalities at the disposal of any other natural or legal 54 | person. 55 | 56 | 2. Scope of the rights granted by the Licence 57 | 58 | The Licensor hereby grants You a worldwide, royalty-free, non-exclusive, 59 | sublicensable licence to do the following, for the duration of copyright vested 60 | in the Original Work: 61 | 62 | - use the Work in any circumstance and for all usage, 63 | - reproduce the Work, 64 | - modify the Work, and make Derivative Works based upon the Work, 65 | - communicate to the public, including the right to make available or display 66 | the Work or copies thereof to the public and perform publicly, as the case may 67 | be, the Work, 68 | - distribute the Work or copies thereof, 69 | - lend and rent the Work or copies thereof, 70 | - sublicense rights in the Work or copies thereof. 71 | 72 | Those rights can be exercised on any media, supports and formats, whether now 73 | known or later invented, as far as the applicable law permits so. 74 | 75 | In the countries where moral rights apply, the Licensor waives his right to 76 | exercise his moral right to the extent allowed by law in order to make effective 77 | the licence of the economic rights here above listed. 78 | 79 | The Licensor grants to the Licensee royalty-free, non-exclusive usage rights to 80 | any patents held by the Licensor, to the extent necessary to make use of the 81 | rights granted on the Work under this Licence. 82 | 83 | 3. Communication of the Source Code 84 | 85 | The Licensor may provide the Work either in its Source Code form, or as 86 | Executable Code. If the Work is provided as Executable Code, the Licensor 87 | provides in addition a machine-readable copy of the Source Code of the Work 88 | along with each copy of the Work that the Licensor distributes or indicates, in 89 | a notice following the copyright notice attached to the Work, a repository where 90 | the Source Code is easily and freely accessible for as long as the Licensor 91 | continues to distribute or communicate the Work. 92 | 93 | 4. Limitations on copyright 94 | 95 | Nothing in this Licence is intended to deprive the Licensee of the benefits from 96 | any exception or limitation to the exclusive rights of the rights owners in the 97 | Work, of the exhaustion of those rights or of other applicable limitations 98 | thereto. 99 | 100 | 5. Obligations of the Licensee 101 | 102 | The grant of the rights mentioned above is subject to some restrictions and 103 | obligations imposed on the Licensee. Those obligations are the following: 104 | 105 | Attribution right: The Licensee shall keep intact all copyright, patent or 106 | trademarks notices and all notices that refer to the Licence and to the 107 | disclaimer of warranties. The Licensee must include a copy of such notices and a 108 | copy of the Licence with every copy of the Work he/she distributes or 109 | communicates. The Licensee must cause any Derivative Work to carry prominent 110 | notices stating that the Work has been modified and the date of modification. 111 | 112 | Copyleft clause: If the Licensee distributes or communicates copies of the 113 | Original Works or Derivative Works, this Distribution or Communication will be 114 | done under the terms of this Licence or of a later version of this Licence 115 | unless the Original Work is expressly distributed only under this version of the 116 | Licence — for example by communicating ‘EUPL v. 1.2 only’. The Licensee 117 | (becoming Licensor) cannot offer or impose any additional terms or conditions on 118 | the Work or Derivative Work that alter or restrict the terms of the Licence. 119 | 120 | Compatibility clause: If the Licensee Distributes or Communicates Derivative 121 | Works or copies thereof based upon both the Work and another work licensed under 122 | a Compatible Licence, this Distribution or Communication can be done under the 123 | terms of this Compatible Licence. For the sake of this clause, ‘Compatible 124 | Licence’ refers to the licences listed in the appendix attached to this Licence. 125 | Should the Licensee's obligations under the Compatible Licence conflict with 126 | his/her obligations under this Licence, the obligations of the Compatible 127 | Licence shall prevail. 128 | 129 | Provision of Source Code: When distributing or communicating copies of the Work, 130 | the Licensee will provide a machine-readable copy of the Source Code or indicate 131 | a repository where this Source will be easily and freely available for as long 132 | as the Licensee continues to distribute or communicate the Work. 133 | 134 | Legal Protection: This Licence does not grant permission to use the trade names, 135 | trademarks, service marks, or names of the Licensor, except as required for 136 | reasonable and customary use in describing the origin of the Work and 137 | reproducing the content of the copyright notice. 138 | 139 | 6. Chain of Authorship 140 | 141 | The original Licensor warrants that the copyright in the Original Work granted 142 | hereunder is owned by him/her or licensed to him/her and that he/she has the 143 | power and authority to grant the Licence. 144 | 145 | Each Contributor warrants that the copyright in the modifications he/she brings 146 | to the Work are owned by him/her or licensed to him/her and that he/she has the 147 | power and authority to grant the Licence. 148 | 149 | Each time You accept the Licence, the original Licensor and subsequent 150 | Contributors grant You a licence to their contributions to the Work, under the 151 | terms of this Licence. 152 | 153 | 7. Disclaimer of Warranty 154 | 155 | The Work is a work in progress, which is continuously improved by numerous 156 | Contributors. It is not a finished work and may therefore contain defects or 157 | ‘bugs’ inherent to this type of development. 158 | 159 | For the above reason, the Work is provided under the Licence on an ‘as is’ basis 160 | and without warranties of any kind concerning the Work, including without 161 | limitation merchantability, fitness for a particular purpose, absence of defects 162 | or errors, accuracy, non-infringement of intellectual property rights other than 163 | copyright as stated in Article 6 of this Licence. 164 | 165 | This disclaimer of warranty is an essential part of the Licence and a condition 166 | for the grant of any rights to the Work. 167 | 168 | 8. Disclaimer of Liability 169 | 170 | Except in the cases of wilful misconduct or damages directly caused to natural 171 | persons, the Licensor will in no event be liable for any direct or indirect, 172 | material or moral, damages of any kind, arising out of the Licence or of the use 173 | of the Work, including without limitation, damages for loss of goodwill, work 174 | stoppage, computer failure or malfunction, loss of data or any commercial 175 | damage, even if the Licensor has been advised of the possibility of such damage. 176 | However, the Licensor will be liable under statutory product liability laws as 177 | far such laws apply to the Work. 178 | 179 | 9. Additional agreements 180 | 181 | While distributing the Work, You may choose to conclude an additional agreement, 182 | defining obligations or services consistent with this Licence. However, if 183 | accepting obligations, You may act only on your own behalf and on your sole 184 | responsibility, not on behalf of the original Licensor or any other Contributor, 185 | and only if You agree to indemnify, defend, and hold each Contributor harmless 186 | for any liability incurred by, or claims asserted against such Contributor by 187 | the fact You have accepted any warranty or additional liability. 188 | 189 | 10. Acceptance of the Licence 190 | 191 | The provisions of this Licence can be accepted by clicking on an icon ‘I agree’ 192 | placed under the bottom of a window displaying the text of this Licence or by 193 | affirming consent in any other similar way, in accordance with the rules of 194 | applicable law. Clicking on that icon indicates your clear and irrevocable 195 | acceptance of this Licence and all of its terms and conditions. 196 | 197 | Similarly, you irrevocably accept this Licence and all of its terms and 198 | conditions by exercising any rights granted to You by Article 2 of this Licence, 199 | such as the use of the Work, the creation by You of a Derivative Work or the 200 | Distribution or Communication by You of the Work or copies thereof. 201 | 202 | 11. Information to the public 203 | 204 | In case of any Distribution or Communication of the Work by means of electronic 205 | communication by You (for example, by offering to download the Work from a 206 | remote location) the distribution channel or media (for example, a website) must 207 | at least provide to the public the information requested by the applicable law 208 | regarding the Licensor, the Licence and the way it may be accessible, concluded, 209 | stored and reproduced by the Licensee. 210 | 211 | 12. Termination of the Licence 212 | 213 | The Licence and the rights granted hereunder will terminate automatically upon 214 | any breach by the Licensee of the terms of the Licence. 215 | 216 | Such a termination will not terminate the licences of any person who has 217 | received the Work from the Licensee under the Licence, provided such persons 218 | remain in full compliance with the Licence. 219 | 220 | 13. Miscellaneous 221 | 222 | Without prejudice of Article 9 above, the Licence represents the complete 223 | agreement between the Parties as to the Work. 224 | 225 | If any provision of the Licence is invalid or unenforceable under applicable 226 | law, this will not affect the validity or enforceability of the Licence as a 227 | whole. Such provision will be construed or reformed so as necessary to make it 228 | valid and enforceable. 229 | 230 | The European Commission may publish other linguistic versions or new versions of 231 | this Licence or updated versions of the Appendix, so far this is required and 232 | reasonable, without reducing the scope of the rights granted by the Licence. New 233 | versions of the Licence will be published with a unique version number. 234 | 235 | All linguistic versions of this Licence, approved by the European Commission, 236 | have identical value. Parties can take advantage of the linguistic version of 237 | their choice. 238 | 239 | 14. Jurisdiction 240 | 241 | Without prejudice to specific agreement between parties, 242 | 243 | - any litigation resulting from the interpretation of this License, arising 244 | between the European Union institutions, bodies, offices or agencies, as a 245 | Licensor, and any Licensee, will be subject to the jurisdiction of the Court 246 | of Justice of the European Union, as laid down in article 272 of the Treaty on 247 | the Functioning of the European Union, 248 | 249 | - any litigation arising between other parties and resulting from the 250 | interpretation of this License, will be subject to the exclusive jurisdiction 251 | of the competent court where the Licensor resides or conducts its primary 252 | business. 253 | 254 | 15. Applicable Law 255 | 256 | Without prejudice to specific agreement between parties, 257 | 258 | - this Licence shall be governed by the law of the European Union Member State 259 | where the Licensor has his seat, resides or has his registered office, 260 | 261 | - this licence shall be governed by Belgian law if the Licensor has no seat, 262 | residence or registered office inside a European Union Member State. 263 | 264 | Appendix 265 | 266 | ‘Compatible Licences’ according to Article 5 EUPL are: 267 | 268 | - GNU General Public License (GPL) v. 2, v. 3 269 | - GNU Affero General Public License (AGPL) v. 3 270 | - Open Software License (OSL) v. 2.1, v. 3.0 271 | - Eclipse Public License (EPL) v. 1.0 272 | - CeCILL v. 2.0, v. 2.1 273 | - Mozilla Public Licence (MPL) v. 2 274 | - GNU Lesser General Public Licence (LGPL) v. 2.1, v. 3 275 | - Creative Commons Attribution-ShareAlike v. 3.0 Unported (CC BY-SA 3.0) for 276 | works other than software 277 | - European Union Public Licence (EUPL) v. 1.1, v. 1.2 278 | - Québec Free and Open-Source Licence — Reciprocity (LiLiQ-R) or Strong 279 | Reciprocity (LiLiQ-R+). 280 | 281 | The European Commission may update this Appendix to later versions of the above 282 | licences without producing a new version of the EUPL, as long as they provide 283 | the rights granted in Article 2 of this Licence and protect the covered Source 284 | Code from exclusive appropriation. 285 | 286 | All other changes or additions to this Appendix require the production of a new 287 | EUPL version. -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE README.rst setup.py 2 | include tests*/*.py 3 | include integration_test/*.py 4 | recursive-include docs *.rst 5 | recursive-include docs *.png 6 | exclude MANIFEST.in 7 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | *** 2 | nrt 3 | *** 4 | 5 | *Python package for near real time detection of change in spatio-temporal datasets* 6 | 7 | .. image:: https://badge.fury.io/py/nrt.svg 8 | :target: https://badge.fury.io/py/nrt 9 | 10 | .. image:: https://readthedocs.org/projects/nrt/badge/?version=latest 11 | :target: https://nrt.readthedocs.io/en/latest/?badge=latest 12 | :alt: Documentation Status 13 | 14 | .. image:: https://github.com/ec-jrc/nrt/actions/workflows/build_and_test.yml/badge.svg 15 | :target: https://github.com/ec-jrc/nrt/actions/workflows/build_and_test.yml 16 | :alt: Build status 17 | 18 | .. image:: https://joss.theoj.org/papers/10.21105/joss.06815/status.svg 19 | :target: https://doi.org/10.21105/joss.06815 20 | 21 | 22 | ``nrt`` provides a standardized interface for Near Real Time monitoring of disturbances on satellite image time-series. 23 | The package is optimized for fast computation and suitable for operational deployment at scale. 24 | A typical operational use case of such package would be a system constantly receiving new satellite based acquisitions and generating alerts when an anomaly is detected. 25 | Five monitoring frameworks from scientific literature on change detection are implemented and exposed via a common API. 26 | All five monitoring framework share a common general approach which consists in modelling the "normal" behavior of the variable through time by fitting a linear model on a user defined stable history period and monitoring until a "break" is detected. 27 | Monitoring starts right after the stable history period, and for each new incoming observation the observed value is compared to the predicted "normal" behavior. 28 | When observations and predictions diverge, a "break" is detected. 29 | A confirmed "break" typically requires several successive diverging observations, this sensitivity or rapid detection capacity depending on many variables such as the algorithm, its fitting and monitoring parameters, the noise level of the history period or the magnitude of the divergence. 30 | The five monitoring frameworks implemented are: 31 | 32 | - Exponentially Weighted Moving Average (EWMA_) (Brooks et al., 2013) 33 | - Cumulative Sum of Residual (CuSum_) (Verbesselt et al., 2012; Zeileis et al., 2005). CuSum is one of the monitoring option of the ``bfastmonitor`` function available in the R package bfast_. 34 | - Moving Sum of Residuals (MoSum_) (Verbesselt et al., 2012; Zeileis et al., 2005). MoSum is one of the monitoring option of the ``bfastmonitor`` function available in the R package bfast_. 35 | - Continuous Change Detection and Classification of land cover (CCDC_, CMFDA_) (Zhu et al., 2012, 2014) - Partial implementation only of the original published method. 36 | - InterQuantile Range (IQR) - Simple, unpublished outlier identification strategy described on stackexchange_. 37 | 38 | 39 | Parts of this package are derived from Chris Holden's pybreakpoints_ and yatsm_ packages. Please see the copyright statements in the respective modules. 40 | 41 | .. _EWMA: https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=6573358 42 | .. _CMFDA: https://www.sciencedirect.com/science/article/pii/S0034425712000387 43 | .. _CCDC: https://www.sciencedirect.com/science/article/pii/S0034425714000248#bbb0350 44 | .. _CuSum: https://www.sciencedirect.com/science/article/pii/S0034425712001150 45 | .. _MoSum: https://www.sciencedirect.com/science/article/pii/S0034425712001150 46 | .. _stackexchange: https://stats.stackexchange.com/a/1153 47 | .. _bfast: https://bfast.r-forge.r-project.org/ 48 | .. _pybreakpoints: https://github.com/ceholden/pybreakpoints 49 | .. _yatsm: https://github.com/ceholden/yatsm 50 | 51 | 52 | 53 | Documentation 54 | ============= 55 | 56 | Learn more about nrt in its official documentation at https://nrt.readthedocs.io/en/latest/ 57 | 58 | 59 | Installation 60 | ============ 61 | 62 | .. code-block:: bash 63 | 64 | pip install nrt 65 | 66 | 67 | The main dependencies, which should be automatically resolved by ``pip``, are: 68 | 69 | - `numpy `_ 70 | - `scipy `_ 71 | - `xarray `_ 72 | - `numba `_ 73 | - `rasterio `_ 74 | - `netCDF4 `_ 75 | 76 | 77 | Example usage 78 | ============= 79 | 80 | The snippet below presents a near real time monitoring simulation. The input data is split in stable history and monitoring period; the monitoring class is instantiated (EWMA algorithm), a simple harmonic model is fitted on the history period, and new acquisition are passed to the monitor method one at the time. Note that in a real operational scenario where new observations come at a less frequent interval (e.g. every 5 or 8 days which coorespond to the revisit frequency of sentinel 2 and Landsat constellations respectively), the monitoring state can be saved on disk and reloaded when required. 81 | 82 | .. code-block:: python 83 | 84 | import datetime 85 | 86 | from nrt.monitor.ewma import EWMA 87 | from nrt import data 88 | 89 | # Forest/non-forest mask 90 | mask = (data.romania_forest_cover_percentage() > 30).astype('int') 91 | 92 | # NDVI training and monitoring periods 93 | s2_cube = data.romania_20m() 94 | s2_cube['ndvi'] = (s2_cube.B8A - s2_cube.B04) / (s2_cube.B8A + s2_cube.B04) 95 | s2_cube = s2_cube.where(s2_cube.SCL.isin([4,5,7])) 96 | ndvi_history = s2_cube.ndvi.sel(time=slice('2015-01-01', '2018-12-31')) 97 | ndvi_monitoring = s2_cube.ndvi.sel(time=slice('2019-01-01', '2021-12-31')) 98 | 99 | # Instantiate monitoring class and fit stable history 100 | EwmaMonitor = EWMA(trend=False, mask=mask) 101 | EwmaMonitor.fit(dataarray=ndvi_history) 102 | 103 | # Monitor new observations 104 | for array, date in zip(ndvi_monitoring.values, 105 | ndvi_monitoring.time.values.astype('M8[s]').astype(datetime.datetime)): 106 | EwmaMonitor.monitor(array=array, date=date) 107 | 108 | # At any time a monitoring report can be produced with EwmaMonitor.report(filename) 109 | # and state of the monitoring instance can be saved as netcdf with 110 | # EwmaMonitor.to_netcdf(filename) 111 | 112 | 113 | Contributing 114 | ============ 115 | 116 | Any type of contribution is welcome. Please see the contributing guidelines at `CONTRIBUTING.md `_. 117 | 118 | 119 | Citing nrt 120 | ========== 121 | 122 | If you use nrt in your research or project, please consider citing it using the following BibTeX entry. 123 | 124 | .. code-block:: bibtex 125 | 126 | @article{dutrieux2024nrt, 127 | year = {2024}, 128 | publisher = {The Open Journal}, 129 | volume = {9}, 130 | number = {100}, 131 | pages = {6815}, 132 | author = {Lo\"{i}c Dutrieux and Jonas Viehweger}, 133 | title = {nrt: operational monitoring of satellite image time-series in Python}, 134 | journal = {Journal of Open Source Software}, 135 | doi = {10.21105/joss.06815}, 136 | } 137 | 138 | 139 | About the authors 140 | ================= 141 | 142 | Loïc Dutrieux works as a remote sensing researcher at the Joint Research Center (JRC) in Ispra, Italy. His work focuses on forest disturbances mapping and characterization from satellite image time-series. 143 | 144 | Jonas Viehweger is a young researcher with a MSc in remote sensing from the university of Marburg, Germany. He developped a large part of the nrt package during his traineeship period at the Joint Research Center (JRC) in Ispra, Italy. 145 | 146 | Chris Holden implemented many time-series change detection algorithms in python during his PhD at Boston university. 147 | 148 | 149 | References 150 | ========== 151 | 152 | Brooks, E.B., Wynne, R.H., Thomas, V.A., Blinn, C.E. and Coulston, J.W., 2013. On-the-fly massively multitemporal change detection using statistical quality control charts and Landsat data. IEEE Transactions on Geoscience and Remote Sensing, 52(6), pp.3316-3332. 153 | https://doi.org/10.1109/TGRS.2013.2272545 154 | 155 | Verbesselt, J., Zeileis, A. and Herold, M., 2012. Near real-time disturbance detection using satellite image time series. Remote Sensing of Environment, 123, pp.98-108. 156 | https://doi.org/10.1016/j.rse.2012.02.022 157 | 158 | Zeileis, A., Leisch, F., Kleiber, C. and Hornik, K., 2005. Monitoring structural change in dynamic econometric models. Journal of Applied Econometrics, 20(1), pp.99-121. 159 | https://doi.org/10.1002/jae.776 160 | 161 | Zhu, Z., Woodcock, C.E. and Olofsson, P., 2012. Continuous monitoring of forest disturbance using all available Landsat imagery. Remote sensing of environment, 122, pp.75-91. 162 | https://doi.org/10.1016/j.rse.2011.10.030 163 | 164 | Zhu, Z. and Woodcock, C.E., 2014. Continuous change detection and classification of land cover using all available Landsat data. Remote sensing of Environment, 144, pp.152-171. 165 | https://doi.org/10.1016/j.rse.2014.01.011 166 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/_static/concept.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ec-jrc/nrt/68c848f9a4fda67ed67d28621e71ae25e4379b49/docs/_static/concept.png -------------------------------------------------------------------------------- /docs/_static/forest_mask_bolivia.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ec-jrc/nrt/68c848f9a4fda67ed67d28621e71ae25e4379b49/docs/_static/forest_mask_bolivia.png -------------------------------------------------------------------------------- /docs/_static/iqr_results_bolivia.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ec-jrc/nrt/68c848f9a4fda67ed67d28621e71ae25e4379b49/docs/_static/iqr_results_bolivia.png -------------------------------------------------------------------------------- /docs/_static/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ec-jrc/nrt/68c848f9a4fda67ed67d28621e71ae25e4379b49/docs/_static/logo.png -------------------------------------------------------------------------------- /docs/api_reference.rst: -------------------------------------------------------------------------------- 1 | API Reference 2 | ============= 3 | 4 | nrt.data package 5 | ---------------- 6 | 7 | .. automodule:: nrt.data 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | 12 | nrt.monitor package 13 | ------------------- 14 | 15 | .. automodule:: nrt.monitor 16 | :members: 17 | :undoc-members: 18 | :show-inheritance: 19 | 20 | nrt.monitor.ccdc module 21 | ^^^^^^^^^^^^^^^^^^^^^^^ 22 | 23 | .. automodule:: nrt.monitor.ccdc 24 | :members: 25 | :undoc-members: 26 | :show-inheritance: 27 | 28 | nrt.monitor.cusum module 29 | ^^^^^^^^^^^^^^^^^^^^^^^^ 30 | 31 | .. automodule:: nrt.monitor.cusum 32 | :members: 33 | :undoc-members: 34 | :show-inheritance: 35 | 36 | nrt.monitor.ewma module 37 | ^^^^^^^^^^^^^^^^^^^^^^^ 38 | 39 | .. automodule:: nrt.monitor.ewma 40 | :members: 41 | :undoc-members: 42 | :show-inheritance: 43 | 44 | nrt.monitor.iqr module 45 | ^^^^^^^^^^^^^^^^^^^^^^ 46 | 47 | .. automodule:: nrt.monitor.iqr 48 | :members: 49 | :undoc-members: 50 | :show-inheritance: 51 | 52 | nrt.monitor.mosum module 53 | ^^^^^^^^^^^^^^^^^^^^^^^^ 54 | 55 | .. automodule:: nrt.monitor.mosum 56 | :members: 57 | :undoc-members: 58 | :show-inheritance: 59 | 60 | nrt.fit\_methods module 61 | ----------------------- 62 | 63 | .. automodule:: nrt.fit_methods 64 | :members: 65 | :undoc-members: 66 | :show-inheritance: 67 | 68 | nrt.log module 69 | -------------- 70 | 71 | .. automodule:: nrt.log 72 | :members: 73 | :undoc-members: 74 | :show-inheritance: 75 | 76 | nrt.outliers module 77 | ------------------- 78 | 79 | .. automodule:: nrt.outliers 80 | :members: 81 | :undoc-members: 82 | :show-inheritance: 83 | 84 | nrt.stats module 85 | ---------------- 86 | 87 | .. automodule:: nrt.stats 88 | :members: 89 | :undoc-members: 90 | :show-inheritance: 91 | 92 | nrt.utils module 93 | ---------------- 94 | 95 | .. automodule:: nrt.utils 96 | :members: 97 | :undoc-members: 98 | :show-inheritance: 99 | 100 | nrt.utils\_efp module 101 | --------------------- 102 | 103 | .. automodule:: nrt.utils_efp 104 | :members: 105 | :undoc-members: 106 | :show-inheritance: 107 | 108 | Module contents 109 | --------------- 110 | 111 | .. automodule:: nrt 112 | :members: 113 | :undoc-members: 114 | :show-inheritance: 115 | :noindex: 116 | 117 | -------------------------------------------------------------------------------- /docs/classes.rst: -------------------------------------------------------------------------------- 1 | .. _classes: 2 | 3 | Monitoring Classes 4 | ****************** 5 | 6 | This chapter is supposed to give a quick overview over differences 7 | between the different implemented algorithms. 8 | 9 | Conceptual basis 10 | ================ 11 | 12 | Conceptually, near real-time monitoring using time series analysis is based on the temporal 13 | signature of forests. Due to seasonal differences in leaf area, chlorophyll and other biophysical or 14 | biochemical attributes, vegetation dynamics can be visible in the spectral response of forests. As 15 | an example, healthy forests exhibit a high reflectivity in the Near Infrared (NIR) because of 16 | scattering in that wavelength caused by the structure and water content of the leaves. 17 | The number of leaves and thus of scattering in the NIR is highest in summer and 18 | spring and lowest during winter. This seasonal pattern can be modelled and used to detect 19 | disturbances. 20 | 21 | .. |copy| unicode:: U+000A9 .. COPYRIGHT SIGN 22 | 23 | .. figure:: _static/concept.png 24 | 25 | |copy| Copyright European Union, 2022; Jonas Viehweger 26 | 27 | All implemented algorithms are based on this concept. They first fit a model to the stable forest, 28 | then monitor for unusual values compared to that model. How exactly this monitoring happens is one of 29 | the main differences between the algorithms. 30 | 31 | 32 | EWMA 33 | ==== 34 | 35 | EWMA is short for exponentially weighted moving average and follows an algorithm as described by 36 | `Brooks et al. (2013) `_. This algorithm is based on 37 | quality control charts, namely Shewhart and EWMA quality control charts. 38 | 39 | Instantiation 40 | ------------- 41 | 42 | 43 | .. code-block:: python 44 | 45 | from nrt.monitor.ewma import EWMA 46 | 47 | nrt_class = EWMA(trend=True, harmonic_order=2, mask=None, 48 | sensitivity=2, lambda_=0.3, threshold_outlier=2) 49 | 50 | This shows the parameters specific to the EWMA class in the second row during instantiating. 51 | In particular this is ``sensitivity``, ``lambda_`` and ``threshold_outlier``. 52 | 53 | Let's first talk about ``lambda_``. Lambda (0<λ<=1) is used as the exponent for the 54 | exponentially weighted moving average and basically controls how much influence the historic data has on the average. 55 | 56 | So for a time series where :math:`x_t` is the value at time period t, 57 | the EWMA value :math:`s` at time t is given as: 58 | 59 | .. math:: 60 | 61 | s_t = \lambda \cdot x_t + (1-\lambda) \cdot s_{t-1} 62 | 63 | First the value at time t is weighted by λ and then added to the previous EWMA value, 64 | which got weighted by the inverse of λ. That means, that for small λ the impact 65 | of single values on the average are low. So if the time series is very noisy, low values for lambda around 66 | 0.05 to 0.25 are recommended. This ensures that for example a single cloud which wasn't masked 67 | doesn't have a long lasting impact on the EWMA value. 68 | 69 | The parameter ``sensitivity`` is used to calculate the process boundary (also called control limit) 70 | which signals a disturbance when crossed. 71 | The boundary is calculated as follows: 72 | 73 | .. math:: 74 | 75 | CL = L\cdot\sigma\sqrt{(\frac{\lambda}{2-\lambda})} 76 | 77 | with CL as Control Limits, L as the sensitivity and :math:`\sigma` as the standard deviation of 78 | the population. Basically the lower L is, the higher the sensitivity since the boundary will be lower. 79 | This is a very simplified formula since a few expectations are made. For a more detailed look at the formula, see 80 | the `Wikipedia page `_. 81 | 82 | Lastly ``threshold_outlier`` provides a way to reduce noise of the time series while monitoring. 83 | It discards all residuals during monitoring which are larger than the standard 84 | deviation of the residuals during fitting multiplied by ``threshold_outlier``. This means that no disturbances which exhibit 85 | consistently higher residuals than :math:`threshold \cdot \sigma`` will signal, but it also means that most clouds 86 | and cloud shadows which aren't caught by masking will get handled during monitoring. 87 | 88 | Fitting 89 | ------------- 90 | 91 | By default EWMA is fit using OLS combined with outlier screening using Shewhart control charts. 92 | For more details see :ref:`fitting`. 93 | 94 | 95 | 96 | CCDC 97 | ==== 98 | 99 | CCDC is short for Continuous Change Detection and Classification and is described in `Zhu & Woodcock (2014) `_. 100 | The implementation in this package is not a strict implementation of the algorithm. It was also not validated against 101 | the original implementation. 102 | 103 | There are a few main differences. In contrast to Zhu & Woodcock (2014), multivariate analysis is not available in the nrt package. 104 | Furthermore, due to the structure of the nrt package, the automatic re-fitting after a disturbance which is described in the 105 | original implementation is not available. 106 | Lastly, the focus of this package is the detection of breaks and not their classification, so this part of the original algorithm is also omitted. 107 | 108 | Instantiation 109 | ------------- 110 | 111 | .. code-block:: python 112 | 113 | from nrt.monitor.ccdc import CCDC 114 | 115 | nrt_class = CCDC(trend=True, harmonic_order=2, mask=None, 116 | sensitivity=3, boundary=3) 117 | 118 | During instantiation, the two parameters ``sensitivity`` and ``boundary`` 119 | influence how sensitive the monitoring with CCDC will be. 120 | 121 | The parameter ``sensitivity`` in this case influences how high the threshold is after which 122 | an observation will get flagged as a possible disturbance. This threshold also 123 | depends on the residual mean square error (RMSE) which is calculated during fitting. 124 | With CCDC everything which is higher than :math:`sensitivity \cdot RMSE` is flagged as a possible 125 | disturbance. 126 | 127 | The boundary value then specifies, how many consecutive observations need to be 128 | above the threshold to confirm a disturbance. 129 | 130 | So with the default values, during monitoring 3 consecutive observation need to be 131 | 3 times higher than the RMSE to confirm a break. 132 | 133 | 134 | Fitting 135 | ------------- 136 | By default CCDC is fit using a stable fitting method called ``CCDC-stable``, combined 137 | with outlier screening which is based on a robust iteratively reweighted least squares fit. 138 | 139 | For more details see :ref:`fitting`. 140 | 141 | 142 | CuSum and MoSum 143 | =============== 144 | 145 | Monitoring with cumulative sums (CuSum) and moving sums (MoSum) is based 146 | on `Verbesselt et al. (2013) `_ and more particularly 147 | the `bfast `_ and `strucchange `_ R packages. 148 | 149 | Both algorithms have the same underlying principle. The assumption is, that if a model was fitted on a time-series of a stable forest, 150 | the residuals will have a mean of 0. So summing all residuals up, the value should stay close to zero. If however then a disturbance happens, 151 | the residuals will consistently be higher or lower than zero, thus gradually moving the sum of residuals away from 0. 152 | 153 | The major difference between the two algorithms is that CuSum always takes the cumulative sum of the entire time-series, while 154 | MoSum only takes the sum of a moving window with a certain size. 155 | 156 | 157 | Instantiation 158 | ------------- 159 | 160 | CuSum 161 | ^^^^^^ 162 | 163 | .. code-block:: python 164 | 165 | from nrt.monitor.cusum import CuSum 166 | 167 | nrt_class = CuSum(trend=True, harmonic_order=2, mask=None, 168 | sensitivity=0.05) 169 | 170 | The parameter ``sensitivity`` in the case of CuSum and MoSum is equivalent to the significance level of the disturbance event. 171 | It basically signifies how likely it was, that the threshold was crossed randomly and not caused by a structural change 172 | in the time-series. 173 | 174 | So in this case lower values decrease the sensitivity of the monitoring to structural changes. 175 | 176 | MoSum 177 | ^^^^^^ 178 | 179 | .. code-block:: python 180 | 181 | from nrt.monitor.mosum import MoSum 182 | 183 | nrt_class = MoSum(trend=True, harmonic_order=2, mask=None, 184 | sensitivity=0.05, h=0.25) 185 | 186 | The only additional parameter in MoSum is ``h``, which sets the moving window size relative to the 187 | the total number of observations which were used during fitting. So if during fitting 40 observations 188 | were used, with ``h=0.25`` the window size during monitoring will be 10 observations. 189 | 190 | .. note:: 191 | Since the process boundary during monitoring is pre-computed only for select values of ``sensitivity`` and ``h``, 192 | only 0.25, 0.5 and 1 are available for ``h`` and ``sensitivity`` has to be between 0.001 and 0.05 193 | 194 | 195 | Fitting 196 | ------------- 197 | 198 | By default CuSum and MoSum use a reverse ordered cumulative sum (ROC) to fit a stable period. 199 | 200 | For more details see :ref:`fitting`. 201 | 202 | 203 | IQR 204 | === 205 | 206 | IQR is an unpublished experimental monitoring algorithm based on the interquartile range of residuals. 207 | 208 | 209 | Instantiation 210 | ------------- 211 | 212 | .. code-block:: python 213 | 214 | from nrt.monitor.iqr import IQR 215 | 216 | nrt_class = IQR(trend=False, harmonic_order=3, mask=None, 217 | sensitivity=1.5, boundary=3) 218 | 219 | The flagging of residuals works similar to CCDC. 220 | 221 | The parameter ``sensitivity`` in this case influences how high the threshold is after which 222 | an observation will get flagged as a possible disturbance. This threshold also 223 | depends on the IQR as well as the 25th and 75th percentile which are calculated during fitting. 224 | With this monitor everything which is higher than 225 | :math:`q75 + sensitivity \cdot IQR` or lower than :math:`q25 - sensitivity \cdot IQR` 226 | is flagged as a possible disturbance. 227 | 228 | The boundary value then specifies, how many consecutive observations need to be 229 | above the threshold to confirm a disturbance. 230 | 231 | 232 | Fitting 233 | ------------- 234 | 235 | By default IQR is using an OLS fit. 236 | 237 | For more details see :ref:`fitting` 238 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | 13 | import os 14 | import sys 15 | import warnings 16 | from importlib.metadata import version as version_ 17 | 18 | from numba import NumbaWarning 19 | import nrt 20 | 21 | 22 | 23 | # -- Project information ----------------------------------------------------- 24 | 25 | project = 'nrt' 26 | copyright = 'European Union, 2022, Loic Dutrieux & Jonas Viehweger' 27 | author = 'Loic Dutrieux, Jonas Viehweger' 28 | 29 | # The full version, including alpha/beta/rc tags 30 | release = version_('nrt') 31 | 32 | 33 | # -- General configuration --------------------------------------------------- 34 | 35 | # Add any Sphinx extension module names here, as strings. They can be 36 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 37 | # ones. 38 | extensions = [ 39 | 'sphinx.ext.autodoc', 40 | 'sphinx.ext.napoleon', 41 | 'sphinx_rtd_theme', 42 | 'sphinx_gallery.gen_gallery', 43 | 'sphinx.ext.mathjax' 44 | ] 45 | 46 | # Gallery configuration 47 | sphinx_gallery_conf = { 48 | 'filename_pattern': '/plot_', 49 | 'examples_dirs': 'gallery', # path to your example scripts 50 | 'gallery_dirs': 'auto_examples', # path to where to save gallery generated output 51 | } 52 | 53 | # Avoid displaying some common warnings in gallery examples 54 | warnings.filterwarnings('ignore', category=NumbaWarning) 55 | warnings.filterwarnings('ignore', category=RuntimeWarning) 56 | 57 | # Add any paths that contain templates here, relative to this directory. 58 | templates_path = ['_templates'] 59 | 60 | # List of patterns, relative to source directory, that match files and 61 | # directories to ignore when looking for source files. 62 | # This pattern also affects html_static_path and html_extra_path. 63 | exclude_patterns = ['gallery/README.rst'] 64 | 65 | 66 | # -- Options for HTML output ------------------------------------------------- 67 | 68 | # The theme to use for HTML and HTML Help pages. See the documentation for 69 | # a list of builtin themes. 70 | # 71 | html_theme = 'sphinx_rtd_theme' 72 | 73 | # Add any paths that contain custom static files (such as style sheets) here, 74 | # relative to this directory. They are copied after the builtin static files, 75 | # so a file named "default.css" will overwrite the builtin "default.css". 76 | html_static_path = ['_static'] 77 | html_logo = "_static/logo.png" 78 | html_theme_options = { 79 | 'logo_only': True, 80 | 'display_version': False, 81 | 'style_nav_header_background': "#f8efc8" 82 | } 83 | -------------------------------------------------------------------------------- /docs/fit.rst: -------------------------------------------------------------------------------- 1 | .. _fitting: 2 | 3 | Fitting & Outlier Screening 4 | *************************** 5 | 6 | Fitting is achieved by calling ``.fit()`` on an instantiated monitoring class. 7 | 8 | In general the default arguments for each monitoring class correspond to the fitting 9 | which was used in the corresponding publication. However since the 10 | classes are not bound to the fit that was used in the publication it is entirely possible 11 | to use any combination of fitting arguments with any monitoring class. 12 | 13 | Fitting works by passing an ``xarray.DataArray`` and specifying a fitting method. 14 | Optionally a method to screen outliers in the time-series can also be passed 15 | to the fit call. 16 | 17 | Screen outliers 18 | =============== 19 | 20 | Outlier screening happens before the fitting and is designed to remove unwanted outliers 21 | in the time-series. When using optical satellite data those outliers are mostly unwanted 22 | clouds, cloud shadows and snow. 23 | 24 | Shewhart 25 | ^^^^^^^^ 26 | 27 | .. code-block:: python 28 | 29 | nrt_class.fit(dataarray, screen_outliers='Shewhart', L=5) 30 | 31 | This outlier screening is using Shewhart control charts to remove outliers. 32 | The optional parameter ``L`` defines how sensitive the outlier screening is. 33 | 34 | With this method, first an OLS fit is carried out using the arguments passed during 35 | instantiation. Then the standard deviation :math:`\sigma` of residuals is computed and all observations with 36 | residuals larger than :math:`L\cdot\sigma` are screened out. 37 | 38 | 39 | CCDC-RIRLS 40 | ^^^^^^^^^^ 41 | 42 | While Shewhart outlier screening could work for any type of time series, the default outlier screening 43 | used by CCDC is tailored for optical satellite time series to mask out clouds and 44 | cloud shadows. 45 | 46 | .. code-block:: python 47 | 48 | nrt_class.fit(dataarray, screen_outliers='CCDC-RIRLS', 49 | green=xr_green, swir=xr_swir, scaling_factor=10000) 50 | 51 | This screening uses hard-coded thresholds of the short-wave infrared (SWIR) and green bands 52 | to detect clouds and cloud shadows. For this, reflectance values of the green and 53 | SWIR bands need to be passed as ``xarray.DataArrays``. Originally the bands 2 (0.52-0.60 µm) and 5 (1.55-1.75 µm) 54 | of the Landsat 5 Thematic Mapper were used. 55 | 56 | If other sensors like Sentinel 2 are used, which supply data with a scaling factor, the optional parameter 57 | ``scaling_factor`` needs to be set appropriately to bring the values to a 0-1 range. 58 | 59 | To screen out clouds, CCDC-RIRLS uses a Robust Iteratively Reweighted Least Squares fit to reduce the influence 60 | of outliers on the fit. See the chapter about RIRLS for more details. 61 | 62 | Do note that the RIRLS fit is quite computationally intensive. 63 | 64 | 65 | Fitting 66 | ======= 67 | 68 | In general when trying to fit a temporal signature it is advisable to fit it on a stable part 69 | of the time-series which doesn't include structural changes. 70 | For this there are two fitting methods (ROC and CCDC-stable) available that aim to achieve a fit on a stable 71 | part of the time-series. 72 | The other two fitting methods (OLS, RIRLS) always fit a model on the entire history period, so if 73 | a lot of disturbances happened during the history period, the fitting results with these 74 | two methods might deliver worse results. Especially OLS however is much less computationally expensive than 75 | ROC and CCDC-stable. 76 | 77 | OLS 78 | ^^^^ 79 | 80 | .. code-block:: python 81 | 82 | nrt_class.fit(dataarray, method='OLS') 83 | 84 | This carries out an ordinary least squares fit. All other available fitting methods in this package 85 | are at some point based on this fit. 86 | 87 | RIRLS 88 | ^^^^^^ 89 | 90 | .. code-block:: python 91 | 92 | nrt_class.fit(dataarray, method='RIRLS', maxiter=50) 93 | 94 | The Robust Iteratively Reweighted Least Squares fit isn't the default for any nrt monitoring class, it's 95 | main purpose is in the outlier screening method CCDC-RIRLS. 96 | 97 | By iteratively reweighting each observation in the time-series, a fit is reached which is less influenced by 98 | outliers in the time-series. 99 | 100 | This process can take a lot of iterations and thus can become very computationally expensive. The maximum number 101 | of iterations can be controlled by setting ``maxiter``. There are also many more possible parameters to modify. 102 | For a complete list see the api documentation for ``RIRLS``. 103 | 104 | ROC 105 | ^^^^ 106 | 107 | .. code-block:: python 108 | 109 | nrt_class.fit(dataarray, method='ROC', alpha=0.05) 110 | 111 | Reverse Ordered Cumulative Sums (ROC) works by applying the same type of monitoring logic as in CuSum to the fitting. 112 | In particular this means, that the fitting period is gradually increased backwards in time starting from the 113 | end of the entire history period (so in reverse order). The period is increased as long as the 114 | cumulative sum of residuals is within a certain threshold which depends on ``alpha``. 115 | 116 | As soon as the threshold is crossed, it is likely that there was a structural break in the history period and thus 117 | the rest of the time series before the threshold was crossed will not be used for fitting the model. 118 | 119 | ``alpha`` is the significance of the detected structural break. So the lower ``alpha`` the lower the sensitivity 120 | for breaks in the time-series. 121 | 122 | 123 | CCDC-stable 124 | ^^^^^^^^^^^^ 125 | 126 | .. code-block:: python 127 | 128 | nrt_class.fit(dataarray, method='CCDC-stable', threshold=3) 129 | 130 | With CCDC-stable, models are first fit using an OLS regression. 131 | Those models are then checked for stability. 132 | 133 | Stability is given if: 134 | 135 | 1. slope / RMSE < threshold 136 | 2. first observation / RMSE < threshold 137 | 3. last observation / RMSE < threshold 138 | 139 | 140 | Since the slope of the model is one of the test conditions, it is required for ``trend`` to be ``True`` 141 | during instantiation of the monitoring class. 142 | 143 | If a model is not stable, the two oldest 144 | acquisitions are removed, a model is fit using this shorter 145 | time-series and again checked for stability. This process continues until the model is stable 146 | or until not enough observations are left, at which point the time-series will get marked as 147 | unstable and not be fit. 148 | 149 | .. note:: 150 | This process is slightly different to the one described in Zhu & Woodcock 2014, 151 | since with the nrt package no new observations can be added during fitting. -------------------------------------------------------------------------------- /docs/gallery/README.rst: -------------------------------------------------------------------------------- 1 | Examples gallery 2 | ================ 3 | 4 | 5 | -------------------------------------------------------------------------------- /docs/gallery/plot_parallel_computing.py: -------------------------------------------------------------------------------- 1 | r""" 2 | Parallel model fitting 3 | ====================== 4 | The most computationally expensive part of a typical nrt workflow is the fitting 5 | of a harmonic model over the stable history period. Starting with version ``0.2.0``, 6 | ``nrt`` uses multithreading to further speed-up the already fast model fitting. 7 | This example illustrates how multithreading can be enabled and adjusted to your use case. 8 | """ 9 | 10 | ############################################################## 11 | # Confirgure multithreading options of linear algebra library 12 | # ----------------------------------------------------------- 13 | # 14 | # Most of the low level computation/numerical optimization occuring during model 15 | # fitting with nrt relies on a linear algebra library. These libraries often implement 16 | # low level methods with built-in multi-threading. ``nrt`` implements multi-threading 17 | # thanks to ``numba`` on a different, higher level. 18 | # To prevent nested parallelism that would result in over-subscription and potentially 19 | # reduce performances, it is recommanded to disable the built in multi-threading 20 | # of the linear algebra library being used. 21 | # Depending on how ``numpy`` was installed, it will rely on one of the three linear 22 | # algebra libraries which are OpenBLAS, MKL or BLIS. At the time of writing this 23 | # tutorial, pipy wheels (obtain when installing ``numpy`` using pip) are shipped 24 | # with OpenBLAS, while a conda installation from the default channel will come with 25 | # MKL. All three libraries use an environmental variable to control threading 26 | # (``MKL_NUM_THREADS``, ``OPENBLAS_NUM_THREADS`` and ``BLIS_NUM_THREADS``); in the 27 | # present example, we set them all to ``'1'`` directly from within python. 28 | # Although knowing which library is used on your system would allow you to remove 29 | # the unnecessary configuration lines, it is not entirely necessary. 30 | import os 31 | # Note that 1 is a string, not an integer 32 | os.environ['MKL_NUM_THREADS'] = '1' 33 | os.environ['OPENBLAS_NUM_THREADS'] = '1' 34 | os.environ['BLIS_NUM_THREADS'] = '1' 35 | 36 | ############################################################## 37 | # Create benchmark data 38 | # --------------------- 39 | # 40 | # Using the synthetic data generation functionalities of the package, we can create 41 | # an xarray DataArray for benchmark. Note that in order to keep the compilation time 42 | # of this tutorial manageable we limit the size of that object to 200 by 200 pixels. 43 | # While this is significantly smaller than e.g. a Sentinel2 MGRS tile, it is sufficient 44 | # to illustrate differences in fitting time among various fitting strategies 45 | import xarray as xr 46 | import numpy as np 47 | from nrt import data 48 | 49 | # Create synthetic ndvi data cube 50 | dates = np.arange('2018-01-01', '2020-12-31', dtype='datetime64[W]') 51 | params_ds = data.make_cube_parameters(shape=(200,200), unstable_proportion=0) 52 | cube = data.make_cube(dates=dates, params_ds=params_ds) 53 | # We also create a very small cube for running each fitting method once before 54 | # the benchmark, ensuring compilation of the jitted functions and fair comparison 55 | cube_sub = cube.isel(indexers={'x': slice(1,5), 'y': slice(1,5)}) 56 | 57 | 58 | ############################################################## 59 | # Benchmark fitting time of all methods 60 | # ------------------------------------- 61 | # 62 | # Note that we are only interested in fitting time and therefore use a single 63 | # class instance for the benchmark. The time required for any subsequent .monitor() 64 | # call is usually negligible and as a consequence not included in this benchmark. 65 | # We use here ``CuSum`` but any of the monitoring classes could be used and 66 | # would produce the same results. 67 | import time 68 | import itertools 69 | from collections import defaultdict 70 | from nrt.monitor.cusum import CuSum 71 | import matplotlib.pyplot as plt 72 | 73 | # Benchmark parameters 74 | benchmark_dict = defaultdict(dict) 75 | monitor = CuSum() 76 | methods = ['OLS', 'RIRLS', 'CCDC-stable', 'ROC'] 77 | threads = range(1,3) 78 | 79 | # Make sure all numba jitted function are compiled 80 | monitor_ = CuSum() 81 | [monitor_.fit(cube_sub, method=method) for method in methods] 82 | 83 | # Benchmark loop 84 | for method, n_threads in itertools.product(methods, threads): 85 | t0 = time.time() 86 | monitor.fit(cube, n_threads=n_threads, method=method) 87 | t1 = time.time() 88 | benchmark_dict[method][n_threads] = t1 - t0 89 | 90 | # Visualize the results 91 | index = np.arange(len(methods)) 92 | for idx, n in enumerate(threads): 93 | values = [benchmark_dict[method][n] for method in methods] 94 | plt.bar(index + idx * 0.2, values, 0.2, label='%d thread(s)' % n) 95 | 96 | plt.xlabel('Fitting method') 97 | plt.ylabel('Time (seconds)') 98 | plt.title('Fitting time') 99 | plt.xticks(index + 0.2, methods) 100 | plt.legend() 101 | plt.tight_layout() 102 | plt.show() 103 | 104 | ############################################################## 105 | # From the results above we notice large differences in fitting time among fitting 106 | # methods. Unsurprisingly, OLS is the fastest, which is expected given that all 107 | # other methods use OLS complemented with some additional, sometimes iterative 108 | # refitting, etc... All methods but ``ROC`` for which parallel fitting hasn't been 109 | # implemented, benefit from using multiple threads. 110 | # Note that a multithreading benefit can only be observed as long as the number 111 | # threads is lower than the computing resources available. The machine used for 112 | # compiling this tutorial is not meant for heavy computation and obviously has limited 113 | # resources as shown by the cpu_count below 114 | import multiprocessing 115 | print(multiprocessing.cpu_count()) 116 | 117 | 118 | ############################################################## 119 | # Further considerations 120 | # ---------------------- 121 | # 122 | # A deployment at scale may involve several levels of parallelization. The multi-threaded 123 | # example illustrated above is made possible thanks to the numba parallel accelerator. 124 | # However, it is also very common to handle the earlier steps of data loading and 125 | # data pre-processing with ``dask.distributed``, which facilitates lazy and distributed 126 | # computation. There is no direct integration between the two parallelism mechanisms 127 | # and while calling ``.fit()`` on a lazy distributed dask array is possible, the lazy 128 | # evaluation cannot be preserved and all the input data need to be evaluated and 129 | # loaded in memory 130 | from nrt import data 131 | 132 | # Lazy load test data using dask 133 | cube = data.romania_10m(chunks={'x': 20, 'y': 20}) 134 | vi_cube = (cube.B08 - cube.B04) / (cube.B08 + cube.B04) 135 | print(vi_cube) 136 | monitor = CuSum() 137 | monitor.fit(vi_cube, method='OLS', n_threads=2) 138 | print(type(monitor.beta)) 139 | -------------------------------------------------------------------------------- /docs/gallery/plot_simulation_data.py: -------------------------------------------------------------------------------- 1 | r""" 2 | Synthetic disturbance data 3 | ========================== 4 | This example illustrates the simulation of a near real time monitoring scenario on synthetic data. 5 | The EWMA approach instantiated from ``nrt.monitor.ewma import EWMA`` is used for monitoring and detection 6 | of the artificially generated breakpoints and the experiment is concluded by a simple accuracy assessment. 7 | """ 8 | 9 | ############################################################# 10 | # Synthetic data generation 11 | # ------------------------- 12 | # 13 | # The simulate module of the nrt-data package contains functionalities to create synthetic 14 | # data with controlled parameters such as position of structural change, phenological 15 | # amplitude, noise level, etc 16 | # One such example can be visualized using the ``make_ts`` function, which 17 | # creates a single time-series. 18 | import random 19 | 20 | import numpy as np 21 | from nrt.data import simulate 22 | import matplotlib.pyplot as plt 23 | import matplotlib.dates as mdates 24 | 25 | dates = np.arange('2018-01-01', '2022-06-15', dtype='datetime64[W]') 26 | fig, axes = plt.subplots(3,3, constrained_layout=True) 27 | for row, amplitude in zip(axes, [0.1, 0.2, 0.3]): 28 | for ax, noise in zip(row, [0.02, 0.05, 0.1]): 29 | break_idx = random.randint(30,100) 30 | ts = simulate.make_ts(dates=dates, 31 | break_idx=break_idx, 32 | sigma_noise=noise, 33 | amplitude=amplitude) 34 | ax.plot(dates, ts) 35 | ax.axvline(x=dates[break_idx], color='magenta') 36 | ax.set_ylim(-0.1,1.1) 37 | ax.set_title('Amplitude: %.1f\nsigma noise: %.2f' % (amplitude, noise), 38 | fontsize=11) 39 | ax.xaxis.set_major_locator(mdates.YearLocator()) 40 | ax.xaxis.set_major_formatter(mdates.DateFormatter("\n%Y")) 41 | ax.tick_params( axis='x', which='both', bottom=False, top=False, 42 | labelbottom=False) 43 | fig.supxlabel('Time') 44 | fig.supylabel('NDVI') 45 | plt.show() 46 | 47 | ################################################################# 48 | # The spatial counterpart of ``make_ts`` is ``make_cube`` and its main argument 49 | # is an ``xarray.Dataset`` of simulation parameters that can be generated 50 | # with the ``make_cube_parameters`` function. 51 | # The data cube generated is a standard univariate ``xarray.DataArray`` with 52 | # ``x``, ``y`` and ``time`` dimensions. Each pixel in the spatial dimensions contains 53 | # a time-series of simulated values with varying levels of noise, seasonality, outliers 54 | # and in some cases a structural break point 55 | 56 | params_ds = simulate.make_cube_parameters(shape=(50,50), 57 | n_outliers_interval=(0,5), 58 | n_nan_interval=(0,7), 59 | break_idx_interval=(105,dates.size - 20)) 60 | # Convert break_idx to dates 61 | print('Early breakpoint: %s' % dates[105]) 62 | print('Late breakpoint: %s' % dates[dates.size - 20]) 63 | cube = simulate.make_cube(dates=dates, params_ds=params_ds) 64 | 65 | ################################################################# 66 | # In the ndvi datacube created, 50 percents of the pixels contain a break point 67 | # occuring between 2020-01-02 and 2022-01-20. The ``break_idx`` variable of 68 | # the ``params_ds`` ``Dataset`` informs on the presence or absence of a break point, 69 | # and its position. 70 | 71 | ################################################################### 72 | # Simulation of an NRT monitoring scenario 73 | # -------------------------------------------------- 74 | # For the simulating a near real time monitoring scenario, we consider all the pixels 75 | # of the datacube (no mask) and define the 2018-01-01 to 2019-12-31 period as the 76 | # stable history period and all subsequent dates as monitoring. We know from the 77 | # time-series simulation parameters that the stable history period is indeed free of breakpoints. 78 | # In a real life near real time monitoring use case, fitting and monitoring are 79 | # occuring separately; we therefore need to split the datacube created in two. 80 | # 81 | # After that instantiation of the ``EWMA`` class and stable history takes place. 82 | # The harmonic fit parameters for each pixel is stored in the instance 83 | # of the ``EWMA`` class 84 | # Note that in a real life scenario, several days may pass between fitting and the 85 | # next observation, or between consecutive observations. The fit parameters or 86 | # ongoing monitoring variables are then usually stored to disk in a NetCDF file. 87 | # See the ``to_netcdf()`` method for more details. 88 | # During monitoring each new observation needs to be passed to the monitor method 89 | # as a numpy array. Since we currently have these observations in an xarray DataArray 90 | # structure, we need to unpack each temporal slice as an (array, date) tuple 91 | 92 | import datetime 93 | 94 | from nrt.monitor.ewma import EWMA 95 | 96 | cube_history = cube.sel(time=slice('2018-01-01','2019-12-31')) 97 | cube_monitor = cube.sel(time=slice('2020-01-01', '2022-12-31')) 98 | 99 | # Monitoring class instantiation and fitting 100 | monitor = EWMA(trend=False, harmonic_order=1, lambda_=0.3, sensitivity=4, 101 | threshold_outlier=10) 102 | monitor.fit(dataarray=cube_history) 103 | 104 | # Monitor every date of the ``cube_monitor`` DataArray 105 | for array, date in zip(cube_monitor.values, 106 | cube_monitor.time.values.astype('M8[s]').astype(datetime.datetime)): 107 | monitor.monitor(array=array, date=date) 108 | 109 | 110 | ############################################################################ 111 | # Monitoring performances evaluation 112 | # ---------------------------------- 113 | # Assessing the performance of a time-series monitoring algorithm can be a complex 114 | # task that depends on the specific use case and what the user wants to emphasize. 115 | # A user valuing rapid detection will chose an assessment approach that puts extra 116 | # weight on the temporal aspect or penalize late detections, while if timeliness 117 | # is not a requirement, accuracy assessment will resemble standard spatial validation. 118 | # In the present example we work with a temporal threshold for which 6 months is the 119 | # default value. This approach to accuracy assessment implies that any breakpoint 120 | # occuring outside of the 6 months periods after the simulated breakpoint (considered ground thruth) 121 | # is considered comission error. Absence of detection during that same period would then be 122 | # an omission, detections during the period are true positives, and absence of detection 123 | # on stable time-series are true negatives. 124 | # Note that alternative accuracy assessment approaches exist; see for instance [1]_ who 125 | # chose to use ``PixelYears`` as their sampling units, or [2]_ who introduced the 126 | # concept of a time weighted F1 score, hence considerating simultaneously detection 127 | # speed and spatial accuracy in a single index. 128 | 129 | def accuracy(nrtInstance, params_ds, dates, delta=180): 130 | """Compute accuracy metrics (precision, recall) of a nrt simulation on synthetic data 131 | 132 | Args: 133 | nrtInstance: Instance of a nrt monitoring class used for monitoring 134 | params_ds: Time-series generation paramaters 135 | dates: Array of numpy.datetime64 dates used for synthetic time-series generation 136 | delta (int): Time delta in day after a reference break for a detected break 137 | to be considered True Positive. 138 | """ 139 | detection_date = nrtInstance._report(layers=['detection_date'], dtype=np.uint16) 140 | dates_true = np.where(params_ds.break_idx != -1, 141 | dates[params_ds.break_idx.values], 142 | np.datetime64('NaT')) 143 | dates_true_bound = dates_true + np.timedelta64(delta) 144 | dates_pred = np.datetime64('1970-01-01') + np.timedelta64(1) * detection_date 145 | dates_pred[dates_pred == np.datetime64('1970-01-01')] = np.datetime64('NaT') 146 | # Computes arrays of TP, FP, FN (they should be mutually exclusive) 147 | TP = np.where(np.logical_and(dates_pred >= dates_true, dates_pred <= dates_true_bound), 148 | 1, 0) 149 | FP = np.where(np.logical_and(TP == 0, ~np.isnat(dates_pred)), 1, 0) 150 | FN = np.where(np.logical_and(np.isnat(dates_pred), ~np.isnat(dates_true)), 1, 0) 151 | precision = TP.sum() / (TP.sum() + FP.sum()) 152 | recall = TP.sum() / (TP.sum() + FN.sum()) 153 | return precision, recall 154 | 155 | print(accuracy(monitor, params_ds, dates)) 156 | 157 | #################################################################### 158 | # White noise sensitivity analysis 159 | # -------------------------------- 160 | # To go one step further we can assess and visualize how these accuracy measures 161 | # vary with the amount of noise present in the synthetic data. 162 | # For that we define a new function encompassing all the steps of data generation, 163 | # instantiation, fitting and monitoring 164 | # 165 | # The increase in recall at low noise levels is probably due to the extreme outliers 166 | # filtering feature of the EWMA monitoring process, OUtliers that exceed ``threshold_outlier`` 167 | # times the standard deviation of the fit residuals are considered extreme 168 | # outliers (often clouds or artifacts) in real images, and do not contribute to the monitoring 169 | # process. With such low noise levels, that threshold is easily reached and breaks missed. 170 | 171 | def make_cube_fit_and_monitor(dates, noise_level): 172 | params_ds = simulate.make_cube_parameters(shape=(20,20), 173 | n_outliers_interval=(4,5), 174 | n_nan_interval=(3,4), 175 | sigma_noise_interval=(noise_level, noise_level), 176 | break_idx_interval=(105,dates.size - 20)) 177 | cube = simulate.make_cube(dates=dates, params_ds=params_ds) 178 | cube_history = cube.sel(time=slice('2018-01-01','2019-12-31')) 179 | cube_monitor = cube.sel(time=slice('2020-01-01', '2022-12-31')) 180 | # Monitoring class instantiation and fitting 181 | monitor = EWMA(trend=False, harmonic_order=1, lambda_=0.3, sensitivity=4, 182 | threshold_outlier=10) 183 | monitor.fit(dataarray=cube_history) 184 | # Monitor every date of the ``cube_monitor`` DataArray 185 | for array, date in zip(cube_monitor.values, 186 | cube_monitor.time.values.astype('M8[s]').astype(datetime.datetime)): 187 | monitor.monitor(array=array, date=date) 188 | return params_ds, monitor 189 | 190 | noises = [0.02, 0.03, 0.05, 0.07, 0.09, 0.12, 0.15, 0.2] 191 | prs = [] 192 | for noise in noises: 193 | params_ds, monitor = make_cube_fit_and_monitor(dates, noise) 194 | prs.append(accuracy(monitor, params_ds, dates)) 195 | 196 | precisions, recalls = zip(*prs) 197 | plt.plot(noises, precisions, label='Precision') 198 | plt.plot(noises, recalls, label='Recall') 199 | plt.xlabel('Noise level') 200 | plt.legend(loc='lower right') 201 | plt.grid(True) 202 | plt.show() 203 | 204 | ########################################################################### 205 | # References 206 | # ---------- 207 | # 208 | # .. [1] Bullock, E.L., Woodcock, C.E. and Holden, C.E., 2020. Improved 209 | # change monitoring using an ensemble of time series algorithms. 210 | # Remote Sensing of Environment, 238, p.111165. 211 | # 212 | # .. [2] Viehweger, J., 2021. Comparative Assessment of Near Real-Time Forest 213 | # Disturbance Detection Algorithms. Master thesis, Philipps Universitat 214 | # Marburg 215 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. nrt documentation master file, created by 2 | sphinx-quickstart on Thu Apr 8 11:56:08 2021. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to nrt's documentation! 7 | =============================== 8 | 9 | `nrt` is a Python package designed for near real-time detection of changes in spatio-temporal datasets, with a particular focus on monitoring forest disturbances from satellite image time-series. It offers a standardized API inspired by scikit-learn, ensuring seamless interoperability and comparison across various state-of-the-art monitoring algorithms. Optimized for rapid computation, `nrt` is suitable for operational deployment at scale. This package is an essential tool for researchers and practitioners aiming for timely and efficient monitoring, contributing to climate change mitigation, biodiversity conservation, and natural heritage preservation. Explore the official documentation to get started with installation, usage examples, and more. 10 | 11 | .. automodule:: nrt 12 | 13 | .. toctree:: 14 | :maxdepth: 2 15 | :caption: Contents: 16 | 17 | quickstart 18 | classes 19 | fit 20 | api_reference 21 | auto_examples/index 22 | 23 | 24 | Indices and tables 25 | ================== 26 | 27 | * :ref:`genindex` 28 | * :ref:`modindex` 29 | * :ref:`search` 30 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/quickstart.rst: -------------------------------------------------------------------------------- 1 | Quickstart 2 | ********** 3 | 4 | Introduction 5 | ============ 6 | 7 | The nrt package provides near real-time disturbance monitoring algorithms for spatio-temporal datasets. 8 | 9 | The following algorithms are implemented: 10 | 11 | - EWMA (Brooks et al., 2013) [1]_ 12 | - CUSUM (Verbesselt et al., 2012) [2]_ 13 | - MOSUM (Verbesselt et al., 2012) [2]_ 14 | - CCDC (Zhu et al., 2012; 2014) [3]_ [4]_ 15 | - IQR 16 | 17 | For more information on the basic function of each monitoring method see :ref:`classes`. 18 | 19 | Installation 20 | ============ 21 | 22 | Install the package from pypi using: 23 | 24 | .. code-block:: 25 | 26 | pip install nrt 27 | 28 | Workflow 29 | ======== 30 | 31 | In general a workflow with the `nrt` package can be divided into three parts: 32 | Instantiating, Fitting and Monitoring. 33 | 34 | Instantiating 35 | ------------- 36 | 37 | .. code-block:: python 38 | 39 | from nrt.monitor.ewma import EWMA 40 | from nrt import data 41 | 42 | # load example mask 43 | mask = (data.romania_forest_cover_percentage() > 20).astype('int') 44 | 45 | # Instantiate 46 | nrt_class = EWMA( 47 | mask=mask, 48 | trend=True, 49 | harmonic_order=3, 50 | sensitivity=2 51 | ) 52 | 53 | Here, the monitoring class ``EWMA`` is imported. Along with that a forest mask 54 | is constructed. This mask is optional but must have the same shape as the data array which is 55 | later supplied for fitting. 56 | 57 | All available monitoring classes accept the parameters ``mask``, ``trend``, ``harmonic_order`` 58 | and ``sensitivity``. Some monitoring classes also accept additional parameters. 59 | For more information see :ref:`classes`. 60 | 61 | .. note:: 62 | ``sensitivity`` takes different values depending on the monitoring class. 63 | For example larger ``sensitivity`` values mean higher sensitivity for ``EWMA``, 64 | but lower sensitivity for ``MoSum``. 65 | 66 | 67 | 68 | Fitting 69 | ------------- 70 | 71 | .. code-block:: python 72 | 73 | # load example xarray 74 | s2_cube = data.romania_20m() 75 | history = s2_cube.B03.sel(time=slice(None, '2019-01-01')) 76 | monitor = s2_cube.B03.sel(time=slice('2019-01-01', None)) 77 | 78 | # Fitting 79 | nrt_class.fit(dataarray=history) 80 | 81 | # Dump model 82 | nrt_class.to_netcdf('model.nc') 83 | 84 | In this example some sample data is created first, which is used for fitting and 85 | later for monitoring. 86 | ``.fit()`` has only one non optional parameter ``dataarray`` which expects an 87 | ``xarray.DataArray``. During fitting there are other optional parameters which can be set. 88 | See :ref:`fitting` for more detail. 89 | 90 | If the next observation is not immediately available, the model can be dumped 91 | to a NetCDF file which can then be loaded once the next observation is available. 92 | 93 | Monitoring 94 | ------------- 95 | 96 | .. code-block:: python 97 | 98 | # Load dumped model 99 | nrt_class = EWMA.from_netcdf('model.nc') 100 | 101 | # Monitor new observations 102 | for array, date in zip(monitor.values, monitor.time.values.astype('datetime64[s]').tolist()): 103 | nrt_class.monitor(array=array, date=date) 104 | 105 | # Report results 106 | nrt_class.report('report.tif') 107 | 108 | If the model was dumped to a NetCDF it can be read from disk with ``from_netcdf()``. 109 | Monitoring happens with ``.monitor()``. This only takes an numpy array and a date of class 110 | ``datetime.date``. 111 | 112 | At any time during monitoring a report can be generated by calling 113 | ``.report()``. This report returns a GeoTIFF with two bands, one showing the status 114 | of all pixels (e.g. not monitored, disturbed, etc.) and another band showing the date when 115 | a disturbance was detected in days since 1970-01-01. 116 | 117 | References 118 | ========== 119 | 120 | .. [1] Brooks, E.B., Wynne, R.H., Thomas, V.A., Blinn, C.E. and Coulston, J.W., 2013. 121 | On-the-fly massively multitemporal change detection using statistical quality control charts and Landsat data. 122 | IEEE Transactions on Geoscience and Remote Sensing, 52(6), pp.3316-3332. 123 | https://doi.org/10.1109/TGRS.2013.2272545 124 | 125 | .. [2] Verbesselt, J., Zeileis, A. and Herold, M., 2012. 126 | Near real-time disturbance detection using satellite image time series. 127 | Remote Sensing of Environment, 123, pp.98-108. 128 | https://doi.org/10.1016/j.rse.2012.02.022 129 | 130 | .. [3] Zhu, Z., Woodcock, C.E. and Olofsson, P., 2012. 131 | Continuous monitoring of forest disturbance using all available Landsat imagery. 132 | Remote sensing of environment, 122, pp.75-91. 133 | https://doi.org/10.1016/j.rse.2011.10.030 134 | 135 | .. [4] Zhu, Z. and Woodcock, C.E., 2014. 136 | Continuous change detection and classification of land cover using all available Landsat data. 137 | Remote sensing of Environment, 144, pp.152-171. 138 | https://doi.org/10.1016/j.rse.2014.01.011 139 | 140 | 141 | 142 | 143 | -------------------------------------------------------------------------------- /nrt/__init__.py: -------------------------------------------------------------------------------- 1 | __path__ = __import__('pkgutil').extend_path(__path__, __name__) 2 | 3 | from importlib.metadata import version 4 | 5 | try: 6 | __version__ = version("nrt") 7 | except Exception: 8 | # Local copy or not installed with setuptools. 9 | # Disable minimum version checks on downstream libraries. 10 | __version__ = "9999" 11 | -------------------------------------------------------------------------------- /nrt/fit_methods.py: -------------------------------------------------------------------------------- 1 | """Model fitting 2 | 3 | Functions defined in this module always use a 2D array containing the dependant 4 | variables (y) and return both coefficient (beta) and residuals matrices. 5 | These functions are meant to be called in ``nrt.BaseNrt._fit()``. 6 | 7 | The RIRLS fit is derived from Chris Holden's yatsm package. See the 8 | copyright statement below. 9 | """ 10 | # Copyright (C) 2022 European Union (Joint Research Centre) 11 | # 12 | # Licensed under the EUPL, Version 1.2 or – as soon they will be approved by 13 | # the European Commission - subsequent versions of the EUPL (the "Licence"); 14 | # You may not use this work except in compliance with the Licence. 15 | # You may obtain a copy of the Licence at: 16 | # 17 | # https://joinup.ec.europa.eu/software/page/eupl 18 | # 19 | # Unless required by applicable law or agreed to in writing, software 20 | # distributed under the Licence is distributed on an "AS IS" basis, 21 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 22 | # See the Licence for the specific language governing permissions and 23 | # limitations under the Licence. 24 | 25 | ############################################################################### 26 | # The MIT License (MIT) 27 | # 28 | # Copyright (c) 2014 Chris Holden 29 | # 30 | # Permission is hereby granted, free of charge, to any person obtaining a copy 31 | # of this software and associated documentation files (the "Software"), to deal 32 | # in the Software without restriction, including without limitation the rights 33 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 34 | # copies of the Software, and to permit persons to whom the Software is 35 | # furnished to do so, subject to the following conditions: 36 | # 37 | # The above copyright notice and this permission notice shall be included in all 38 | # copies or substantial portions of the Software. 39 | # 40 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 41 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 42 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 43 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 44 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 45 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 46 | # SOFTWARE. 47 | ############################################################################### 48 | 49 | import numpy as np 50 | import numba 51 | 52 | from nrt.log import logger 53 | from nrt import utils 54 | from nrt.utils_efp import history_roc 55 | from nrt.stats import nanlstsq, mad, bisquare 56 | 57 | 58 | def ols(X, y): 59 | """Fit simple OLS model 60 | 61 | Args: 62 | X ((M, N) np.ndarray): Matrix of independant variables 63 | y ({(M,), (M, K)} np.ndarray): Matrix of dependant variables 64 | 65 | Returns: 66 | beta (numpy.ndarray): The array of regression estimators 67 | residuals (numpy.ndarray): The array of residuals 68 | """ 69 | beta = nanlstsq(X, y) 70 | residuals = np.dot(X, beta) - y 71 | return beta, residuals 72 | 73 | 74 | @utils.numba_kwargs 75 | @numba.jit(nopython=True, cache=True, parallel=True) 76 | def rirls(X, y, M=bisquare, tune=4.685, 77 | scale_est=mad, scale_constant=0.6745, 78 | update_scale=True, maxiter=50, tol=1e-8): 79 | """Robust Linear Model using Iterative Reweighted Least Squares (RIRLS) 80 | 81 | Perform robust fitting regression via iteratively reweighted least squares 82 | according to weight function and tuning parameter. 83 | Basically a clone from `statsmodels` that should be much faster. 84 | 85 | Note: 86 | For best performances of the multithreaded implementation, it is 87 | recommended to limit the number of threads used by MKL or OpenBLAS to 1. 88 | This avoids over-subscription, and improves performances. 89 | By default the function will use all cores available; the number of cores 90 | used can be controled using the ``numba.set_num_threads`` function or 91 | by modifying the ``NUMBA_NUM_THREADS`` environment variable 92 | 93 | Args: 94 | X (np.ndarray): 2D (n_obs x n_features) design matrix 95 | y (np.ndarray): 1D independent variable 96 | tune (float): tuning constant for scale estimate 97 | maxiter (int, optional): maximum number of iterations (default: 50) 98 | tol (float, optional): convergence tolerance of estimate 99 | (default: 1e-8) 100 | scale_est (callable): estimate used to scale the weights 101 | (default: `mad` for median absolute deviation) 102 | scale_constant (float): normalization constant (default: 0.6745) 103 | update_scale (bool, optional): update scale estimate for weights 104 | across iterations (default: True) 105 | M (callable): function for scaling residuals 106 | tune (float): tuning constant for scale estimate 107 | 108 | Returns: 109 | tuple: beta-coefficients and residual vector 110 | """ 111 | beta = np.zeros((X.shape[1], y.shape[1]), dtype=np.float64) 112 | resid = np.full_like(y, np.nan, dtype=np.float64) 113 | for idx in numba.prange(y.shape[1]): 114 | y_sub = y[:,idx] 115 | isna = np.isnan(y_sub) 116 | X_sub = X[~isna] 117 | y_sub = y_sub[~isna] 118 | beta_, resid_ = weighted_ols(X_sub, y_sub, np.ones_like(y_sub)) 119 | scale = scale_est(resid_, c=scale_constant) 120 | 121 | EPS = np.finfo(np.float32).eps 122 | if scale < EPS: 123 | beta[:,idx] = beta_ 124 | resid[~isna,idx] = resid_ 125 | continue 126 | 127 | iteration = 1 128 | converged = 0 129 | while not converged and iteration < maxiter: 130 | last_beta = beta_.copy() 131 | weights = M(resid_ / scale, c=tune) 132 | beta_, resid_ = weighted_ols(X_sub, y_sub, weights) 133 | if update_scale: 134 | scale = max(EPS,scale_est(resid_, c=scale_constant)) 135 | iteration += 1 136 | converged = not np.any(np.fabs(beta_ - last_beta > tol)) 137 | beta[:,idx] = beta_ 138 | resid[~isna,idx] = resid_ 139 | 140 | return beta, resid 141 | 142 | 143 | @numba.jit(nopython=True, cache=True) 144 | def weighted_ols(X, y, w): 145 | """Apply a weighted OLS fit to 1D data 146 | 147 | Args: 148 | X (np.ndarray): independent variables 149 | y (np.ndarray): dependent variable 150 | w (np.ndarray): observation weights 151 | 152 | Returns: 153 | tuple: coefficients and residual vector 154 | """ 155 | sw = np.sqrt(w) 156 | 157 | Xw = X * np.expand_dims(sw, -1) 158 | yw = y * sw 159 | 160 | beta,_,_,_ = np.linalg.lstsq(Xw, yw) 161 | 162 | resid = y - np.dot(X, beta) 163 | 164 | return beta, resid 165 | 166 | @utils.numba_kwargs 167 | @numba.jit(nopython=True, cache=True, parallel=True) 168 | def ccdc_stable_fit(X, y, dates, threshold=3): 169 | """Fitting stable regressions using an adapted CCDC method 170 | 171 | Models are first fit using OLS regression. Those models are then checked for 172 | stability. If a model is not stable, the two oldest 173 | acquisitions are removed, a model is fit using this shorter 174 | time-series and again checked for stability. This process continues as long 175 | as all of the following 3 conditions are met: 176 | 177 | 1. The timeseries is still unstable 178 | 2. There are enough cloud-free acquisitions left (threshold is 1.5x the 179 | number of parameters in the design matrix) 180 | 3. The time series includes data of more than half a year 181 | 182 | Stability depends on all these three conditions being true: 183 | 1. slope / RMSE < threshold 184 | 2. first observation / RMSE < threshold 185 | 3. last observation / RMSE < threshold 186 | 187 | Note: 188 | For best performances of the multithreaded implementation, it is 189 | recommended to limit the number of threads used by MKL or OpenBLAS to 1. 190 | This avoids over-subscription, and improves performances. 191 | By default the function will use all cores available; the number of cores 192 | used can be controled using the ``numba.set_num_threads`` function or 193 | by modifying the ``NUMBA_NUM_THREADS`` environment variable 194 | 195 | Args: 196 | X ((M, N) np.ndarray): Matrix of independant variables 197 | y ((M, K) np.ndarray): Matrix of dependant variables 198 | dates ((M, ) np.ndarray): Corresponding dates to y in numpy datetime64 199 | threshold (float): Sensitivity of stability checking. Gets passed to 200 | ``is_stable_ccdc()`` 201 | Returns: 202 | beta (numpy.ndarray): The array of regression estimators 203 | residuals (numpy.ndarray): The array of residuals 204 | is_stable (numpy.ndarray): 1D Boolean array indicating stability 205 | start (numpy.ndarray): 1D integer array indicating day of fitting start 206 | as days since UNIX epoch. 207 | """ 208 | min_obs = int(X.shape[1] * 1.5) 209 | beta = np.zeros((X.shape[1], y.shape[1]), dtype=np.float64) 210 | residuals = np.full_like(y, np.nan) 211 | stable = np.empty((y.shape[1])) 212 | fit_start = np.empty((y.shape[1])) 213 | for idx in numba.prange(y.shape[1]): 214 | y_sub = y[:, idx] 215 | isna = np.isnan(y_sub) 216 | X_sub = X[~isna] 217 | y_sub = y_sub[~isna] 218 | _dates = dates[~isna] 219 | is_stable = False 220 | 221 | # Run until minimum observations 222 | # or until stability is reached 223 | for jdx in range(len(y_sub), min_obs-1, -2): 224 | # Timeseries gets reduced by two elements 225 | # each iteration 226 | y_ = y_sub[-jdx:] 227 | X_ = X_sub[-jdx:] 228 | beta_sub = np.linalg.solve(np.dot(X_.T, X_), np.dot(X_.T, y_)) 229 | resid_sub = np.dot(X_, beta_sub) - y_ 230 | 231 | # Check for stability 232 | rmse = np.sqrt(np.mean(resid_sub ** 2)) 233 | slope = np.fabs(beta_sub[1]) / rmse < threshold 234 | first = np.fabs(resid_sub[0]) / rmse < threshold 235 | last = np.fabs(resid_sub[-1]) / rmse < threshold 236 | 237 | # Break if stability is reached 238 | is_stable = slope & first & last 239 | if is_stable: 240 | break 241 | # Also break if less than half a year of data remain 242 | last_date = _dates[-1] 243 | first_date = _dates[-jdx] 244 | if last_date - first_date < 183: 245 | break 246 | 247 | beta[:,idx] = beta_sub 248 | residuals[-jdx:,idx] = resid_sub 249 | stable[idx] = is_stable 250 | fit_start[idx] = _dates[-jdx] 251 | return beta, residuals, stable.astype(np.bool_), fit_start 252 | 253 | 254 | @utils.numba_kwargs 255 | @numba.jit(nopython=True, cache=True, parallel=False) 256 | def roc_stable_fit(X, y, dates, alpha=0.05, crit=0.9478982340418134): 257 | """Fitting stable regressions using Reverse Ordered Cumulative Sums 258 | 259 | Calculates OLS coefficients, residuals and a stability mask based on 260 | a stable history period which is provided by ``history_roc()``. 261 | 262 | The pixel will get marked as unstable if: 263 | 1. The stable period is shorter than half a year OR 264 | 2. There are fewer observation than the number of coefficients in X 265 | 266 | The implementation roughly corresponds to the fit of bfastmonitor 267 | with the history option set to 'ROC'. 268 | 269 | Args: 270 | X ((M, N) np.ndarray): Matrix of independant variables 271 | y ((M, K) np.ndarray): Matrix of dependant variables 272 | dates ((M, ) np.ndarray): Corresponding dates to y in days since epoch 273 | (int) 274 | alpha (float): Significance level for the boundary 275 | (probability of type I error) 276 | crit (float): Critical value corresponding to the chosen alpha. Can be 277 | calculated with ``_cusum_rec_test_crit``. 278 | Default is the value for alpha=0.05 279 | 280 | Returns: 281 | beta (numpy.ndarray): The array of regression estimators 282 | residuals (numpy.ndarray): The array of residuals 283 | is_stable (numpy.ndarray): 1D Boolean array indicating stability 284 | start (numpy.ndarray): 1D integer array indicating day of fitting start 285 | as days since UNIX epoch. 286 | """ 287 | is_stable = np.ones(y.shape[1], dtype=np.bool_) 288 | fit_start = np.zeros_like(is_stable, dtype=np.uint16) 289 | beta = np.full((X.shape[1], y.shape[1]), np.nan, dtype=np.float64) 290 | nreg = X.shape[1] 291 | for idx in numba.prange(y.shape[1]): 292 | # subset and remove nan 293 | is_nan = np.isnan(y[:, idx]) 294 | _y = y[~is_nan, idx] 295 | _X = X[~is_nan, :] 296 | 297 | # get the index where the stable period starts 298 | stable_idx = history_roc(_X, _y, alpha=alpha, crit=crit) 299 | 300 | # If there are not enough observations available in the stable period 301 | # set stability to False and continue 302 | if len(_y) - stable_idx < nreg + 1: 303 | is_stable[idx] = False 304 | continue 305 | 306 | # Check if there is more than 1 year (365 days) of data available 307 | # If not, set stability to False and continue 308 | _dates = dates[~is_nan] 309 | last_date = _dates[-1] 310 | first_date = _dates[stable_idx] 311 | if last_date - first_date < 183: 312 | is_stable[idx] = False 313 | continue 314 | 315 | # Subset and fit 316 | X_stable = _X[stable_idx:] 317 | y_stable = _y[stable_idx:] 318 | beta[:, idx] = np.linalg.solve(np.dot(X_stable.T, X_stable), 319 | np.dot(X_stable.T, y_stable)) 320 | fit_start[idx] = _dates[stable_idx] 321 | 322 | residuals = np.dot(X, beta) - y 323 | return beta, residuals, is_stable, fit_start 324 | -------------------------------------------------------------------------------- /nrt/log.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2022 European Union (Joint Research Centre) 2 | # 3 | # Licensed under the EUPL, Version 1.2 or – as soon they will be approved by 4 | # the European Commission - subsequent versions of the EUPL (the "Licence"); 5 | # You may not use this work except in compliance with the Licence. 6 | # You may obtain a copy of the Licence at: 7 | # 8 | # https://joinup.ec.europa.eu/software/page/eupl 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the Licence is distributed on an "AS IS" basis, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the Licence for the specific language governing permissions and 14 | # limitations under the Licence. 15 | 16 | import logging 17 | 18 | _FORMAT = '%(asctime)s:%(levelname)s:%(lineno)s:%(module)s.%(funcName)s:%(message)s' 19 | _formatter = logging.Formatter(_FORMAT, '%H:%M:%S') 20 | _handler = logging.StreamHandler() 21 | _handler.setFormatter(_formatter) 22 | 23 | logger = logging.getLogger('nrt') 24 | logger.addHandler(_handler) 25 | logger.setLevel(logging.INFO) 26 | -------------------------------------------------------------------------------- /nrt/monitor/ccdc.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2022 European Union (Joint Research Centre) 2 | # 3 | # Licensed under the EUPL, Version 1.2 or – as soon they will be approved by 4 | # the European Commission - subsequent versions of the EUPL (the "Licence"); 5 | # You may not use this work except in compliance with the Licence. 6 | # You may obtain a copy of the Licence at: 7 | # 8 | # https://joinup.ec.europa.eu/software/page/eupl 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the Licence is distributed on an "AS IS" basis, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the Licence for the specific language governing permissions and 14 | # limitations under the Licence. 15 | 16 | import numpy as np 17 | import xarray as xr 18 | 19 | from nrt.monitor import BaseNrt 20 | 21 | 22 | class CCDC(BaseNrt): 23 | """Monitoring using CCDC-like implementation 24 | 25 | Implementation loosely following method described in Zhu & Woodcock 2014. 26 | 27 | Zhu, Zhe, and Curtis E. Woodcock. 2014. “Continuous Change Detection and 28 | Classification of Land Cover Using All Available Landsat Data.” Remote 29 | Sensing of Environment 144 (March): 152–71. 30 | https://doi.org/10.1016/j.rse.2014.01.011. 31 | 32 | Attributes: 33 | mask (numpy.ndarray): A 2D numpy array containing pixels that should 34 | be monitored (1) and not (0). The mask may be updated following 35 | history period stability check, and after a call to monitor 36 | following a confirmed break. Values are as follow. 37 | ``{0: 'Not monitored', 1: 'monitored', 2: 'Unstable history', 38 | 3: 'Confirmed break - no longer monitored'}`` 39 | trend (bool): Indicate whether stable period fit is performed with 40 | trend or not 41 | harmonic_order (int): The harmonic order of the time-series regression 42 | beta (np.ndarray): 3D array containing the model coefficients 43 | x (numpy.ndarray): array of x coordinates 44 | y (numpy.ndarray): array of y coordinates 45 | sensitivity (float): sensitivity of the monitoring. Lower numbers are 46 | high sensitivity. Value can't be zero. 47 | boundary (int): Number of consecutive observations identified as outliers 48 | to signal as disturbance 49 | rmse (np.ndarray): 2D float array indicating RMSE for each pixel 50 | detection_date (numpy.ndarray): 2D array signalling detection date of 51 | disturbances in days since 1970-01-01 52 | 53 | Args: 54 | mask (numpy.ndarray): A 2D numpy array containing pixels that should be 55 | monitored marked as ``1`` and pixels that should be excluded (marked 56 | as ``0``). Typically a stable forest mask when doing forest disturbance 57 | monitoring. If no mask is supplied all pixels are considered and 58 | a mask is created following the ``fit()`` call 59 | trend (bool): Indicate whether stable period fit is performed with 60 | trend or not 61 | harmonic_order (int): The harmonic order of the time-series regression 62 | sensitivity (float): sensitivity of the monitoring. Lower numbers are 63 | high sensitivity. Value can't be zero. 64 | boundary (int): Number of consecutive observations identified as outliers 65 | to signal as disturbance 66 | **kwargs: Used to set internal attributes when initializing with 67 | ``.from_netcdf()`` 68 | """ 69 | def __init__(self, trend=True, harmonic_order=2, sensitivity=3, 70 | mask=None, boundary=3, **kwargs): 71 | super().__init__(mask=mask, 72 | trend=trend, 73 | harmonic_order=harmonic_order, 74 | boundary=boundary, 75 | **kwargs) 76 | self.sensitivity = sensitivity 77 | self.rmse = kwargs.get('rmse') 78 | self.monitoring_strategy = 'CCDC' 79 | 80 | def fit(self, dataarray, method='CCDC-stable', screen_outliers='CCDC_RIRLS', 81 | green=None, swir=None, scaling_factor=1, **kwargs): 82 | """Stable history model fitting 83 | 84 | If screen outliers is required, green and swir bands must be passed. 85 | 86 | The stability check will use the same sensitivity as is later used for 87 | detecting changes (default: 3*RMSE) 88 | 89 | Args: 90 | dataarray (xr.DataArray): xarray Dataarray including the historic 91 | data to be fitted 92 | method (string): Regression to use. See ``_fit()`` for info. 93 | screen_outliers (string): Outlier screening to use. 94 | See ``_fit()`` for info. 95 | green (xr.DataArray): Green reflectance values to be used by 96 | ``screen_outliers``. 97 | swir (xr.DataArray): Short wave infrared (SWIR) reflectance values 98 | to be used by ``screen_outliers``. 99 | scaling_factor (int): Optional Scaling factor to be applied to 100 | ``green`` and ``swir``. When ``screen_outliers`` is ``'CCDC_RIRLS'`` 101 | (default for CCDC), the outlier screening algorithm expects green 102 | and swir reflectance values in the [0,1] range. EO data are often 103 | scaled and stored as integer, with a scaling factor to convert 104 | between scaled and actual reflectance values. As an example, if 105 | scaled reflectance values are in the [0,10000] range, set 106 | ``scaling_factor`` to ``10000``. 107 | **kwargs: to be passed to ``_fit`` 108 | 109 | Examples: 110 | >>> from nrt.monitor.ccdc import CCDC 111 | >>> from nrt import data 112 | 113 | >>> # Load and prepare test data 114 | >>> mask = (data.romania_forest_cover_percentage() > 30).astype('int') 115 | >>> s2_cube = data.romania_20m() 116 | 117 | >>> s2_cube['ndvi'] = (s2_cube.B8A - s2_cube.B04) / (s2_cube.B8A + s2_cube.B04) 118 | >>> s2_cube = s2_cube.where(s2_cube.SCL.isin([4,5,7])) 119 | >>> cube_history = s2_cube.sel(time=slice('2015-01-01', '2018-12-31')) 120 | 121 | >>> # Instantiate monitoring class and fit the model, including outliers screening 122 | >>> ccdcMonitor = CCDC(trend=True, mask=mask) 123 | >>> ccdcMonitor.fit(dataarray=cube_history.ndvi, 124 | ... green=cube_history.B03, 125 | ... swir=cube_history.B11, 126 | ... scaling_factor=10000) 127 | """ 128 | self.set_xy(dataarray) 129 | X = self.build_design_matrix(dataarray, trend=self.trend, 130 | harmonic_order=self.harmonic_order) 131 | self.beta, residuals = self._fit(X, dataarray, 132 | method=method, 133 | screen_outliers=screen_outliers, 134 | green=green, swir=swir, 135 | scaling_factor=scaling_factor, 136 | **kwargs) 137 | self.rmse = np.sqrt(np.nanmean(residuals ** 2, axis=0)) 138 | 139 | def _update_process(self, residuals, is_valid): 140 | # TODO: Calculation is different for multivariate analysis 141 | # (mean of all bands has to be > sensitivity) 142 | with np.errstate(divide='ignore'): 143 | is_outlier = np.abs(residuals) / self.rmse > self.sensitivity 144 | # Update process 145 | if self.process is None: 146 | self.process = np.zeros_like(residuals, dtype=np.uint8) 147 | self.process = np.where(is_valid, 148 | self.process * is_outlier + is_outlier, 149 | self.process) 150 | -------------------------------------------------------------------------------- /nrt/monitor/cusum.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2022 European Union (Joint Research Centre) 2 | # 3 | # Licensed under the EUPL, Version 1.2 or – as soon they will be approved by 4 | # the European Commission - subsequent versions of the EUPL (the "Licence"); 5 | # You may not use this work except in compliance with the Licence. 6 | # You may obtain a copy of the Licence at: 7 | # 8 | # https://joinup.ec.europa.eu/software/page/eupl 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the Licence is distributed on an "AS IS" basis, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the Licence for the specific language governing permissions and 14 | # limitations under the Licence. 15 | 16 | import numpy as np 17 | import xarray as xr 18 | 19 | from nrt.monitor import BaseNrt 20 | from nrt.utils_efp import _cusum_ols_test_crit 21 | 22 | 23 | class CuSum(BaseNrt): 24 | """Monitoring using cumulative sums (CUSUM) of residuals 25 | 26 | Implementation following method as implemented in R package bFast. 27 | 28 | Attributes: 29 | mask (numpy.ndarray): A 2D numpy array containing pixels that should 30 | be monitored (1) and not (0). The mask may be updated following 31 | history period stability check, and after a call to monitor 32 | following a confirmed break. Values are as follow. 33 | ``{0: 'Not monitored', 1: 'monitored', 2: 'Unstable history', 34 | 3: 'Confirmed break - no longer monitored'}`` 35 | trend (bool): Indicate whether stable period fit is performed with 36 | trend or not 37 | harmonic_order (int): The harmonic order of the time-series regression 38 | x (numpy.ndarray): array of x coordinates 39 | y (numpy.ndarray): array of y coordinates 40 | sensitivity (float): sensitivity of the monitoring. Lower numbers 41 | correspond to lower sensitivity. Equivalent to significance level 42 | 'alpha' with which the boundary is computed 43 | boundary (numpy.ndarray): process boundary for each time series. 44 | Calculated from alpha and length of time series. 45 | sigma (numpy.ndarray): Standard deviation for normalized residuals in 46 | history period 47 | histsize (numpy.ndarray): Number of non-nan observations in history 48 | period 49 | n (numpy.ndarray): Total number of non-nan observations in time-series 50 | critval (float): Critical test value corresponding to the chosen 51 | sensitivity 52 | detection_date (numpy.ndarray): 2D array signalling detection date of 53 | disturbances in days since 1970-01-01 54 | 55 | Args: 56 | mask (numpy.ndarray): A 2D numpy array containing pixels that should be 57 | monitored marked as ``1`` and pixels that should be excluded (marked 58 | as ``0``). Typically a stable forest mask when doing forest disturbance 59 | monitoring. If no mask is supplied all pixels are considered and 60 | a mask is created following the ``fit()`` call 61 | trend (bool): Indicate whether stable period fit is performed with 62 | trend or not 63 | harmonic_order (int): The harmonic order of the time-series regression 64 | sensitivity (float): sensitivity of the monitoring. Lower numbers 65 | correspond to lower sensitivity. Equivalent to significance level 66 | 'alpha' with which the boundary is computed 67 | **kwargs: Used to set internal attributes when initializing with 68 | ``.from_netcdf()`` 69 | """ 70 | def __init__(self, trend=True, harmonic_order=2, sensitivity=0.05, 71 | mask=None, **kwargs): 72 | super().__init__(mask=mask, 73 | trend=trend, 74 | harmonic_order=harmonic_order, 75 | **kwargs) 76 | self.sensitivity = sensitivity 77 | self.critval = _cusum_ols_test_crit(sensitivity) 78 | self.sigma = kwargs.get('sigma') 79 | self.histsize = kwargs.get('histsize') 80 | self.n = kwargs.get('n') 81 | self.monitoring_strategy = 'CUSUM' 82 | 83 | def fit(self, dataarray, method='ROC', alpha=0.05, **kwargs): 84 | """Stable history model fitting 85 | 86 | If method ``'ROC'`` is used for fitting, the argument ``alpha`` has 87 | to be passed. 88 | 89 | Args: 90 | dataarray (xr.DataArray): xarray Dataarray including the historic 91 | data to be fitted 92 | method (string): Regression to use. See ``_fit()`` for info. 93 | alpha (float): Significance level for ``'ROC'`` stable fit. 94 | **kwargs: to be passed to ``_fit`` 95 | """ 96 | self.set_xy(dataarray) 97 | X = self.build_design_matrix(dataarray, trend=self.trend, 98 | harmonic_order=self.harmonic_order) 99 | self.beta, residuals = self._fit(X, dataarray, 100 | method=method, 101 | alpha=alpha, 102 | **kwargs) 103 | 104 | # histsize is necessary for normalization of residuals, 105 | # n is necessary for boundary calculation 106 | self.histsize = np.sum(~np.isnan(residuals), axis=0)\ 107 | .astype(np.uint16) 108 | self.n = self.histsize 109 | self.boundary = np.full_like(self.histsize, np.nan, dtype=np.float32) 110 | self.sigma = np.nanstd(residuals, axis=0, ddof=X.shape[1]) 111 | # calculate process and normalize it using sigma and histsize 112 | with np.errstate(divide='ignore', invalid='ignore'): 113 | residuals_ = residuals / (self.sigma*np.sqrt(self.histsize)) 114 | self.process = np.nancumsum(residuals_, axis=0)[-1] 115 | 116 | def _update_process(self, residuals, is_valid): 117 | with np.errstate(divide='ignore', invalid='ignore'): 118 | # calculate boundary 119 | self.n = self.n + is_valid 120 | x = self.n / self.histsize 121 | self.boundary = np.where(is_valid, 122 | np.sqrt(x * (x - 1) 123 | * (self.critval**2 124 | + np.log(x / (x - 1)))), 125 | self.boundary) 126 | # normalize residuals 127 | residuals_norm = residuals / (self.sigma*np.sqrt(self.histsize)) 128 | # Update process 129 | self.process = np.where(is_valid, 130 | self.process+residuals_norm, 131 | self.process) 132 | -------------------------------------------------------------------------------- /nrt/monitor/ewma.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2022 European Union (Joint Research Centre) 2 | # 3 | # Licensed under the EUPL, Version 1.2 or – as soon they will be approved by 4 | # the European Commission - subsequent versions of the EUPL (the "Licence"); 5 | # You may not use this work except in compliance with the Licence. 6 | # You may obtain a copy of the Licence at: 7 | # 8 | # https://joinup.ec.europa.eu/software/page/eupl 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the Licence is distributed on an "AS IS" basis, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the Licence for the specific language governing permissions and 14 | # limitations under the Licence. 15 | 16 | import numpy as np 17 | 18 | from nrt.monitor import BaseNrt 19 | 20 | 21 | class EWMA(BaseNrt): 22 | """Monitoring using EWMA control chart 23 | 24 | Implementation following method described in Brooks et al. 2014. 25 | 26 | Args: 27 | mask (numpy.ndarray): A 2D numpy array containing pixels that should be 28 | monitored marked as ``1`` and pixels that should be excluded (marked 29 | as ``0``). Typically a stable forest mask when doing forest disturbance 30 | monitoring. If no mask is supplied all pixels are considered and 31 | a mask is created following the ``fit()`` call 32 | trend (bool): Indicate whether stable period fit is performed with 33 | trend or not 34 | harmonic_order (int): The harmonic order of the time-series regression 35 | lambda_ (float): Weight of previous observation in the monitoring process 36 | (memory). Valid range is [0,1], 1 corresponding to no memory and 0 to 37 | full memory 38 | sensitivity (float): Sensitivity parameter used in the computation of the 39 | monitoring boundaries. Lower values imply more sensitive monitoring 40 | threshold_outlier (float): Values bigger than threshold_outlier*sigma 41 | (extreme outliers) will get screened out during monitoring and will 42 | not contribute to updating the EWMA process value 43 | **kwargs: Used to set internal attributes when initializing with 44 | ``.from_netcdf()`` 45 | """ 46 | def __init__(self, trend=True, harmonic_order=2, sensitivity=2, mask=None, 47 | lambda_=0.3, threshold_outlier=10, **kwargs): 48 | super().__init__(mask=mask, 49 | trend=trend, 50 | harmonic_order=harmonic_order, 51 | **kwargs) 52 | self.lambda_ = lambda_ 53 | self.sensitivity = sensitivity 54 | self.threshold = threshold_outlier 55 | self.sigma = kwargs.get('sigma') 56 | self.monitoring_strategy = 'EWMA' 57 | 58 | def fit(self, dataarray, method='OLS', 59 | screen_outliers='Shewhart', L=5, **kwargs): 60 | """Stable history model fitting 61 | 62 | The preferred fitting method for this monitoring type is ``'OLS'`` with 63 | outlier screening ``'Shewhart'``. It requires a control limit parameter 64 | ``L``. See ``nrt.outliers.shewart`` for more details 65 | """ 66 | self.set_xy(dataarray) 67 | X = self.build_design_matrix(dataarray, trend=self.trend, 68 | harmonic_order=self.harmonic_order) 69 | beta, residuals = self._fit(X, dataarray=dataarray, method=method, 70 | screen_outliers=screen_outliers, L=L, 71 | **kwargs) 72 | self.beta = beta 73 | # get new standard deviation 74 | self.sigma = np.nanstd(residuals, axis=0) 75 | # calculate EWMA control limits and save them 76 | # since control limits quickly approach a limit they are assumed to be 77 | # stable after the training period and can thus be simplified 78 | self.boundary = self.sensitivity * self.sigma * np.sqrt(( 79 | self.lambda_ / (2 - self.lambda_))) 80 | # calculate the EWMA value for the end of the training period and save it 81 | self.process = self._init_process(residuals) 82 | # Mark everything as unstable that already crosses the boundary after 83 | # fitting 84 | self.mask[self.process > self.boundary] = 2 85 | 86 | def _detect_extreme_outliers(self, residuals, is_valid): 87 | is_eoutlier = np.abs(residuals) > self.threshold * self.sigma 88 | return np.logical_and(~is_eoutlier, is_valid) 89 | 90 | def _update_process(self, residuals, is_valid): 91 | """Update process value (EWMA in this case) with new acquisition 92 | 93 | Args: 94 | residuals (numpy.ndarray): 2 dimensional array corresponding to the 95 | residuals of a new acquisition 96 | is_valid (np.ndarray): A boolean 2D array indicating where process 97 | values should be updated 98 | 99 | Returns: 100 | numpy.ndarray: A 2 dimensional array containing the updated EWMA 101 | values 102 | """ 103 | # If the monitoring has not been initialized yet, raise an error 104 | if self.process is None: 105 | raise ValueError('Process has to be initialized before update') 106 | # Update ewma value for element of the input array that are not Nan 107 | process_new = self._update_ewma(array=residuals, ewma=self.process, 108 | lambda_=self.lambda_) 109 | self.process = np.where(is_valid, process_new, self.process) 110 | 111 | @staticmethod 112 | def _update_ewma(array, ewma, lambda_): 113 | ewma_new = np.where(np.isnan(array), 114 | ewma, 115 | (1 - lambda_) * ewma + lambda_ * array) 116 | return ewma_new 117 | 118 | def _init_process(self, array): 119 | """Initialize the ewma process value using the residuals of the fitted values 120 | 121 | Args: 122 | array (np.ndarray): 3 dimensional array of residuals. Usually the 123 | residuals from the model fitting 124 | 125 | Returns: 126 | numpy.ndarray: A 2 dimensional array corresponding to the last slice 127 | of the recursive ewma process updating 128 | """ 129 | ewma = np.zeros_like(array[0,:,:]) 130 | for slice_ in array: 131 | ewma = self._update_ewma(array=slice_, ewma=ewma, lambda_=self.lambda_) 132 | return ewma 133 | 134 | 135 | -------------------------------------------------------------------------------- /nrt/monitor/iqr.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2022 European Union (Joint Research Centre) 2 | # 3 | # Licensed under the EUPL, Version 1.2 or – as soon they will be approved by 4 | # the European Commission - subsequent versions of the EUPL (the "Licence"); 5 | # You may not use this work except in compliance with the Licence. 6 | # You may obtain a copy of the Licence at: 7 | # 8 | # https://joinup.ec.europa.eu/software/page/eupl 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the Licence is distributed on an "AS IS" basis, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the Licence for the specific language governing permissions and 14 | # limitations under the Licence. 15 | 16 | import numpy as np 17 | 18 | from nrt.monitor import BaseNrt 19 | from nrt.stats import nan_percentile_axis0 20 | 21 | 22 | class IQR(BaseNrt): 23 | """Online monitoring of disturbances based on interquartile range 24 | 25 | Reference: 26 | https://stats.stackexchange.com/a/1153 27 | 28 | Attributes: 29 | mask (numpy.ndarray): A 2D numpy array containing pixels that should 30 | be monitored (1) and not (0). The mask may be updated following 31 | history period stability check, and after a call to monitor 32 | following a confirmed break. Values are as follow. 33 | ``{0: 'Not monitored', 1: 'monitored', 2: 'Unstable history', 34 | 3: 'Confirmed break - no longer monitored'}`` 35 | trend (bool): Indicate whether stable period fit is performed with 36 | trend or not 37 | harmonic_order (int): The harmonic order of the time-series regression 38 | beta (np.ndarray): 3D array containing the model coefficients 39 | x (numpy.ndarray): array of x coordinates 40 | y (numpy.ndarray): array of y coordinates 41 | sensitivity (float): sensitivity of the monitoring. Lower numbers are 42 | high sensitivity. Value can't be zero. 43 | boundary (int): Number of consecutive observations identified as outliers 44 | to signal as disturbance 45 | q25 (numpy.ndarray): 25th percentile of residuals 46 | q75 (numpy.ndarray): 75th percentile of residuals 47 | detection_date (numpy.ndarray): 2D array signalling detection date of 48 | disturbances in days since 1970-01-01 49 | 50 | Args: 51 | mask (numpy.ndarray): A 2D numpy array containing pixels that should be 52 | monitored marked as ``1`` and pixels that should be excluded (marked 53 | as ``0``). Typically a stable forest mask when doing forest disturbance 54 | monitoring. If no mask is supplied all pixels are considered and 55 | a mask is created following the ``fit()`` call 56 | trend (bool): Indicate whether stable period fit is performed with 57 | trend or not 58 | harmonic_order (int): The harmonic order of the time-series regression 59 | sensitivity (float): sensitivity of the monitoring. Lower numbers are 60 | high sensitivity. Value can't be zero. 61 | boundary (int): Number of consecutive observations identified as outliers 62 | to signal as disturbance 63 | **kwargs: Used to set internal attributes when initializing with 64 | ``.from_netcdf()`` 65 | """ 66 | def __init__(self, trend=True, harmonic_order=3, sensitivity=1.5, mask=None, 67 | boundary=3, **kwargs): 68 | super().__init__(mask=mask, 69 | trend=trend, 70 | harmonic_order=harmonic_order, 71 | boundary=boundary, 72 | **kwargs) 73 | self.sensitivity = sensitivity 74 | self.q25 = kwargs.get('q25') 75 | self.q75 = kwargs.get('q75') 76 | self.monitoring_strategy = 'IQR' 77 | 78 | def fit(self, dataarray, method='OLS', **kwargs): 79 | self.set_xy(dataarray) 80 | X = self.build_design_matrix(dataarray, trend=self.trend, 81 | harmonic_order=self.harmonic_order) 82 | beta, residuals = self._fit(X, dataarray=dataarray, method=method, 83 | **kwargs) 84 | self.beta = beta 85 | q75, q25 = nan_percentile_axis0(residuals, np.array([75 ,25])) 86 | self.q25 = q25 87 | self.q75 = q75 88 | 89 | def _update_process(self, residuals, is_valid): 90 | # Compute upper and lower thresholds 91 | iqr = self.q75 - self.q25 92 | lower_limit = self.q25 - self.sensitivity * iqr 93 | upper_limit = self.q75 + self.sensitivity * iqr 94 | # compare residuals to thresholds 95 | is_outlier = np.logical_or(residuals > upper_limit, 96 | residuals < lower_limit) 97 | # Update self.process 98 | if self.process is None: 99 | self.process = np.zeros_like(residuals, dtype=np.uint8) 100 | self.process = np.where(is_valid, 101 | self.process * is_outlier + is_outlier, 102 | self.process) 103 | -------------------------------------------------------------------------------- /nrt/monitor/mosum.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2022 European Union (Joint Research Centre) 2 | # 3 | # Licensed under the EUPL, Version 1.2 or – as soon they will be approved by 4 | # the European Commission - subsequent versions of the EUPL (the "Licence"); 5 | # You may not use this work except in compliance with the Licence. 6 | # You may obtain a copy of the Licence at: 7 | # 8 | # https://joinup.ec.europa.eu/software/page/eupl 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the Licence is distributed on an "AS IS" basis, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the Licence for the specific language governing permissions and 14 | # limitations under the Licence. 15 | 16 | import numpy as np 17 | import xarray as xr 18 | 19 | from nrt.monitor import BaseNrt 20 | from nrt.utils_efp import _mosum_ols_test_crit, _mosum_init_window 21 | 22 | 23 | class MoSum(BaseNrt): 24 | """Monitoring using moving sums (MOSUM) of residuals 25 | 26 | Implementation following method as implemented in R package bFast. 27 | 28 | Attributes: 29 | mask (numpy.ndarray): A 2D numpy array containing pixels that should 30 | be monitored (1) and not (0). The mask may be updated following 31 | history period stability check, and after a call to monitor 32 | following a confirmed break. Values are as follow. 33 | ``{0: 'Not monitored', 1: 'monitored', 2: 'Unstable history', 34 | 3: 'Confirmed break - no longer monitored'}`` 35 | trend (bool): Indicate whether stable period fit is performed with 36 | trend or not 37 | harmonic_order (int): The harmonic order of the time-series regression 38 | x (numpy.ndarray): array of x coordinates 39 | y (numpy.ndarray): array of y coordinates 40 | sensitivity (float): sensitivity of the monitoring. Lower numbers 41 | correspond to lower sensitivity. Equivalent to significance level 42 | 'alpha' with which the boundary is computed 43 | boundary (numpy.ndarray): process boundary for each time series. 44 | Calculated from alpha and length of time series. 45 | sigma (numpy.ndarray): Standard deviation for normalized residuals in 46 | history period 47 | histsize (numpy.ndarray): Number of non-nan observations in history 48 | period 49 | n (numpy.ndarray): Total number of non-nan observations in time-series 50 | critval (float): Critical test value corresponding to the chosen 51 | sensitivity 52 | h (float): Moving window size relative to length of the history period. 53 | Can be one of 0.25, 0.5 and 1 54 | winsize (numpy.ndarray): 2D array with absolute window size. Computed as 55 | h*histsize 56 | window (numpy.ndarray): 3D array containing the current values in the 57 | window 58 | detection_date (numpy.ndarray): 2D array signalling detection date of 59 | disturbances in days since 1970-01-01 60 | 61 | Args: 62 | mask (numpy.ndarray): A 2D numpy array containing pixels that should be 63 | monitored marked as ``1`` and pixels that should be excluded (marked 64 | as ``0``). Typically a stable forest mask when doing forest disturbance 65 | monitoring. If no mask is supplied all pixels are considered and 66 | a mask is created following the ``fit()`` call 67 | trend (bool): Indicate whether stable period fit is performed with 68 | trend or not 69 | harmonic_order (int): The harmonic order of the time-series regression 70 | sensitivity (float): sensitivity of the monitoring. Lower numbers 71 | correspond to lower sensitivity. Equivalent to significance level 72 | 'alpha' with which the boundary is computed 73 | h (float): Moving window size relative to length of the history period. 74 | Can be one of 0.25, 0.5 and 1 75 | **kwargs: Used to set internal attributes when initializing with 76 | ``.from_netcdf()`` 77 | """ 78 | 79 | def __init__(self, trend=True, harmonic_order=2, sensitivity=0.05, 80 | mask=None, h=0.25, **kwargs): 81 | super().__init__(mask=mask, 82 | trend=trend, 83 | harmonic_order=harmonic_order, 84 | **kwargs) 85 | self.sensitivity = sensitivity 86 | self.critval = _mosum_ols_test_crit(sensitivity, h=h, 87 | period=10, functional='max') 88 | self.sigma = kwargs.get('sigma') 89 | self.histsize = kwargs.get('histsize') 90 | self.n = kwargs.get('n') 91 | self.h = h 92 | self.winsize = kwargs.get('winsize') 93 | self.window = kwargs.get('window') 94 | self.monitoring_strategy = 'MOSUM' 95 | 96 | def get_process(self): 97 | return np.nansum(self.window, axis=0) 98 | 99 | def set_process(self, x): 100 | pass 101 | 102 | process = property(get_process, set_process) 103 | 104 | def fit(self, dataarray, method='ROC', alpha=0.05, **kwargs): 105 | """Stable history model fitting 106 | 107 | If method ``'ROC'`` is used for fitting, the argument ``alpha`` has 108 | to be passed. 109 | 110 | Args: 111 | dataarray (xr.DataArray): xarray Dataarray including the historic 112 | data to be fitted 113 | method (string): Regression to use. See ``_fit()`` for info. 114 | alpha (float): Significance level for ``'ROC'`` stable fit. 115 | **kwargs: to be passed to ``_fit`` 116 | """ 117 | self.set_xy(dataarray) 118 | X = self.build_design_matrix(dataarray, trend=self.trend, 119 | harmonic_order=self.harmonic_order) 120 | self.beta, residuals = self._fit(X, dataarray, 121 | method=method, 122 | alpha=alpha, 123 | **kwargs) 124 | 125 | # histsize is necessary for normalization of residuals, 126 | # n is necessary for boundary calculation 127 | self.histsize = np.sum(~np.isnan(residuals), axis=0) \ 128 | .astype(np.uint16) 129 | self.histsize[self.mask != 1] = 0 130 | self.winsize = np.floor(self.histsize * self.h).astype(np.int16) 131 | self.n = self.histsize 132 | self.boundary = np.full_like(self.histsize, np.nan, dtype=np.float32) 133 | self.sigma = np.nanstd(residuals, axis=0, ddof=X.shape[1]) 134 | # calculate normalized residuals 135 | with np.errstate(divide='ignore', invalid='ignore'): 136 | residuals_ = residuals / (self.sigma * np.sqrt(self.histsize)) 137 | # TODO self.window can be converted to property to allow for safe 138 | # application of scaling factor with getter and setter 139 | self.window = _mosum_init_window(residuals_, self.winsize) 140 | 141 | def _update_process(self, residuals, is_valid): 142 | """Update process 143 | (Isn't actually updating process directly, but is updating the values 144 | from which the process gets calculated)""" 145 | # get valid indices 146 | valid_idx = np.where(is_valid) 147 | 148 | # get indices which need to be changed and write normalized residuals 149 | with np.errstate(divide='ignore', invalid='ignore'): 150 | change_idx = np.mod(self.n-self.histsize, self.winsize)[valid_idx] 151 | residuals_norm = residuals / (self.sigma * np.sqrt(self.histsize)) 152 | self.window[change_idx, valid_idx[0], valid_idx[1]] = residuals_norm[valid_idx] 153 | 154 | # calculate boundary 155 | self.n = self.n + is_valid 156 | x = self.n / self.histsize 157 | log_out = np.ones_like(x) 158 | self.boundary = np.where(is_valid, 159 | self.critval * np.sqrt( 160 | 2 * np.log(x, out=log_out, 161 | where=(x > np.exp(1)))), 162 | self.boundary) 163 | -------------------------------------------------------------------------------- /nrt/outliers.py: -------------------------------------------------------------------------------- 1 | """Removing outliers 2 | 3 | Functions defined in this module always use a 2D array containing the dependant 4 | variables (y) and return y with outliers set to np.nan. 5 | These functions are meant to be called in ``nrt.BaseNrt._fit()`` 6 | 7 | Citations: 8 | 9 | - Brooks, E.B., Wynne, R.H., Thomas, V.A., Blinn, C.E. and Coulston, J.W., 2013. 10 | On-the-fly massively multitemporal change detection using statistical quality 11 | control charts and Landsat data. IEEE Transactions on Geoscience and Remote Sensing, 12 | 52(6), pp.3316-3332. 13 | 14 | - Zhu, Zhe, and Curtis E. Woodcock. 2014. “Continuous Change Detection and 15 | Classification of Land Cover Using All Available Landsat Data.” Remote 16 | Sensing of Environment 144 (March): 152–71. 17 | https://doi.org/10.1016/j.rse.2014.01.011. 18 | """ 19 | # Copyright (C) 2022 European Union (Joint Research Centre) 20 | # 21 | # Licensed under the EUPL, Version 1.2 or – as soon they will be approved by 22 | # the European Commission - subsequent versions of the EUPL (the "Licence"); 23 | # You may not use this work except in compliance with the Licence. 24 | # You may obtain a copy of the Licence at: 25 | # 26 | # https://joinup.ec.europa.eu/software/page/eupl 27 | # 28 | # Unless required by applicable law or agreed to in writing, software 29 | # distributed under the Licence is distributed on an "AS IS" basis, 30 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 31 | # See the Licence for the specific language governing permissions and 32 | # limitations under the Licence. 33 | 34 | import numpy as np 35 | 36 | from nrt.fit_methods import rirls, ols 37 | from nrt.log import logger 38 | 39 | 40 | def shewhart(X, y, L=5, **kwargs): 41 | """Remove outliers using a Shewhart control chart 42 | 43 | As described in Brooks et al. 2014, following an initial OLS fit, outliers are 44 | identified using a shewhart control chart and removed. 45 | 46 | Args: 47 | X ((M, N) np.ndarray): Matrix of independant variables 48 | y ({(M,), (M, K)} np.ndarray): Matrix of dependant variables 49 | L (float): control limit used for outlier filtering. Must be a positive 50 | float. Lower values indicate stricter filtering. Residuals larger 51 | than L*sigma will get screened out 52 | **kwargs: not used 53 | 54 | Returns: 55 | y(np.ndarray): Dependant variables with outliers set to np.nan 56 | """ 57 | beta_full, residuals_full = ols(X, y) 58 | # Shewhart chart to get rid of outliers (clouds etc) 59 | sigma = np.nanstd(residuals_full, axis=0) 60 | shewhart_mask = np.abs(residuals_full) > L * sigma 61 | y[shewhart_mask] = np.nan 62 | return y 63 | 64 | 65 | def ccdc_rirls(X, y, green, swir, scaling_factor=1, **kwargs): 66 | """Screen for missed clouds and other outliers using green and SWIR band 67 | 68 | Args: 69 | X ((M, N) np.ndarray): Matrix of independant variables 70 | y ((M, K) np.ndarray): Matrix of dependant variables 71 | green (np.ndarray): 2D array containing spectral values 72 | swir (np.ndarray): 2D array containing spectral values (~1.55-1.75um) 73 | scaling_factor (int): Scaling factor to bring green and swir values 74 | to reflectance values between 0 and 1 75 | 76 | Returns: 77 | np.ndarray: y with outliers set to np.nan 78 | """ 79 | # 1. estimate time series model using rirls for green and swir 80 | # TODO could be sped up, since masking is the same for green and swir 81 | g_beta, g_residuals = rirls(X, green, **kwargs) 82 | s_beta, s_residuals = rirls(X, swir, **kwargs) 83 | # Update mask using thresholds 84 | is_outlier = np.logical_or(g_residuals > 0.04*scaling_factor, 85 | s_residuals < -0.04*scaling_factor) 86 | 87 | removed = np.count_nonzero(is_outlier) / np.count_nonzero(~np.isnan(green)) 88 | if removed > 0.5: 89 | logger.warn('More than 50% of pixels have been removed as outliers. ' 90 | 'Check if scaling_factor has been set correctly.') 91 | logger.debug('%.2f%% of (non nan) pixels removed.', 92 | removed * 100) 93 | 94 | y[is_outlier] = np.nan 95 | return y 96 | -------------------------------------------------------------------------------- /nrt/stats.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2022 European Union (Joint Research Centre) 2 | # 3 | # Licensed under the EUPL, Version 1.2 or – as soon they will be approved by 4 | # the European Commission - subsequent versions of the EUPL (the "Licence"); 5 | # You may not use this work except in compliance with the Licence. 6 | # You may obtain a copy of the Licence at: 7 | # 8 | # https://joinup.ec.europa.eu/software/page/eupl 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the Licence is distributed on an "AS IS" basis, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the Licence for the specific language governing permissions and 14 | # limitations under the Licence. 15 | 16 | import numba 17 | import numpy as np 18 | 19 | 20 | @numba.jit(nopython=True, cache=True, parallel=True) 21 | def nanlstsq(X, y): 22 | """Return the least-squares solution to a linear matrix equation 23 | 24 | Analog to ``numpy.linalg.lstsq`` for dependant variable containing ``Nan`` 25 | 26 | Note: 27 | For best performances of the multithreaded implementation, it is 28 | recommended to limit the number of threads used by MKL or OpenBLAS to 1. 29 | This avoids over-subscription, and improves performances. 30 | By default the function will use all cores available; the number of cores 31 | used can be controled using the ``numba.set_num_threads`` function or 32 | by modifying the ``NUMBA_NUM_THREADS`` environment variable 33 | 34 | Args: 35 | X ((M, N) np.ndarray): Matrix of independant variables 36 | y ({(M,), (M, K)} np.ndarray): Matrix of dependant variables 37 | 38 | Examples: 39 | >>> import os 40 | >>> # Adjust linear algebra configuration (only one should be required 41 | >>> # depending on how numpy was installed/compiled) 42 | >>> os.environ['OPENBLAS_NUM_THREADS'] = '1' 43 | >>> os.environ['MKL_NUM_THREADS'] = '1' 44 | >>> import numpy as np 45 | >>> from sklearn.datasets import make_regression 46 | >>> from nrt.stats import nanlstsq 47 | >>> # Generate random data 48 | >>> n_targets = 1000 49 | >>> n_features = 2 50 | >>> X, y = make_regression(n_samples=200, n_features=n_features, 51 | ... n_targets=n_targets) 52 | >>> # Add random nan to y array 53 | >>> y.ravel()[np.random.choice(y.size, 5*n_targets, replace=False)] = np.nan 54 | >>> # Run the regression 55 | >>> beta = nanlstsq(X, y) 56 | >>> assert beta.shape == (n_features, n_targets) 57 | 58 | Returns: 59 | np.ndarray: Least-squares solution, ignoring ``Nan`` 60 | """ 61 | beta = np.zeros((X.shape[1], y.shape[1]), dtype=np.float64) 62 | for idx in numba.prange(y.shape[1]): 63 | # subset y and X 64 | isna = np.isnan(y[:,idx]) 65 | X_sub = X[~isna] 66 | y_sub = y[~isna,idx] 67 | beta[:, idx] = np.linalg.solve(np.dot(X_sub.T, X_sub), np.dot(X_sub.T, y_sub)) 68 | return beta 69 | 70 | 71 | @numba.jit(nopython=True, cache=True) 72 | def mad(resid, c=0.6745): 73 | """Returns Median-Absolute-Deviation (MAD) for residuals 74 | 75 | Args: 76 | resid (np.ndarray): residuals 77 | c (float): scale factor to get to ~standard normal (default: 0.6745) 78 | (i.e. 1 / 0.75iCDF ~= 1.4826 = 1 / 0.6745) 79 | Returns: 80 | float: MAD 'robust' variance estimate 81 | 82 | Reference: 83 | http://en.wikipedia.org/wiki/Median_absolute_deviation 84 | """ 85 | # Return median absolute deviation adjusted sigma 86 | return np.nanmedian(np.fabs(resid - np.nanmedian(resid))) / c 87 | 88 | # Weight scaling methods 89 | @numba.jit(nopython=True, cache=True) 90 | def bisquare(resid, c=4.685): 91 | """Weight residuals using bisquare weight function 92 | 93 | Args: 94 | resid (np.ndarray): residuals to be weighted 95 | c (float): tuning constant for Tukey's Biweight (default: 4.685) 96 | 97 | Returns: 98 | weight (ndarray): weights for residuals 99 | 100 | Reference: 101 | http://statsmodels.sourceforge.net/stable/generated/statsmodels.robust.norms.TukeyBiweight.html 102 | """ 103 | # Weight where abs(resid) < c; otherwise 0 104 | return (np.abs(resid) < c) * (1 - (resid / c) ** 2) ** 2 105 | 106 | 107 | @numba.jit(nopython=True, cache=True) 108 | def erfcc(x): 109 | """Complementary error function.""" 110 | z = np.abs(x) 111 | t = 1. / (1. + 0.5*z) 112 | r = t * np.exp(-z*z-1.26551223+t*(1.00002368+t*(.37409196+ 113 | t*(.09678418+t*(-.18628806+t*(.27886807+ 114 | t*(-1.13520398+t*(1.48851587+t*(-.82215223+ 115 | t*.17087277))))))))) 116 | if x >= 0.: 117 | return r 118 | else: 119 | return 2. - r 120 | 121 | 122 | @numba.jit(nopython=True, cache=True) 123 | def ncdf(x): 124 | """Normal cumulative distribution function 125 | Source: Stackoverflow Unknown, 126 | https://stackoverflow.com/a/809402/12819237""" 127 | return 1. - 0.5*erfcc(x/(2**0.5)) 128 | 129 | 130 | @numba.jit(nopython=True, cache=True) 131 | def nan_percentile_axis0(arr, percentiles): 132 | """Faster implementation of np.nanpercentile 133 | 134 | This implementation always takes the percentile along axis 0. 135 | Uses numba to speed up the calculation by more than 7x. 136 | 137 | Function is equivalent to np.nanpercentile(arr, , axis=0) 138 | 139 | Args: 140 | arr (np.ndarray): 2D array to calculate percentiles for 141 | percentiles (np.ndarray): 1D array of percentiles to calculate 142 | 143 | Returns: 144 | np.ndarray: Array with first dimension corresponding to values passed 145 | in percentiles 146 | 147 | """ 148 | shape = arr.shape 149 | arr = arr.reshape((arr.shape[0], -1)) 150 | out = np.empty((len(percentiles), arr.shape[1])) 151 | for i in range(arr.shape[1]): 152 | out[:,i] = np.nanpercentile(arr[:,i], percentiles) 153 | shape = (out.shape[0], *shape[1:]) 154 | return out.reshape(shape) 155 | -------------------------------------------------------------------------------- /nrt/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2022 European Union (Joint Research Centre) 2 | # 3 | # Licensed under the EUPL, Version 1.2 or – as soon they will be approved by 4 | # the European Commission - subsequent versions of the EUPL (the "Licence"); 5 | # You may not use this work except in compliance with the Licence. 6 | # You may obtain a copy of the Licence at: 7 | # 8 | # https://joinup.ec.europa.eu/software/page/eupl 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the Licence is distributed on an "AS IS" basis, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the Licence for the specific language governing permissions and 14 | # limitations under the Licence. 15 | 16 | import datetime 17 | import inspect 18 | from math import pi 19 | 20 | import pandas as pd 21 | import numpy as np 22 | 23 | 24 | def build_regressors(dates, trend=True, harmonic_order=3): 25 | """Build the design matrix (X) from a list or an array of datetimes 26 | 27 | Trend assumes temporal resolution no finer than daily 28 | Harmonics assume annual cycles 29 | 30 | Args: 31 | dates (pandas.DatetimeIndex): The dates to use for building regressors 32 | trend (bool): Whether to add a trend component 33 | harmonic_order (int): The order of the harmonic component 34 | 35 | Returns: 36 | numpy.ndarray: A design matrix 37 | """ 38 | dates = dates.sort_values() 39 | shape = (len(dates), 1 + trend + 2*harmonic_order) 40 | X = np.zeros(shape, dtype=float) 41 | # Add intercept (Is that actually required?) 42 | X[:,0] = 1 43 | if trend: 44 | origin = pd.Timestamp(1970) 45 | X[:,1] = (dates - origin).days 46 | if harmonic_order: 47 | indices = range(1 + trend, 1 + trend + 2 * harmonic_order) 48 | # Array of decimal dates 49 | ddates = datetimeIndex_to_decimal_dates(dates) 50 | # Allocate array 51 | X_harmon = np.empty((len(dates), harmonic_order)) 52 | for i in range(harmonic_order): 53 | X_harmon[:,i] = 2 * np.pi * ddates * (i + 1) 54 | X_harmon = np.concatenate([np.cos(X_harmon), np.sin(X_harmon)], 1) 55 | X[:, indices] = X_harmon 56 | return X 57 | 58 | 59 | def dt_to_decimal(dt): 60 | """Helper to build a decimal date from a datetime object 61 | """ 62 | year = dt.year 63 | begin = datetime.datetime(year, 1, 1) 64 | end = datetime.datetime(year, 12, 31) 65 | return year + (dt - begin)/(end - begin) 66 | 67 | 68 | def datetimeIndex_to_decimal_dates(dates): 69 | """Convert a pandas datetime index to decimal dates""" 70 | years = dates.year 71 | first_year_day = pd.to_datetime({'year':years, 'day':1, 'month':1}) 72 | last_year_day = pd.to_datetime({'year':years, 'day':31, 'month':12}) 73 | ddates = years + (dates - first_year_day)/(last_year_day - first_year_day) 74 | return np.array(ddates, dtype=float) 75 | 76 | 77 | def numba_kwargs(func): 78 | """ 79 | Decorator which enables passing of kwargs to jitted functions by selecting 80 | only those kwargs that are available in the decorated functions signature 81 | """ 82 | def wrapper(*args, **kwargs): 83 | # Only pass those kwargs that func takes 84 | # as positional or keyword arguments 85 | select_kwargs = { 86 | k: kwargs[k] 87 | for k in kwargs.keys() 88 | if k in inspect.signature(func).parameters.keys() 89 | } 90 | return func(*args, **select_kwargs) 91 | return wrapper 92 | -------------------------------------------------------------------------------- /nrt/utils_efp.py: -------------------------------------------------------------------------------- 1 | """CUSUM utility functions 2 | 3 | Functions defined in this module implement functionality necessary for 4 | CUSUM and MOSUM monitoring as implemented in the R packages strucchange and 5 | bFast. 6 | 7 | Portions of this module are derived from Chris Holden's pybreakpoints package. 8 | See the copyright statement below. 9 | """ 10 | # Copyright (C) 2022 European Union (Joint Research Centre) 11 | # 12 | # Licensed under the EUPL, Version 1.2 or – as soon they will be approved by 13 | # the European Commission - subsequent versions of the EUPL (the "Licence"); 14 | # You may not use this work except in compliance with the Licence. 15 | # You may obtain a copy of the Licence at: 16 | # 17 | # https://joinup.ec.europa.eu/software/page/eupl 18 | # 19 | # Unless required by applicable law or agreed to in writing, software 20 | # distributed under the Licence is distributed on an "AS IS" basis, 21 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 22 | # See the Licence for the specific language governing permissions and 23 | # limitations under the Licence. 24 | 25 | ############################################################################### 26 | # BSD 3-Clause License 27 | # 28 | # Copyright (c) 2018, Chris Holden 29 | # All rights reserved. 30 | # 31 | # Redistribution and use in source and binary forms, with or without 32 | # modification, are permitted provided that the following conditions are met: 33 | # 34 | # * Redistributions of source code must retain the above copyright notice, this 35 | # list of conditions and the following disclaimer. 36 | # 37 | # * Redistributions in binary form must reproduce the above copyright notice, 38 | # this list of conditions and the following disclaimer in the documentation 39 | # and/or other materials provided with the distribution. 40 | # 41 | # * Neither the name of the copyright holder nor the names of its 42 | # contributors may be used to endorse or promote products derived from this 43 | # software without specific prior written permission. 44 | # 45 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 46 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 47 | # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 48 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 49 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 50 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 51 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 52 | # OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 53 | # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 54 | # OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 55 | # ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 56 | ############################################################################### 57 | 58 | import numpy as np 59 | import numba 60 | from scipy import optimize 61 | from scipy.stats import norm 62 | 63 | from nrt.stats import ncdf 64 | from nrt import data 65 | 66 | 67 | @numba.jit(nopython=True, cache=True) 68 | def history_roc(X, y, alpha=0.05, crit=0.9478982340418134): 69 | """Reverse Ordered Rec-CUSUM check for stable periods 70 | 71 | Checks for stable periods by calculating recursive OLS-Residuals 72 | (see ``_recresid()``) on the reversed X and y matrices. If the cumulative 73 | sum of the residuals crosses a boundary, the index of y where this 74 | structural change occured is returned. 75 | 76 | Args: 77 | X ((M, ) np.ndarray): Matrix of independant variables 78 | y ((M, K) np.ndarray): Matrix of dependant variables 79 | alpha (float): Significance level for the boundary 80 | (probability of type I error) 81 | crit (float): Critical value corresponding to the chosen alpha. Can be 82 | calculated with ``_cusum_rec_test_crit``. 83 | Default is the value for alpha=0.05 84 | 85 | Returns: 86 | int: Index of structural change in y. 87 | ``0`` - y completely stable 88 | ``>0`` - y stable after this index 89 | """ 90 | # Index, where instability in time-series is detected 91 | # 0: time-series completely stable 92 | # >0: stable after this index 93 | process = _cusum_rec_efp(X[::-1], y[::-1]) 94 | stat = _cusum_rec_sctest(process) 95 | stat_pvalue = _brownian_motion_pvalue(stat, 1) 96 | if stat_pvalue < alpha: 97 | boundary = _cusum_rec_boundary(process, crit) 98 | return len(process) - np.where(np.abs(process) > boundary)[0].min() 99 | else: 100 | return 0 101 | 102 | 103 | # REC-CUSUM 104 | @numba.jit(nopython=True, cache=True) 105 | def _brownian_motion_pvalue(x, k): 106 | """ Return pvalue for some given test statistic """ 107 | # TODO: Make generic, add "type='Brownian Motion'"? 108 | if x < 0.3: 109 | p = 1 - 0.1464 * x 110 | else: 111 | p = 2 * (1 - 112 | ncdf(3 * x) + 113 | np.exp(-4 * x ** 2) * (ncdf(x) + ncdf(5 * x) - 1) - 114 | np.exp(-16 * x ** 2) * (1 - ncdf(x))) 115 | return 1 - (1 - p) ** k 116 | 117 | 118 | @numba.jit(nopython=True, cache=True) 119 | def _cusum_rec_boundary(x, crit=0.9478982340418134): 120 | """ Equivalent to ``strucchange::boundary.efp``` for Rec-CUSUM 121 | 122 | Args: 123 | x (np.ndarray): Process values 124 | crit (float): Critical value as computed by _cusum_rec_test_crit. 125 | Default is the value for alpha=0.05 126 | """ 127 | n = x.size 128 | bound = crit 129 | boundary = (bound + (2 * bound * np.arange(0, n) / (n - 1))) 130 | 131 | return boundary 132 | 133 | 134 | def _cusum_rec_test_crit(alpha=0.05, **kwargs): 135 | """ Return critical test statistic value for some alpha """ 136 | return optimize.brentq(lambda _x: _brownian_motion_pvalue(_x, 1) - alpha, 0, 20) 137 | 138 | 139 | def _cusum_ols_test_crit(alpha): 140 | """ Return critical test statistic value for some alpha """ 141 | return optimize.golden(lambda _x: np.abs( 142 | 2 * (norm.cdf(_x) - _x * norm.pdf(_x)) + alpha - 2), brack=(0, 10)) 143 | 144 | 145 | def _mosum_ols_test_crit(alpha, h=0.5, period=10, functional='max'): 146 | """Returns critical test value 147 | 148 | Args: 149 | alpha (float): Significance value (0-1) 150 | h (float): Relative window size. One of (0.25, 0.5, 1) 151 | period (int): Maximum monitoring period (2, 4, 6, 8, 10) 152 | functional (str): Functional type (either 'max' or 'range') 153 | 154 | Returns: 155 | (float) Critical test value for parameters 156 | """ 157 | if not 0.001 <= alpha <= 0.05: 158 | raise ValueError("'alpha' needs to be between [0.001,0.05]") 159 | crit_table = data.mre_crit_table() 160 | try: 161 | crit_values = crit_table[str(h)][str(period)][functional] 162 | except KeyError: 163 | raise ValueError("'h' needs to be in (0.25, 0.5, 1) and " 164 | "'period' in (2, 4, 6, 8, 10).") 165 | sig_level = crit_table.get('sig_levels') 166 | return np.interp(1 - alpha, sig_level, crit_values) 167 | 168 | 169 | @numba.jit(nopython=True, cache=True) 170 | def _mosum_init_window(residuals, winsize): 171 | """Initializes MOSUM moving window 172 | 173 | Args: 174 | residuals (np.ndarray): 3D array containing normalized residuals 175 | winsize (np.ndarray): 2D array containing the absolute window size for 176 | each time-series in residuals 177 | Returns: 178 | (np.ndarray) Array with length of winsize.max(). Contains as many of the 179 | last non nan values in the time series as specified by winsize. Padded 180 | with 0s where winsize is smaller than winsize.max(). 181 | """ 182 | x = winsize.max() 183 | res = np.zeros((x, residuals.shape[1], residuals.shape[2])) 184 | for i, j in zip(*np.where(winsize > 0)): 185 | residuals_ = residuals[:, i, j] 186 | winsize_ = winsize[i, j] 187 | residuals_ = residuals_[~np.isnan(residuals_)] 188 | res[:winsize_, i, j] = residuals_[-winsize_:] 189 | return res 190 | 191 | 192 | @numba.jit(nopython=True, cache=True) 193 | def _cusum_rec_efp(X, y): 194 | """ Equivalent to ``strucchange::efp`` for Rec-CUSUM """ 195 | # Run "efp" 196 | n, k = X.shape 197 | k = k+1 198 | w = _recresid(X, y, k)[k:] 199 | sigma = np.std(w) 200 | w = np.concatenate((np.array([0]), w)) 201 | return np.cumsum(w) / (sigma * (n - k) ** 0.5) 202 | 203 | 204 | @numba.jit(nopython=True, cache=True) 205 | def _cusum_rec_sctest(x): 206 | """ Equivalent to ``strucchange::sctest`` for Rec-CUSUM """ 207 | x = x[1:] 208 | j = np.linspace(0, 1, x.size + 1)[1:] 209 | x = x * 1 / (1 + 2 * j) 210 | stat = np.abs(x).max() 211 | 212 | return stat 213 | 214 | 215 | @numba.jit(nopython=True, cache=True) 216 | def _recresid(X, y, span): 217 | """ Return standardized recursive residuals for y ~ X 218 | 219 | Args: 220 | X ((M, N) np.ndarray): Matrix of independant variables 221 | y ((M, K) np.ndarray): Matrix of dependant variables 222 | span (int): Minimum number of observations for initial regression. 223 | 224 | Returns: 225 | (np.ndarray) containing recursive residuals standardized by 226 | prediction error variance 227 | 228 | Notes: 229 | For a matrix :math:`X_t` of :math:`T` total observations of :math:`n` 230 | variables, the :math:`t` th recursive residual is the forecast prediction 231 | error for :math:`y_t` using a regression fit on the first :math:`t - 1` 232 | observations. Recursive residuals are scaled and standardized so they are 233 | :math:`N(0, 1)` distributed. 234 | Using notation from Brown, Durbin, and Evans (1975) and Judge, et al 235 | (1985): 236 | .. math:: 237 | w_r = 238 | \\frac{y_r - \\boldsymbol{x}_r^{\prime}\\boldsymbol{b}_{r-1}} 239 | {\sqrt{(1 + \\boldsymbol{x}_r^{\prime} 240 | S_{r-1}\\boldsymbol{x}_r)}} 241 | = 242 | \\frac 243 | {y_r - \\boldsymbol{x}_r^{\prime}\\boldsymbol{b}_r} 244 | {\sqrt{1 - \\boldsymbol{x}_r^{\prime}S_r\\boldsymbol{x}_r}} 245 | r = k + 1, \ldots, T, 246 | where :math:`S_{r}` is the residual sum of squares after 247 | fitting the model on :math:`r` observations. 248 | A quick way of calculating :math:`\\boldsymbol{b}_r` and 249 | :math:`S_r` is using an update formula (Equations 4 and 5 in 250 | Brown, Durbin, and Evans; Equation 5.5.14 and 5.5.15 in Judge et al): 251 | .. math:: 252 | \\boldsymbol{b}_r 253 | = 254 | b_{r-1} + 255 | \\frac 256 | {S_{r-1}\\boldsymbol{x}_j 257 | (y_r - \\boldsymbol{x}_r^{\prime}\\boldsymbol{b}_{r-1})} 258 | {1 + \\boldsymbol{x}_r^{\prime}S_{r-1}x_r} 259 | .. math:: 260 | S_r = 261 | S_{j-1} - 262 | \\frac{S_{j-1}\\boldsymbol{x}_r\\boldsymbol{x}_r^{\prime}S_{j-1}} 263 | {1 + \\boldsymbol{x}_r^{\prime}S_{j-1}\\boldsymbol{x}_r} 264 | 265 | See Also: 266 | statsmodels.stats.diagnostic.recursive_olsresiduals 267 | """ 268 | nobs, nvars = X.shape 269 | 270 | recresid_ = np.nan * np.zeros((nobs)) 271 | recvar = np.nan * np.zeros((nobs)) 272 | 273 | X0 = X[:span, :] 274 | y0 = y[:span] 275 | 276 | # Initial fit 277 | XTX_j = np.linalg.inv(np.dot(X0.T, X0)) 278 | XTY = np.dot(X0.T, y0) 279 | beta = np.dot(XTX_j, XTY) 280 | 281 | yhat_j = np.dot(X[span - 1, :], beta) 282 | recresid_[span - 1] = y[span - 1] - yhat_j 283 | recvar[span - 1] = 1 + np.dot(X[span - 1, :], 284 | np.dot(XTX_j, X[span - 1, :])) 285 | for j in range(span, nobs): 286 | x_j = X[j:j+1, :] 287 | y_j = y[j] 288 | 289 | # Prediction with previous beta 290 | resid_j = y_j - np.dot(x_j, beta) 291 | 292 | # Update 293 | XTXx_j = np.dot(XTX_j, x_j.T) 294 | f_t = 1 + np.dot(x_j, XTXx_j) 295 | XTX_j = XTX_j - np.dot(XTXx_j, XTXx_j.T) / f_t # eqn 5.5.15 296 | 297 | beta = beta + (XTXx_j * resid_j / f_t).ravel() # eqn 5.5.14 298 | recresid_[j] = resid_j.item() 299 | recvar[j] = f_t.item() 300 | 301 | return recresid_ / np.sqrt(recvar) 302 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=64", 3 | "wheel", 4 | "setuptools-scm>=8"] 5 | build-backend = "setuptools.build_meta" 6 | 7 | [project] 8 | name = "nrt" 9 | description = "Online monitoring with xarray" 10 | readme = "README.rst" 11 | keywords = ["sentinel2", "xarray", "datacube", "monitoring", "change"] 12 | authors = [ 13 | { name = "Loic Dutrieux", email = "loic.dutrieux@ec.europa.eu" }, 14 | { name = "Jonas Viehweger" }, 15 | { name = "Chris Holden" } 16 | ] 17 | dynamic = ["version"] 18 | license = {text = "EUPL-1.2"} 19 | classifiers = [ 20 | "Programming Language :: Python :: 3", 21 | "Programming Language :: Python :: 3.9", 22 | "Programming Language :: Python :: 3.10", 23 | "Programming Language :: Python :: 3.11", 24 | "Programming Language :: Python :: 3.12", 25 | "License :: OSI Approved :: European Union Public Licence 1.2 (EUPL 1.2)" 26 | ] 27 | requires-python = ">=3.9" 28 | dependencies = [ 29 | "numpy", 30 | "scipy", 31 | "xarray", 32 | "rasterio", 33 | "netCDF4", 34 | "numba!=0.59.*", 35 | "pandas", 36 | "affine", 37 | "nrt-data" 38 | ] 39 | 40 | [project.urls] 41 | "Homepage" = "https://github.com/ec-jrc/nrt.git" 42 | 43 | [project.optional-dependencies] 44 | tests = ["pytest"] 45 | docs = [ 46 | "sphinx==7.4.7", 47 | "dask", 48 | "sphinx_rtd_theme==2.0.0", 49 | "matplotlib==3.9.1", 50 | "sphinx-gallery==0.17.0" 51 | ] 52 | 53 | [tool.setuptools.packages.find] 54 | where = ["."] 55 | 56 | [tool.setuptools_scm] 57 | fallback_version = "9999" 58 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | # pytest.ini 2 | [pytest] 3 | markers = 4 | ewma: All test relating to EWMA monitoring 5 | iqr: All test relating to IQR monitoring 6 | cusum: All test relating to CUSUM monitoring 7 | mosum: All test relating to MOSUM monitoring 8 | ccdc: All test relating to CCDC monitoring 9 | testpaths = 10 | tests 11 | -------------------------------------------------------------------------------- /tests/integration_tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ec-jrc/nrt/68c848f9a4fda67ed67d28621e71ae25e4379b49/tests/integration_tests/__init__.py -------------------------------------------------------------------------------- /tests/integration_tests/conftest.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2022 European Union (Joint Research Centre) 2 | # 3 | # Licensed under the EUPL, Version 1.2 or – as soon they will be approved by 4 | # the European Commission - subsequent versions of the EUPL (the "Licence"); 5 | # You may not use this work except in compliance with the Licence. 6 | # You may obtain a copy of the Licence at: 7 | # 8 | # https://joinup.ec.europa.eu/software/page/eupl 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the Licence is distributed on an "AS IS" basis, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the Licence for the specific language governing permissions and 14 | # limitations under the Licence. 15 | 16 | import pkg_resources 17 | import datetime 18 | 19 | import pytest 20 | import xarray as xr 21 | import rasterio 22 | import numpy as np 23 | import pandas as pd 24 | 25 | from nrt import data 26 | 27 | 28 | @pytest.fixture 29 | def history_dataarray(): 30 | """History dataarray over romania 31 | 32 | 1 squared km over a forest in Romania. 33 | NDVI with cloud already filtered (appear as np.nan) in the arrays 34 | 3.5 years of data from 2015 to end of 2018 35 | """ 36 | ds = data.romania_20m() 37 | ds['ndvi'] = (ds.B8A - ds.B04) / (ds.B8A + ds.B04) 38 | ds = ds.where(ds.SCL.isin([4,5,7])) 39 | history = ds.sel(time=slice(datetime.datetime(2015, 1, 1), 40 | datetime.datetime(2016, 12, 31))) 41 | return history 42 | 43 | 44 | @pytest.fixture 45 | def ndvi_history(history_dataarray): 46 | """A NDVI dataarray of Romania 47 | """ 48 | return history_dataarray.ndvi 49 | 50 | 51 | @pytest.fixture 52 | def green_swir_history(history_dataarray): 53 | """A NDVI dataarray of Romania 54 | """ 55 | return history_dataarray.B03, history_dataarray.B11 56 | 57 | 58 | @pytest.fixture 59 | def ndvi_monitoring_numpy(): 60 | ds = data.romania_20m() 61 | ds['ndvi'] = (ds.B8A - ds.B04) / (ds.B8A + ds.B04) 62 | ds = ds.where(ds.SCL.isin([4,5,7])) 63 | ndvi_monitoring = ds.ndvi.sel(time=slice(datetime.datetime(2017, 1, 1), 64 | datetime.datetime(2021, 1, 15))) 65 | values = ndvi_monitoring.values 66 | dates = ndvi_monitoring.time.values.astype('datetime64[s]').tolist() 67 | return values, dates 68 | 69 | 70 | @pytest.fixture 71 | def forest_mask(): 72 | """Forest density over romania 73 | """ 74 | arr = data.romania_forest_cover_percentage() 75 | return (arr > 30).astype(np.int8) 76 | -------------------------------------------------------------------------------- /tests/integration_tests/test_ccdc.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2022 European Union (Joint Research Centre) 2 | # 3 | # Licensed under the EUPL, Version 1.2 or – as soon they will be approved by 4 | # the European Commission - subsequent versions of the EUPL (the "Licence"); 5 | # You may not use this work except in compliance with the Licence. 6 | # You may obtain a copy of the Licence at: 7 | # 8 | # https://joinup.ec.europa.eu/software/page/eupl 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the Licence is distributed on an "AS IS" basis, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the Licence for the specific language governing permissions and 14 | # limitations under the Licence. 15 | import numpy as np 16 | 17 | from nrt.monitor.ccdc import CCDC 18 | 19 | # For now, because fixtures can't be parametrized and CCDC needs 20 | # green_swir_history, this is done seperately. The package pytest-lazy-fixture 21 | # could be used to also parametrize CCDC. 22 | 23 | 24 | def test_fit_monitor(ndvi_history, green_swir_history, ndvi_monitoring_numpy, 25 | forest_mask): 26 | green, swir = green_swir_history 27 | ccdc_monitor = CCDC(mask=forest_mask) 28 | ccdc_monitor.fit(dataarray=ndvi_history, green=green, swir=swir, 29 | scaling_factor=10000) 30 | assert ccdc_monitor.beta.shape[0] == 6 # 2*2 harmonics + intercept + trend 31 | for array, date in zip(*ndvi_monitoring_numpy): 32 | ccdc_monitor.monitor(array=array, date=date) 33 | ccdc_monitor._report(layers=['mask', 'detection_date'], 34 | dtype=np.int16) 35 | 36 | 37 | def test_netcdf(ndvi_history, green_swir_history, tmp_path): 38 | nc_path = tmp_path / 'ccdc.nc' 39 | green, swir = green_swir_history 40 | ccdc_monitor = CCDC() 41 | ccdc_monitor.fit(dataarray=ndvi_history, green=green, swir=swir, 42 | scaling_factor=10000) 43 | 44 | ccdc_monitor.to_netcdf(nc_path) 45 | ccdc_load = CCDC.from_netcdf(nc_path) 46 | assert ccdc_monitor == ccdc_load 47 | -------------------------------------------------------------------------------- /tests/integration_tests/test_monitor.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2022 European Union (Joint Research Centre) 2 | # 3 | # Licensed under the EUPL, Version 1.2 or – as soon they will be approved by 4 | # the European Commission - subsequent versions of the EUPL (the "Licence"); 5 | # You may not use this work except in compliance with the Licence. 6 | # You may obtain a copy of the Licence at: 7 | # 8 | # https://joinup.ec.europa.eu/software/page/eupl 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the Licence is distributed on an "AS IS" basis, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the Licence for the specific language governing permissions and 14 | # limitations under the Licence. 15 | 16 | from pathlib import Path 17 | import pytest 18 | import numpy as np 19 | 20 | from nrt.monitor import iqr, ewma, cusum, mosum 21 | 22 | monitor_params = { 23 | 'EWMA': pytest.param(ewma.EWMA, {'trend': False, 'L': 5}, 5, 24 | marks=pytest.mark.ewma), 25 | 'IQR': pytest.param(iqr.IQR, {'trend': False, 'harmonic_order': 1}, 3, 26 | marks=pytest.mark.iqr), 27 | 'CUSUM': pytest.param(cusum.CuSum, {}, 6, 28 | marks=pytest.mark.cusum), 29 | 'MOSUM': pytest.param(mosum.MoSum, {}, 6, 30 | marks=pytest.mark.mosum) 31 | } 32 | 33 | @pytest.mark.parametrize('monitor_cls, kwargs, beta', monitor_params.values(), 34 | ids=monitor_params.keys()) 35 | def test_fit_monitor(monitor_cls, kwargs, beta, 36 | ndvi_history, ndvi_monitoring_numpy, forest_mask): 37 | monitor_ = monitor_cls(**kwargs, mask=forest_mask) 38 | monitor_.fit(dataarray=ndvi_history, **kwargs) 39 | assert monitor_.beta.shape[0] == beta 40 | for array, date in zip(*ndvi_monitoring_numpy): 41 | monitor_.monitor(array=array, date=date) 42 | monitor_._report(layers=['mask', 'detection_date'], 43 | dtype=np.int16) 44 | 45 | 46 | @pytest.mark.parametrize('monitor_cls, kwargs, beta', monitor_params.values(), 47 | ids=monitor_params.keys()) 48 | def test_netcdf(monitor_cls, kwargs, beta, ndvi_history, tmp_path): 49 | nc_path = tmp_path / 'monitor.nc' 50 | monitor_ = monitor_cls(**kwargs) 51 | monitor_.fit(dataarray=ndvi_history, **kwargs) 52 | 53 | monitor_.to_netcdf(nc_path) 54 | monitor_load = monitor_cls().from_netcdf(nc_path) 55 | assert monitor_ == monitor_load 56 | 57 | -------------------------------------------------------------------------------- /tests/unit_tests/conftest.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2022 European Union (Joint Research Centre) 2 | # 3 | # Licensed under the EUPL, Version 1.2 or – as soon they will be approved by 4 | # the European Commission - subsequent versions of the EUPL (the "Licence"); 5 | # You may not use this work except in compliance with the Licence. 6 | # You may obtain a copy of the Licence at: 7 | # 8 | # https://joinup.ec.europa.eu/software/page/eupl 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the Licence is distributed on an "AS IS" basis, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the Licence for the specific language governing permissions and 14 | # limitations under the Licence. 15 | 16 | from pathlib import Path 17 | import pytest 18 | import numpy as np 19 | 20 | here = Path(__file__).parent 21 | 22 | @pytest.fixture 23 | def X_y_clear(X_y_intercept_slope): 24 | # adds an array indicating 'clear' pixels as True and outliers as False 25 | X, y, intercept, slope = X_y_intercept_slope 26 | clear = np.ones_like(y).astype('bool') 27 | clear[9, 0] = False 28 | clear[0, 1] = False 29 | return X, y, clear 30 | 31 | 32 | @pytest.fixture 33 | def X_y_intercept_slope(request): 34 | np.random.seed(0) 35 | slope, intercept = 2., 5. 36 | X = np.c_[np.ones(10), np.arange(10)] 37 | y = np.array([slope * X[:, 1] + intercept, 38 | slope * X[:, 1] + intercept]) 39 | # Add noise (X_y_clear depends on the same noise) 40 | y[0, 9] = 0 41 | y[1, 0] = 23 42 | return X, y.T, intercept, slope 43 | 44 | 45 | # fixture of 2D residuals with extreme values start and end 46 | # 1D slope with extreme value and corresponding results in stability 47 | @pytest.fixture 48 | def stability_ccdc(request): 49 | np.random.seed(0) 50 | a_len = 30 51 | # build an example, where one time series has a large last value, 52 | # one a large first value, one a large slope and one just random residuals 53 | residuals = (np.random.rand(a_len, 4) - 0.5)*2 54 | residuals[0, 0] = 100 55 | residuals[-1, 1] = 100 56 | 57 | ts = np.array([ 58 | np.ones(a_len), 59 | np.ones(a_len), 60 | np.arange(a_len)*20+5, # Large slope 61 | np.ones(a_len) 62 | ]).T 63 | # add a np.nan in there: 64 | ts[int(a_len/2),3] = np.nan 65 | 66 | X = np.array([np.ones(a_len), np.arange(a_len)]).T 67 | y = ts+residuals 68 | dates = np.linspace(1, 365, a_len) 69 | result = np.array([True, False, False, True]) 70 | return X, y, dates, result 71 | 72 | 73 | @pytest.fixture 74 | def X_y_dates_romania(request): 75 | # Imported as double, to match precision of R computation 76 | X = np.loadtxt(here / 'data' / 'X.csv', delimiter=',', dtype=np.float64) 77 | y = np.loadtxt(here / 'data' / 'y.csv', delimiter=',', dtype=np.float64) 78 | dates = np.genfromtxt(here / 'data' / 'dates.csv', delimiter=',') \ 79 | .astype("datetime64[ns]") 80 | 81 | return X, y, dates 82 | 83 | 84 | # results of calculating recursive residuals of X_y_dates_romania by 85 | # strucchange package in R 86 | # Recursive Residuals for entire matrix 87 | # Code: 88 | # res_2d < - apply(y, 2, function(column){ 89 | # non_nan < - which(is.finite(column)) 90 | # y_clear < - column[non_nan] 91 | # X_clear < - X[non_nan,] 92 | # rresid_na < - rep(NA, length(column)) 93 | # 94 | # rresid < - recresid(X_clear, y_clear) 95 | # 96 | # rresid_na[non_nan[ncol(X_clear) + 1:length(rresid)]] < - rresid 97 | # return (rresid_na) 98 | # }) 99 | @pytest.fixture 100 | def strcchng_rr(request): 101 | return np.genfromtxt(here / 'data' / 'rr_result.csv', 102 | delimiter=',', dtype=np.float64, missing_values='NA') 103 | 104 | 105 | # Results of calculating Rec-CUSUM efp process value with efp() from package 106 | # strucchange 107 | # X_df < - as.data.frame(X) 108 | # X_df$y < - y[,1] 109 | # # Remove nan 110 | # X_df_clear < - X_df[! is.na(X_df$y), ] 111 | # 112 | # level < - 0.05 113 | # 114 | # n < - nrow(X_df_clear) 115 | # data_rev < - X_df_clear[n:1, ] 116 | # y_rcus < - efp(y 117 | # ~ V1 + V2 + V3 + V4 + V5, data = data_rev, type = "Rec-CUSUM") 118 | # return (y_rcus$process) 119 | @pytest.fixture 120 | def strcchng_efp(request): 121 | return np.genfromtxt(here / 'data' / 'efp_result.csv', 122 | delimiter=',', dtype=np.float64, missing_values='NA') 123 | 124 | 125 | # Results of monitoring with strucchange 126 | # res_bound_proc < - apply(y, 2, function(column) 127 | # { 128 | # # convert to dataframe 129 | # X_df < - as.data.frame(X) 130 | # X_df$y < - column 131 | # # Split in history and monitor 132 | # history < - X_df[1:100, ] 133 | # # Remove nan 134 | # history_clear < - history[! is.na(history$y), ] 135 | # monitor_clear < - X_df[! is.na(X_df$y), ] 136 | # 137 | # history_efp < - efp(y 138 | # ~ V2 + V3 + V4 + V5, data = history, type = "OLS-CUSUM") 139 | # history_mefp < - mefp(history_efp) 140 | # monitor_data < - monitor(history_mefp, data=monitor_clear) 141 | # plot(monitor_data) 142 | # return (c(monitor_process = as.numeric(tail(monitor_data$process, 1)), 143 | # boundary = history_mefp$border(nrow(monitor_clear)), 144 | # histsize = history_mefp$histsize, 145 | # sigma = history_efp$sigma)) 146 | # }) 147 | @pytest.fixture 148 | def cusum_result(request): 149 | return np.loadtxt(here / 'data' / 'cusum_result.csv', 150 | delimiter=',', dtype=np.float64) 151 | 152 | 153 | # Same as cusum_result only with type="OLS-MOSUM" 154 | @pytest.fixture 155 | def mosum_result(request): 156 | return np.loadtxt(here / 'data' / 'mosum_result.csv', 157 | delimiter=',', dtype=np.float64) 158 | 159 | # Test data for robust fit. 160 | # 161 | # First time-series can become singular if accuracy isn't sufficient 162 | @pytest.fixture 163 | def X_y_RLM(request): 164 | X = np.loadtxt(here / 'data' / 'RLM_X.csv', 165 | delimiter=',', dtype=np.float64) 166 | y = np.loadtxt(here / 'data' / 'RLM_y.csv', 167 | delimiter=',', dtype=np.float64) 168 | return X, y 169 | 170 | # Result of Robust Fit with statsmodels 171 | # 172 | # With X, y = X_y_RLM() 173 | # import statsmodels as sm 174 | # 175 | # for idx in range(y.shape[1]): 176 | # y_sub = y[:, idx] 177 | # isna = np.isnan(y_sub) 178 | # X_sub = X[~isna] 179 | # endog = y_sub[~isna] 180 | # rlm_model = sm.RLM(endog, X_sub, M=sm.robust.norms.TukeyBiweight()) 181 | # rlm_results = rlm_model.fit(update_scale=True) 182 | # beta[:,idx] = rlm_results.params 183 | @pytest.fixture 184 | def sm_RLM_result(request): 185 | return np.array([['2.3757569983999076', '-51.621207292381314'], 186 | ['1.5919053949452396e-05', '-0.00019788972214892546'], 187 | ['4.960483948314601', '-73.95341088849317'], 188 | ['4.0427485592574195', '-17.66452192456504'], 189 | ['1.0676653146683237', '0.579422996703399'], 190 | ['-0.7172424822211365', '-49.52111301879781'], 191 | ['1.2701246101474761', '-38.324020145702654'], 192 | ['1.1329168669944791', '-9.034638787625045']], dtype=' 40 | boundary(y_rcus)[-1])) + 1 41 | } else { 42 | 1 43 | } 44 | return(y_start) 45 | }) 46 | """ 47 | X, y, dates = X_y_dates_romania 48 | result = np.array([1, 8, 49, 62, 1], dtype='float32') 49 | stable_idx = np.zeros(y.shape[1]) 50 | for idx in range(y.shape[1]): 51 | # subset and remove nan 52 | is_nan = np.isnan(y[:, idx]) 53 | _y = y[~is_nan, idx] 54 | _X = X[~is_nan, :] 55 | 56 | # get the index where the stable period starts 57 | stable_idx[idx] = cs.history_roc(_X, _y) 58 | 59 | # Result from strucchange must be subtracted by 1, because R is 1 indexed 60 | np.testing.assert_allclose(stable_idx, result-1) 61 | 62 | 63 | def test_efp(X_y_dates_romania, strcchng_efp): 64 | """Test efp against process value of 65 | strucchange::efp with type='Rec-CUSUM'""" 66 | X, y, dates = X_y_dates_romania 67 | 68 | is_nan = np.isnan(y[:, 0]) 69 | _y = y[~is_nan, 0] 70 | _X = X[~is_nan, :] 71 | 72 | process = cs._cusum_rec_efp(_X[::-1], _y[::-1]) 73 | 74 | result = strcchng_efp 75 | 76 | # Relative high tolerance, due to floating point precision 77 | np.testing.assert_allclose(process[X.shape[1]+2:], result[X.shape[1]+2:], 78 | rtol=1e-02) 79 | 80 | 81 | @pytest.mark.parametrize("test_input,expected", [(0.01, 3.368214), 82 | (0.05, 2.795483), 83 | (0.1, 2.500278)]) 84 | def test_cusum_ols_test_crit(test_input, expected): 85 | assert cs._cusum_ols_test_crit(test_input) == pytest.approx(expected) 86 | 87 | 88 | mosum_crit_params = { 89 | 'h': (pytest.raises(ValueError), {'alpha': 0.05, 'h': 0.24}), 90 | 'alpha': (pytest.raises(ValueError), {'alpha': 0.06}), 91 | 'period': (pytest.raises(ValueError), {'alpha': 0.05, 'period': 11}), 92 | } 93 | 94 | @pytest.mark.parametrize('expected, test_input', mosum_crit_params.values(), 95 | ids=mosum_crit_params.keys()) 96 | def test_mosum_ols_test_crit(expected, test_input): 97 | """Test edge cases""" 98 | with expected: 99 | assert cs._mosum_ols_test_crit(**test_input) is not None 100 | 101 | 102 | def test_process_boundary_cusum(X_y_dates_romania, cusum_result): 103 | X, y, dates = X_y_dates_romania 104 | # make y 6 long 105 | y = np.insert(y, 5, values=y[:,0], axis=1) 106 | y_3d = y.reshape((y.shape[0], 2, -1)) 107 | data = xr.DataArray(y_3d, dims=["time", "x", "y"], coords={"time": dates}) 108 | fit = data[:100] 109 | monitor = data[100:] 110 | cusum_monitor = CuSum(trend=False) 111 | cusum_monitor.fit(dataarray=fit, method='OLS') 112 | for array, date in zip(monitor.values, 113 | monitor.time.values.astype('datetime64[s]').tolist()): 114 | cusum_monitor.monitor(array=array, date=date) 115 | 116 | # Process value 117 | np.testing.assert_allclose(cusum_result[0], 118 | cusum_monitor.process.ravel()[:-1], rtol=1e-4) 119 | # Boundary value 120 | np.testing.assert_allclose(cusum_result[1], 121 | cusum_monitor.boundary.ravel()[:-1]) 122 | # Histsize 123 | np.testing.assert_allclose(cusum_result[2], 124 | cusum_monitor.histsize.ravel()[:-1]) 125 | # Sigma 126 | np.testing.assert_allclose(cusum_result[3], 127 | cusum_monitor.sigma.ravel()[:-1], rtol=1e-6) 128 | 129 | 130 | def test_process_boundary_mosum(X_y_dates_romania, mosum_result): 131 | X, y, dates = X_y_dates_romania 132 | # make y 6 long 133 | y = np.insert(y, 5, values=y[:,0], axis=1) 134 | y_3d = y.reshape((y.shape[0], 2, -1)) 135 | data = xr.DataArray(y_3d, dims=["time", "x", "y"], coords={"time": dates}) 136 | fit = data[:100] 137 | monitor = data[100:] 138 | mosum_monitor = MoSum(trend=False) 139 | mosum_monitor.fit(dataarray=fit, method='OLS') 140 | for array, date in zip(monitor.values, 141 | monitor.time.values.astype('datetime64[s]').tolist()): 142 | mosum_monitor.monitor(array=array, date=date) 143 | 144 | # Process value (third value has a break and so diverges a lot since 145 | # monitoring in bFast does not stop in case there is a break) 146 | np.testing.assert_allclose(np.delete(mosum_result[0], 2), 147 | np.delete(mosum_monitor.process.ravel(), [2,-1]), 148 | rtol=1e-4) 149 | # Boundary value 150 | np.testing.assert_allclose(mosum_result[1], 151 | mosum_monitor.boundary.ravel()[:-1]) 152 | # Histsize 153 | np.testing.assert_allclose(mosum_result[2], 154 | mosum_monitor.histsize.ravel()[:-1]) 155 | # Sigma 156 | np.testing.assert_allclose(mosum_result[3], 157 | mosum_monitor.sigma.ravel()[:-1], rtol=1e-6) 158 | --------------------------------------------------------------------------------