├── .gitattributes
├── .github
└── workflows
│ └── build_and_test.yml
├── .gitignore
├── .readthedocs.yaml
├── CHANGES.txt
├── CITATION.cff
├── CONTRIBUTING.md
├── LICENSE
├── MANIFEST.in
├── README.rst
├── docs
├── Makefile
├── _static
│ ├── concept.png
│ ├── forest_mask_bolivia.png
│ ├── iqr_results_bolivia.png
│ └── logo.png
├── api_reference.rst
├── classes.rst
├── conf.py
├── fit.rst
├── gallery
│ ├── README.rst
│ ├── dontplot_bolivia_stac.py
│ ├── plot_parallel_computing.py
│ └── plot_simulation_data.py
├── index.rst
├── make.bat
└── quickstart.rst
├── nrt
├── __init__.py
├── fit_methods.py
├── log.py
├── monitor
│ ├── __init__.py
│ ├── ccdc.py
│ ├── cusum.py
│ ├── ewma.py
│ ├── iqr.py
│ └── mosum.py
├── outliers.py
├── stats.py
├── utils.py
└── utils_efp.py
├── pyproject.toml
├── pytest.ini
└── tests
├── integration_tests
├── __init__.py
├── conftest.py
├── test_ccdc.py
└── test_monitor.py
└── unit_tests
├── conftest.py
├── data
├── RLM_X.csv
├── RLM_y.csv
├── X.csv
├── cusum_result.csv
├── dates.csv
├── efp_result.csv
├── mosum_result.csv
├── roc_history_result.csv
├── rr_result.csv
└── y.csv
├── test_fit_methods.py
├── test_outliers.py
├── test_stats.py
└── test_utils_efp.py
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.nc filter=lfs diff=lfs merge=lfs -text
2 | *.tif filter=lfs diff=lfs merge=lfs -text
3 |
--------------------------------------------------------------------------------
/.github/workflows/build_and_test.yml:
--------------------------------------------------------------------------------
1 | name: Build, Test, and Deploy nrt
2 |
3 | on:
4 | push:
5 | pull_request:
6 |
7 | jobs:
8 | build-and-test:
9 | runs-on: ubuntu-latest
10 | strategy:
11 | matrix:
12 | python-version: ["3.9", "3.10", "3.11", "3.12"]
13 |
14 | steps:
15 | - name: checkout repo
16 | uses: actions/checkout@v4
17 | - name: Set up Python ${{ matrix.python-version }}
18 | uses: actions/setup-python@v5
19 | with:
20 | python-version: ${{ matrix.python-version }}
21 | - name: Install dependencies and package
22 | run: |
23 | python -m pip install --upgrade pip
24 | pip install pytest wheel
25 | pip install .
26 | - name: Test with pytest
27 | run: |
28 | pytest
29 |
30 | # Deploy tagged commits that pass the test step
31 | deploy:
32 | needs: build-and-test
33 | runs-on: ubuntu-latest
34 | if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
35 | steps:
36 | - name: Checkout repository
37 | uses: actions/checkout@v4
38 |
39 | - name: Set up Python 3.11
40 | uses: actions/setup-python@v5
41 | with:
42 | python-version: '3.11'
43 |
44 | - name: Install pypi-publish requirements
45 | run: |
46 | python -m pip install --upgrade pip twine build
47 |
48 | - name: Build package
49 | run: |
50 | python -m build
51 |
52 | - name: Publish package to PyPI
53 | uses: pypa/gh-action-pypi-publish@v1.9.0
54 | with:
55 | password: ${{ secrets.PYPI_API_TOKEN }}
56 | user: __token__
57 |
58 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | *.pyc
3 | *.pyo
4 |
5 | # Distribution / packaging
6 | *.egg-info/
7 | dist/
8 | build/
9 |
10 | # Sphinx doc
11 | _build/
12 |
--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
1 | # .readthedocs.yaml
2 | # Read the Docs configuration file
3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
4 |
5 | # Required
6 | version: 2
7 |
8 | # Set the version of Python and other tools you might need
9 | build:
10 | os: ubuntu-20.04
11 | tools:
12 | python: "3.11"
13 |
14 | # Build documentation in the docs/ directory with Sphinx
15 | sphinx:
16 | configuration: docs/conf.py
17 |
18 | # If using Sphinx, optionally build your docs in additional formats such as PDF
19 | # formats:
20 | # - pdf
21 |
22 | # Optionally declare the Python requirements required to build your docs
23 | python:
24 | install:
25 | - method: pip
26 | path: .
27 | extra_requirements:
28 | - docs
29 |
--------------------------------------------------------------------------------
/CHANGES.txt:
--------------------------------------------------------------------------------
1 | Changes
2 | =======
3 |
4 | 0.3.0 (2024-08-29)
5 | ------------------
6 | - nrt.data now lives in its own namespace package (nrt-data). Subpackage removed,
7 | and namespace enabling in multiple __init__.py files
8 | - A few edits (e.g. pkg-data and MANIFEST.in) as a consequence of removing
9 | nrt.data subpackage
10 | - Transitioned from setup.py to pyproject.toml
11 | - Package version now maintained in pyproject.toml
12 | - Adapted integration fixture to new test data access method (+ band names slightly changed
13 | B4 is now B04, etc)
14 | - Version number now managed via git tags using setuptools-scm
15 | - Removed all LFS hack in .readthedocs, github actions, etc. No longer needed
16 | given that the package itself no longer contain data
17 |
18 |
19 | 0.2.1 (2024-07-15)
20 | ------------------
21 |
22 | - Various improvements to documentation and package metadata suggested by JOSS
23 | review (new example using PC archive and STAC catalogue, Contributing guidelines,
24 | explicit citation information, etc)
25 | - Small documentation improvement for CCDC monitoring
26 | - New update_mask argument and moniroting class attribute to allow monitoring to
27 | continue regardless of boundary crossing
28 | - Exclude version 0.59 of numba in requirements due to known bug in pararrel accelerator.
29 | See https://github.com/numba/numba/issues/9490
30 |
31 | 0.2.0 (2024-01-15)
32 | ------------------
33 |
34 | - np.linalg.inv replaced by the more recommanded np.linalg.solve in many places
35 | - Integration of numba parallel accelerator in most fitting functions (new argument
36 | to control number of threads in the .fit method of BaseNrt class)
37 | - Possibility to pass kwargs to function of data module that load xarray.Datasets
38 | objects (particularly useful to specify chunking and get a dask based object)
39 | - New example in gallery on parallel fitting
40 | - Add a minimum python version requirement (>=3.9). 3.8 not longer tested
41 | - New set of functions to generate synthetic data (single time-series and DataArrays)
42 | added to the data module
43 | - Gallery section added to the documentation, with one example on use of synthetic data
44 | for nrt simulation and computation of performance metrics
45 |
46 | 0.1.0 (2022-04-27)
47 | ------------------
48 |
49 | - The report() method can now receive a list of layers as argument to select the
50 | attributes of the monitoring instance to be retrieved and written to disk. Backward
51 | compatibility of the .report() method is not affected but backward compatibility
52 | of the private ._report() method, used to prepare the array is broken
53 |
54 | 0.0.5 (2022-03-21)
55 | ------------------
56 |
57 | - First pypi release
58 |
--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
1 | cff-version: "1.2.0"
2 | authors:
3 | - family-names: Dutrieux
4 | given-names: Loïc
5 | orcid: "https://orcid.org/0000-0002-5058-2526"
6 | - family-names: Viehweger
7 | given-names: Jonas
8 | orcid: "https://orcid.org/0000-0002-1610-4600"
9 | doi: 10.5281/zenodo.12799278
10 | message: If you use nrt in your research or project, please cite our article in the
11 | Journal of Open Source Software.
12 | preferred-citation:
13 | authors:
14 | - family-names: Dutrieux
15 | given-names: Loïc
16 | orcid: "https://orcid.org/0000-0002-5058-2526"
17 | - family-names: Viehweger
18 | given-names: Jonas
19 | orcid: "https://orcid.org/0000-0002-1610-4600"
20 | date-published: 2024-08-15
21 | doi: 10.21105/joss.06815
22 | issn: 2475-9066
23 | issue: 100
24 | journal: Journal of Open Source Software
25 | publisher:
26 | name: Open Journals
27 | start: 6815
28 | title: "nrt: operational monitoring of satellite image time-series in
29 | Python"
30 | type: article
31 | url: "https://joss.theoj.org/papers/10.21105/joss.06815"
32 | volume: 9
33 | title: "nrt: operational monitoring of satellite image time-series in
34 | Python"
35 |
36 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing to nrt
2 |
3 | Thanks for taking the time to contribute to nrt! 🎉
4 |
5 | ## Rights
6 |
7 | The EUPL v2 license (see LICENSE) applies to all contributions.
8 |
9 | ## How Can I Contribute?
10 |
11 | ### Reporting Bugs
12 |
13 | If you find a bug, please open an issue and include as much detail as possible. Include steps to reproduce the issue and any relevant logs or screenshots.
14 |
15 | ### Suggesting Enhancements
16 |
17 | Enhancement suggestions are welcome! If you have an idea to improve nrt or its documentation, please open an issue and describe your idea in detail. If possible, provide examples of how the enhancement would be used.
18 |
19 | ### Code Contributions
20 |
21 | For any contribution to the code base or the documentation, use the pull request mechanism.
22 | 1. Fork the repository: Click the 'Fork' button on the upper right corner of the repository page.
23 | 2. Apply changes to your fork.
24 | 3. Open a pull request on github
25 |
26 |
27 | Your contribution will be reviewed and discussied as part of the pull request. If approved, it will then be merged
28 | into the main branch of the repository and included in the following release.
29 |
30 |
31 | ### Testing
32 |
33 | We use `pytest` for unit tests.
34 |
35 | - Unit tests are written using the `pytest` framework.
36 | - Tests are automatically run using GitHub CI with every push and pull request.
37 | - You can run tests locally by simply calling `pytest` in the root directory of the project.
38 |
39 |
40 | ### Releasing a new version
41 |
42 | Package version is set via git tags thanks to [setuptools-scm](https://setuptools-scm.readthedocs.io/en/latest/). A new release
43 | is made for every tagged commit pushed to github and that passes unit tests.
44 | Examples git tag command: `git tag -a v0.3.0 -m "version 0.3.0"
45 |
46 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | EUROPEAN UNION PUBLIC LICENCE v. 1.2
2 | EUPL © the European Union 2007, 2016
3 |
4 | This European Union Public Licence (the ‘EUPL’) applies to the Work (as defined
5 | below) which is provided under the terms of this Licence. Any use of the Work,
6 | other than as authorised under this Licence is prohibited (to the extent such
7 | use is covered by a right of the copyright holder of the Work).
8 |
9 | The Work is provided under the terms of this Licence when the Licensor (as
10 | defined below) has placed the following notice immediately following the
11 | copyright notice for the Work:
12 |
13 | Licensed under the EUPL
14 |
15 | or has expressed by any other means his willingness to license under the EUPL.
16 |
17 | 1. Definitions
18 |
19 | In this Licence, the following terms have the following meaning:
20 |
21 | - ‘The Licence’: this Licence.
22 |
23 | - ‘The Original Work’: the work or software distributed or communicated by the
24 | Licensor under this Licence, available as Source Code and also as Executable
25 | Code as the case may be.
26 |
27 | - ‘Derivative Works’: the works or software that could be created by the
28 | Licensee, based upon the Original Work or modifications thereof. This Licence
29 | does not define the extent of modification or dependence on the Original Work
30 | required in order to classify a work as a Derivative Work; this extent is
31 | determined by copyright law applicable in the country mentioned in Article 15.
32 |
33 | - ‘The Work’: the Original Work or its Derivative Works.
34 |
35 | - ‘The Source Code’: the human-readable form of the Work which is the most
36 | convenient for people to study and modify.
37 |
38 | - ‘The Executable Code’: any code which has generally been compiled and which is
39 | meant to be interpreted by a computer as a program.
40 |
41 | - ‘The Licensor’: the natural or legal person that distributes or communicates
42 | the Work under the Licence.
43 |
44 | - ‘Contributor(s)’: any natural or legal person who modifies the Work under the
45 | Licence, or otherwise contributes to the creation of a Derivative Work.
46 |
47 | - ‘The Licensee’ or ‘You’: any natural or legal person who makes any usage of
48 | the Work under the terms of the Licence.
49 |
50 | - ‘Distribution’ or ‘Communication’: any act of selling, giving, lending,
51 | renting, distributing, communicating, transmitting, or otherwise making
52 | available, online or offline, copies of the Work or providing access to its
53 | essential functionalities at the disposal of any other natural or legal
54 | person.
55 |
56 | 2. Scope of the rights granted by the Licence
57 |
58 | The Licensor hereby grants You a worldwide, royalty-free, non-exclusive,
59 | sublicensable licence to do the following, for the duration of copyright vested
60 | in the Original Work:
61 |
62 | - use the Work in any circumstance and for all usage,
63 | - reproduce the Work,
64 | - modify the Work, and make Derivative Works based upon the Work,
65 | - communicate to the public, including the right to make available or display
66 | the Work or copies thereof to the public and perform publicly, as the case may
67 | be, the Work,
68 | - distribute the Work or copies thereof,
69 | - lend and rent the Work or copies thereof,
70 | - sublicense rights in the Work or copies thereof.
71 |
72 | Those rights can be exercised on any media, supports and formats, whether now
73 | known or later invented, as far as the applicable law permits so.
74 |
75 | In the countries where moral rights apply, the Licensor waives his right to
76 | exercise his moral right to the extent allowed by law in order to make effective
77 | the licence of the economic rights here above listed.
78 |
79 | The Licensor grants to the Licensee royalty-free, non-exclusive usage rights to
80 | any patents held by the Licensor, to the extent necessary to make use of the
81 | rights granted on the Work under this Licence.
82 |
83 | 3. Communication of the Source Code
84 |
85 | The Licensor may provide the Work either in its Source Code form, or as
86 | Executable Code. If the Work is provided as Executable Code, the Licensor
87 | provides in addition a machine-readable copy of the Source Code of the Work
88 | along with each copy of the Work that the Licensor distributes or indicates, in
89 | a notice following the copyright notice attached to the Work, a repository where
90 | the Source Code is easily and freely accessible for as long as the Licensor
91 | continues to distribute or communicate the Work.
92 |
93 | 4. Limitations on copyright
94 |
95 | Nothing in this Licence is intended to deprive the Licensee of the benefits from
96 | any exception or limitation to the exclusive rights of the rights owners in the
97 | Work, of the exhaustion of those rights or of other applicable limitations
98 | thereto.
99 |
100 | 5. Obligations of the Licensee
101 |
102 | The grant of the rights mentioned above is subject to some restrictions and
103 | obligations imposed on the Licensee. Those obligations are the following:
104 |
105 | Attribution right: The Licensee shall keep intact all copyright, patent or
106 | trademarks notices and all notices that refer to the Licence and to the
107 | disclaimer of warranties. The Licensee must include a copy of such notices and a
108 | copy of the Licence with every copy of the Work he/she distributes or
109 | communicates. The Licensee must cause any Derivative Work to carry prominent
110 | notices stating that the Work has been modified and the date of modification.
111 |
112 | Copyleft clause: If the Licensee distributes or communicates copies of the
113 | Original Works or Derivative Works, this Distribution or Communication will be
114 | done under the terms of this Licence or of a later version of this Licence
115 | unless the Original Work is expressly distributed only under this version of the
116 | Licence — for example by communicating ‘EUPL v. 1.2 only’. The Licensee
117 | (becoming Licensor) cannot offer or impose any additional terms or conditions on
118 | the Work or Derivative Work that alter or restrict the terms of the Licence.
119 |
120 | Compatibility clause: If the Licensee Distributes or Communicates Derivative
121 | Works or copies thereof based upon both the Work and another work licensed under
122 | a Compatible Licence, this Distribution or Communication can be done under the
123 | terms of this Compatible Licence. For the sake of this clause, ‘Compatible
124 | Licence’ refers to the licences listed in the appendix attached to this Licence.
125 | Should the Licensee's obligations under the Compatible Licence conflict with
126 | his/her obligations under this Licence, the obligations of the Compatible
127 | Licence shall prevail.
128 |
129 | Provision of Source Code: When distributing or communicating copies of the Work,
130 | the Licensee will provide a machine-readable copy of the Source Code or indicate
131 | a repository where this Source will be easily and freely available for as long
132 | as the Licensee continues to distribute or communicate the Work.
133 |
134 | Legal Protection: This Licence does not grant permission to use the trade names,
135 | trademarks, service marks, or names of the Licensor, except as required for
136 | reasonable and customary use in describing the origin of the Work and
137 | reproducing the content of the copyright notice.
138 |
139 | 6. Chain of Authorship
140 |
141 | The original Licensor warrants that the copyright in the Original Work granted
142 | hereunder is owned by him/her or licensed to him/her and that he/she has the
143 | power and authority to grant the Licence.
144 |
145 | Each Contributor warrants that the copyright in the modifications he/she brings
146 | to the Work are owned by him/her or licensed to him/her and that he/she has the
147 | power and authority to grant the Licence.
148 |
149 | Each time You accept the Licence, the original Licensor and subsequent
150 | Contributors grant You a licence to their contributions to the Work, under the
151 | terms of this Licence.
152 |
153 | 7. Disclaimer of Warranty
154 |
155 | The Work is a work in progress, which is continuously improved by numerous
156 | Contributors. It is not a finished work and may therefore contain defects or
157 | ‘bugs’ inherent to this type of development.
158 |
159 | For the above reason, the Work is provided under the Licence on an ‘as is’ basis
160 | and without warranties of any kind concerning the Work, including without
161 | limitation merchantability, fitness for a particular purpose, absence of defects
162 | or errors, accuracy, non-infringement of intellectual property rights other than
163 | copyright as stated in Article 6 of this Licence.
164 |
165 | This disclaimer of warranty is an essential part of the Licence and a condition
166 | for the grant of any rights to the Work.
167 |
168 | 8. Disclaimer of Liability
169 |
170 | Except in the cases of wilful misconduct or damages directly caused to natural
171 | persons, the Licensor will in no event be liable for any direct or indirect,
172 | material or moral, damages of any kind, arising out of the Licence or of the use
173 | of the Work, including without limitation, damages for loss of goodwill, work
174 | stoppage, computer failure or malfunction, loss of data or any commercial
175 | damage, even if the Licensor has been advised of the possibility of such damage.
176 | However, the Licensor will be liable under statutory product liability laws as
177 | far such laws apply to the Work.
178 |
179 | 9. Additional agreements
180 |
181 | While distributing the Work, You may choose to conclude an additional agreement,
182 | defining obligations or services consistent with this Licence. However, if
183 | accepting obligations, You may act only on your own behalf and on your sole
184 | responsibility, not on behalf of the original Licensor or any other Contributor,
185 | and only if You agree to indemnify, defend, and hold each Contributor harmless
186 | for any liability incurred by, or claims asserted against such Contributor by
187 | the fact You have accepted any warranty or additional liability.
188 |
189 | 10. Acceptance of the Licence
190 |
191 | The provisions of this Licence can be accepted by clicking on an icon ‘I agree’
192 | placed under the bottom of a window displaying the text of this Licence or by
193 | affirming consent in any other similar way, in accordance with the rules of
194 | applicable law. Clicking on that icon indicates your clear and irrevocable
195 | acceptance of this Licence and all of its terms and conditions.
196 |
197 | Similarly, you irrevocably accept this Licence and all of its terms and
198 | conditions by exercising any rights granted to You by Article 2 of this Licence,
199 | such as the use of the Work, the creation by You of a Derivative Work or the
200 | Distribution or Communication by You of the Work or copies thereof.
201 |
202 | 11. Information to the public
203 |
204 | In case of any Distribution or Communication of the Work by means of electronic
205 | communication by You (for example, by offering to download the Work from a
206 | remote location) the distribution channel or media (for example, a website) must
207 | at least provide to the public the information requested by the applicable law
208 | regarding the Licensor, the Licence and the way it may be accessible, concluded,
209 | stored and reproduced by the Licensee.
210 |
211 | 12. Termination of the Licence
212 |
213 | The Licence and the rights granted hereunder will terminate automatically upon
214 | any breach by the Licensee of the terms of the Licence.
215 |
216 | Such a termination will not terminate the licences of any person who has
217 | received the Work from the Licensee under the Licence, provided such persons
218 | remain in full compliance with the Licence.
219 |
220 | 13. Miscellaneous
221 |
222 | Without prejudice of Article 9 above, the Licence represents the complete
223 | agreement between the Parties as to the Work.
224 |
225 | If any provision of the Licence is invalid or unenforceable under applicable
226 | law, this will not affect the validity or enforceability of the Licence as a
227 | whole. Such provision will be construed or reformed so as necessary to make it
228 | valid and enforceable.
229 |
230 | The European Commission may publish other linguistic versions or new versions of
231 | this Licence or updated versions of the Appendix, so far this is required and
232 | reasonable, without reducing the scope of the rights granted by the Licence. New
233 | versions of the Licence will be published with a unique version number.
234 |
235 | All linguistic versions of this Licence, approved by the European Commission,
236 | have identical value. Parties can take advantage of the linguistic version of
237 | their choice.
238 |
239 | 14. Jurisdiction
240 |
241 | Without prejudice to specific agreement between parties,
242 |
243 | - any litigation resulting from the interpretation of this License, arising
244 | between the European Union institutions, bodies, offices or agencies, as a
245 | Licensor, and any Licensee, will be subject to the jurisdiction of the Court
246 | of Justice of the European Union, as laid down in article 272 of the Treaty on
247 | the Functioning of the European Union,
248 |
249 | - any litigation arising between other parties and resulting from the
250 | interpretation of this License, will be subject to the exclusive jurisdiction
251 | of the competent court where the Licensor resides or conducts its primary
252 | business.
253 |
254 | 15. Applicable Law
255 |
256 | Without prejudice to specific agreement between parties,
257 |
258 | - this Licence shall be governed by the law of the European Union Member State
259 | where the Licensor has his seat, resides or has his registered office,
260 |
261 | - this licence shall be governed by Belgian law if the Licensor has no seat,
262 | residence or registered office inside a European Union Member State.
263 |
264 | Appendix
265 |
266 | ‘Compatible Licences’ according to Article 5 EUPL are:
267 |
268 | - GNU General Public License (GPL) v. 2, v. 3
269 | - GNU Affero General Public License (AGPL) v. 3
270 | - Open Software License (OSL) v. 2.1, v. 3.0
271 | - Eclipse Public License (EPL) v. 1.0
272 | - CeCILL v. 2.0, v. 2.1
273 | - Mozilla Public Licence (MPL) v. 2
274 | - GNU Lesser General Public Licence (LGPL) v. 2.1, v. 3
275 | - Creative Commons Attribution-ShareAlike v. 3.0 Unported (CC BY-SA 3.0) for
276 | works other than software
277 | - European Union Public Licence (EUPL) v. 1.1, v. 1.2
278 | - Québec Free and Open-Source Licence — Reciprocity (LiLiQ-R) or Strong
279 | Reciprocity (LiLiQ-R+).
280 |
281 | The European Commission may update this Appendix to later versions of the above
282 | licences without producing a new version of the EUPL, as long as they provide
283 | the rights granted in Article 2 of this Licence and protect the covered Source
284 | Code from exclusive appropriation.
285 |
286 | All other changes or additions to this Appendix require the production of a new
287 | EUPL version.
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE README.rst setup.py
2 | include tests*/*.py
3 | include integration_test/*.py
4 | recursive-include docs *.rst
5 | recursive-include docs *.png
6 | exclude MANIFEST.in
7 |
--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
1 | ***
2 | nrt
3 | ***
4 |
5 | *Python package for near real time detection of change in spatio-temporal datasets*
6 |
7 | .. image:: https://badge.fury.io/py/nrt.svg
8 | :target: https://badge.fury.io/py/nrt
9 |
10 | .. image:: https://readthedocs.org/projects/nrt/badge/?version=latest
11 | :target: https://nrt.readthedocs.io/en/latest/?badge=latest
12 | :alt: Documentation Status
13 |
14 | .. image:: https://github.com/ec-jrc/nrt/actions/workflows/build_and_test.yml/badge.svg
15 | :target: https://github.com/ec-jrc/nrt/actions/workflows/build_and_test.yml
16 | :alt: Build status
17 |
18 | .. image:: https://joss.theoj.org/papers/10.21105/joss.06815/status.svg
19 | :target: https://doi.org/10.21105/joss.06815
20 |
21 |
22 | ``nrt`` provides a standardized interface for Near Real Time monitoring of disturbances on satellite image time-series.
23 | The package is optimized for fast computation and suitable for operational deployment at scale.
24 | A typical operational use case of such package would be a system constantly receiving new satellite based acquisitions and generating alerts when an anomaly is detected.
25 | Five monitoring frameworks from scientific literature on change detection are implemented and exposed via a common API.
26 | All five monitoring framework share a common general approach which consists in modelling the "normal" behavior of the variable through time by fitting a linear model on a user defined stable history period and monitoring until a "break" is detected.
27 | Monitoring starts right after the stable history period, and for each new incoming observation the observed value is compared to the predicted "normal" behavior.
28 | When observations and predictions diverge, a "break" is detected.
29 | A confirmed "break" typically requires several successive diverging observations, this sensitivity or rapid detection capacity depending on many variables such as the algorithm, its fitting and monitoring parameters, the noise level of the history period or the magnitude of the divergence.
30 | The five monitoring frameworks implemented are:
31 |
32 | - Exponentially Weighted Moving Average (EWMA_) (Brooks et al., 2013)
33 | - Cumulative Sum of Residual (CuSum_) (Verbesselt et al., 2012; Zeileis et al., 2005). CuSum is one of the monitoring option of the ``bfastmonitor`` function available in the R package bfast_.
34 | - Moving Sum of Residuals (MoSum_) (Verbesselt et al., 2012; Zeileis et al., 2005). MoSum is one of the monitoring option of the ``bfastmonitor`` function available in the R package bfast_.
35 | - Continuous Change Detection and Classification of land cover (CCDC_, CMFDA_) (Zhu et al., 2012, 2014) - Partial implementation only of the original published method.
36 | - InterQuantile Range (IQR) - Simple, unpublished outlier identification strategy described on stackexchange_.
37 |
38 |
39 | Parts of this package are derived from Chris Holden's pybreakpoints_ and yatsm_ packages. Please see the copyright statements in the respective modules.
40 |
41 | .. _EWMA: https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=6573358
42 | .. _CMFDA: https://www.sciencedirect.com/science/article/pii/S0034425712000387
43 | .. _CCDC: https://www.sciencedirect.com/science/article/pii/S0034425714000248#bbb0350
44 | .. _CuSum: https://www.sciencedirect.com/science/article/pii/S0034425712001150
45 | .. _MoSum: https://www.sciencedirect.com/science/article/pii/S0034425712001150
46 | .. _stackexchange: https://stats.stackexchange.com/a/1153
47 | .. _bfast: https://bfast.r-forge.r-project.org/
48 | .. _pybreakpoints: https://github.com/ceholden/pybreakpoints
49 | .. _yatsm: https://github.com/ceholden/yatsm
50 |
51 |
52 |
53 | Documentation
54 | =============
55 |
56 | Learn more about nrt in its official documentation at https://nrt.readthedocs.io/en/latest/
57 |
58 |
59 | Installation
60 | ============
61 |
62 | .. code-block:: bash
63 |
64 | pip install nrt
65 |
66 |
67 | The main dependencies, which should be automatically resolved by ``pip``, are:
68 |
69 | - `numpy `_
70 | - `scipy `_
71 | - `xarray `_
72 | - `numba `_
73 | - `rasterio `_
74 | - `netCDF4 `_
75 |
76 |
77 | Example usage
78 | =============
79 |
80 | The snippet below presents a near real time monitoring simulation. The input data is split in stable history and monitoring period; the monitoring class is instantiated (EWMA algorithm), a simple harmonic model is fitted on the history period, and new acquisition are passed to the monitor method one at the time. Note that in a real operational scenario where new observations come at a less frequent interval (e.g. every 5 or 8 days which coorespond to the revisit frequency of sentinel 2 and Landsat constellations respectively), the monitoring state can be saved on disk and reloaded when required.
81 |
82 | .. code-block:: python
83 |
84 | import datetime
85 |
86 | from nrt.monitor.ewma import EWMA
87 | from nrt import data
88 |
89 | # Forest/non-forest mask
90 | mask = (data.romania_forest_cover_percentage() > 30).astype('int')
91 |
92 | # NDVI training and monitoring periods
93 | s2_cube = data.romania_20m()
94 | s2_cube['ndvi'] = (s2_cube.B8A - s2_cube.B04) / (s2_cube.B8A + s2_cube.B04)
95 | s2_cube = s2_cube.where(s2_cube.SCL.isin([4,5,7]))
96 | ndvi_history = s2_cube.ndvi.sel(time=slice('2015-01-01', '2018-12-31'))
97 | ndvi_monitoring = s2_cube.ndvi.sel(time=slice('2019-01-01', '2021-12-31'))
98 |
99 | # Instantiate monitoring class and fit stable history
100 | EwmaMonitor = EWMA(trend=False, mask=mask)
101 | EwmaMonitor.fit(dataarray=ndvi_history)
102 |
103 | # Monitor new observations
104 | for array, date in zip(ndvi_monitoring.values,
105 | ndvi_monitoring.time.values.astype('M8[s]').astype(datetime.datetime)):
106 | EwmaMonitor.monitor(array=array, date=date)
107 |
108 | # At any time a monitoring report can be produced with EwmaMonitor.report(filename)
109 | # and state of the monitoring instance can be saved as netcdf with
110 | # EwmaMonitor.to_netcdf(filename)
111 |
112 |
113 | Contributing
114 | ============
115 |
116 | Any type of contribution is welcome. Please see the contributing guidelines at `CONTRIBUTING.md `_.
117 |
118 |
119 | Citing nrt
120 | ==========
121 |
122 | If you use nrt in your research or project, please consider citing it using the following BibTeX entry.
123 |
124 | .. code-block:: bibtex
125 |
126 | @article{dutrieux2024nrt,
127 | year = {2024},
128 | publisher = {The Open Journal},
129 | volume = {9},
130 | number = {100},
131 | pages = {6815},
132 | author = {Lo\"{i}c Dutrieux and Jonas Viehweger},
133 | title = {nrt: operational monitoring of satellite image time-series in Python},
134 | journal = {Journal of Open Source Software},
135 | doi = {10.21105/joss.06815},
136 | }
137 |
138 |
139 | About the authors
140 | =================
141 |
142 | Loïc Dutrieux works as a remote sensing researcher at the Joint Research Center (JRC) in Ispra, Italy. His work focuses on forest disturbances mapping and characterization from satellite image time-series.
143 |
144 | Jonas Viehweger is a young researcher with a MSc in remote sensing from the university of Marburg, Germany. He developped a large part of the nrt package during his traineeship period at the Joint Research Center (JRC) in Ispra, Italy.
145 |
146 | Chris Holden implemented many time-series change detection algorithms in python during his PhD at Boston university.
147 |
148 |
149 | References
150 | ==========
151 |
152 | Brooks, E.B., Wynne, R.H., Thomas, V.A., Blinn, C.E. and Coulston, J.W., 2013. On-the-fly massively multitemporal change detection using statistical quality control charts and Landsat data. IEEE Transactions on Geoscience and Remote Sensing, 52(6), pp.3316-3332.
153 | https://doi.org/10.1109/TGRS.2013.2272545
154 |
155 | Verbesselt, J., Zeileis, A. and Herold, M., 2012. Near real-time disturbance detection using satellite image time series. Remote Sensing of Environment, 123, pp.98-108.
156 | https://doi.org/10.1016/j.rse.2012.02.022
157 |
158 | Zeileis, A., Leisch, F., Kleiber, C. and Hornik, K., 2005. Monitoring structural change in dynamic econometric models. Journal of Applied Econometrics, 20(1), pp.99-121.
159 | https://doi.org/10.1002/jae.776
160 |
161 | Zhu, Z., Woodcock, C.E. and Olofsson, P., 2012. Continuous monitoring of forest disturbance using all available Landsat imagery. Remote sensing of environment, 122, pp.75-91.
162 | https://doi.org/10.1016/j.rse.2011.10.030
163 |
164 | Zhu, Z. and Woodcock, C.E., 2014. Continuous change detection and classification of land cover using all available Landsat data. Remote sensing of Environment, 144, pp.152-171.
165 | https://doi.org/10.1016/j.rse.2014.01.011
166 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = .
9 | BUILDDIR = _build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/docs/_static/concept.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ec-jrc/nrt/68c848f9a4fda67ed67d28621e71ae25e4379b49/docs/_static/concept.png
--------------------------------------------------------------------------------
/docs/_static/forest_mask_bolivia.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ec-jrc/nrt/68c848f9a4fda67ed67d28621e71ae25e4379b49/docs/_static/forest_mask_bolivia.png
--------------------------------------------------------------------------------
/docs/_static/iqr_results_bolivia.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ec-jrc/nrt/68c848f9a4fda67ed67d28621e71ae25e4379b49/docs/_static/iqr_results_bolivia.png
--------------------------------------------------------------------------------
/docs/_static/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ec-jrc/nrt/68c848f9a4fda67ed67d28621e71ae25e4379b49/docs/_static/logo.png
--------------------------------------------------------------------------------
/docs/api_reference.rst:
--------------------------------------------------------------------------------
1 | API Reference
2 | =============
3 |
4 | nrt.data package
5 | ----------------
6 |
7 | .. automodule:: nrt.data
8 | :members:
9 | :undoc-members:
10 | :show-inheritance:
11 |
12 | nrt.monitor package
13 | -------------------
14 |
15 | .. automodule:: nrt.monitor
16 | :members:
17 | :undoc-members:
18 | :show-inheritance:
19 |
20 | nrt.monitor.ccdc module
21 | ^^^^^^^^^^^^^^^^^^^^^^^
22 |
23 | .. automodule:: nrt.monitor.ccdc
24 | :members:
25 | :undoc-members:
26 | :show-inheritance:
27 |
28 | nrt.monitor.cusum module
29 | ^^^^^^^^^^^^^^^^^^^^^^^^
30 |
31 | .. automodule:: nrt.monitor.cusum
32 | :members:
33 | :undoc-members:
34 | :show-inheritance:
35 |
36 | nrt.monitor.ewma module
37 | ^^^^^^^^^^^^^^^^^^^^^^^
38 |
39 | .. automodule:: nrt.monitor.ewma
40 | :members:
41 | :undoc-members:
42 | :show-inheritance:
43 |
44 | nrt.monitor.iqr module
45 | ^^^^^^^^^^^^^^^^^^^^^^
46 |
47 | .. automodule:: nrt.monitor.iqr
48 | :members:
49 | :undoc-members:
50 | :show-inheritance:
51 |
52 | nrt.monitor.mosum module
53 | ^^^^^^^^^^^^^^^^^^^^^^^^
54 |
55 | .. automodule:: nrt.monitor.mosum
56 | :members:
57 | :undoc-members:
58 | :show-inheritance:
59 |
60 | nrt.fit\_methods module
61 | -----------------------
62 |
63 | .. automodule:: nrt.fit_methods
64 | :members:
65 | :undoc-members:
66 | :show-inheritance:
67 |
68 | nrt.log module
69 | --------------
70 |
71 | .. automodule:: nrt.log
72 | :members:
73 | :undoc-members:
74 | :show-inheritance:
75 |
76 | nrt.outliers module
77 | -------------------
78 |
79 | .. automodule:: nrt.outliers
80 | :members:
81 | :undoc-members:
82 | :show-inheritance:
83 |
84 | nrt.stats module
85 | ----------------
86 |
87 | .. automodule:: nrt.stats
88 | :members:
89 | :undoc-members:
90 | :show-inheritance:
91 |
92 | nrt.utils module
93 | ----------------
94 |
95 | .. automodule:: nrt.utils
96 | :members:
97 | :undoc-members:
98 | :show-inheritance:
99 |
100 | nrt.utils\_efp module
101 | ---------------------
102 |
103 | .. automodule:: nrt.utils_efp
104 | :members:
105 | :undoc-members:
106 | :show-inheritance:
107 |
108 | Module contents
109 | ---------------
110 |
111 | .. automodule:: nrt
112 | :members:
113 | :undoc-members:
114 | :show-inheritance:
115 | :noindex:
116 |
117 |
--------------------------------------------------------------------------------
/docs/classes.rst:
--------------------------------------------------------------------------------
1 | .. _classes:
2 |
3 | Monitoring Classes
4 | ******************
5 |
6 | This chapter is supposed to give a quick overview over differences
7 | between the different implemented algorithms.
8 |
9 | Conceptual basis
10 | ================
11 |
12 | Conceptually, near real-time monitoring using time series analysis is based on the temporal
13 | signature of forests. Due to seasonal differences in leaf area, chlorophyll and other biophysical or
14 | biochemical attributes, vegetation dynamics can be visible in the spectral response of forests. As
15 | an example, healthy forests exhibit a high reflectivity in the Near Infrared (NIR) because of
16 | scattering in that wavelength caused by the structure and water content of the leaves.
17 | The number of leaves and thus of scattering in the NIR is highest in summer and
18 | spring and lowest during winter. This seasonal pattern can be modelled and used to detect
19 | disturbances.
20 |
21 | .. |copy| unicode:: U+000A9 .. COPYRIGHT SIGN
22 |
23 | .. figure:: _static/concept.png
24 |
25 | |copy| Copyright European Union, 2022; Jonas Viehweger
26 |
27 | All implemented algorithms are based on this concept. They first fit a model to the stable forest,
28 | then monitor for unusual values compared to that model. How exactly this monitoring happens is one of
29 | the main differences between the algorithms.
30 |
31 |
32 | EWMA
33 | ====
34 |
35 | EWMA is short for exponentially weighted moving average and follows an algorithm as described by
36 | `Brooks et al. (2013) `_. This algorithm is based on
37 | quality control charts, namely Shewhart and EWMA quality control charts.
38 |
39 | Instantiation
40 | -------------
41 |
42 |
43 | .. code-block:: python
44 |
45 | from nrt.monitor.ewma import EWMA
46 |
47 | nrt_class = EWMA(trend=True, harmonic_order=2, mask=None,
48 | sensitivity=2, lambda_=0.3, threshold_outlier=2)
49 |
50 | This shows the parameters specific to the EWMA class in the second row during instantiating.
51 | In particular this is ``sensitivity``, ``lambda_`` and ``threshold_outlier``.
52 |
53 | Let's first talk about ``lambda_``. Lambda (0<λ<=1) is used as the exponent for the
54 | exponentially weighted moving average and basically controls how much influence the historic data has on the average.
55 |
56 | So for a time series where :math:`x_t` is the value at time period t,
57 | the EWMA value :math:`s` at time t is given as:
58 |
59 | .. math::
60 |
61 | s_t = \lambda \cdot x_t + (1-\lambda) \cdot s_{t-1}
62 |
63 | First the value at time t is weighted by λ and then added to the previous EWMA value,
64 | which got weighted by the inverse of λ. That means, that for small λ the impact
65 | of single values on the average are low. So if the time series is very noisy, low values for lambda around
66 | 0.05 to 0.25 are recommended. This ensures that for example a single cloud which wasn't masked
67 | doesn't have a long lasting impact on the EWMA value.
68 |
69 | The parameter ``sensitivity`` is used to calculate the process boundary (also called control limit)
70 | which signals a disturbance when crossed.
71 | The boundary is calculated as follows:
72 |
73 | .. math::
74 |
75 | CL = L\cdot\sigma\sqrt{(\frac{\lambda}{2-\lambda})}
76 |
77 | with CL as Control Limits, L as the sensitivity and :math:`\sigma` as the standard deviation of
78 | the population. Basically the lower L is, the higher the sensitivity since the boundary will be lower.
79 | This is a very simplified formula since a few expectations are made. For a more detailed look at the formula, see
80 | the `Wikipedia page `_.
81 |
82 | Lastly ``threshold_outlier`` provides a way to reduce noise of the time series while monitoring.
83 | It discards all residuals during monitoring which are larger than the standard
84 | deviation of the residuals during fitting multiplied by ``threshold_outlier``. This means that no disturbances which exhibit
85 | consistently higher residuals than :math:`threshold \cdot \sigma`` will signal, but it also means that most clouds
86 | and cloud shadows which aren't caught by masking will get handled during monitoring.
87 |
88 | Fitting
89 | -------------
90 |
91 | By default EWMA is fit using OLS combined with outlier screening using Shewhart control charts.
92 | For more details see :ref:`fitting`.
93 |
94 |
95 |
96 | CCDC
97 | ====
98 |
99 | CCDC is short for Continuous Change Detection and Classification and is described in `Zhu & Woodcock (2014) `_.
100 | The implementation in this package is not a strict implementation of the algorithm. It was also not validated against
101 | the original implementation.
102 |
103 | There are a few main differences. In contrast to Zhu & Woodcock (2014), multivariate analysis is not available in the nrt package.
104 | Furthermore, due to the structure of the nrt package, the automatic re-fitting after a disturbance which is described in the
105 | original implementation is not available.
106 | Lastly, the focus of this package is the detection of breaks and not their classification, so this part of the original algorithm is also omitted.
107 |
108 | Instantiation
109 | -------------
110 |
111 | .. code-block:: python
112 |
113 | from nrt.monitor.ccdc import CCDC
114 |
115 | nrt_class = CCDC(trend=True, harmonic_order=2, mask=None,
116 | sensitivity=3, boundary=3)
117 |
118 | During instantiation, the two parameters ``sensitivity`` and ``boundary``
119 | influence how sensitive the monitoring with CCDC will be.
120 |
121 | The parameter ``sensitivity`` in this case influences how high the threshold is after which
122 | an observation will get flagged as a possible disturbance. This threshold also
123 | depends on the residual mean square error (RMSE) which is calculated during fitting.
124 | With CCDC everything which is higher than :math:`sensitivity \cdot RMSE` is flagged as a possible
125 | disturbance.
126 |
127 | The boundary value then specifies, how many consecutive observations need to be
128 | above the threshold to confirm a disturbance.
129 |
130 | So with the default values, during monitoring 3 consecutive observation need to be
131 | 3 times higher than the RMSE to confirm a break.
132 |
133 |
134 | Fitting
135 | -------------
136 | By default CCDC is fit using a stable fitting method called ``CCDC-stable``, combined
137 | with outlier screening which is based on a robust iteratively reweighted least squares fit.
138 |
139 | For more details see :ref:`fitting`.
140 |
141 |
142 | CuSum and MoSum
143 | ===============
144 |
145 | Monitoring with cumulative sums (CuSum) and moving sums (MoSum) is based
146 | on `Verbesselt et al. (2013) `_ and more particularly
147 | the `bfast `_ and `strucchange `_ R packages.
148 |
149 | Both algorithms have the same underlying principle. The assumption is, that if a model was fitted on a time-series of a stable forest,
150 | the residuals will have a mean of 0. So summing all residuals up, the value should stay close to zero. If however then a disturbance happens,
151 | the residuals will consistently be higher or lower than zero, thus gradually moving the sum of residuals away from 0.
152 |
153 | The major difference between the two algorithms is that CuSum always takes the cumulative sum of the entire time-series, while
154 | MoSum only takes the sum of a moving window with a certain size.
155 |
156 |
157 | Instantiation
158 | -------------
159 |
160 | CuSum
161 | ^^^^^^
162 |
163 | .. code-block:: python
164 |
165 | from nrt.monitor.cusum import CuSum
166 |
167 | nrt_class = CuSum(trend=True, harmonic_order=2, mask=None,
168 | sensitivity=0.05)
169 |
170 | The parameter ``sensitivity`` in the case of CuSum and MoSum is equivalent to the significance level of the disturbance event.
171 | It basically signifies how likely it was, that the threshold was crossed randomly and not caused by a structural change
172 | in the time-series.
173 |
174 | So in this case lower values decrease the sensitivity of the monitoring to structural changes.
175 |
176 | MoSum
177 | ^^^^^^
178 |
179 | .. code-block:: python
180 |
181 | from nrt.monitor.mosum import MoSum
182 |
183 | nrt_class = MoSum(trend=True, harmonic_order=2, mask=None,
184 | sensitivity=0.05, h=0.25)
185 |
186 | The only additional parameter in MoSum is ``h``, which sets the moving window size relative to the
187 | the total number of observations which were used during fitting. So if during fitting 40 observations
188 | were used, with ``h=0.25`` the window size during monitoring will be 10 observations.
189 |
190 | .. note::
191 | Since the process boundary during monitoring is pre-computed only for select values of ``sensitivity`` and ``h``,
192 | only 0.25, 0.5 and 1 are available for ``h`` and ``sensitivity`` has to be between 0.001 and 0.05
193 |
194 |
195 | Fitting
196 | -------------
197 |
198 | By default CuSum and MoSum use a reverse ordered cumulative sum (ROC) to fit a stable period.
199 |
200 | For more details see :ref:`fitting`.
201 |
202 |
203 | IQR
204 | ===
205 |
206 | IQR is an unpublished experimental monitoring algorithm based on the interquartile range of residuals.
207 |
208 |
209 | Instantiation
210 | -------------
211 |
212 | .. code-block:: python
213 |
214 | from nrt.monitor.iqr import IQR
215 |
216 | nrt_class = IQR(trend=False, harmonic_order=3, mask=None,
217 | sensitivity=1.5, boundary=3)
218 |
219 | The flagging of residuals works similar to CCDC.
220 |
221 | The parameter ``sensitivity`` in this case influences how high the threshold is after which
222 | an observation will get flagged as a possible disturbance. This threshold also
223 | depends on the IQR as well as the 25th and 75th percentile which are calculated during fitting.
224 | With this monitor everything which is higher than
225 | :math:`q75 + sensitivity \cdot IQR` or lower than :math:`q25 - sensitivity \cdot IQR`
226 | is flagged as a possible disturbance.
227 |
228 | The boundary value then specifies, how many consecutive observations need to be
229 | above the threshold to confirm a disturbance.
230 |
231 |
232 | Fitting
233 | -------------
234 |
235 | By default IQR is using an OLS fit.
236 |
237 | For more details see :ref:`fitting`
238 |
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | # Configuration file for the Sphinx documentation builder.
2 | #
3 | # This file only contains a selection of the most common options. For a full
4 | # list see the documentation:
5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
6 |
7 | # -- Path setup --------------------------------------------------------------
8 |
9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 |
13 | import os
14 | import sys
15 | import warnings
16 | from importlib.metadata import version as version_
17 |
18 | from numba import NumbaWarning
19 | import nrt
20 |
21 |
22 |
23 | # -- Project information -----------------------------------------------------
24 |
25 | project = 'nrt'
26 | copyright = 'European Union, 2022, Loic Dutrieux & Jonas Viehweger'
27 | author = 'Loic Dutrieux, Jonas Viehweger'
28 |
29 | # The full version, including alpha/beta/rc tags
30 | release = version_('nrt')
31 |
32 |
33 | # -- General configuration ---------------------------------------------------
34 |
35 | # Add any Sphinx extension module names here, as strings. They can be
36 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
37 | # ones.
38 | extensions = [
39 | 'sphinx.ext.autodoc',
40 | 'sphinx.ext.napoleon',
41 | 'sphinx_rtd_theme',
42 | 'sphinx_gallery.gen_gallery',
43 | 'sphinx.ext.mathjax'
44 | ]
45 |
46 | # Gallery configuration
47 | sphinx_gallery_conf = {
48 | 'filename_pattern': '/plot_',
49 | 'examples_dirs': 'gallery', # path to your example scripts
50 | 'gallery_dirs': 'auto_examples', # path to where to save gallery generated output
51 | }
52 |
53 | # Avoid displaying some common warnings in gallery examples
54 | warnings.filterwarnings('ignore', category=NumbaWarning)
55 | warnings.filterwarnings('ignore', category=RuntimeWarning)
56 |
57 | # Add any paths that contain templates here, relative to this directory.
58 | templates_path = ['_templates']
59 |
60 | # List of patterns, relative to source directory, that match files and
61 | # directories to ignore when looking for source files.
62 | # This pattern also affects html_static_path and html_extra_path.
63 | exclude_patterns = ['gallery/README.rst']
64 |
65 |
66 | # -- Options for HTML output -------------------------------------------------
67 |
68 | # The theme to use for HTML and HTML Help pages. See the documentation for
69 | # a list of builtin themes.
70 | #
71 | html_theme = 'sphinx_rtd_theme'
72 |
73 | # Add any paths that contain custom static files (such as style sheets) here,
74 | # relative to this directory. They are copied after the builtin static files,
75 | # so a file named "default.css" will overwrite the builtin "default.css".
76 | html_static_path = ['_static']
77 | html_logo = "_static/logo.png"
78 | html_theme_options = {
79 | 'logo_only': True,
80 | 'display_version': False,
81 | 'style_nav_header_background': "#f8efc8"
82 | }
83 |
--------------------------------------------------------------------------------
/docs/fit.rst:
--------------------------------------------------------------------------------
1 | .. _fitting:
2 |
3 | Fitting & Outlier Screening
4 | ***************************
5 |
6 | Fitting is achieved by calling ``.fit()`` on an instantiated monitoring class.
7 |
8 | In general the default arguments for each monitoring class correspond to the fitting
9 | which was used in the corresponding publication. However since the
10 | classes are not bound to the fit that was used in the publication it is entirely possible
11 | to use any combination of fitting arguments with any monitoring class.
12 |
13 | Fitting works by passing an ``xarray.DataArray`` and specifying a fitting method.
14 | Optionally a method to screen outliers in the time-series can also be passed
15 | to the fit call.
16 |
17 | Screen outliers
18 | ===============
19 |
20 | Outlier screening happens before the fitting and is designed to remove unwanted outliers
21 | in the time-series. When using optical satellite data those outliers are mostly unwanted
22 | clouds, cloud shadows and snow.
23 |
24 | Shewhart
25 | ^^^^^^^^
26 |
27 | .. code-block:: python
28 |
29 | nrt_class.fit(dataarray, screen_outliers='Shewhart', L=5)
30 |
31 | This outlier screening is using Shewhart control charts to remove outliers.
32 | The optional parameter ``L`` defines how sensitive the outlier screening is.
33 |
34 | With this method, first an OLS fit is carried out using the arguments passed during
35 | instantiation. Then the standard deviation :math:`\sigma` of residuals is computed and all observations with
36 | residuals larger than :math:`L\cdot\sigma` are screened out.
37 |
38 |
39 | CCDC-RIRLS
40 | ^^^^^^^^^^
41 |
42 | While Shewhart outlier screening could work for any type of time series, the default outlier screening
43 | used by CCDC is tailored for optical satellite time series to mask out clouds and
44 | cloud shadows.
45 |
46 | .. code-block:: python
47 |
48 | nrt_class.fit(dataarray, screen_outliers='CCDC-RIRLS',
49 | green=xr_green, swir=xr_swir, scaling_factor=10000)
50 |
51 | This screening uses hard-coded thresholds of the short-wave infrared (SWIR) and green bands
52 | to detect clouds and cloud shadows. For this, reflectance values of the green and
53 | SWIR bands need to be passed as ``xarray.DataArrays``. Originally the bands 2 (0.52-0.60 µm) and 5 (1.55-1.75 µm)
54 | of the Landsat 5 Thematic Mapper were used.
55 |
56 | If other sensors like Sentinel 2 are used, which supply data with a scaling factor, the optional parameter
57 | ``scaling_factor`` needs to be set appropriately to bring the values to a 0-1 range.
58 |
59 | To screen out clouds, CCDC-RIRLS uses a Robust Iteratively Reweighted Least Squares fit to reduce the influence
60 | of outliers on the fit. See the chapter about RIRLS for more details.
61 |
62 | Do note that the RIRLS fit is quite computationally intensive.
63 |
64 |
65 | Fitting
66 | =======
67 |
68 | In general when trying to fit a temporal signature it is advisable to fit it on a stable part
69 | of the time-series which doesn't include structural changes.
70 | For this there are two fitting methods (ROC and CCDC-stable) available that aim to achieve a fit on a stable
71 | part of the time-series.
72 | The other two fitting methods (OLS, RIRLS) always fit a model on the entire history period, so if
73 | a lot of disturbances happened during the history period, the fitting results with these
74 | two methods might deliver worse results. Especially OLS however is much less computationally expensive than
75 | ROC and CCDC-stable.
76 |
77 | OLS
78 | ^^^^
79 |
80 | .. code-block:: python
81 |
82 | nrt_class.fit(dataarray, method='OLS')
83 |
84 | This carries out an ordinary least squares fit. All other available fitting methods in this package
85 | are at some point based on this fit.
86 |
87 | RIRLS
88 | ^^^^^^
89 |
90 | .. code-block:: python
91 |
92 | nrt_class.fit(dataarray, method='RIRLS', maxiter=50)
93 |
94 | The Robust Iteratively Reweighted Least Squares fit isn't the default for any nrt monitoring class, it's
95 | main purpose is in the outlier screening method CCDC-RIRLS.
96 |
97 | By iteratively reweighting each observation in the time-series, a fit is reached which is less influenced by
98 | outliers in the time-series.
99 |
100 | This process can take a lot of iterations and thus can become very computationally expensive. The maximum number
101 | of iterations can be controlled by setting ``maxiter``. There are also many more possible parameters to modify.
102 | For a complete list see the api documentation for ``RIRLS``.
103 |
104 | ROC
105 | ^^^^
106 |
107 | .. code-block:: python
108 |
109 | nrt_class.fit(dataarray, method='ROC', alpha=0.05)
110 |
111 | Reverse Ordered Cumulative Sums (ROC) works by applying the same type of monitoring logic as in CuSum to the fitting.
112 | In particular this means, that the fitting period is gradually increased backwards in time starting from the
113 | end of the entire history period (so in reverse order). The period is increased as long as the
114 | cumulative sum of residuals is within a certain threshold which depends on ``alpha``.
115 |
116 | As soon as the threshold is crossed, it is likely that there was a structural break in the history period and thus
117 | the rest of the time series before the threshold was crossed will not be used for fitting the model.
118 |
119 | ``alpha`` is the significance of the detected structural break. So the lower ``alpha`` the lower the sensitivity
120 | for breaks in the time-series.
121 |
122 |
123 | CCDC-stable
124 | ^^^^^^^^^^^^
125 |
126 | .. code-block:: python
127 |
128 | nrt_class.fit(dataarray, method='CCDC-stable', threshold=3)
129 |
130 | With CCDC-stable, models are first fit using an OLS regression.
131 | Those models are then checked for stability.
132 |
133 | Stability is given if:
134 |
135 | 1. slope / RMSE < threshold
136 | 2. first observation / RMSE < threshold
137 | 3. last observation / RMSE < threshold
138 |
139 |
140 | Since the slope of the model is one of the test conditions, it is required for ``trend`` to be ``True``
141 | during instantiation of the monitoring class.
142 |
143 | If a model is not stable, the two oldest
144 | acquisitions are removed, a model is fit using this shorter
145 | time-series and again checked for stability. This process continues until the model is stable
146 | or until not enough observations are left, at which point the time-series will get marked as
147 | unstable and not be fit.
148 |
149 | .. note::
150 | This process is slightly different to the one described in Zhu & Woodcock 2014,
151 | since with the nrt package no new observations can be added during fitting.
--------------------------------------------------------------------------------
/docs/gallery/README.rst:
--------------------------------------------------------------------------------
1 | Examples gallery
2 | ================
3 |
4 |
5 |
--------------------------------------------------------------------------------
/docs/gallery/plot_parallel_computing.py:
--------------------------------------------------------------------------------
1 | r"""
2 | Parallel model fitting
3 | ======================
4 | The most computationally expensive part of a typical nrt workflow is the fitting
5 | of a harmonic model over the stable history period. Starting with version ``0.2.0``,
6 | ``nrt`` uses multithreading to further speed-up the already fast model fitting.
7 | This example illustrates how multithreading can be enabled and adjusted to your use case.
8 | """
9 |
10 | ##############################################################
11 | # Confirgure multithreading options of linear algebra library
12 | # -----------------------------------------------------------
13 | #
14 | # Most of the low level computation/numerical optimization occuring during model
15 | # fitting with nrt relies on a linear algebra library. These libraries often implement
16 | # low level methods with built-in multi-threading. ``nrt`` implements multi-threading
17 | # thanks to ``numba`` on a different, higher level.
18 | # To prevent nested parallelism that would result in over-subscription and potentially
19 | # reduce performances, it is recommanded to disable the built in multi-threading
20 | # of the linear algebra library being used.
21 | # Depending on how ``numpy`` was installed, it will rely on one of the three linear
22 | # algebra libraries which are OpenBLAS, MKL or BLIS. At the time of writing this
23 | # tutorial, pipy wheels (obtain when installing ``numpy`` using pip) are shipped
24 | # with OpenBLAS, while a conda installation from the default channel will come with
25 | # MKL. All three libraries use an environmental variable to control threading
26 | # (``MKL_NUM_THREADS``, ``OPENBLAS_NUM_THREADS`` and ``BLIS_NUM_THREADS``); in the
27 | # present example, we set them all to ``'1'`` directly from within python.
28 | # Although knowing which library is used on your system would allow you to remove
29 | # the unnecessary configuration lines, it is not entirely necessary.
30 | import os
31 | # Note that 1 is a string, not an integer
32 | os.environ['MKL_NUM_THREADS'] = '1'
33 | os.environ['OPENBLAS_NUM_THREADS'] = '1'
34 | os.environ['BLIS_NUM_THREADS'] = '1'
35 |
36 | ##############################################################
37 | # Create benchmark data
38 | # ---------------------
39 | #
40 | # Using the synthetic data generation functionalities of the package, we can create
41 | # an xarray DataArray for benchmark. Note that in order to keep the compilation time
42 | # of this tutorial manageable we limit the size of that object to 200 by 200 pixels.
43 | # While this is significantly smaller than e.g. a Sentinel2 MGRS tile, it is sufficient
44 | # to illustrate differences in fitting time among various fitting strategies
45 | import xarray as xr
46 | import numpy as np
47 | from nrt import data
48 |
49 | # Create synthetic ndvi data cube
50 | dates = np.arange('2018-01-01', '2020-12-31', dtype='datetime64[W]')
51 | params_ds = data.make_cube_parameters(shape=(200,200), unstable_proportion=0)
52 | cube = data.make_cube(dates=dates, params_ds=params_ds)
53 | # We also create a very small cube for running each fitting method once before
54 | # the benchmark, ensuring compilation of the jitted functions and fair comparison
55 | cube_sub = cube.isel(indexers={'x': slice(1,5), 'y': slice(1,5)})
56 |
57 |
58 | ##############################################################
59 | # Benchmark fitting time of all methods
60 | # -------------------------------------
61 | #
62 | # Note that we are only interested in fitting time and therefore use a single
63 | # class instance for the benchmark. The time required for any subsequent .monitor()
64 | # call is usually negligible and as a consequence not included in this benchmark.
65 | # We use here ``CuSum`` but any of the monitoring classes could be used and
66 | # would produce the same results.
67 | import time
68 | import itertools
69 | from collections import defaultdict
70 | from nrt.monitor.cusum import CuSum
71 | import matplotlib.pyplot as plt
72 |
73 | # Benchmark parameters
74 | benchmark_dict = defaultdict(dict)
75 | monitor = CuSum()
76 | methods = ['OLS', 'RIRLS', 'CCDC-stable', 'ROC']
77 | threads = range(1,3)
78 |
79 | # Make sure all numba jitted function are compiled
80 | monitor_ = CuSum()
81 | [monitor_.fit(cube_sub, method=method) for method in methods]
82 |
83 | # Benchmark loop
84 | for method, n_threads in itertools.product(methods, threads):
85 | t0 = time.time()
86 | monitor.fit(cube, n_threads=n_threads, method=method)
87 | t1 = time.time()
88 | benchmark_dict[method][n_threads] = t1 - t0
89 |
90 | # Visualize the results
91 | index = np.arange(len(methods))
92 | for idx, n in enumerate(threads):
93 | values = [benchmark_dict[method][n] for method in methods]
94 | plt.bar(index + idx * 0.2, values, 0.2, label='%d thread(s)' % n)
95 |
96 | plt.xlabel('Fitting method')
97 | plt.ylabel('Time (seconds)')
98 | plt.title('Fitting time')
99 | plt.xticks(index + 0.2, methods)
100 | plt.legend()
101 | plt.tight_layout()
102 | plt.show()
103 |
104 | ##############################################################
105 | # From the results above we notice large differences in fitting time among fitting
106 | # methods. Unsurprisingly, OLS is the fastest, which is expected given that all
107 | # other methods use OLS complemented with some additional, sometimes iterative
108 | # refitting, etc... All methods but ``ROC`` for which parallel fitting hasn't been
109 | # implemented, benefit from using multiple threads.
110 | # Note that a multithreading benefit can only be observed as long as the number
111 | # threads is lower than the computing resources available. The machine used for
112 | # compiling this tutorial is not meant for heavy computation and obviously has limited
113 | # resources as shown by the cpu_count below
114 | import multiprocessing
115 | print(multiprocessing.cpu_count())
116 |
117 |
118 | ##############################################################
119 | # Further considerations
120 | # ----------------------
121 | #
122 | # A deployment at scale may involve several levels of parallelization. The multi-threaded
123 | # example illustrated above is made possible thanks to the numba parallel accelerator.
124 | # However, it is also very common to handle the earlier steps of data loading and
125 | # data pre-processing with ``dask.distributed``, which facilitates lazy and distributed
126 | # computation. There is no direct integration between the two parallelism mechanisms
127 | # and while calling ``.fit()`` on a lazy distributed dask array is possible, the lazy
128 | # evaluation cannot be preserved and all the input data need to be evaluated and
129 | # loaded in memory
130 | from nrt import data
131 |
132 | # Lazy load test data using dask
133 | cube = data.romania_10m(chunks={'x': 20, 'y': 20})
134 | vi_cube = (cube.B08 - cube.B04) / (cube.B08 + cube.B04)
135 | print(vi_cube)
136 | monitor = CuSum()
137 | monitor.fit(vi_cube, method='OLS', n_threads=2)
138 | print(type(monitor.beta))
139 |
--------------------------------------------------------------------------------
/docs/gallery/plot_simulation_data.py:
--------------------------------------------------------------------------------
1 | r"""
2 | Synthetic disturbance data
3 | ==========================
4 | This example illustrates the simulation of a near real time monitoring scenario on synthetic data.
5 | The EWMA approach instantiated from ``nrt.monitor.ewma import EWMA`` is used for monitoring and detection
6 | of the artificially generated breakpoints and the experiment is concluded by a simple accuracy assessment.
7 | """
8 |
9 | #############################################################
10 | # Synthetic data generation
11 | # -------------------------
12 | #
13 | # The simulate module of the nrt-data package contains functionalities to create synthetic
14 | # data with controlled parameters such as position of structural change, phenological
15 | # amplitude, noise level, etc
16 | # One such example can be visualized using the ``make_ts`` function, which
17 | # creates a single time-series.
18 | import random
19 |
20 | import numpy as np
21 | from nrt.data import simulate
22 | import matplotlib.pyplot as plt
23 | import matplotlib.dates as mdates
24 |
25 | dates = np.arange('2018-01-01', '2022-06-15', dtype='datetime64[W]')
26 | fig, axes = plt.subplots(3,3, constrained_layout=True)
27 | for row, amplitude in zip(axes, [0.1, 0.2, 0.3]):
28 | for ax, noise in zip(row, [0.02, 0.05, 0.1]):
29 | break_idx = random.randint(30,100)
30 | ts = simulate.make_ts(dates=dates,
31 | break_idx=break_idx,
32 | sigma_noise=noise,
33 | amplitude=amplitude)
34 | ax.plot(dates, ts)
35 | ax.axvline(x=dates[break_idx], color='magenta')
36 | ax.set_ylim(-0.1,1.1)
37 | ax.set_title('Amplitude: %.1f\nsigma noise: %.2f' % (amplitude, noise),
38 | fontsize=11)
39 | ax.xaxis.set_major_locator(mdates.YearLocator())
40 | ax.xaxis.set_major_formatter(mdates.DateFormatter("\n%Y"))
41 | ax.tick_params( axis='x', which='both', bottom=False, top=False,
42 | labelbottom=False)
43 | fig.supxlabel('Time')
44 | fig.supylabel('NDVI')
45 | plt.show()
46 |
47 | #################################################################
48 | # The spatial counterpart of ``make_ts`` is ``make_cube`` and its main argument
49 | # is an ``xarray.Dataset`` of simulation parameters that can be generated
50 | # with the ``make_cube_parameters`` function.
51 | # The data cube generated is a standard univariate ``xarray.DataArray`` with
52 | # ``x``, ``y`` and ``time`` dimensions. Each pixel in the spatial dimensions contains
53 | # a time-series of simulated values with varying levels of noise, seasonality, outliers
54 | # and in some cases a structural break point
55 |
56 | params_ds = simulate.make_cube_parameters(shape=(50,50),
57 | n_outliers_interval=(0,5),
58 | n_nan_interval=(0,7),
59 | break_idx_interval=(105,dates.size - 20))
60 | # Convert break_idx to dates
61 | print('Early breakpoint: %s' % dates[105])
62 | print('Late breakpoint: %s' % dates[dates.size - 20])
63 | cube = simulate.make_cube(dates=dates, params_ds=params_ds)
64 |
65 | #################################################################
66 | # In the ndvi datacube created, 50 percents of the pixels contain a break point
67 | # occuring between 2020-01-02 and 2022-01-20. The ``break_idx`` variable of
68 | # the ``params_ds`` ``Dataset`` informs on the presence or absence of a break point,
69 | # and its position.
70 |
71 | ###################################################################
72 | # Simulation of an NRT monitoring scenario
73 | # --------------------------------------------------
74 | # For the simulating a near real time monitoring scenario, we consider all the pixels
75 | # of the datacube (no mask) and define the 2018-01-01 to 2019-12-31 period as the
76 | # stable history period and all subsequent dates as monitoring. We know from the
77 | # time-series simulation parameters that the stable history period is indeed free of breakpoints.
78 | # In a real life near real time monitoring use case, fitting and monitoring are
79 | # occuring separately; we therefore need to split the datacube created in two.
80 | #
81 | # After that instantiation of the ``EWMA`` class and stable history takes place.
82 | # The harmonic fit parameters for each pixel is stored in the instance
83 | # of the ``EWMA`` class
84 | # Note that in a real life scenario, several days may pass between fitting and the
85 | # next observation, or between consecutive observations. The fit parameters or
86 | # ongoing monitoring variables are then usually stored to disk in a NetCDF file.
87 | # See the ``to_netcdf()`` method for more details.
88 | # During monitoring each new observation needs to be passed to the monitor method
89 | # as a numpy array. Since we currently have these observations in an xarray DataArray
90 | # structure, we need to unpack each temporal slice as an (array, date) tuple
91 |
92 | import datetime
93 |
94 | from nrt.monitor.ewma import EWMA
95 |
96 | cube_history = cube.sel(time=slice('2018-01-01','2019-12-31'))
97 | cube_monitor = cube.sel(time=slice('2020-01-01', '2022-12-31'))
98 |
99 | # Monitoring class instantiation and fitting
100 | monitor = EWMA(trend=False, harmonic_order=1, lambda_=0.3, sensitivity=4,
101 | threshold_outlier=10)
102 | monitor.fit(dataarray=cube_history)
103 |
104 | # Monitor every date of the ``cube_monitor`` DataArray
105 | for array, date in zip(cube_monitor.values,
106 | cube_monitor.time.values.astype('M8[s]').astype(datetime.datetime)):
107 | monitor.monitor(array=array, date=date)
108 |
109 |
110 | ############################################################################
111 | # Monitoring performances evaluation
112 | # ----------------------------------
113 | # Assessing the performance of a time-series monitoring algorithm can be a complex
114 | # task that depends on the specific use case and what the user wants to emphasize.
115 | # A user valuing rapid detection will chose an assessment approach that puts extra
116 | # weight on the temporal aspect or penalize late detections, while if timeliness
117 | # is not a requirement, accuracy assessment will resemble standard spatial validation.
118 | # In the present example we work with a temporal threshold for which 6 months is the
119 | # default value. This approach to accuracy assessment implies that any breakpoint
120 | # occuring outside of the 6 months periods after the simulated breakpoint (considered ground thruth)
121 | # is considered comission error. Absence of detection during that same period would then be
122 | # an omission, detections during the period are true positives, and absence of detection
123 | # on stable time-series are true negatives.
124 | # Note that alternative accuracy assessment approaches exist; see for instance [1]_ who
125 | # chose to use ``PixelYears`` as their sampling units, or [2]_ who introduced the
126 | # concept of a time weighted F1 score, hence considerating simultaneously detection
127 | # speed and spatial accuracy in a single index.
128 |
129 | def accuracy(nrtInstance, params_ds, dates, delta=180):
130 | """Compute accuracy metrics (precision, recall) of a nrt simulation on synthetic data
131 |
132 | Args:
133 | nrtInstance: Instance of a nrt monitoring class used for monitoring
134 | params_ds: Time-series generation paramaters
135 | dates: Array of numpy.datetime64 dates used for synthetic time-series generation
136 | delta (int): Time delta in day after a reference break for a detected break
137 | to be considered True Positive.
138 | """
139 | detection_date = nrtInstance._report(layers=['detection_date'], dtype=np.uint16)
140 | dates_true = np.where(params_ds.break_idx != -1,
141 | dates[params_ds.break_idx.values],
142 | np.datetime64('NaT'))
143 | dates_true_bound = dates_true + np.timedelta64(delta)
144 | dates_pred = np.datetime64('1970-01-01') + np.timedelta64(1) * detection_date
145 | dates_pred[dates_pred == np.datetime64('1970-01-01')] = np.datetime64('NaT')
146 | # Computes arrays of TP, FP, FN (they should be mutually exclusive)
147 | TP = np.where(np.logical_and(dates_pred >= dates_true, dates_pred <= dates_true_bound),
148 | 1, 0)
149 | FP = np.where(np.logical_and(TP == 0, ~np.isnat(dates_pred)), 1, 0)
150 | FN = np.where(np.logical_and(np.isnat(dates_pred), ~np.isnat(dates_true)), 1, 0)
151 | precision = TP.sum() / (TP.sum() + FP.sum())
152 | recall = TP.sum() / (TP.sum() + FN.sum())
153 | return precision, recall
154 |
155 | print(accuracy(monitor, params_ds, dates))
156 |
157 | ####################################################################
158 | # White noise sensitivity analysis
159 | # --------------------------------
160 | # To go one step further we can assess and visualize how these accuracy measures
161 | # vary with the amount of noise present in the synthetic data.
162 | # For that we define a new function encompassing all the steps of data generation,
163 | # instantiation, fitting and monitoring
164 | #
165 | # The increase in recall at low noise levels is probably due to the extreme outliers
166 | # filtering feature of the EWMA monitoring process, OUtliers that exceed ``threshold_outlier``
167 | # times the standard deviation of the fit residuals are considered extreme
168 | # outliers (often clouds or artifacts) in real images, and do not contribute to the monitoring
169 | # process. With such low noise levels, that threshold is easily reached and breaks missed.
170 |
171 | def make_cube_fit_and_monitor(dates, noise_level):
172 | params_ds = simulate.make_cube_parameters(shape=(20,20),
173 | n_outliers_interval=(4,5),
174 | n_nan_interval=(3,4),
175 | sigma_noise_interval=(noise_level, noise_level),
176 | break_idx_interval=(105,dates.size - 20))
177 | cube = simulate.make_cube(dates=dates, params_ds=params_ds)
178 | cube_history = cube.sel(time=slice('2018-01-01','2019-12-31'))
179 | cube_monitor = cube.sel(time=slice('2020-01-01', '2022-12-31'))
180 | # Monitoring class instantiation and fitting
181 | monitor = EWMA(trend=False, harmonic_order=1, lambda_=0.3, sensitivity=4,
182 | threshold_outlier=10)
183 | monitor.fit(dataarray=cube_history)
184 | # Monitor every date of the ``cube_monitor`` DataArray
185 | for array, date in zip(cube_monitor.values,
186 | cube_monitor.time.values.astype('M8[s]').astype(datetime.datetime)):
187 | monitor.monitor(array=array, date=date)
188 | return params_ds, monitor
189 |
190 | noises = [0.02, 0.03, 0.05, 0.07, 0.09, 0.12, 0.15, 0.2]
191 | prs = []
192 | for noise in noises:
193 | params_ds, monitor = make_cube_fit_and_monitor(dates, noise)
194 | prs.append(accuracy(monitor, params_ds, dates))
195 |
196 | precisions, recalls = zip(*prs)
197 | plt.plot(noises, precisions, label='Precision')
198 | plt.plot(noises, recalls, label='Recall')
199 | plt.xlabel('Noise level')
200 | plt.legend(loc='lower right')
201 | plt.grid(True)
202 | plt.show()
203 |
204 | ###########################################################################
205 | # References
206 | # ----------
207 | #
208 | # .. [1] Bullock, E.L., Woodcock, C.E. and Holden, C.E., 2020. Improved
209 | # change monitoring using an ensemble of time series algorithms.
210 | # Remote Sensing of Environment, 238, p.111165.
211 | #
212 | # .. [2] Viehweger, J., 2021. Comparative Assessment of Near Real-Time Forest
213 | # Disturbance Detection Algorithms. Master thesis, Philipps Universitat
214 | # Marburg
215 |
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | .. nrt documentation master file, created by
2 | sphinx-quickstart on Thu Apr 8 11:56:08 2021.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | Welcome to nrt's documentation!
7 | ===============================
8 |
9 | `nrt` is a Python package designed for near real-time detection of changes in spatio-temporal datasets, with a particular focus on monitoring forest disturbances from satellite image time-series. It offers a standardized API inspired by scikit-learn, ensuring seamless interoperability and comparison across various state-of-the-art monitoring algorithms. Optimized for rapid computation, `nrt` is suitable for operational deployment at scale. This package is an essential tool for researchers and practitioners aiming for timely and efficient monitoring, contributing to climate change mitigation, biodiversity conservation, and natural heritage preservation. Explore the official documentation to get started with installation, usage examples, and more.
10 |
11 | .. automodule:: nrt
12 |
13 | .. toctree::
14 | :maxdepth: 2
15 | :caption: Contents:
16 |
17 | quickstart
18 | classes
19 | fit
20 | api_reference
21 | auto_examples/index
22 |
23 |
24 | Indices and tables
25 | ==================
26 |
27 | * :ref:`genindex`
28 | * :ref:`modindex`
29 | * :ref:`search`
30 |
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 |
13 | if "%1" == "" goto help
14 |
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | echo.
18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | echo.installed, then set the SPHINXBUILD environment variable to point
20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | echo.may add the Sphinx directory to PATH.
22 | echo.
23 | echo.If you don't have Sphinx installed, grab it from
24 | echo.http://sphinx-doc.org/
25 | exit /b 1
26 | )
27 |
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 |
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 |
34 | :end
35 | popd
36 |
--------------------------------------------------------------------------------
/docs/quickstart.rst:
--------------------------------------------------------------------------------
1 | Quickstart
2 | **********
3 |
4 | Introduction
5 | ============
6 |
7 | The nrt package provides near real-time disturbance monitoring algorithms for spatio-temporal datasets.
8 |
9 | The following algorithms are implemented:
10 |
11 | - EWMA (Brooks et al., 2013) [1]_
12 | - CUSUM (Verbesselt et al., 2012) [2]_
13 | - MOSUM (Verbesselt et al., 2012) [2]_
14 | - CCDC (Zhu et al., 2012; 2014) [3]_ [4]_
15 | - IQR
16 |
17 | For more information on the basic function of each monitoring method see :ref:`classes`.
18 |
19 | Installation
20 | ============
21 |
22 | Install the package from pypi using:
23 |
24 | .. code-block::
25 |
26 | pip install nrt
27 |
28 | Workflow
29 | ========
30 |
31 | In general a workflow with the `nrt` package can be divided into three parts:
32 | Instantiating, Fitting and Monitoring.
33 |
34 | Instantiating
35 | -------------
36 |
37 | .. code-block:: python
38 |
39 | from nrt.monitor.ewma import EWMA
40 | from nrt import data
41 |
42 | # load example mask
43 | mask = (data.romania_forest_cover_percentage() > 20).astype('int')
44 |
45 | # Instantiate
46 | nrt_class = EWMA(
47 | mask=mask,
48 | trend=True,
49 | harmonic_order=3,
50 | sensitivity=2
51 | )
52 |
53 | Here, the monitoring class ``EWMA`` is imported. Along with that a forest mask
54 | is constructed. This mask is optional but must have the same shape as the data array which is
55 | later supplied for fitting.
56 |
57 | All available monitoring classes accept the parameters ``mask``, ``trend``, ``harmonic_order``
58 | and ``sensitivity``. Some monitoring classes also accept additional parameters.
59 | For more information see :ref:`classes`.
60 |
61 | .. note::
62 | ``sensitivity`` takes different values depending on the monitoring class.
63 | For example larger ``sensitivity`` values mean higher sensitivity for ``EWMA``,
64 | but lower sensitivity for ``MoSum``.
65 |
66 |
67 |
68 | Fitting
69 | -------------
70 |
71 | .. code-block:: python
72 |
73 | # load example xarray
74 | s2_cube = data.romania_20m()
75 | history = s2_cube.B03.sel(time=slice(None, '2019-01-01'))
76 | monitor = s2_cube.B03.sel(time=slice('2019-01-01', None))
77 |
78 | # Fitting
79 | nrt_class.fit(dataarray=history)
80 |
81 | # Dump model
82 | nrt_class.to_netcdf('model.nc')
83 |
84 | In this example some sample data is created first, which is used for fitting and
85 | later for monitoring.
86 | ``.fit()`` has only one non optional parameter ``dataarray`` which expects an
87 | ``xarray.DataArray``. During fitting there are other optional parameters which can be set.
88 | See :ref:`fitting` for more detail.
89 |
90 | If the next observation is not immediately available, the model can be dumped
91 | to a NetCDF file which can then be loaded once the next observation is available.
92 |
93 | Monitoring
94 | -------------
95 |
96 | .. code-block:: python
97 |
98 | # Load dumped model
99 | nrt_class = EWMA.from_netcdf('model.nc')
100 |
101 | # Monitor new observations
102 | for array, date in zip(monitor.values, monitor.time.values.astype('datetime64[s]').tolist()):
103 | nrt_class.monitor(array=array, date=date)
104 |
105 | # Report results
106 | nrt_class.report('report.tif')
107 |
108 | If the model was dumped to a NetCDF it can be read from disk with ``from_netcdf()``.
109 | Monitoring happens with ``.monitor()``. This only takes an numpy array and a date of class
110 | ``datetime.date``.
111 |
112 | At any time during monitoring a report can be generated by calling
113 | ``.report()``. This report returns a GeoTIFF with two bands, one showing the status
114 | of all pixels (e.g. not monitored, disturbed, etc.) and another band showing the date when
115 | a disturbance was detected in days since 1970-01-01.
116 |
117 | References
118 | ==========
119 |
120 | .. [1] Brooks, E.B., Wynne, R.H., Thomas, V.A., Blinn, C.E. and Coulston, J.W., 2013.
121 | On-the-fly massively multitemporal change detection using statistical quality control charts and Landsat data.
122 | IEEE Transactions on Geoscience and Remote Sensing, 52(6), pp.3316-3332.
123 | https://doi.org/10.1109/TGRS.2013.2272545
124 |
125 | .. [2] Verbesselt, J., Zeileis, A. and Herold, M., 2012.
126 | Near real-time disturbance detection using satellite image time series.
127 | Remote Sensing of Environment, 123, pp.98-108.
128 | https://doi.org/10.1016/j.rse.2012.02.022
129 |
130 | .. [3] Zhu, Z., Woodcock, C.E. and Olofsson, P., 2012.
131 | Continuous monitoring of forest disturbance using all available Landsat imagery.
132 | Remote sensing of environment, 122, pp.75-91.
133 | https://doi.org/10.1016/j.rse.2011.10.030
134 |
135 | .. [4] Zhu, Z. and Woodcock, C.E., 2014.
136 | Continuous change detection and classification of land cover using all available Landsat data.
137 | Remote sensing of Environment, 144, pp.152-171.
138 | https://doi.org/10.1016/j.rse.2014.01.011
139 |
140 |
141 |
142 |
143 |
--------------------------------------------------------------------------------
/nrt/__init__.py:
--------------------------------------------------------------------------------
1 | __path__ = __import__('pkgutil').extend_path(__path__, __name__)
2 |
3 | from importlib.metadata import version
4 |
5 | try:
6 | __version__ = version("nrt")
7 | except Exception:
8 | # Local copy or not installed with setuptools.
9 | # Disable minimum version checks on downstream libraries.
10 | __version__ = "9999"
11 |
--------------------------------------------------------------------------------
/nrt/fit_methods.py:
--------------------------------------------------------------------------------
1 | """Model fitting
2 |
3 | Functions defined in this module always use a 2D array containing the dependant
4 | variables (y) and return both coefficient (beta) and residuals matrices.
5 | These functions are meant to be called in ``nrt.BaseNrt._fit()``.
6 |
7 | The RIRLS fit is derived from Chris Holden's yatsm package. See the
8 | copyright statement below.
9 | """
10 | # Copyright (C) 2022 European Union (Joint Research Centre)
11 | #
12 | # Licensed under the EUPL, Version 1.2 or – as soon they will be approved by
13 | # the European Commission - subsequent versions of the EUPL (the "Licence");
14 | # You may not use this work except in compliance with the Licence.
15 | # You may obtain a copy of the Licence at:
16 | #
17 | # https://joinup.ec.europa.eu/software/page/eupl
18 | #
19 | # Unless required by applicable law or agreed to in writing, software
20 | # distributed under the Licence is distributed on an "AS IS" basis,
21 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22 | # See the Licence for the specific language governing permissions and
23 | # limitations under the Licence.
24 |
25 | ###############################################################################
26 | # The MIT License (MIT)
27 | #
28 | # Copyright (c) 2014 Chris Holden
29 | #
30 | # Permission is hereby granted, free of charge, to any person obtaining a copy
31 | # of this software and associated documentation files (the "Software"), to deal
32 | # in the Software without restriction, including without limitation the rights
33 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
34 | # copies of the Software, and to permit persons to whom the Software is
35 | # furnished to do so, subject to the following conditions:
36 | #
37 | # The above copyright notice and this permission notice shall be included in all
38 | # copies or substantial portions of the Software.
39 | #
40 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
41 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
42 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
43 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
44 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
45 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
46 | # SOFTWARE.
47 | ###############################################################################
48 |
49 | import numpy as np
50 | import numba
51 |
52 | from nrt.log import logger
53 | from nrt import utils
54 | from nrt.utils_efp import history_roc
55 | from nrt.stats import nanlstsq, mad, bisquare
56 |
57 |
58 | def ols(X, y):
59 | """Fit simple OLS model
60 |
61 | Args:
62 | X ((M, N) np.ndarray): Matrix of independant variables
63 | y ({(M,), (M, K)} np.ndarray): Matrix of dependant variables
64 |
65 | Returns:
66 | beta (numpy.ndarray): The array of regression estimators
67 | residuals (numpy.ndarray): The array of residuals
68 | """
69 | beta = nanlstsq(X, y)
70 | residuals = np.dot(X, beta) - y
71 | return beta, residuals
72 |
73 |
74 | @utils.numba_kwargs
75 | @numba.jit(nopython=True, cache=True, parallel=True)
76 | def rirls(X, y, M=bisquare, tune=4.685,
77 | scale_est=mad, scale_constant=0.6745,
78 | update_scale=True, maxiter=50, tol=1e-8):
79 | """Robust Linear Model using Iterative Reweighted Least Squares (RIRLS)
80 |
81 | Perform robust fitting regression via iteratively reweighted least squares
82 | according to weight function and tuning parameter.
83 | Basically a clone from `statsmodels` that should be much faster.
84 |
85 | Note:
86 | For best performances of the multithreaded implementation, it is
87 | recommended to limit the number of threads used by MKL or OpenBLAS to 1.
88 | This avoids over-subscription, and improves performances.
89 | By default the function will use all cores available; the number of cores
90 | used can be controled using the ``numba.set_num_threads`` function or
91 | by modifying the ``NUMBA_NUM_THREADS`` environment variable
92 |
93 | Args:
94 | X (np.ndarray): 2D (n_obs x n_features) design matrix
95 | y (np.ndarray): 1D independent variable
96 | tune (float): tuning constant for scale estimate
97 | maxiter (int, optional): maximum number of iterations (default: 50)
98 | tol (float, optional): convergence tolerance of estimate
99 | (default: 1e-8)
100 | scale_est (callable): estimate used to scale the weights
101 | (default: `mad` for median absolute deviation)
102 | scale_constant (float): normalization constant (default: 0.6745)
103 | update_scale (bool, optional): update scale estimate for weights
104 | across iterations (default: True)
105 | M (callable): function for scaling residuals
106 | tune (float): tuning constant for scale estimate
107 |
108 | Returns:
109 | tuple: beta-coefficients and residual vector
110 | """
111 | beta = np.zeros((X.shape[1], y.shape[1]), dtype=np.float64)
112 | resid = np.full_like(y, np.nan, dtype=np.float64)
113 | for idx in numba.prange(y.shape[1]):
114 | y_sub = y[:,idx]
115 | isna = np.isnan(y_sub)
116 | X_sub = X[~isna]
117 | y_sub = y_sub[~isna]
118 | beta_, resid_ = weighted_ols(X_sub, y_sub, np.ones_like(y_sub))
119 | scale = scale_est(resid_, c=scale_constant)
120 |
121 | EPS = np.finfo(np.float32).eps
122 | if scale < EPS:
123 | beta[:,idx] = beta_
124 | resid[~isna,idx] = resid_
125 | continue
126 |
127 | iteration = 1
128 | converged = 0
129 | while not converged and iteration < maxiter:
130 | last_beta = beta_.copy()
131 | weights = M(resid_ / scale, c=tune)
132 | beta_, resid_ = weighted_ols(X_sub, y_sub, weights)
133 | if update_scale:
134 | scale = max(EPS,scale_est(resid_, c=scale_constant))
135 | iteration += 1
136 | converged = not np.any(np.fabs(beta_ - last_beta > tol))
137 | beta[:,idx] = beta_
138 | resid[~isna,idx] = resid_
139 |
140 | return beta, resid
141 |
142 |
143 | @numba.jit(nopython=True, cache=True)
144 | def weighted_ols(X, y, w):
145 | """Apply a weighted OLS fit to 1D data
146 |
147 | Args:
148 | X (np.ndarray): independent variables
149 | y (np.ndarray): dependent variable
150 | w (np.ndarray): observation weights
151 |
152 | Returns:
153 | tuple: coefficients and residual vector
154 | """
155 | sw = np.sqrt(w)
156 |
157 | Xw = X * np.expand_dims(sw, -1)
158 | yw = y * sw
159 |
160 | beta,_,_,_ = np.linalg.lstsq(Xw, yw)
161 |
162 | resid = y - np.dot(X, beta)
163 |
164 | return beta, resid
165 |
166 | @utils.numba_kwargs
167 | @numba.jit(nopython=True, cache=True, parallel=True)
168 | def ccdc_stable_fit(X, y, dates, threshold=3):
169 | """Fitting stable regressions using an adapted CCDC method
170 |
171 | Models are first fit using OLS regression. Those models are then checked for
172 | stability. If a model is not stable, the two oldest
173 | acquisitions are removed, a model is fit using this shorter
174 | time-series and again checked for stability. This process continues as long
175 | as all of the following 3 conditions are met:
176 |
177 | 1. The timeseries is still unstable
178 | 2. There are enough cloud-free acquisitions left (threshold is 1.5x the
179 | number of parameters in the design matrix)
180 | 3. The time series includes data of more than half a year
181 |
182 | Stability depends on all these three conditions being true:
183 | 1. slope / RMSE < threshold
184 | 2. first observation / RMSE < threshold
185 | 3. last observation / RMSE < threshold
186 |
187 | Note:
188 | For best performances of the multithreaded implementation, it is
189 | recommended to limit the number of threads used by MKL or OpenBLAS to 1.
190 | This avoids over-subscription, and improves performances.
191 | By default the function will use all cores available; the number of cores
192 | used can be controled using the ``numba.set_num_threads`` function or
193 | by modifying the ``NUMBA_NUM_THREADS`` environment variable
194 |
195 | Args:
196 | X ((M, N) np.ndarray): Matrix of independant variables
197 | y ((M, K) np.ndarray): Matrix of dependant variables
198 | dates ((M, ) np.ndarray): Corresponding dates to y in numpy datetime64
199 | threshold (float): Sensitivity of stability checking. Gets passed to
200 | ``is_stable_ccdc()``
201 | Returns:
202 | beta (numpy.ndarray): The array of regression estimators
203 | residuals (numpy.ndarray): The array of residuals
204 | is_stable (numpy.ndarray): 1D Boolean array indicating stability
205 | start (numpy.ndarray): 1D integer array indicating day of fitting start
206 | as days since UNIX epoch.
207 | """
208 | min_obs = int(X.shape[1] * 1.5)
209 | beta = np.zeros((X.shape[1], y.shape[1]), dtype=np.float64)
210 | residuals = np.full_like(y, np.nan)
211 | stable = np.empty((y.shape[1]))
212 | fit_start = np.empty((y.shape[1]))
213 | for idx in numba.prange(y.shape[1]):
214 | y_sub = y[:, idx]
215 | isna = np.isnan(y_sub)
216 | X_sub = X[~isna]
217 | y_sub = y_sub[~isna]
218 | _dates = dates[~isna]
219 | is_stable = False
220 |
221 | # Run until minimum observations
222 | # or until stability is reached
223 | for jdx in range(len(y_sub), min_obs-1, -2):
224 | # Timeseries gets reduced by two elements
225 | # each iteration
226 | y_ = y_sub[-jdx:]
227 | X_ = X_sub[-jdx:]
228 | beta_sub = np.linalg.solve(np.dot(X_.T, X_), np.dot(X_.T, y_))
229 | resid_sub = np.dot(X_, beta_sub) - y_
230 |
231 | # Check for stability
232 | rmse = np.sqrt(np.mean(resid_sub ** 2))
233 | slope = np.fabs(beta_sub[1]) / rmse < threshold
234 | first = np.fabs(resid_sub[0]) / rmse < threshold
235 | last = np.fabs(resid_sub[-1]) / rmse < threshold
236 |
237 | # Break if stability is reached
238 | is_stable = slope & first & last
239 | if is_stable:
240 | break
241 | # Also break if less than half a year of data remain
242 | last_date = _dates[-1]
243 | first_date = _dates[-jdx]
244 | if last_date - first_date < 183:
245 | break
246 |
247 | beta[:,idx] = beta_sub
248 | residuals[-jdx:,idx] = resid_sub
249 | stable[idx] = is_stable
250 | fit_start[idx] = _dates[-jdx]
251 | return beta, residuals, stable.astype(np.bool_), fit_start
252 |
253 |
254 | @utils.numba_kwargs
255 | @numba.jit(nopython=True, cache=True, parallel=False)
256 | def roc_stable_fit(X, y, dates, alpha=0.05, crit=0.9478982340418134):
257 | """Fitting stable regressions using Reverse Ordered Cumulative Sums
258 |
259 | Calculates OLS coefficients, residuals and a stability mask based on
260 | a stable history period which is provided by ``history_roc()``.
261 |
262 | The pixel will get marked as unstable if:
263 | 1. The stable period is shorter than half a year OR
264 | 2. There are fewer observation than the number of coefficients in X
265 |
266 | The implementation roughly corresponds to the fit of bfastmonitor
267 | with the history option set to 'ROC'.
268 |
269 | Args:
270 | X ((M, N) np.ndarray): Matrix of independant variables
271 | y ((M, K) np.ndarray): Matrix of dependant variables
272 | dates ((M, ) np.ndarray): Corresponding dates to y in days since epoch
273 | (int)
274 | alpha (float): Significance level for the boundary
275 | (probability of type I error)
276 | crit (float): Critical value corresponding to the chosen alpha. Can be
277 | calculated with ``_cusum_rec_test_crit``.
278 | Default is the value for alpha=0.05
279 |
280 | Returns:
281 | beta (numpy.ndarray): The array of regression estimators
282 | residuals (numpy.ndarray): The array of residuals
283 | is_stable (numpy.ndarray): 1D Boolean array indicating stability
284 | start (numpy.ndarray): 1D integer array indicating day of fitting start
285 | as days since UNIX epoch.
286 | """
287 | is_stable = np.ones(y.shape[1], dtype=np.bool_)
288 | fit_start = np.zeros_like(is_stable, dtype=np.uint16)
289 | beta = np.full((X.shape[1], y.shape[1]), np.nan, dtype=np.float64)
290 | nreg = X.shape[1]
291 | for idx in numba.prange(y.shape[1]):
292 | # subset and remove nan
293 | is_nan = np.isnan(y[:, idx])
294 | _y = y[~is_nan, idx]
295 | _X = X[~is_nan, :]
296 |
297 | # get the index where the stable period starts
298 | stable_idx = history_roc(_X, _y, alpha=alpha, crit=crit)
299 |
300 | # If there are not enough observations available in the stable period
301 | # set stability to False and continue
302 | if len(_y) - stable_idx < nreg + 1:
303 | is_stable[idx] = False
304 | continue
305 |
306 | # Check if there is more than 1 year (365 days) of data available
307 | # If not, set stability to False and continue
308 | _dates = dates[~is_nan]
309 | last_date = _dates[-1]
310 | first_date = _dates[stable_idx]
311 | if last_date - first_date < 183:
312 | is_stable[idx] = False
313 | continue
314 |
315 | # Subset and fit
316 | X_stable = _X[stable_idx:]
317 | y_stable = _y[stable_idx:]
318 | beta[:, idx] = np.linalg.solve(np.dot(X_stable.T, X_stable),
319 | np.dot(X_stable.T, y_stable))
320 | fit_start[idx] = _dates[stable_idx]
321 |
322 | residuals = np.dot(X, beta) - y
323 | return beta, residuals, is_stable, fit_start
324 |
--------------------------------------------------------------------------------
/nrt/log.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2022 European Union (Joint Research Centre)
2 | #
3 | # Licensed under the EUPL, Version 1.2 or – as soon they will be approved by
4 | # the European Commission - subsequent versions of the EUPL (the "Licence");
5 | # You may not use this work except in compliance with the Licence.
6 | # You may obtain a copy of the Licence at:
7 | #
8 | # https://joinup.ec.europa.eu/software/page/eupl
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the Licence is distributed on an "AS IS" basis,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the Licence for the specific language governing permissions and
14 | # limitations under the Licence.
15 |
16 | import logging
17 |
18 | _FORMAT = '%(asctime)s:%(levelname)s:%(lineno)s:%(module)s.%(funcName)s:%(message)s'
19 | _formatter = logging.Formatter(_FORMAT, '%H:%M:%S')
20 | _handler = logging.StreamHandler()
21 | _handler.setFormatter(_formatter)
22 |
23 | logger = logging.getLogger('nrt')
24 | logger.addHandler(_handler)
25 | logger.setLevel(logging.INFO)
26 |
--------------------------------------------------------------------------------
/nrt/monitor/ccdc.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2022 European Union (Joint Research Centre)
2 | #
3 | # Licensed under the EUPL, Version 1.2 or – as soon they will be approved by
4 | # the European Commission - subsequent versions of the EUPL (the "Licence");
5 | # You may not use this work except in compliance with the Licence.
6 | # You may obtain a copy of the Licence at:
7 | #
8 | # https://joinup.ec.europa.eu/software/page/eupl
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the Licence is distributed on an "AS IS" basis,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the Licence for the specific language governing permissions and
14 | # limitations under the Licence.
15 |
16 | import numpy as np
17 | import xarray as xr
18 |
19 | from nrt.monitor import BaseNrt
20 |
21 |
22 | class CCDC(BaseNrt):
23 | """Monitoring using CCDC-like implementation
24 |
25 | Implementation loosely following method described in Zhu & Woodcock 2014.
26 |
27 | Zhu, Zhe, and Curtis E. Woodcock. 2014. “Continuous Change Detection and
28 | Classification of Land Cover Using All Available Landsat Data.” Remote
29 | Sensing of Environment 144 (March): 152–71.
30 | https://doi.org/10.1016/j.rse.2014.01.011.
31 |
32 | Attributes:
33 | mask (numpy.ndarray): A 2D numpy array containing pixels that should
34 | be monitored (1) and not (0). The mask may be updated following
35 | history period stability check, and after a call to monitor
36 | following a confirmed break. Values are as follow.
37 | ``{0: 'Not monitored', 1: 'monitored', 2: 'Unstable history',
38 | 3: 'Confirmed break - no longer monitored'}``
39 | trend (bool): Indicate whether stable period fit is performed with
40 | trend or not
41 | harmonic_order (int): The harmonic order of the time-series regression
42 | beta (np.ndarray): 3D array containing the model coefficients
43 | x (numpy.ndarray): array of x coordinates
44 | y (numpy.ndarray): array of y coordinates
45 | sensitivity (float): sensitivity of the monitoring. Lower numbers are
46 | high sensitivity. Value can't be zero.
47 | boundary (int): Number of consecutive observations identified as outliers
48 | to signal as disturbance
49 | rmse (np.ndarray): 2D float array indicating RMSE for each pixel
50 | detection_date (numpy.ndarray): 2D array signalling detection date of
51 | disturbances in days since 1970-01-01
52 |
53 | Args:
54 | mask (numpy.ndarray): A 2D numpy array containing pixels that should be
55 | monitored marked as ``1`` and pixels that should be excluded (marked
56 | as ``0``). Typically a stable forest mask when doing forest disturbance
57 | monitoring. If no mask is supplied all pixels are considered and
58 | a mask is created following the ``fit()`` call
59 | trend (bool): Indicate whether stable period fit is performed with
60 | trend or not
61 | harmonic_order (int): The harmonic order of the time-series regression
62 | sensitivity (float): sensitivity of the monitoring. Lower numbers are
63 | high sensitivity. Value can't be zero.
64 | boundary (int): Number of consecutive observations identified as outliers
65 | to signal as disturbance
66 | **kwargs: Used to set internal attributes when initializing with
67 | ``.from_netcdf()``
68 | """
69 | def __init__(self, trend=True, harmonic_order=2, sensitivity=3,
70 | mask=None, boundary=3, **kwargs):
71 | super().__init__(mask=mask,
72 | trend=trend,
73 | harmonic_order=harmonic_order,
74 | boundary=boundary,
75 | **kwargs)
76 | self.sensitivity = sensitivity
77 | self.rmse = kwargs.get('rmse')
78 | self.monitoring_strategy = 'CCDC'
79 |
80 | def fit(self, dataarray, method='CCDC-stable', screen_outliers='CCDC_RIRLS',
81 | green=None, swir=None, scaling_factor=1, **kwargs):
82 | """Stable history model fitting
83 |
84 | If screen outliers is required, green and swir bands must be passed.
85 |
86 | The stability check will use the same sensitivity as is later used for
87 | detecting changes (default: 3*RMSE)
88 |
89 | Args:
90 | dataarray (xr.DataArray): xarray Dataarray including the historic
91 | data to be fitted
92 | method (string): Regression to use. See ``_fit()`` for info.
93 | screen_outliers (string): Outlier screening to use.
94 | See ``_fit()`` for info.
95 | green (xr.DataArray): Green reflectance values to be used by
96 | ``screen_outliers``.
97 | swir (xr.DataArray): Short wave infrared (SWIR) reflectance values
98 | to be used by ``screen_outliers``.
99 | scaling_factor (int): Optional Scaling factor to be applied to
100 | ``green`` and ``swir``. When ``screen_outliers`` is ``'CCDC_RIRLS'``
101 | (default for CCDC), the outlier screening algorithm expects green
102 | and swir reflectance values in the [0,1] range. EO data are often
103 | scaled and stored as integer, with a scaling factor to convert
104 | between scaled and actual reflectance values. As an example, if
105 | scaled reflectance values are in the [0,10000] range, set
106 | ``scaling_factor`` to ``10000``.
107 | **kwargs: to be passed to ``_fit``
108 |
109 | Examples:
110 | >>> from nrt.monitor.ccdc import CCDC
111 | >>> from nrt import data
112 |
113 | >>> # Load and prepare test data
114 | >>> mask = (data.romania_forest_cover_percentage() > 30).astype('int')
115 | >>> s2_cube = data.romania_20m()
116 |
117 | >>> s2_cube['ndvi'] = (s2_cube.B8A - s2_cube.B04) / (s2_cube.B8A + s2_cube.B04)
118 | >>> s2_cube = s2_cube.where(s2_cube.SCL.isin([4,5,7]))
119 | >>> cube_history = s2_cube.sel(time=slice('2015-01-01', '2018-12-31'))
120 |
121 | >>> # Instantiate monitoring class and fit the model, including outliers screening
122 | >>> ccdcMonitor = CCDC(trend=True, mask=mask)
123 | >>> ccdcMonitor.fit(dataarray=cube_history.ndvi,
124 | ... green=cube_history.B03,
125 | ... swir=cube_history.B11,
126 | ... scaling_factor=10000)
127 | """
128 | self.set_xy(dataarray)
129 | X = self.build_design_matrix(dataarray, trend=self.trend,
130 | harmonic_order=self.harmonic_order)
131 | self.beta, residuals = self._fit(X, dataarray,
132 | method=method,
133 | screen_outliers=screen_outliers,
134 | green=green, swir=swir,
135 | scaling_factor=scaling_factor,
136 | **kwargs)
137 | self.rmse = np.sqrt(np.nanmean(residuals ** 2, axis=0))
138 |
139 | def _update_process(self, residuals, is_valid):
140 | # TODO: Calculation is different for multivariate analysis
141 | # (mean of all bands has to be > sensitivity)
142 | with np.errstate(divide='ignore'):
143 | is_outlier = np.abs(residuals) / self.rmse > self.sensitivity
144 | # Update process
145 | if self.process is None:
146 | self.process = np.zeros_like(residuals, dtype=np.uint8)
147 | self.process = np.where(is_valid,
148 | self.process * is_outlier + is_outlier,
149 | self.process)
150 |
--------------------------------------------------------------------------------
/nrt/monitor/cusum.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2022 European Union (Joint Research Centre)
2 | #
3 | # Licensed under the EUPL, Version 1.2 or – as soon they will be approved by
4 | # the European Commission - subsequent versions of the EUPL (the "Licence");
5 | # You may not use this work except in compliance with the Licence.
6 | # You may obtain a copy of the Licence at:
7 | #
8 | # https://joinup.ec.europa.eu/software/page/eupl
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the Licence is distributed on an "AS IS" basis,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the Licence for the specific language governing permissions and
14 | # limitations under the Licence.
15 |
16 | import numpy as np
17 | import xarray as xr
18 |
19 | from nrt.monitor import BaseNrt
20 | from nrt.utils_efp import _cusum_ols_test_crit
21 |
22 |
23 | class CuSum(BaseNrt):
24 | """Monitoring using cumulative sums (CUSUM) of residuals
25 |
26 | Implementation following method as implemented in R package bFast.
27 |
28 | Attributes:
29 | mask (numpy.ndarray): A 2D numpy array containing pixels that should
30 | be monitored (1) and not (0). The mask may be updated following
31 | history period stability check, and after a call to monitor
32 | following a confirmed break. Values are as follow.
33 | ``{0: 'Not monitored', 1: 'monitored', 2: 'Unstable history',
34 | 3: 'Confirmed break - no longer monitored'}``
35 | trend (bool): Indicate whether stable period fit is performed with
36 | trend or not
37 | harmonic_order (int): The harmonic order of the time-series regression
38 | x (numpy.ndarray): array of x coordinates
39 | y (numpy.ndarray): array of y coordinates
40 | sensitivity (float): sensitivity of the monitoring. Lower numbers
41 | correspond to lower sensitivity. Equivalent to significance level
42 | 'alpha' with which the boundary is computed
43 | boundary (numpy.ndarray): process boundary for each time series.
44 | Calculated from alpha and length of time series.
45 | sigma (numpy.ndarray): Standard deviation for normalized residuals in
46 | history period
47 | histsize (numpy.ndarray): Number of non-nan observations in history
48 | period
49 | n (numpy.ndarray): Total number of non-nan observations in time-series
50 | critval (float): Critical test value corresponding to the chosen
51 | sensitivity
52 | detection_date (numpy.ndarray): 2D array signalling detection date of
53 | disturbances in days since 1970-01-01
54 |
55 | Args:
56 | mask (numpy.ndarray): A 2D numpy array containing pixels that should be
57 | monitored marked as ``1`` and pixels that should be excluded (marked
58 | as ``0``). Typically a stable forest mask when doing forest disturbance
59 | monitoring. If no mask is supplied all pixels are considered and
60 | a mask is created following the ``fit()`` call
61 | trend (bool): Indicate whether stable period fit is performed with
62 | trend or not
63 | harmonic_order (int): The harmonic order of the time-series regression
64 | sensitivity (float): sensitivity of the monitoring. Lower numbers
65 | correspond to lower sensitivity. Equivalent to significance level
66 | 'alpha' with which the boundary is computed
67 | **kwargs: Used to set internal attributes when initializing with
68 | ``.from_netcdf()``
69 | """
70 | def __init__(self, trend=True, harmonic_order=2, sensitivity=0.05,
71 | mask=None, **kwargs):
72 | super().__init__(mask=mask,
73 | trend=trend,
74 | harmonic_order=harmonic_order,
75 | **kwargs)
76 | self.sensitivity = sensitivity
77 | self.critval = _cusum_ols_test_crit(sensitivity)
78 | self.sigma = kwargs.get('sigma')
79 | self.histsize = kwargs.get('histsize')
80 | self.n = kwargs.get('n')
81 | self.monitoring_strategy = 'CUSUM'
82 |
83 | def fit(self, dataarray, method='ROC', alpha=0.05, **kwargs):
84 | """Stable history model fitting
85 |
86 | If method ``'ROC'`` is used for fitting, the argument ``alpha`` has
87 | to be passed.
88 |
89 | Args:
90 | dataarray (xr.DataArray): xarray Dataarray including the historic
91 | data to be fitted
92 | method (string): Regression to use. See ``_fit()`` for info.
93 | alpha (float): Significance level for ``'ROC'`` stable fit.
94 | **kwargs: to be passed to ``_fit``
95 | """
96 | self.set_xy(dataarray)
97 | X = self.build_design_matrix(dataarray, trend=self.trend,
98 | harmonic_order=self.harmonic_order)
99 | self.beta, residuals = self._fit(X, dataarray,
100 | method=method,
101 | alpha=alpha,
102 | **kwargs)
103 |
104 | # histsize is necessary for normalization of residuals,
105 | # n is necessary for boundary calculation
106 | self.histsize = np.sum(~np.isnan(residuals), axis=0)\
107 | .astype(np.uint16)
108 | self.n = self.histsize
109 | self.boundary = np.full_like(self.histsize, np.nan, dtype=np.float32)
110 | self.sigma = np.nanstd(residuals, axis=0, ddof=X.shape[1])
111 | # calculate process and normalize it using sigma and histsize
112 | with np.errstate(divide='ignore', invalid='ignore'):
113 | residuals_ = residuals / (self.sigma*np.sqrt(self.histsize))
114 | self.process = np.nancumsum(residuals_, axis=0)[-1]
115 |
116 | def _update_process(self, residuals, is_valid):
117 | with np.errstate(divide='ignore', invalid='ignore'):
118 | # calculate boundary
119 | self.n = self.n + is_valid
120 | x = self.n / self.histsize
121 | self.boundary = np.where(is_valid,
122 | np.sqrt(x * (x - 1)
123 | * (self.critval**2
124 | + np.log(x / (x - 1)))),
125 | self.boundary)
126 | # normalize residuals
127 | residuals_norm = residuals / (self.sigma*np.sqrt(self.histsize))
128 | # Update process
129 | self.process = np.where(is_valid,
130 | self.process+residuals_norm,
131 | self.process)
132 |
--------------------------------------------------------------------------------
/nrt/monitor/ewma.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2022 European Union (Joint Research Centre)
2 | #
3 | # Licensed under the EUPL, Version 1.2 or – as soon they will be approved by
4 | # the European Commission - subsequent versions of the EUPL (the "Licence");
5 | # You may not use this work except in compliance with the Licence.
6 | # You may obtain a copy of the Licence at:
7 | #
8 | # https://joinup.ec.europa.eu/software/page/eupl
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the Licence is distributed on an "AS IS" basis,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the Licence for the specific language governing permissions and
14 | # limitations under the Licence.
15 |
16 | import numpy as np
17 |
18 | from nrt.monitor import BaseNrt
19 |
20 |
21 | class EWMA(BaseNrt):
22 | """Monitoring using EWMA control chart
23 |
24 | Implementation following method described in Brooks et al. 2014.
25 |
26 | Args:
27 | mask (numpy.ndarray): A 2D numpy array containing pixels that should be
28 | monitored marked as ``1`` and pixels that should be excluded (marked
29 | as ``0``). Typically a stable forest mask when doing forest disturbance
30 | monitoring. If no mask is supplied all pixels are considered and
31 | a mask is created following the ``fit()`` call
32 | trend (bool): Indicate whether stable period fit is performed with
33 | trend or not
34 | harmonic_order (int): The harmonic order of the time-series regression
35 | lambda_ (float): Weight of previous observation in the monitoring process
36 | (memory). Valid range is [0,1], 1 corresponding to no memory and 0 to
37 | full memory
38 | sensitivity (float): Sensitivity parameter used in the computation of the
39 | monitoring boundaries. Lower values imply more sensitive monitoring
40 | threshold_outlier (float): Values bigger than threshold_outlier*sigma
41 | (extreme outliers) will get screened out during monitoring and will
42 | not contribute to updating the EWMA process value
43 | **kwargs: Used to set internal attributes when initializing with
44 | ``.from_netcdf()``
45 | """
46 | def __init__(self, trend=True, harmonic_order=2, sensitivity=2, mask=None,
47 | lambda_=0.3, threshold_outlier=10, **kwargs):
48 | super().__init__(mask=mask,
49 | trend=trend,
50 | harmonic_order=harmonic_order,
51 | **kwargs)
52 | self.lambda_ = lambda_
53 | self.sensitivity = sensitivity
54 | self.threshold = threshold_outlier
55 | self.sigma = kwargs.get('sigma')
56 | self.monitoring_strategy = 'EWMA'
57 |
58 | def fit(self, dataarray, method='OLS',
59 | screen_outliers='Shewhart', L=5, **kwargs):
60 | """Stable history model fitting
61 |
62 | The preferred fitting method for this monitoring type is ``'OLS'`` with
63 | outlier screening ``'Shewhart'``. It requires a control limit parameter
64 | ``L``. See ``nrt.outliers.shewart`` for more details
65 | """
66 | self.set_xy(dataarray)
67 | X = self.build_design_matrix(dataarray, trend=self.trend,
68 | harmonic_order=self.harmonic_order)
69 | beta, residuals = self._fit(X, dataarray=dataarray, method=method,
70 | screen_outliers=screen_outliers, L=L,
71 | **kwargs)
72 | self.beta = beta
73 | # get new standard deviation
74 | self.sigma = np.nanstd(residuals, axis=0)
75 | # calculate EWMA control limits and save them
76 | # since control limits quickly approach a limit they are assumed to be
77 | # stable after the training period and can thus be simplified
78 | self.boundary = self.sensitivity * self.sigma * np.sqrt((
79 | self.lambda_ / (2 - self.lambda_)))
80 | # calculate the EWMA value for the end of the training period and save it
81 | self.process = self._init_process(residuals)
82 | # Mark everything as unstable that already crosses the boundary after
83 | # fitting
84 | self.mask[self.process > self.boundary] = 2
85 |
86 | def _detect_extreme_outliers(self, residuals, is_valid):
87 | is_eoutlier = np.abs(residuals) > self.threshold * self.sigma
88 | return np.logical_and(~is_eoutlier, is_valid)
89 |
90 | def _update_process(self, residuals, is_valid):
91 | """Update process value (EWMA in this case) with new acquisition
92 |
93 | Args:
94 | residuals (numpy.ndarray): 2 dimensional array corresponding to the
95 | residuals of a new acquisition
96 | is_valid (np.ndarray): A boolean 2D array indicating where process
97 | values should be updated
98 |
99 | Returns:
100 | numpy.ndarray: A 2 dimensional array containing the updated EWMA
101 | values
102 | """
103 | # If the monitoring has not been initialized yet, raise an error
104 | if self.process is None:
105 | raise ValueError('Process has to be initialized before update')
106 | # Update ewma value for element of the input array that are not Nan
107 | process_new = self._update_ewma(array=residuals, ewma=self.process,
108 | lambda_=self.lambda_)
109 | self.process = np.where(is_valid, process_new, self.process)
110 |
111 | @staticmethod
112 | def _update_ewma(array, ewma, lambda_):
113 | ewma_new = np.where(np.isnan(array),
114 | ewma,
115 | (1 - lambda_) * ewma + lambda_ * array)
116 | return ewma_new
117 |
118 | def _init_process(self, array):
119 | """Initialize the ewma process value using the residuals of the fitted values
120 |
121 | Args:
122 | array (np.ndarray): 3 dimensional array of residuals. Usually the
123 | residuals from the model fitting
124 |
125 | Returns:
126 | numpy.ndarray: A 2 dimensional array corresponding to the last slice
127 | of the recursive ewma process updating
128 | """
129 | ewma = np.zeros_like(array[0,:,:])
130 | for slice_ in array:
131 | ewma = self._update_ewma(array=slice_, ewma=ewma, lambda_=self.lambda_)
132 | return ewma
133 |
134 |
135 |
--------------------------------------------------------------------------------
/nrt/monitor/iqr.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2022 European Union (Joint Research Centre)
2 | #
3 | # Licensed under the EUPL, Version 1.2 or – as soon they will be approved by
4 | # the European Commission - subsequent versions of the EUPL (the "Licence");
5 | # You may not use this work except in compliance with the Licence.
6 | # You may obtain a copy of the Licence at:
7 | #
8 | # https://joinup.ec.europa.eu/software/page/eupl
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the Licence is distributed on an "AS IS" basis,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the Licence for the specific language governing permissions and
14 | # limitations under the Licence.
15 |
16 | import numpy as np
17 |
18 | from nrt.monitor import BaseNrt
19 | from nrt.stats import nan_percentile_axis0
20 |
21 |
22 | class IQR(BaseNrt):
23 | """Online monitoring of disturbances based on interquartile range
24 |
25 | Reference:
26 | https://stats.stackexchange.com/a/1153
27 |
28 | Attributes:
29 | mask (numpy.ndarray): A 2D numpy array containing pixels that should
30 | be monitored (1) and not (0). The mask may be updated following
31 | history period stability check, and after a call to monitor
32 | following a confirmed break. Values are as follow.
33 | ``{0: 'Not monitored', 1: 'monitored', 2: 'Unstable history',
34 | 3: 'Confirmed break - no longer monitored'}``
35 | trend (bool): Indicate whether stable period fit is performed with
36 | trend or not
37 | harmonic_order (int): The harmonic order of the time-series regression
38 | beta (np.ndarray): 3D array containing the model coefficients
39 | x (numpy.ndarray): array of x coordinates
40 | y (numpy.ndarray): array of y coordinates
41 | sensitivity (float): sensitivity of the monitoring. Lower numbers are
42 | high sensitivity. Value can't be zero.
43 | boundary (int): Number of consecutive observations identified as outliers
44 | to signal as disturbance
45 | q25 (numpy.ndarray): 25th percentile of residuals
46 | q75 (numpy.ndarray): 75th percentile of residuals
47 | detection_date (numpy.ndarray): 2D array signalling detection date of
48 | disturbances in days since 1970-01-01
49 |
50 | Args:
51 | mask (numpy.ndarray): A 2D numpy array containing pixels that should be
52 | monitored marked as ``1`` and pixels that should be excluded (marked
53 | as ``0``). Typically a stable forest mask when doing forest disturbance
54 | monitoring. If no mask is supplied all pixels are considered and
55 | a mask is created following the ``fit()`` call
56 | trend (bool): Indicate whether stable period fit is performed with
57 | trend or not
58 | harmonic_order (int): The harmonic order of the time-series regression
59 | sensitivity (float): sensitivity of the monitoring. Lower numbers are
60 | high sensitivity. Value can't be zero.
61 | boundary (int): Number of consecutive observations identified as outliers
62 | to signal as disturbance
63 | **kwargs: Used to set internal attributes when initializing with
64 | ``.from_netcdf()``
65 | """
66 | def __init__(self, trend=True, harmonic_order=3, sensitivity=1.5, mask=None,
67 | boundary=3, **kwargs):
68 | super().__init__(mask=mask,
69 | trend=trend,
70 | harmonic_order=harmonic_order,
71 | boundary=boundary,
72 | **kwargs)
73 | self.sensitivity = sensitivity
74 | self.q25 = kwargs.get('q25')
75 | self.q75 = kwargs.get('q75')
76 | self.monitoring_strategy = 'IQR'
77 |
78 | def fit(self, dataarray, method='OLS', **kwargs):
79 | self.set_xy(dataarray)
80 | X = self.build_design_matrix(dataarray, trend=self.trend,
81 | harmonic_order=self.harmonic_order)
82 | beta, residuals = self._fit(X, dataarray=dataarray, method=method,
83 | **kwargs)
84 | self.beta = beta
85 | q75, q25 = nan_percentile_axis0(residuals, np.array([75 ,25]))
86 | self.q25 = q25
87 | self.q75 = q75
88 |
89 | def _update_process(self, residuals, is_valid):
90 | # Compute upper and lower thresholds
91 | iqr = self.q75 - self.q25
92 | lower_limit = self.q25 - self.sensitivity * iqr
93 | upper_limit = self.q75 + self.sensitivity * iqr
94 | # compare residuals to thresholds
95 | is_outlier = np.logical_or(residuals > upper_limit,
96 | residuals < lower_limit)
97 | # Update self.process
98 | if self.process is None:
99 | self.process = np.zeros_like(residuals, dtype=np.uint8)
100 | self.process = np.where(is_valid,
101 | self.process * is_outlier + is_outlier,
102 | self.process)
103 |
--------------------------------------------------------------------------------
/nrt/monitor/mosum.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2022 European Union (Joint Research Centre)
2 | #
3 | # Licensed under the EUPL, Version 1.2 or – as soon they will be approved by
4 | # the European Commission - subsequent versions of the EUPL (the "Licence");
5 | # You may not use this work except in compliance with the Licence.
6 | # You may obtain a copy of the Licence at:
7 | #
8 | # https://joinup.ec.europa.eu/software/page/eupl
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the Licence is distributed on an "AS IS" basis,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the Licence for the specific language governing permissions and
14 | # limitations under the Licence.
15 |
16 | import numpy as np
17 | import xarray as xr
18 |
19 | from nrt.monitor import BaseNrt
20 | from nrt.utils_efp import _mosum_ols_test_crit, _mosum_init_window
21 |
22 |
23 | class MoSum(BaseNrt):
24 | """Monitoring using moving sums (MOSUM) of residuals
25 |
26 | Implementation following method as implemented in R package bFast.
27 |
28 | Attributes:
29 | mask (numpy.ndarray): A 2D numpy array containing pixels that should
30 | be monitored (1) and not (0). The mask may be updated following
31 | history period stability check, and after a call to monitor
32 | following a confirmed break. Values are as follow.
33 | ``{0: 'Not monitored', 1: 'monitored', 2: 'Unstable history',
34 | 3: 'Confirmed break - no longer monitored'}``
35 | trend (bool): Indicate whether stable period fit is performed with
36 | trend or not
37 | harmonic_order (int): The harmonic order of the time-series regression
38 | x (numpy.ndarray): array of x coordinates
39 | y (numpy.ndarray): array of y coordinates
40 | sensitivity (float): sensitivity of the monitoring. Lower numbers
41 | correspond to lower sensitivity. Equivalent to significance level
42 | 'alpha' with which the boundary is computed
43 | boundary (numpy.ndarray): process boundary for each time series.
44 | Calculated from alpha and length of time series.
45 | sigma (numpy.ndarray): Standard deviation for normalized residuals in
46 | history period
47 | histsize (numpy.ndarray): Number of non-nan observations in history
48 | period
49 | n (numpy.ndarray): Total number of non-nan observations in time-series
50 | critval (float): Critical test value corresponding to the chosen
51 | sensitivity
52 | h (float): Moving window size relative to length of the history period.
53 | Can be one of 0.25, 0.5 and 1
54 | winsize (numpy.ndarray): 2D array with absolute window size. Computed as
55 | h*histsize
56 | window (numpy.ndarray): 3D array containing the current values in the
57 | window
58 | detection_date (numpy.ndarray): 2D array signalling detection date of
59 | disturbances in days since 1970-01-01
60 |
61 | Args:
62 | mask (numpy.ndarray): A 2D numpy array containing pixels that should be
63 | monitored marked as ``1`` and pixels that should be excluded (marked
64 | as ``0``). Typically a stable forest mask when doing forest disturbance
65 | monitoring. If no mask is supplied all pixels are considered and
66 | a mask is created following the ``fit()`` call
67 | trend (bool): Indicate whether stable period fit is performed with
68 | trend or not
69 | harmonic_order (int): The harmonic order of the time-series regression
70 | sensitivity (float): sensitivity of the monitoring. Lower numbers
71 | correspond to lower sensitivity. Equivalent to significance level
72 | 'alpha' with which the boundary is computed
73 | h (float): Moving window size relative to length of the history period.
74 | Can be one of 0.25, 0.5 and 1
75 | **kwargs: Used to set internal attributes when initializing with
76 | ``.from_netcdf()``
77 | """
78 |
79 | def __init__(self, trend=True, harmonic_order=2, sensitivity=0.05,
80 | mask=None, h=0.25, **kwargs):
81 | super().__init__(mask=mask,
82 | trend=trend,
83 | harmonic_order=harmonic_order,
84 | **kwargs)
85 | self.sensitivity = sensitivity
86 | self.critval = _mosum_ols_test_crit(sensitivity, h=h,
87 | period=10, functional='max')
88 | self.sigma = kwargs.get('sigma')
89 | self.histsize = kwargs.get('histsize')
90 | self.n = kwargs.get('n')
91 | self.h = h
92 | self.winsize = kwargs.get('winsize')
93 | self.window = kwargs.get('window')
94 | self.monitoring_strategy = 'MOSUM'
95 |
96 | def get_process(self):
97 | return np.nansum(self.window, axis=0)
98 |
99 | def set_process(self, x):
100 | pass
101 |
102 | process = property(get_process, set_process)
103 |
104 | def fit(self, dataarray, method='ROC', alpha=0.05, **kwargs):
105 | """Stable history model fitting
106 |
107 | If method ``'ROC'`` is used for fitting, the argument ``alpha`` has
108 | to be passed.
109 |
110 | Args:
111 | dataarray (xr.DataArray): xarray Dataarray including the historic
112 | data to be fitted
113 | method (string): Regression to use. See ``_fit()`` for info.
114 | alpha (float): Significance level for ``'ROC'`` stable fit.
115 | **kwargs: to be passed to ``_fit``
116 | """
117 | self.set_xy(dataarray)
118 | X = self.build_design_matrix(dataarray, trend=self.trend,
119 | harmonic_order=self.harmonic_order)
120 | self.beta, residuals = self._fit(X, dataarray,
121 | method=method,
122 | alpha=alpha,
123 | **kwargs)
124 |
125 | # histsize is necessary for normalization of residuals,
126 | # n is necessary for boundary calculation
127 | self.histsize = np.sum(~np.isnan(residuals), axis=0) \
128 | .astype(np.uint16)
129 | self.histsize[self.mask != 1] = 0
130 | self.winsize = np.floor(self.histsize * self.h).astype(np.int16)
131 | self.n = self.histsize
132 | self.boundary = np.full_like(self.histsize, np.nan, dtype=np.float32)
133 | self.sigma = np.nanstd(residuals, axis=0, ddof=X.shape[1])
134 | # calculate normalized residuals
135 | with np.errstate(divide='ignore', invalid='ignore'):
136 | residuals_ = residuals / (self.sigma * np.sqrt(self.histsize))
137 | # TODO self.window can be converted to property to allow for safe
138 | # application of scaling factor with getter and setter
139 | self.window = _mosum_init_window(residuals_, self.winsize)
140 |
141 | def _update_process(self, residuals, is_valid):
142 | """Update process
143 | (Isn't actually updating process directly, but is updating the values
144 | from which the process gets calculated)"""
145 | # get valid indices
146 | valid_idx = np.where(is_valid)
147 |
148 | # get indices which need to be changed and write normalized residuals
149 | with np.errstate(divide='ignore', invalid='ignore'):
150 | change_idx = np.mod(self.n-self.histsize, self.winsize)[valid_idx]
151 | residuals_norm = residuals / (self.sigma * np.sqrt(self.histsize))
152 | self.window[change_idx, valid_idx[0], valid_idx[1]] = residuals_norm[valid_idx]
153 |
154 | # calculate boundary
155 | self.n = self.n + is_valid
156 | x = self.n / self.histsize
157 | log_out = np.ones_like(x)
158 | self.boundary = np.where(is_valid,
159 | self.critval * np.sqrt(
160 | 2 * np.log(x, out=log_out,
161 | where=(x > np.exp(1)))),
162 | self.boundary)
163 |
--------------------------------------------------------------------------------
/nrt/outliers.py:
--------------------------------------------------------------------------------
1 | """Removing outliers
2 |
3 | Functions defined in this module always use a 2D array containing the dependant
4 | variables (y) and return y with outliers set to np.nan.
5 | These functions are meant to be called in ``nrt.BaseNrt._fit()``
6 |
7 | Citations:
8 |
9 | - Brooks, E.B., Wynne, R.H., Thomas, V.A., Blinn, C.E. and Coulston, J.W., 2013.
10 | On-the-fly massively multitemporal change detection using statistical quality
11 | control charts and Landsat data. IEEE Transactions on Geoscience and Remote Sensing,
12 | 52(6), pp.3316-3332.
13 |
14 | - Zhu, Zhe, and Curtis E. Woodcock. 2014. “Continuous Change Detection and
15 | Classification of Land Cover Using All Available Landsat Data.” Remote
16 | Sensing of Environment 144 (March): 152–71.
17 | https://doi.org/10.1016/j.rse.2014.01.011.
18 | """
19 | # Copyright (C) 2022 European Union (Joint Research Centre)
20 | #
21 | # Licensed under the EUPL, Version 1.2 or – as soon they will be approved by
22 | # the European Commission - subsequent versions of the EUPL (the "Licence");
23 | # You may not use this work except in compliance with the Licence.
24 | # You may obtain a copy of the Licence at:
25 | #
26 | # https://joinup.ec.europa.eu/software/page/eupl
27 | #
28 | # Unless required by applicable law or agreed to in writing, software
29 | # distributed under the Licence is distributed on an "AS IS" basis,
30 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
31 | # See the Licence for the specific language governing permissions and
32 | # limitations under the Licence.
33 |
34 | import numpy as np
35 |
36 | from nrt.fit_methods import rirls, ols
37 | from nrt.log import logger
38 |
39 |
40 | def shewhart(X, y, L=5, **kwargs):
41 | """Remove outliers using a Shewhart control chart
42 |
43 | As described in Brooks et al. 2014, following an initial OLS fit, outliers are
44 | identified using a shewhart control chart and removed.
45 |
46 | Args:
47 | X ((M, N) np.ndarray): Matrix of independant variables
48 | y ({(M,), (M, K)} np.ndarray): Matrix of dependant variables
49 | L (float): control limit used for outlier filtering. Must be a positive
50 | float. Lower values indicate stricter filtering. Residuals larger
51 | than L*sigma will get screened out
52 | **kwargs: not used
53 |
54 | Returns:
55 | y(np.ndarray): Dependant variables with outliers set to np.nan
56 | """
57 | beta_full, residuals_full = ols(X, y)
58 | # Shewhart chart to get rid of outliers (clouds etc)
59 | sigma = np.nanstd(residuals_full, axis=0)
60 | shewhart_mask = np.abs(residuals_full) > L * sigma
61 | y[shewhart_mask] = np.nan
62 | return y
63 |
64 |
65 | def ccdc_rirls(X, y, green, swir, scaling_factor=1, **kwargs):
66 | """Screen for missed clouds and other outliers using green and SWIR band
67 |
68 | Args:
69 | X ((M, N) np.ndarray): Matrix of independant variables
70 | y ((M, K) np.ndarray): Matrix of dependant variables
71 | green (np.ndarray): 2D array containing spectral values
72 | swir (np.ndarray): 2D array containing spectral values (~1.55-1.75um)
73 | scaling_factor (int): Scaling factor to bring green and swir values
74 | to reflectance values between 0 and 1
75 |
76 | Returns:
77 | np.ndarray: y with outliers set to np.nan
78 | """
79 | # 1. estimate time series model using rirls for green and swir
80 | # TODO could be sped up, since masking is the same for green and swir
81 | g_beta, g_residuals = rirls(X, green, **kwargs)
82 | s_beta, s_residuals = rirls(X, swir, **kwargs)
83 | # Update mask using thresholds
84 | is_outlier = np.logical_or(g_residuals > 0.04*scaling_factor,
85 | s_residuals < -0.04*scaling_factor)
86 |
87 | removed = np.count_nonzero(is_outlier) / np.count_nonzero(~np.isnan(green))
88 | if removed > 0.5:
89 | logger.warn('More than 50% of pixels have been removed as outliers. '
90 | 'Check if scaling_factor has been set correctly.')
91 | logger.debug('%.2f%% of (non nan) pixels removed.',
92 | removed * 100)
93 |
94 | y[is_outlier] = np.nan
95 | return y
96 |
--------------------------------------------------------------------------------
/nrt/stats.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2022 European Union (Joint Research Centre)
2 | #
3 | # Licensed under the EUPL, Version 1.2 or – as soon they will be approved by
4 | # the European Commission - subsequent versions of the EUPL (the "Licence");
5 | # You may not use this work except in compliance with the Licence.
6 | # You may obtain a copy of the Licence at:
7 | #
8 | # https://joinup.ec.europa.eu/software/page/eupl
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the Licence is distributed on an "AS IS" basis,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the Licence for the specific language governing permissions and
14 | # limitations under the Licence.
15 |
16 | import numba
17 | import numpy as np
18 |
19 |
20 | @numba.jit(nopython=True, cache=True, parallel=True)
21 | def nanlstsq(X, y):
22 | """Return the least-squares solution to a linear matrix equation
23 |
24 | Analog to ``numpy.linalg.lstsq`` for dependant variable containing ``Nan``
25 |
26 | Note:
27 | For best performances of the multithreaded implementation, it is
28 | recommended to limit the number of threads used by MKL or OpenBLAS to 1.
29 | This avoids over-subscription, and improves performances.
30 | By default the function will use all cores available; the number of cores
31 | used can be controled using the ``numba.set_num_threads`` function or
32 | by modifying the ``NUMBA_NUM_THREADS`` environment variable
33 |
34 | Args:
35 | X ((M, N) np.ndarray): Matrix of independant variables
36 | y ({(M,), (M, K)} np.ndarray): Matrix of dependant variables
37 |
38 | Examples:
39 | >>> import os
40 | >>> # Adjust linear algebra configuration (only one should be required
41 | >>> # depending on how numpy was installed/compiled)
42 | >>> os.environ['OPENBLAS_NUM_THREADS'] = '1'
43 | >>> os.environ['MKL_NUM_THREADS'] = '1'
44 | >>> import numpy as np
45 | >>> from sklearn.datasets import make_regression
46 | >>> from nrt.stats import nanlstsq
47 | >>> # Generate random data
48 | >>> n_targets = 1000
49 | >>> n_features = 2
50 | >>> X, y = make_regression(n_samples=200, n_features=n_features,
51 | ... n_targets=n_targets)
52 | >>> # Add random nan to y array
53 | >>> y.ravel()[np.random.choice(y.size, 5*n_targets, replace=False)] = np.nan
54 | >>> # Run the regression
55 | >>> beta = nanlstsq(X, y)
56 | >>> assert beta.shape == (n_features, n_targets)
57 |
58 | Returns:
59 | np.ndarray: Least-squares solution, ignoring ``Nan``
60 | """
61 | beta = np.zeros((X.shape[1], y.shape[1]), dtype=np.float64)
62 | for idx in numba.prange(y.shape[1]):
63 | # subset y and X
64 | isna = np.isnan(y[:,idx])
65 | X_sub = X[~isna]
66 | y_sub = y[~isna,idx]
67 | beta[:, idx] = np.linalg.solve(np.dot(X_sub.T, X_sub), np.dot(X_sub.T, y_sub))
68 | return beta
69 |
70 |
71 | @numba.jit(nopython=True, cache=True)
72 | def mad(resid, c=0.6745):
73 | """Returns Median-Absolute-Deviation (MAD) for residuals
74 |
75 | Args:
76 | resid (np.ndarray): residuals
77 | c (float): scale factor to get to ~standard normal (default: 0.6745)
78 | (i.e. 1 / 0.75iCDF ~= 1.4826 = 1 / 0.6745)
79 | Returns:
80 | float: MAD 'robust' variance estimate
81 |
82 | Reference:
83 | http://en.wikipedia.org/wiki/Median_absolute_deviation
84 | """
85 | # Return median absolute deviation adjusted sigma
86 | return np.nanmedian(np.fabs(resid - np.nanmedian(resid))) / c
87 |
88 | # Weight scaling methods
89 | @numba.jit(nopython=True, cache=True)
90 | def bisquare(resid, c=4.685):
91 | """Weight residuals using bisquare weight function
92 |
93 | Args:
94 | resid (np.ndarray): residuals to be weighted
95 | c (float): tuning constant for Tukey's Biweight (default: 4.685)
96 |
97 | Returns:
98 | weight (ndarray): weights for residuals
99 |
100 | Reference:
101 | http://statsmodels.sourceforge.net/stable/generated/statsmodels.robust.norms.TukeyBiweight.html
102 | """
103 | # Weight where abs(resid) < c; otherwise 0
104 | return (np.abs(resid) < c) * (1 - (resid / c) ** 2) ** 2
105 |
106 |
107 | @numba.jit(nopython=True, cache=True)
108 | def erfcc(x):
109 | """Complementary error function."""
110 | z = np.abs(x)
111 | t = 1. / (1. + 0.5*z)
112 | r = t * np.exp(-z*z-1.26551223+t*(1.00002368+t*(.37409196+
113 | t*(.09678418+t*(-.18628806+t*(.27886807+
114 | t*(-1.13520398+t*(1.48851587+t*(-.82215223+
115 | t*.17087277)))))))))
116 | if x >= 0.:
117 | return r
118 | else:
119 | return 2. - r
120 |
121 |
122 | @numba.jit(nopython=True, cache=True)
123 | def ncdf(x):
124 | """Normal cumulative distribution function
125 | Source: Stackoverflow Unknown,
126 | https://stackoverflow.com/a/809402/12819237"""
127 | return 1. - 0.5*erfcc(x/(2**0.5))
128 |
129 |
130 | @numba.jit(nopython=True, cache=True)
131 | def nan_percentile_axis0(arr, percentiles):
132 | """Faster implementation of np.nanpercentile
133 |
134 | This implementation always takes the percentile along axis 0.
135 | Uses numba to speed up the calculation by more than 7x.
136 |
137 | Function is equivalent to np.nanpercentile(arr, , axis=0)
138 |
139 | Args:
140 | arr (np.ndarray): 2D array to calculate percentiles for
141 | percentiles (np.ndarray): 1D array of percentiles to calculate
142 |
143 | Returns:
144 | np.ndarray: Array with first dimension corresponding to values passed
145 | in percentiles
146 |
147 | """
148 | shape = arr.shape
149 | arr = arr.reshape((arr.shape[0], -1))
150 | out = np.empty((len(percentiles), arr.shape[1]))
151 | for i in range(arr.shape[1]):
152 | out[:,i] = np.nanpercentile(arr[:,i], percentiles)
153 | shape = (out.shape[0], *shape[1:])
154 | return out.reshape(shape)
155 |
--------------------------------------------------------------------------------
/nrt/utils.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2022 European Union (Joint Research Centre)
2 | #
3 | # Licensed under the EUPL, Version 1.2 or – as soon they will be approved by
4 | # the European Commission - subsequent versions of the EUPL (the "Licence");
5 | # You may not use this work except in compliance with the Licence.
6 | # You may obtain a copy of the Licence at:
7 | #
8 | # https://joinup.ec.europa.eu/software/page/eupl
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the Licence is distributed on an "AS IS" basis,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the Licence for the specific language governing permissions and
14 | # limitations under the Licence.
15 |
16 | import datetime
17 | import inspect
18 | from math import pi
19 |
20 | import pandas as pd
21 | import numpy as np
22 |
23 |
24 | def build_regressors(dates, trend=True, harmonic_order=3):
25 | """Build the design matrix (X) from a list or an array of datetimes
26 |
27 | Trend assumes temporal resolution no finer than daily
28 | Harmonics assume annual cycles
29 |
30 | Args:
31 | dates (pandas.DatetimeIndex): The dates to use for building regressors
32 | trend (bool): Whether to add a trend component
33 | harmonic_order (int): The order of the harmonic component
34 |
35 | Returns:
36 | numpy.ndarray: A design matrix
37 | """
38 | dates = dates.sort_values()
39 | shape = (len(dates), 1 + trend + 2*harmonic_order)
40 | X = np.zeros(shape, dtype=float)
41 | # Add intercept (Is that actually required?)
42 | X[:,0] = 1
43 | if trend:
44 | origin = pd.Timestamp(1970)
45 | X[:,1] = (dates - origin).days
46 | if harmonic_order:
47 | indices = range(1 + trend, 1 + trend + 2 * harmonic_order)
48 | # Array of decimal dates
49 | ddates = datetimeIndex_to_decimal_dates(dates)
50 | # Allocate array
51 | X_harmon = np.empty((len(dates), harmonic_order))
52 | for i in range(harmonic_order):
53 | X_harmon[:,i] = 2 * np.pi * ddates * (i + 1)
54 | X_harmon = np.concatenate([np.cos(X_harmon), np.sin(X_harmon)], 1)
55 | X[:, indices] = X_harmon
56 | return X
57 |
58 |
59 | def dt_to_decimal(dt):
60 | """Helper to build a decimal date from a datetime object
61 | """
62 | year = dt.year
63 | begin = datetime.datetime(year, 1, 1)
64 | end = datetime.datetime(year, 12, 31)
65 | return year + (dt - begin)/(end - begin)
66 |
67 |
68 | def datetimeIndex_to_decimal_dates(dates):
69 | """Convert a pandas datetime index to decimal dates"""
70 | years = dates.year
71 | first_year_day = pd.to_datetime({'year':years, 'day':1, 'month':1})
72 | last_year_day = pd.to_datetime({'year':years, 'day':31, 'month':12})
73 | ddates = years + (dates - first_year_day)/(last_year_day - first_year_day)
74 | return np.array(ddates, dtype=float)
75 |
76 |
77 | def numba_kwargs(func):
78 | """
79 | Decorator which enables passing of kwargs to jitted functions by selecting
80 | only those kwargs that are available in the decorated functions signature
81 | """
82 | def wrapper(*args, **kwargs):
83 | # Only pass those kwargs that func takes
84 | # as positional or keyword arguments
85 | select_kwargs = {
86 | k: kwargs[k]
87 | for k in kwargs.keys()
88 | if k in inspect.signature(func).parameters.keys()
89 | }
90 | return func(*args, **select_kwargs)
91 | return wrapper
92 |
--------------------------------------------------------------------------------
/nrt/utils_efp.py:
--------------------------------------------------------------------------------
1 | """CUSUM utility functions
2 |
3 | Functions defined in this module implement functionality necessary for
4 | CUSUM and MOSUM monitoring as implemented in the R packages strucchange and
5 | bFast.
6 |
7 | Portions of this module are derived from Chris Holden's pybreakpoints package.
8 | See the copyright statement below.
9 | """
10 | # Copyright (C) 2022 European Union (Joint Research Centre)
11 | #
12 | # Licensed under the EUPL, Version 1.2 or – as soon they will be approved by
13 | # the European Commission - subsequent versions of the EUPL (the "Licence");
14 | # You may not use this work except in compliance with the Licence.
15 | # You may obtain a copy of the Licence at:
16 | #
17 | # https://joinup.ec.europa.eu/software/page/eupl
18 | #
19 | # Unless required by applicable law or agreed to in writing, software
20 | # distributed under the Licence is distributed on an "AS IS" basis,
21 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22 | # See the Licence for the specific language governing permissions and
23 | # limitations under the Licence.
24 |
25 | ###############################################################################
26 | # BSD 3-Clause License
27 | #
28 | # Copyright (c) 2018, Chris Holden
29 | # All rights reserved.
30 | #
31 | # Redistribution and use in source and binary forms, with or without
32 | # modification, are permitted provided that the following conditions are met:
33 | #
34 | # * Redistributions of source code must retain the above copyright notice, this
35 | # list of conditions and the following disclaimer.
36 | #
37 | # * Redistributions in binary form must reproduce the above copyright notice,
38 | # this list of conditions and the following disclaimer in the documentation
39 | # and/or other materials provided with the distribution.
40 | #
41 | # * Neither the name of the copyright holder nor the names of its
42 | # contributors may be used to endorse or promote products derived from this
43 | # software without specific prior written permission.
44 | #
45 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
46 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
47 | # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
48 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
49 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
50 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
51 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
52 | # OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
53 | # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
54 | # OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
55 | # ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
56 | ###############################################################################
57 |
58 | import numpy as np
59 | import numba
60 | from scipy import optimize
61 | from scipy.stats import norm
62 |
63 | from nrt.stats import ncdf
64 | from nrt import data
65 |
66 |
67 | @numba.jit(nopython=True, cache=True)
68 | def history_roc(X, y, alpha=0.05, crit=0.9478982340418134):
69 | """Reverse Ordered Rec-CUSUM check for stable periods
70 |
71 | Checks for stable periods by calculating recursive OLS-Residuals
72 | (see ``_recresid()``) on the reversed X and y matrices. If the cumulative
73 | sum of the residuals crosses a boundary, the index of y where this
74 | structural change occured is returned.
75 |
76 | Args:
77 | X ((M, ) np.ndarray): Matrix of independant variables
78 | y ((M, K) np.ndarray): Matrix of dependant variables
79 | alpha (float): Significance level for the boundary
80 | (probability of type I error)
81 | crit (float): Critical value corresponding to the chosen alpha. Can be
82 | calculated with ``_cusum_rec_test_crit``.
83 | Default is the value for alpha=0.05
84 |
85 | Returns:
86 | int: Index of structural change in y.
87 | ``0`` - y completely stable
88 | ``>0`` - y stable after this index
89 | """
90 | # Index, where instability in time-series is detected
91 | # 0: time-series completely stable
92 | # >0: stable after this index
93 | process = _cusum_rec_efp(X[::-1], y[::-1])
94 | stat = _cusum_rec_sctest(process)
95 | stat_pvalue = _brownian_motion_pvalue(stat, 1)
96 | if stat_pvalue < alpha:
97 | boundary = _cusum_rec_boundary(process, crit)
98 | return len(process) - np.where(np.abs(process) > boundary)[0].min()
99 | else:
100 | return 0
101 |
102 |
103 | # REC-CUSUM
104 | @numba.jit(nopython=True, cache=True)
105 | def _brownian_motion_pvalue(x, k):
106 | """ Return pvalue for some given test statistic """
107 | # TODO: Make generic, add "type='Brownian Motion'"?
108 | if x < 0.3:
109 | p = 1 - 0.1464 * x
110 | else:
111 | p = 2 * (1 -
112 | ncdf(3 * x) +
113 | np.exp(-4 * x ** 2) * (ncdf(x) + ncdf(5 * x) - 1) -
114 | np.exp(-16 * x ** 2) * (1 - ncdf(x)))
115 | return 1 - (1 - p) ** k
116 |
117 |
118 | @numba.jit(nopython=True, cache=True)
119 | def _cusum_rec_boundary(x, crit=0.9478982340418134):
120 | """ Equivalent to ``strucchange::boundary.efp``` for Rec-CUSUM
121 |
122 | Args:
123 | x (np.ndarray): Process values
124 | crit (float): Critical value as computed by _cusum_rec_test_crit.
125 | Default is the value for alpha=0.05
126 | """
127 | n = x.size
128 | bound = crit
129 | boundary = (bound + (2 * bound * np.arange(0, n) / (n - 1)))
130 |
131 | return boundary
132 |
133 |
134 | def _cusum_rec_test_crit(alpha=0.05, **kwargs):
135 | """ Return critical test statistic value for some alpha """
136 | return optimize.brentq(lambda _x: _brownian_motion_pvalue(_x, 1) - alpha, 0, 20)
137 |
138 |
139 | def _cusum_ols_test_crit(alpha):
140 | """ Return critical test statistic value for some alpha """
141 | return optimize.golden(lambda _x: np.abs(
142 | 2 * (norm.cdf(_x) - _x * norm.pdf(_x)) + alpha - 2), brack=(0, 10))
143 |
144 |
145 | def _mosum_ols_test_crit(alpha, h=0.5, period=10, functional='max'):
146 | """Returns critical test value
147 |
148 | Args:
149 | alpha (float): Significance value (0-1)
150 | h (float): Relative window size. One of (0.25, 0.5, 1)
151 | period (int): Maximum monitoring period (2, 4, 6, 8, 10)
152 | functional (str): Functional type (either 'max' or 'range')
153 |
154 | Returns:
155 | (float) Critical test value for parameters
156 | """
157 | if not 0.001 <= alpha <= 0.05:
158 | raise ValueError("'alpha' needs to be between [0.001,0.05]")
159 | crit_table = data.mre_crit_table()
160 | try:
161 | crit_values = crit_table[str(h)][str(period)][functional]
162 | except KeyError:
163 | raise ValueError("'h' needs to be in (0.25, 0.5, 1) and "
164 | "'period' in (2, 4, 6, 8, 10).")
165 | sig_level = crit_table.get('sig_levels')
166 | return np.interp(1 - alpha, sig_level, crit_values)
167 |
168 |
169 | @numba.jit(nopython=True, cache=True)
170 | def _mosum_init_window(residuals, winsize):
171 | """Initializes MOSUM moving window
172 |
173 | Args:
174 | residuals (np.ndarray): 3D array containing normalized residuals
175 | winsize (np.ndarray): 2D array containing the absolute window size for
176 | each time-series in residuals
177 | Returns:
178 | (np.ndarray) Array with length of winsize.max(). Contains as many of the
179 | last non nan values in the time series as specified by winsize. Padded
180 | with 0s where winsize is smaller than winsize.max().
181 | """
182 | x = winsize.max()
183 | res = np.zeros((x, residuals.shape[1], residuals.shape[2]))
184 | for i, j in zip(*np.where(winsize > 0)):
185 | residuals_ = residuals[:, i, j]
186 | winsize_ = winsize[i, j]
187 | residuals_ = residuals_[~np.isnan(residuals_)]
188 | res[:winsize_, i, j] = residuals_[-winsize_:]
189 | return res
190 |
191 |
192 | @numba.jit(nopython=True, cache=True)
193 | def _cusum_rec_efp(X, y):
194 | """ Equivalent to ``strucchange::efp`` for Rec-CUSUM """
195 | # Run "efp"
196 | n, k = X.shape
197 | k = k+1
198 | w = _recresid(X, y, k)[k:]
199 | sigma = np.std(w)
200 | w = np.concatenate((np.array([0]), w))
201 | return np.cumsum(w) / (sigma * (n - k) ** 0.5)
202 |
203 |
204 | @numba.jit(nopython=True, cache=True)
205 | def _cusum_rec_sctest(x):
206 | """ Equivalent to ``strucchange::sctest`` for Rec-CUSUM """
207 | x = x[1:]
208 | j = np.linspace(0, 1, x.size + 1)[1:]
209 | x = x * 1 / (1 + 2 * j)
210 | stat = np.abs(x).max()
211 |
212 | return stat
213 |
214 |
215 | @numba.jit(nopython=True, cache=True)
216 | def _recresid(X, y, span):
217 | """ Return standardized recursive residuals for y ~ X
218 |
219 | Args:
220 | X ((M, N) np.ndarray): Matrix of independant variables
221 | y ((M, K) np.ndarray): Matrix of dependant variables
222 | span (int): Minimum number of observations for initial regression.
223 |
224 | Returns:
225 | (np.ndarray) containing recursive residuals standardized by
226 | prediction error variance
227 |
228 | Notes:
229 | For a matrix :math:`X_t` of :math:`T` total observations of :math:`n`
230 | variables, the :math:`t` th recursive residual is the forecast prediction
231 | error for :math:`y_t` using a regression fit on the first :math:`t - 1`
232 | observations. Recursive residuals are scaled and standardized so they are
233 | :math:`N(0, 1)` distributed.
234 | Using notation from Brown, Durbin, and Evans (1975) and Judge, et al
235 | (1985):
236 | .. math::
237 | w_r =
238 | \\frac{y_r - \\boldsymbol{x}_r^{\prime}\\boldsymbol{b}_{r-1}}
239 | {\sqrt{(1 + \\boldsymbol{x}_r^{\prime}
240 | S_{r-1}\\boldsymbol{x}_r)}}
241 | =
242 | \\frac
243 | {y_r - \\boldsymbol{x}_r^{\prime}\\boldsymbol{b}_r}
244 | {\sqrt{1 - \\boldsymbol{x}_r^{\prime}S_r\\boldsymbol{x}_r}}
245 | r = k + 1, \ldots, T,
246 | where :math:`S_{r}` is the residual sum of squares after
247 | fitting the model on :math:`r` observations.
248 | A quick way of calculating :math:`\\boldsymbol{b}_r` and
249 | :math:`S_r` is using an update formula (Equations 4 and 5 in
250 | Brown, Durbin, and Evans; Equation 5.5.14 and 5.5.15 in Judge et al):
251 | .. math::
252 | \\boldsymbol{b}_r
253 | =
254 | b_{r-1} +
255 | \\frac
256 | {S_{r-1}\\boldsymbol{x}_j
257 | (y_r - \\boldsymbol{x}_r^{\prime}\\boldsymbol{b}_{r-1})}
258 | {1 + \\boldsymbol{x}_r^{\prime}S_{r-1}x_r}
259 | .. math::
260 | S_r =
261 | S_{j-1} -
262 | \\frac{S_{j-1}\\boldsymbol{x}_r\\boldsymbol{x}_r^{\prime}S_{j-1}}
263 | {1 + \\boldsymbol{x}_r^{\prime}S_{j-1}\\boldsymbol{x}_r}
264 |
265 | See Also:
266 | statsmodels.stats.diagnostic.recursive_olsresiduals
267 | """
268 | nobs, nvars = X.shape
269 |
270 | recresid_ = np.nan * np.zeros((nobs))
271 | recvar = np.nan * np.zeros((nobs))
272 |
273 | X0 = X[:span, :]
274 | y0 = y[:span]
275 |
276 | # Initial fit
277 | XTX_j = np.linalg.inv(np.dot(X0.T, X0))
278 | XTY = np.dot(X0.T, y0)
279 | beta = np.dot(XTX_j, XTY)
280 |
281 | yhat_j = np.dot(X[span - 1, :], beta)
282 | recresid_[span - 1] = y[span - 1] - yhat_j
283 | recvar[span - 1] = 1 + np.dot(X[span - 1, :],
284 | np.dot(XTX_j, X[span - 1, :]))
285 | for j in range(span, nobs):
286 | x_j = X[j:j+1, :]
287 | y_j = y[j]
288 |
289 | # Prediction with previous beta
290 | resid_j = y_j - np.dot(x_j, beta)
291 |
292 | # Update
293 | XTXx_j = np.dot(XTX_j, x_j.T)
294 | f_t = 1 + np.dot(x_j, XTXx_j)
295 | XTX_j = XTX_j - np.dot(XTXx_j, XTXx_j.T) / f_t # eqn 5.5.15
296 |
297 | beta = beta + (XTXx_j * resid_j / f_t).ravel() # eqn 5.5.14
298 | recresid_[j] = resid_j.item()
299 | recvar[j] = f_t.item()
300 |
301 | return recresid_ / np.sqrt(recvar)
302 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools>=64",
3 | "wheel",
4 | "setuptools-scm>=8"]
5 | build-backend = "setuptools.build_meta"
6 |
7 | [project]
8 | name = "nrt"
9 | description = "Online monitoring with xarray"
10 | readme = "README.rst"
11 | keywords = ["sentinel2", "xarray", "datacube", "monitoring", "change"]
12 | authors = [
13 | { name = "Loic Dutrieux", email = "loic.dutrieux@ec.europa.eu" },
14 | { name = "Jonas Viehweger" },
15 | { name = "Chris Holden" }
16 | ]
17 | dynamic = ["version"]
18 | license = {text = "EUPL-1.2"}
19 | classifiers = [
20 | "Programming Language :: Python :: 3",
21 | "Programming Language :: Python :: 3.9",
22 | "Programming Language :: Python :: 3.10",
23 | "Programming Language :: Python :: 3.11",
24 | "Programming Language :: Python :: 3.12",
25 | "License :: OSI Approved :: European Union Public Licence 1.2 (EUPL 1.2)"
26 | ]
27 | requires-python = ">=3.9"
28 | dependencies = [
29 | "numpy",
30 | "scipy",
31 | "xarray",
32 | "rasterio",
33 | "netCDF4",
34 | "numba!=0.59.*",
35 | "pandas",
36 | "affine",
37 | "nrt-data"
38 | ]
39 |
40 | [project.urls]
41 | "Homepage" = "https://github.com/ec-jrc/nrt.git"
42 |
43 | [project.optional-dependencies]
44 | tests = ["pytest"]
45 | docs = [
46 | "sphinx==7.4.7",
47 | "dask",
48 | "sphinx_rtd_theme==2.0.0",
49 | "matplotlib==3.9.1",
50 | "sphinx-gallery==0.17.0"
51 | ]
52 |
53 | [tool.setuptools.packages.find]
54 | where = ["."]
55 |
56 | [tool.setuptools_scm]
57 | fallback_version = "9999"
58 |
--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | # pytest.ini
2 | [pytest]
3 | markers =
4 | ewma: All test relating to EWMA monitoring
5 | iqr: All test relating to IQR monitoring
6 | cusum: All test relating to CUSUM monitoring
7 | mosum: All test relating to MOSUM monitoring
8 | ccdc: All test relating to CCDC monitoring
9 | testpaths =
10 | tests
11 |
--------------------------------------------------------------------------------
/tests/integration_tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ec-jrc/nrt/68c848f9a4fda67ed67d28621e71ae25e4379b49/tests/integration_tests/__init__.py
--------------------------------------------------------------------------------
/tests/integration_tests/conftest.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2022 European Union (Joint Research Centre)
2 | #
3 | # Licensed under the EUPL, Version 1.2 or – as soon they will be approved by
4 | # the European Commission - subsequent versions of the EUPL (the "Licence");
5 | # You may not use this work except in compliance with the Licence.
6 | # You may obtain a copy of the Licence at:
7 | #
8 | # https://joinup.ec.europa.eu/software/page/eupl
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the Licence is distributed on an "AS IS" basis,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the Licence for the specific language governing permissions and
14 | # limitations under the Licence.
15 |
16 | import pkg_resources
17 | import datetime
18 |
19 | import pytest
20 | import xarray as xr
21 | import rasterio
22 | import numpy as np
23 | import pandas as pd
24 |
25 | from nrt import data
26 |
27 |
28 | @pytest.fixture
29 | def history_dataarray():
30 | """History dataarray over romania
31 |
32 | 1 squared km over a forest in Romania.
33 | NDVI with cloud already filtered (appear as np.nan) in the arrays
34 | 3.5 years of data from 2015 to end of 2018
35 | """
36 | ds = data.romania_20m()
37 | ds['ndvi'] = (ds.B8A - ds.B04) / (ds.B8A + ds.B04)
38 | ds = ds.where(ds.SCL.isin([4,5,7]))
39 | history = ds.sel(time=slice(datetime.datetime(2015, 1, 1),
40 | datetime.datetime(2016, 12, 31)))
41 | return history
42 |
43 |
44 | @pytest.fixture
45 | def ndvi_history(history_dataarray):
46 | """A NDVI dataarray of Romania
47 | """
48 | return history_dataarray.ndvi
49 |
50 |
51 | @pytest.fixture
52 | def green_swir_history(history_dataarray):
53 | """A NDVI dataarray of Romania
54 | """
55 | return history_dataarray.B03, history_dataarray.B11
56 |
57 |
58 | @pytest.fixture
59 | def ndvi_monitoring_numpy():
60 | ds = data.romania_20m()
61 | ds['ndvi'] = (ds.B8A - ds.B04) / (ds.B8A + ds.B04)
62 | ds = ds.where(ds.SCL.isin([4,5,7]))
63 | ndvi_monitoring = ds.ndvi.sel(time=slice(datetime.datetime(2017, 1, 1),
64 | datetime.datetime(2021, 1, 15)))
65 | values = ndvi_monitoring.values
66 | dates = ndvi_monitoring.time.values.astype('datetime64[s]').tolist()
67 | return values, dates
68 |
69 |
70 | @pytest.fixture
71 | def forest_mask():
72 | """Forest density over romania
73 | """
74 | arr = data.romania_forest_cover_percentage()
75 | return (arr > 30).astype(np.int8)
76 |
--------------------------------------------------------------------------------
/tests/integration_tests/test_ccdc.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2022 European Union (Joint Research Centre)
2 | #
3 | # Licensed under the EUPL, Version 1.2 or – as soon they will be approved by
4 | # the European Commission - subsequent versions of the EUPL (the "Licence");
5 | # You may not use this work except in compliance with the Licence.
6 | # You may obtain a copy of the Licence at:
7 | #
8 | # https://joinup.ec.europa.eu/software/page/eupl
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the Licence is distributed on an "AS IS" basis,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the Licence for the specific language governing permissions and
14 | # limitations under the Licence.
15 | import numpy as np
16 |
17 | from nrt.monitor.ccdc import CCDC
18 |
19 | # For now, because fixtures can't be parametrized and CCDC needs
20 | # green_swir_history, this is done seperately. The package pytest-lazy-fixture
21 | # could be used to also parametrize CCDC.
22 |
23 |
24 | def test_fit_monitor(ndvi_history, green_swir_history, ndvi_monitoring_numpy,
25 | forest_mask):
26 | green, swir = green_swir_history
27 | ccdc_monitor = CCDC(mask=forest_mask)
28 | ccdc_monitor.fit(dataarray=ndvi_history, green=green, swir=swir,
29 | scaling_factor=10000)
30 | assert ccdc_monitor.beta.shape[0] == 6 # 2*2 harmonics + intercept + trend
31 | for array, date in zip(*ndvi_monitoring_numpy):
32 | ccdc_monitor.monitor(array=array, date=date)
33 | ccdc_monitor._report(layers=['mask', 'detection_date'],
34 | dtype=np.int16)
35 |
36 |
37 | def test_netcdf(ndvi_history, green_swir_history, tmp_path):
38 | nc_path = tmp_path / 'ccdc.nc'
39 | green, swir = green_swir_history
40 | ccdc_monitor = CCDC()
41 | ccdc_monitor.fit(dataarray=ndvi_history, green=green, swir=swir,
42 | scaling_factor=10000)
43 |
44 | ccdc_monitor.to_netcdf(nc_path)
45 | ccdc_load = CCDC.from_netcdf(nc_path)
46 | assert ccdc_monitor == ccdc_load
47 |
--------------------------------------------------------------------------------
/tests/integration_tests/test_monitor.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2022 European Union (Joint Research Centre)
2 | #
3 | # Licensed under the EUPL, Version 1.2 or – as soon they will be approved by
4 | # the European Commission - subsequent versions of the EUPL (the "Licence");
5 | # You may not use this work except in compliance with the Licence.
6 | # You may obtain a copy of the Licence at:
7 | #
8 | # https://joinup.ec.europa.eu/software/page/eupl
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the Licence is distributed on an "AS IS" basis,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the Licence for the specific language governing permissions and
14 | # limitations under the Licence.
15 |
16 | from pathlib import Path
17 | import pytest
18 | import numpy as np
19 |
20 | from nrt.monitor import iqr, ewma, cusum, mosum
21 |
22 | monitor_params = {
23 | 'EWMA': pytest.param(ewma.EWMA, {'trend': False, 'L': 5}, 5,
24 | marks=pytest.mark.ewma),
25 | 'IQR': pytest.param(iqr.IQR, {'trend': False, 'harmonic_order': 1}, 3,
26 | marks=pytest.mark.iqr),
27 | 'CUSUM': pytest.param(cusum.CuSum, {}, 6,
28 | marks=pytest.mark.cusum),
29 | 'MOSUM': pytest.param(mosum.MoSum, {}, 6,
30 | marks=pytest.mark.mosum)
31 | }
32 |
33 | @pytest.mark.parametrize('monitor_cls, kwargs, beta', monitor_params.values(),
34 | ids=monitor_params.keys())
35 | def test_fit_monitor(monitor_cls, kwargs, beta,
36 | ndvi_history, ndvi_monitoring_numpy, forest_mask):
37 | monitor_ = monitor_cls(**kwargs, mask=forest_mask)
38 | monitor_.fit(dataarray=ndvi_history, **kwargs)
39 | assert monitor_.beta.shape[0] == beta
40 | for array, date in zip(*ndvi_monitoring_numpy):
41 | monitor_.monitor(array=array, date=date)
42 | monitor_._report(layers=['mask', 'detection_date'],
43 | dtype=np.int16)
44 |
45 |
46 | @pytest.mark.parametrize('monitor_cls, kwargs, beta', monitor_params.values(),
47 | ids=monitor_params.keys())
48 | def test_netcdf(monitor_cls, kwargs, beta, ndvi_history, tmp_path):
49 | nc_path = tmp_path / 'monitor.nc'
50 | monitor_ = monitor_cls(**kwargs)
51 | monitor_.fit(dataarray=ndvi_history, **kwargs)
52 |
53 | monitor_.to_netcdf(nc_path)
54 | monitor_load = monitor_cls().from_netcdf(nc_path)
55 | assert monitor_ == monitor_load
56 |
57 |
--------------------------------------------------------------------------------
/tests/unit_tests/conftest.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2022 European Union (Joint Research Centre)
2 | #
3 | # Licensed under the EUPL, Version 1.2 or – as soon they will be approved by
4 | # the European Commission - subsequent versions of the EUPL (the "Licence");
5 | # You may not use this work except in compliance with the Licence.
6 | # You may obtain a copy of the Licence at:
7 | #
8 | # https://joinup.ec.europa.eu/software/page/eupl
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the Licence is distributed on an "AS IS" basis,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the Licence for the specific language governing permissions and
14 | # limitations under the Licence.
15 |
16 | from pathlib import Path
17 | import pytest
18 | import numpy as np
19 |
20 | here = Path(__file__).parent
21 |
22 | @pytest.fixture
23 | def X_y_clear(X_y_intercept_slope):
24 | # adds an array indicating 'clear' pixels as True and outliers as False
25 | X, y, intercept, slope = X_y_intercept_slope
26 | clear = np.ones_like(y).astype('bool')
27 | clear[9, 0] = False
28 | clear[0, 1] = False
29 | return X, y, clear
30 |
31 |
32 | @pytest.fixture
33 | def X_y_intercept_slope(request):
34 | np.random.seed(0)
35 | slope, intercept = 2., 5.
36 | X = np.c_[np.ones(10), np.arange(10)]
37 | y = np.array([slope * X[:, 1] + intercept,
38 | slope * X[:, 1] + intercept])
39 | # Add noise (X_y_clear depends on the same noise)
40 | y[0, 9] = 0
41 | y[1, 0] = 23
42 | return X, y.T, intercept, slope
43 |
44 |
45 | # fixture of 2D residuals with extreme values start and end
46 | # 1D slope with extreme value and corresponding results in stability
47 | @pytest.fixture
48 | def stability_ccdc(request):
49 | np.random.seed(0)
50 | a_len = 30
51 | # build an example, where one time series has a large last value,
52 | # one a large first value, one a large slope and one just random residuals
53 | residuals = (np.random.rand(a_len, 4) - 0.5)*2
54 | residuals[0, 0] = 100
55 | residuals[-1, 1] = 100
56 |
57 | ts = np.array([
58 | np.ones(a_len),
59 | np.ones(a_len),
60 | np.arange(a_len)*20+5, # Large slope
61 | np.ones(a_len)
62 | ]).T
63 | # add a np.nan in there:
64 | ts[int(a_len/2),3] = np.nan
65 |
66 | X = np.array([np.ones(a_len), np.arange(a_len)]).T
67 | y = ts+residuals
68 | dates = np.linspace(1, 365, a_len)
69 | result = np.array([True, False, False, True])
70 | return X, y, dates, result
71 |
72 |
73 | @pytest.fixture
74 | def X_y_dates_romania(request):
75 | # Imported as double, to match precision of R computation
76 | X = np.loadtxt(here / 'data' / 'X.csv', delimiter=',', dtype=np.float64)
77 | y = np.loadtxt(here / 'data' / 'y.csv', delimiter=',', dtype=np.float64)
78 | dates = np.genfromtxt(here / 'data' / 'dates.csv', delimiter=',') \
79 | .astype("datetime64[ns]")
80 |
81 | return X, y, dates
82 |
83 |
84 | # results of calculating recursive residuals of X_y_dates_romania by
85 | # strucchange package in R
86 | # Recursive Residuals for entire matrix
87 | # Code:
88 | # res_2d < - apply(y, 2, function(column){
89 | # non_nan < - which(is.finite(column))
90 | # y_clear < - column[non_nan]
91 | # X_clear < - X[non_nan,]
92 | # rresid_na < - rep(NA, length(column))
93 | #
94 | # rresid < - recresid(X_clear, y_clear)
95 | #
96 | # rresid_na[non_nan[ncol(X_clear) + 1:length(rresid)]] < - rresid
97 | # return (rresid_na)
98 | # })
99 | @pytest.fixture
100 | def strcchng_rr(request):
101 | return np.genfromtxt(here / 'data' / 'rr_result.csv',
102 | delimiter=',', dtype=np.float64, missing_values='NA')
103 |
104 |
105 | # Results of calculating Rec-CUSUM efp process value with efp() from package
106 | # strucchange
107 | # X_df < - as.data.frame(X)
108 | # X_df$y < - y[,1]
109 | # # Remove nan
110 | # X_df_clear < - X_df[! is.na(X_df$y), ]
111 | #
112 | # level < - 0.05
113 | #
114 | # n < - nrow(X_df_clear)
115 | # data_rev < - X_df_clear[n:1, ]
116 | # y_rcus < - efp(y
117 | # ~ V1 + V2 + V3 + V4 + V5, data = data_rev, type = "Rec-CUSUM")
118 | # return (y_rcus$process)
119 | @pytest.fixture
120 | def strcchng_efp(request):
121 | return np.genfromtxt(here / 'data' / 'efp_result.csv',
122 | delimiter=',', dtype=np.float64, missing_values='NA')
123 |
124 |
125 | # Results of monitoring with strucchange
126 | # res_bound_proc < - apply(y, 2, function(column)
127 | # {
128 | # # convert to dataframe
129 | # X_df < - as.data.frame(X)
130 | # X_df$y < - column
131 | # # Split in history and monitor
132 | # history < - X_df[1:100, ]
133 | # # Remove nan
134 | # history_clear < - history[! is.na(history$y), ]
135 | # monitor_clear < - X_df[! is.na(X_df$y), ]
136 | #
137 | # history_efp < - efp(y
138 | # ~ V2 + V3 + V4 + V5, data = history, type = "OLS-CUSUM")
139 | # history_mefp < - mefp(history_efp)
140 | # monitor_data < - monitor(history_mefp, data=monitor_clear)
141 | # plot(monitor_data)
142 | # return (c(monitor_process = as.numeric(tail(monitor_data$process, 1)),
143 | # boundary = history_mefp$border(nrow(monitor_clear)),
144 | # histsize = history_mefp$histsize,
145 | # sigma = history_efp$sigma))
146 | # })
147 | @pytest.fixture
148 | def cusum_result(request):
149 | return np.loadtxt(here / 'data' / 'cusum_result.csv',
150 | delimiter=',', dtype=np.float64)
151 |
152 |
153 | # Same as cusum_result only with type="OLS-MOSUM"
154 | @pytest.fixture
155 | def mosum_result(request):
156 | return np.loadtxt(here / 'data' / 'mosum_result.csv',
157 | delimiter=',', dtype=np.float64)
158 |
159 | # Test data for robust fit.
160 | #
161 | # First time-series can become singular if accuracy isn't sufficient
162 | @pytest.fixture
163 | def X_y_RLM(request):
164 | X = np.loadtxt(here / 'data' / 'RLM_X.csv',
165 | delimiter=',', dtype=np.float64)
166 | y = np.loadtxt(here / 'data' / 'RLM_y.csv',
167 | delimiter=',', dtype=np.float64)
168 | return X, y
169 |
170 | # Result of Robust Fit with statsmodels
171 | #
172 | # With X, y = X_y_RLM()
173 | # import statsmodels as sm
174 | #
175 | # for idx in range(y.shape[1]):
176 | # y_sub = y[:, idx]
177 | # isna = np.isnan(y_sub)
178 | # X_sub = X[~isna]
179 | # endog = y_sub[~isna]
180 | # rlm_model = sm.RLM(endog, X_sub, M=sm.robust.norms.TukeyBiweight())
181 | # rlm_results = rlm_model.fit(update_scale=True)
182 | # beta[:,idx] = rlm_results.params
183 | @pytest.fixture
184 | def sm_RLM_result(request):
185 | return np.array([['2.3757569983999076', '-51.621207292381314'],
186 | ['1.5919053949452396e-05', '-0.00019788972214892546'],
187 | ['4.960483948314601', '-73.95341088849317'],
188 | ['4.0427485592574195', '-17.66452192456504'],
189 | ['1.0676653146683237', '0.579422996703399'],
190 | ['-0.7172424822211365', '-49.52111301879781'],
191 | ['1.2701246101474761', '-38.324020145702654'],
192 | ['1.1329168669944791', '-9.034638787625045']], dtype='
40 | boundary(y_rcus)[-1])) + 1
41 | } else {
42 | 1
43 | }
44 | return(y_start)
45 | })
46 | """
47 | X, y, dates = X_y_dates_romania
48 | result = np.array([1, 8, 49, 62, 1], dtype='float32')
49 | stable_idx = np.zeros(y.shape[1])
50 | for idx in range(y.shape[1]):
51 | # subset and remove nan
52 | is_nan = np.isnan(y[:, idx])
53 | _y = y[~is_nan, idx]
54 | _X = X[~is_nan, :]
55 |
56 | # get the index where the stable period starts
57 | stable_idx[idx] = cs.history_roc(_X, _y)
58 |
59 | # Result from strucchange must be subtracted by 1, because R is 1 indexed
60 | np.testing.assert_allclose(stable_idx, result-1)
61 |
62 |
63 | def test_efp(X_y_dates_romania, strcchng_efp):
64 | """Test efp against process value of
65 | strucchange::efp with type='Rec-CUSUM'"""
66 | X, y, dates = X_y_dates_romania
67 |
68 | is_nan = np.isnan(y[:, 0])
69 | _y = y[~is_nan, 0]
70 | _X = X[~is_nan, :]
71 |
72 | process = cs._cusum_rec_efp(_X[::-1], _y[::-1])
73 |
74 | result = strcchng_efp
75 |
76 | # Relative high tolerance, due to floating point precision
77 | np.testing.assert_allclose(process[X.shape[1]+2:], result[X.shape[1]+2:],
78 | rtol=1e-02)
79 |
80 |
81 | @pytest.mark.parametrize("test_input,expected", [(0.01, 3.368214),
82 | (0.05, 2.795483),
83 | (0.1, 2.500278)])
84 | def test_cusum_ols_test_crit(test_input, expected):
85 | assert cs._cusum_ols_test_crit(test_input) == pytest.approx(expected)
86 |
87 |
88 | mosum_crit_params = {
89 | 'h': (pytest.raises(ValueError), {'alpha': 0.05, 'h': 0.24}),
90 | 'alpha': (pytest.raises(ValueError), {'alpha': 0.06}),
91 | 'period': (pytest.raises(ValueError), {'alpha': 0.05, 'period': 11}),
92 | }
93 |
94 | @pytest.mark.parametrize('expected, test_input', mosum_crit_params.values(),
95 | ids=mosum_crit_params.keys())
96 | def test_mosum_ols_test_crit(expected, test_input):
97 | """Test edge cases"""
98 | with expected:
99 | assert cs._mosum_ols_test_crit(**test_input) is not None
100 |
101 |
102 | def test_process_boundary_cusum(X_y_dates_romania, cusum_result):
103 | X, y, dates = X_y_dates_romania
104 | # make y 6 long
105 | y = np.insert(y, 5, values=y[:,0], axis=1)
106 | y_3d = y.reshape((y.shape[0], 2, -1))
107 | data = xr.DataArray(y_3d, dims=["time", "x", "y"], coords={"time": dates})
108 | fit = data[:100]
109 | monitor = data[100:]
110 | cusum_monitor = CuSum(trend=False)
111 | cusum_monitor.fit(dataarray=fit, method='OLS')
112 | for array, date in zip(monitor.values,
113 | monitor.time.values.astype('datetime64[s]').tolist()):
114 | cusum_monitor.monitor(array=array, date=date)
115 |
116 | # Process value
117 | np.testing.assert_allclose(cusum_result[0],
118 | cusum_monitor.process.ravel()[:-1], rtol=1e-4)
119 | # Boundary value
120 | np.testing.assert_allclose(cusum_result[1],
121 | cusum_monitor.boundary.ravel()[:-1])
122 | # Histsize
123 | np.testing.assert_allclose(cusum_result[2],
124 | cusum_monitor.histsize.ravel()[:-1])
125 | # Sigma
126 | np.testing.assert_allclose(cusum_result[3],
127 | cusum_monitor.sigma.ravel()[:-1], rtol=1e-6)
128 |
129 |
130 | def test_process_boundary_mosum(X_y_dates_romania, mosum_result):
131 | X, y, dates = X_y_dates_romania
132 | # make y 6 long
133 | y = np.insert(y, 5, values=y[:,0], axis=1)
134 | y_3d = y.reshape((y.shape[0], 2, -1))
135 | data = xr.DataArray(y_3d, dims=["time", "x", "y"], coords={"time": dates})
136 | fit = data[:100]
137 | monitor = data[100:]
138 | mosum_monitor = MoSum(trend=False)
139 | mosum_monitor.fit(dataarray=fit, method='OLS')
140 | for array, date in zip(monitor.values,
141 | monitor.time.values.astype('datetime64[s]').tolist()):
142 | mosum_monitor.monitor(array=array, date=date)
143 |
144 | # Process value (third value has a break and so diverges a lot since
145 | # monitoring in bFast does not stop in case there is a break)
146 | np.testing.assert_allclose(np.delete(mosum_result[0], 2),
147 | np.delete(mosum_monitor.process.ravel(), [2,-1]),
148 | rtol=1e-4)
149 | # Boundary value
150 | np.testing.assert_allclose(mosum_result[1],
151 | mosum_monitor.boundary.ravel()[:-1])
152 | # Histsize
153 | np.testing.assert_allclose(mosum_result[2],
154 | mosum_monitor.histsize.ravel()[:-1])
155 | # Sigma
156 | np.testing.assert_allclose(mosum_result[3],
157 | mosum_monitor.sigma.ravel()[:-1], rtol=1e-6)
158 |
--------------------------------------------------------------------------------