├── tests
    ├── __init__.py
    ├── test_version.py
    ├── test_rai.py
    ├── test_plots.py
    ├── conftest.py
    ├── test_climdex.py
    ├── test_knmi.py
    ├── test_validate.py
    └── test_si.py
├── docs
    ├── _static
    │   └── .gitkeep
    ├── examples
    │   ├── index.md
    │   ├── example08_rai.ipynb
    │   ├── example06_treshold.ipynb
    │   ├── example05_multiyear_drought.ipynb
    │   ├── example01_indices.ipynb
    │   ├── example03_drought_prediction.ipynb
    │   ├── example04_package_comparison.ipynb
    │   ├── example07_knmi.ipynb
    │   └── example09_joss_paper.ipynb
    ├── index.md
    └── conf.py
├── src
    └── spei
    │   ├── py.typed
    │   ├── __init__.py
    │   ├── _typing.py
    │   ├── _version.py
    │   ├── rai.py
    │   ├── climdex.py
    │   ├── utils.py
    │   ├── dist.py
    │   ├── knmi.py
    │   └── si.py
├── paper
    ├── figures
    │   ├── spei1.png
    │   ├── threshold.png
    │   ├── spei_density.png
    │   ├── spei_heatmap.png
    │   ├── surplus_fit_cdf.png
    │   └── monthly_precipitation_surplus.png
    ├── paper.md
    └── paper.bib
├── .gitignore
├── .github
    ├── workflows
    │   ├── auto-author-assign.yml
    │   ├── draft-pdf.yml
    │   ├── python-publish.yml
    │   ├── documentation.yml
    │   └── tests.yml
    ├── ISSUE_TEMPLATE
    │   ├── question.yml
    │   ├── enhancement.yml
    │   └── bug.yml
    └── CONTRIBUTING.md
├── CITATION.cff
├── LICENSE
├── pyproject.toml
└── README.md


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docs/_static/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/spei/py.typed:
--------------------------------------------------------------------------------
1 | # Marker file for PEP 561.  The mypy package uses inline types.


--------------------------------------------------------------------------------
/paper/figures/spei1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/martinvonk/SPEI/HEAD/paper/figures/spei1.png


--------------------------------------------------------------------------------
/paper/figures/threshold.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/martinvonk/SPEI/HEAD/paper/figures/threshold.png


--------------------------------------------------------------------------------
/paper/figures/spei_density.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/martinvonk/SPEI/HEAD/paper/figures/spei_density.png


--------------------------------------------------------------------------------
/paper/figures/spei_heatmap.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/martinvonk/SPEI/HEAD/paper/figures/spei_heatmap.png


--------------------------------------------------------------------------------
/paper/figures/surplus_fit_cdf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/martinvonk/SPEI/HEAD/paper/figures/surplus_fit_cdf.png


--------------------------------------------------------------------------------
/paper/figures/monthly_precipitation_surplus.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/martinvonk/SPEI/HEAD/paper/figures/monthly_precipitation_surplus.png


--------------------------------------------------------------------------------
/src/spei/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | from . import climdex, rai, knmi, dist, plot, si, utils
3 | from ._version import __version__, show_versions
4 | from .si import SI, sgi, spei, spi, ssfi, ssmi
5 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | *.egg-info
 3 | 
 4 | /dist
 5 | /docs/references
 6 | /docs/_build
 7 | /docs/_api
 8 | /paper/literature
 9 | /paper/data
10 | 
11 | /.vscode
12 | *.code-workspace
13 | /.mypy_cache
14 | /.tox
15 | /.pytest_cache
16 | /htmlcov
17 | coverage.xml
18 | *.coverage
19 | uv.lock
20 | 


--------------------------------------------------------------------------------
/tests/test_version.py:
--------------------------------------------------------------------------------
 1 | import spei as si
 2 | 
 3 | 
 4 | def test_version() -> None:
 5 |     assert isinstance(si.__version__, str)
 6 |     assert si.__version__.count(".") == 2
 7 | 
 8 | 
 9 | def test_show_versions():
10 |     msg = si.show_versions()
11 |     assert isinstance(msg, str)
12 | 


--------------------------------------------------------------------------------
/src/spei/_typing.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | from typing import Any
 3 | 
 4 | from numpy import float64
 5 | from numpy.typing import NDArray
 6 | from scipy.stats._continuous_distns import rv_continuous
 7 | 
 8 | ContinuousDist = Any | rv_continuous
 9 | NDArrayAxes = NDArray[Any]
10 | NDArrayFloat = NDArray[float64]
11 | 


--------------------------------------------------------------------------------
/.github/workflows/auto-author-assign.yml:
--------------------------------------------------------------------------------
 1 | # .github/workflows/auto-author-assign.yml
 2 | name: Auto Author Assign
 3 | 
 4 | on:
 5 |   pull_request_target:
 6 |     types: [opened, reopened]
 7 | 
 8 | permissions:
 9 |   pull-requests: write
10 | 
11 | jobs:
12 |   assign-author:
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |       - uses: toshimaru/auto-author-assign@v1.6.2


--------------------------------------------------------------------------------
/docs/examples/index.md:
--------------------------------------------------------------------------------
 1 | # Examples
 2 | 
 3 | Below you can find examples of how SPEI can be used. These Jupyter Notbooks can also be run via [examples directory on GitHub](https://github.com/martinvonk/SPEI/tree/master/docs/examples).
 4 | 
 5 | ```{toctree}
 6 | :maxdepth: 1
 7 | 
 8 | example01_indices
 9 | example02_distributions
10 | example03_drought_prediction
11 | example04_package_comparison
12 | example05_multiyear_drought
13 | example06_treshold
14 | example07_knmi
15 | example08_rai
16 | example09_joss_paper
17 | ```
18 | 


--------------------------------------------------------------------------------
/src/spei/_version.py:
--------------------------------------------------------------------------------
 1 | from importlib import metadata
 2 | from platform import python_version
 3 | 
 4 | __version__ = "0.8.0"
 5 | 
 6 | 
 7 | def show_versions() -> str:
 8 |     msg = f"python: {python_version()}\nspei: {__version__}\n"
 9 | 
10 |     requirements = metadata.requires("spei")
11 |     if requirements:
12 |         deps = [x for x in requirements if "extra" not in x]
13 |         for dep in deps:
14 |             msg += f"{dep}: {metadata.version(dep)}"
15 |             msg += "\n" if deps.index(dep) < len(deps) - 1 else ""
16 | 
17 |     return msg
18 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/question.yml:
--------------------------------------------------------------------------------
 1 | name: Question
 2 | description: Form for a question or clarification
 3 | labels: ["question"]
 4 | body:
 5 |   - type: checkboxes
 6 |     attributes:
 7 |       label: Is there an existing issue for this?
 8 |       description: Please search to see if an issue already exists for the question you have.
 9 |       options:
10 |         - label: I have searched the existing issues
11 |           required: true
12 |   - type: textarea
13 |     attributes:
14 |       label: Describe the question
15 |       description: A concise description of what you're would like to know.
16 |     validations:
17 |       required: false


--------------------------------------------------------------------------------
/.github/workflows/draft-pdf.yml:
--------------------------------------------------------------------------------
 1 | # https://github.com/marketplace/actions/open-journals-pdf-generator
 2 | name: JOSS
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - joss
 7 |       # - dev
 8 | 
 9 | jobs:
10 |   paper:
11 |     runs-on: ubuntu-latest
12 |     name: Build Paper Draft
13 |     steps:
14 |       - name: Checkout
15 |         uses: actions/checkout@v4
16 | 
17 |       - name: Build draft PDF
18 |         uses: openjournals/openjournals-draft-action@master
19 |         with:
20 |           journal: joss
21 |           paper-path: paper/paper.md
22 | 
23 |       - name: Upload
24 |         uses: actions/upload-artifact@v4
25 |         with:
26 |           name: paper
27 |           path: paper/paper.pdf


--------------------------------------------------------------------------------
/tests/test_rai.py:
--------------------------------------------------------------------------------
 1 | from pandas import Series
 2 | 
 3 | from spei.rai import mrai, rai
 4 | 
 5 | 
 6 | def test_rai(prec: Series) -> None:
 7 |     precrs = prec.resample("MS").sum()
 8 |     rai_result = rai(precrs)
 9 |     assert isinstance(rai_result, Series), "RAI result is not a pandas Series"
10 |     assert len(rai_result) == len(precrs), (
11 |         "RAI result length does not match input length"
12 |     )
13 | 
14 | 
15 | def test_mrai(prec: Series) -> None:
16 |     precrs = prec.resample("MS").sum()
17 |     mrai_result = mrai(precrs)
18 |     assert isinstance(mrai_result, Series), "MRAI result is not a pandas Series"
19 |     assert len(mrai_result) == len(precrs), (
20 |         "MRAI result length does not match input length"
21 |     )
22 | 


--------------------------------------------------------------------------------
/tests/test_plots.py:
--------------------------------------------------------------------------------
 1 | import matplotlib as mpl
 2 | from pandas import Series
 3 | 
 4 | from spei.plot import heatmap, monthly_density, threshold
 5 | from spei.plot import si as plot_si
 6 | 
 7 | mpl.use("Agg")  # prevent _tkinter.TclError: Can't find a usable tk.tcl error
 8 | 
 9 | 
10 | def test_plot_si(si: Series) -> None:
11 |     _ = plot_si(si)
12 | 
13 | 
14 | def test_plot_si_no_background(si: Series) -> None:
15 |     _ = plot_si(si, cmap="roma_r", background=False)
16 | 
17 | 
18 | def test_plot_monthly_density(si: Series) -> None:
19 |     _ = monthly_density(si, years=[2011], months=[1, 2, 3, 4, 5])
20 | 
21 | 
22 | def test_plot_heatmap(si: Series) -> None:
23 |     _ = heatmap([si], cmap="vik", vmin=-3.0, vmax=3.0)
24 | 
25 | 
26 | def test_plot_threshold(head: Series) -> None:
27 |     th = Series(head.mean(), index=head.index, dtype=float)
28 |     _ = threshold(series=head, threshold=th, fill_color="orange")
29 | 


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
 1 | cff-version: "1.2.0"
 2 | authors:
 3 |   - family-names: Vonk
 4 |     given-names: M. A.
 5 |     orcid: "https://orcid.org/0009-0007-3528-2991"
 6 | doi: 10.5281/zenodo.16441123
 7 | message: If you use this software, please cite our article in the
 8 |   Journal of Open Source Software.
 9 | preferred-citation:
10 |   authors:
11 |     - family-names: Vonk
12 |       given-names: M. A.
13 |       orcid: "https://orcid.org/0009-0007-3528-2991"
14 |   date-published: 2025-07-29
15 |   doi: 10.21105/joss.08454
16 |   issn: 2475-9066
17 |   issue: 111
18 |   journal: Journal of Open Source Software
19 |   publisher:
20 |     name: Open Journals
21 |   start: 8454
22 |   title: "SPEI: A Python package for calculating and visualizing drought
23 |     indices"
24 |   type: article
25 |   url: "https://joss.theoj.org/papers/10.21105/joss.08454"
26 |   volume: 10
27 | title: "SPEI: A Python package for calculating and visualizing drought
28 |   indices"
29 | 


--------------------------------------------------------------------------------
/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
 1 | # This workflows will upload a Python Package using Twine when a release is created
 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
 3 | 
 4 | name: Upload Python Package
 5 | 
 6 | on:
 7 |   release:
 8 |     types: [created]
 9 | 
10 | jobs:
11 |   deploy:
12 | 
13 |     runs-on: ubuntu-latest
14 | 
15 |     steps:
16 |     - uses: actions/checkout@v4
17 |     - name: Set up Python
18 |       uses: actions/setup-python@v5
19 |       with:
20 |         python-version: '3.x'
21 |     - name: Install dependencies
22 |       run: |
23 |         python -m pip install --upgrade pip
24 |         pip install wheel twine build
25 |     - name: Build and publish
26 |       env:
27 |         TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
28 |         TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
29 |       run: |
30 |         python3 -m build
31 |         python3 -m twine upload --repository pypi dist/*
32 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Martin Vonk
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/enhancement.yml:
--------------------------------------------------------------------------------
 1 | name: Enhancement
 2 | description: Form for an enhancement, new feature or request
 3 | labels: ["enhancement"]
 4 | body:
 5 |   - type: checkboxes
 6 |     attributes:
 7 |       label: Is there an existing issue for this?
 8 |       description: Please search to see if an issue already exists for the feature you are requesting.
 9 |       options:
10 |         - label: I have searched the existing issues
11 |           required: true
12 |   - type: textarea
13 |     attributes:
14 |       label: Describe the enhancement
15 |       description: A concise description of the feature you are requesting.
16 |     validations:
17 |       required: false
18 |       placeholder: |
19 |         Description of the feature.
20 | 
21 |         ```python
22 |         # Possible pseudo code to describe the feature
23 |         ```
24 |   - type: textarea
25 |     attributes:
26 |       label: Anything else?
27 |       description: |
28 |         Links? References? Anything that will give us more context!
29 |         Tip: You can attach images or log files by clicking this area to highlight it and then dragging files in.
30 |     validations:
31 |       required: false


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import pytest
 4 | from pandas import Series, Timestamp, read_csv
 5 | 
 6 | from spei.si import spi
 7 | 
 8 | 
 9 | def read_data(column: str) -> Series:
10 |     df = read_csv(
11 |         Path(__file__).parent / "data/B11C0329_EAGMARYP.csv",
12 |         index_col=0,
13 |         parse_dates=True,
14 |         sep=";",
15 |     )
16 |     return df.loc[:, column]
17 | 
18 | 
19 | @pytest.fixture
20 | def prec() -> Series:
21 |     prec = read_data("Prec [m/d] 081_JOURE").dropna()
22 |     return prec
23 | 
24 | 
25 | @pytest.fixture
26 | def precmm(prec) -> Series:
27 |     return prec.multiply(1e3).rename("Prec [mm/d] 081_JOURE")
28 | 
29 | 
30 | @pytest.fixture
31 | def evap() -> Series:
32 |     evap = read_data("Evap [m/d] 235_DE-KOOY").dropna()
33 |     return evap
34 | 
35 | 
36 | @pytest.fixture
37 | def head() -> Series:
38 |     head = read_data("Head [m] B11C0329_EAGMARYP").dropna()
39 |     return head
40 | 
41 | 
42 | @pytest.fixture
43 | def si(prec: Series) -> Series:
44 |     si = spi(prec.rolling("30D", min_periods=30).sum().dropna(), prob_zero=True)
45 |     return si
46 | 
47 | 
48 | @pytest.fixture
49 | def deficit(prec: Series, evap: Series) -> Series:
50 |     deficit = (
51 |         (evap - prec)
52 |         .loc[Timestamp("1965-01-01") : Timestamp("2020-12-31")]
53 |         .rename("deficit")
54 |     )
55 |     return deficit
56 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug.yml:
--------------------------------------------------------------------------------
 1 | name: Bug
 2 | description: Form for an unexpected problem or behavior
 3 | labels: ["bug"]
 4 | body:
 5 |   - type: checkboxes
 6 |     attributes:
 7 |       label: Is there an existing issue for this?
 8 |       description: Please search to see if an issue already exists for the bug you encountered.
 9 |       options:
10 |         - label: I have searched the existing issues
11 |           required: true
12 |   - type: textarea
13 |     attributes:
14 |       label: Describe the bug
15 |       description: A concise description of what you're experiencing.
16 |     validations:
17 |       required: false
18 |   - type: textarea
19 |     attributes:
20 |       label: Expected behavior
21 |       description: A concise description of what you expected to happen.
22 |     validations:
23 |       required: false
24 |   - type: textarea
25 |     attributes:
26 |       label: Code to reproduce
27 |       description: Steps to reproduce the behavior.
28 |       placeholder: |
29 |         1. In this environment `spei.show_versions()`
30 |         2. Run
31 |         ```python
32 |         # Code to reproduce the behavior
33 |         ```
34 |         3. Gives output or error...
35 |     validations:
36 |       required: false
37 |   - type: textarea
38 |     attributes:
39 |       label: Anything else?
40 |       description: |
41 |         Links? References? Anything that will give us more context about the issue you are encountering!
42 |         Tip: You can attach images or log files by clicking this area to highlight it and then dragging files in.
43 |     validations:
44 |       required: false


--------------------------------------------------------------------------------
/tests/test_climdex.py:
--------------------------------------------------------------------------------
 1 | from pandas import Series
 2 | 
 3 | from spei import climdex
 4 | 
 5 | 
 6 | def test_climdex_rxnday(precmm: Series) -> None:
 7 |     climdex.rxnday(series=precmm, interval="10D", period="90D")
 8 | 
 9 | 
10 | def test_climdex_rx1day(precmm: Series) -> None:
11 |     climdex.rx1day(series=precmm)
12 | 
13 | 
14 | def test_climdex_rx5day(precmm: Series) -> None:
15 |     climdex.rx5day(series=precmm)
16 | 
17 | 
18 | def test_climdex_sdii(precmm: Series) -> None:
19 |     climdex.sdii(series=precmm)
20 | 
21 | 
22 | def test_climdex_rnmm(precmm: Series) -> None:
23 |     climdex.rnmm(series=precmm, threshold=5, period="90D")
24 | 
25 | 
26 | def test_climdex_r10mm(precmm: Series) -> None:
27 |     climdex.r10mm(series=precmm)
28 | 
29 | 
30 | def test_climdex_r20mm(precmm: Series) -> None:
31 |     climdex.r20mm(series=precmm)
32 | 
33 | 
34 | def test_climdex_cdd(precmm: Series) -> None:
35 |     climdex.cdd(series=precmm)
36 | 
37 | 
38 | def test_climdex_cwd(precmm: Series) -> None:
39 |     climdex.cwd(series=precmm)
40 | 
41 | 
42 | def test_climdex_prcptot(precmm: Series) -> None:
43 |     climdex.prcptot(series=precmm)
44 | 
45 | 
46 | def test_climdex_rnnp(precmm: Series) -> None:
47 |     climdex.rnnp(series=precmm, quantile=0.5)
48 | 
49 | 
50 | def test_climdex_r95p(precmm: Series) -> None:
51 |     climdex.r95p(series=precmm)
52 | 
53 | 
54 | def test_climdex_r99p(precmm: Series) -> None:
55 |     climdex.r99p(series=precmm)
56 | 
57 | 
58 | def test_climdex_r95ptot(precmm: Series) -> None:
59 |     climdex.r95ptot(series=precmm)
60 | 
61 | 
62 | def test_climdex_r99ptot(precmm: Series) -> None:
63 |     climdex.r99ptot(series=precmm)
64 | 


--------------------------------------------------------------------------------
/.github/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to the SPEI Python Package
 2 | 
 3 | Thank you for your interest in contributing to the **SPEI Python package**! We welcome contributions from everyone — whether you want to report an issue, improve the documentation, or submit code enhancements. This document outlines guidelines to help you get started.
 4 | 
 5 | ## How to Contribute
 6 | 
 7 | ### Creating a Good Issue
 8 | 
 9 | Before creating an issue, check whether it has already been reported.
10 | When opening an new issue, please include:
11 | 
12 | - A clear and descriptive title
13 | - A detailed description of the problem or suggestion
14 | - Steps to reproduce (if it's a bug)
15 | - The version of the package and Python you’re using
16 | - If relevant, a minimal reproducible example
17 | 
18 | Before creating a new issue, check whether it has already been reported.
19 | 
20 | ### Submitting a Pull Request
21 | 
22 | To submit a pull request:
23 | 
24 | 1. Create an issue first
25 | 2. Fork the repository
26 | 3. Create a new branch with a meaningful name
27 | 4. Fix the issue
28 | 5. Make your changes and include tests / example notebook if applicable.
29 | 6. Run the test suite to ensure everything works (including type hints and formatting & linting with ruff)
30 | 7. Submit a pull request (PR) with a clear description of what was changed and why.
31 | 
32 | ## Useful Links
33 | - SPEI GitHub Repository: [github.com/martinvonk/SPEI](https://github.com/martinvonk/SPEI)
34 | - Documentation and examples: [notebooks folder](https://github.com/martinvonk/SPEI/tree/main/doc/examples)
35 | - Code of Conduct: [Contributor Covenant](https://www.contributor-covenant.org/version/2/1/code_of_conduct/)
36 | 


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
 1 | # SPEI documentation
 2 | 
 3 | SPEI is a Python package designed for calculating drought indices from meteorological and hydrological time series. Built on widely used libraries like Pandas and SciPy, it offers a flexible and simple approach to drought analysis.
 4 | 
 5 | This website provides full documentation, example notebooks, and a detailed API reference to help you get started.
 6 | 
 7 | The SPEI package is open-source and hosted on [GitHub]((https://github.com/martinvonk/SPEI)), where you can find more information about the available drought indices and ongoing development. The package is published on [PyPi](https://pypi.org/project/spei/) from which it can be installed using `pip install spei`.
 8 | 
 9 | If you use this package for drought analysis and/or visualization, please cite it by referencing our article in the [Journal of Open Source Software](https://joss.theoj.org/papers/10.21105/joss.08454):
10 | 
11 | Vonk, M. A. (2025). SPEI: A Python package for calculating and visualizing drought indices. Journal of Open Source Software, 10(111), 8454. [doi.org/10.21105/joss.08454](https://doi.org/10.21105/joss.08454).
12 | 
13 | ```bibtex
14 | @article{Vonk_SPEI_2025,
15 |     author  = {Vonk, M. A.},
16 |     doi     = {10.21105/joss.08454},
17 |     journal = {Journal of Open Source Software},
18 |     number  = {111},
19 |     pages   = {8454},
20 |     title   = {{SPEI: A Python package for calculating and visualizing drought indices}},
21 |     url     = {https://joss.theoj.org/papers/10.21105/joss.08454},
22 |     volume  = {10},
23 |     year    = {2025}
24 | }
25 | ```
26 | 
27 | ```{toctree}
28 | :maxdepth: 2
29 | 
30 | examples/index.md
31 | _api/modules.rst
32 | ```


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # For the full list of built-in configuration values, see the documentation:
 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 5 | 
 6 | # -- Project information -----------------------------------------------------
 7 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
 8 | 
 9 | from spei import __version__
10 | 
11 | project = "SPEI"
12 | copyright = "2025, Martin Vonk"
13 | author = "Martin Vonk"
14 | release = __version__
15 | 
16 | # make docs
17 | # sphinx-build -M html docs/source docs/build
18 | 
19 | # -- General configuration ---------------------------------------------------
20 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
21 | 
22 | extensions = [
23 |     "myst_parser",  # For Markdown support
24 |     "nbsphinx",  # For Jupyter Notebooks support
25 |     "sphinx.ext.autodoc",  # For automatic documentation generation from docstrings
26 |     "sphinx.ext.apidoc",  # For automatic API documentation generation
27 |     "sphinx.ext.napoleon",  # For Google and NumPy style docstrings
28 | ]
29 | 
30 | exclude_patterns = [
31 |     "_build",  # Exclude the build directory
32 |     "**.ipynb_checkpoints",  # ignores  WARNING: Pygments lexer name 'ipython3' is not known
33 | ]
34 | nbsphinx_allow_errors = True  # Allow errors in notebooks
35 | apidoc_modules = [
36 |     {
37 |         "path": "../src/spei",
38 |         "destination": "_api",
39 |         "separate_modules": True,
40 |         "max_depth": 2,
41 |     }
42 | ]
43 | 
44 | # -- Options for HTML output -------------------------------------------------
45 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
46 | 
47 | html_theme = "alabaster"
48 | html_static_path = ["_static"]
49 | 


--------------------------------------------------------------------------------
/.github/workflows/documentation.yml:
--------------------------------------------------------------------------------
 1 | name: Build and Deploy Docs
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     branches: [main]
 6 |   push:
 7 |     branches: [main]
 8 | 
 9 | jobs:
10 |   build:
11 |     runs-on: ubuntu-latest
12 |     permissions:
13 |       contents: write
14 |     steps:
15 |       - name: Checkout code
16 |         uses: actions/checkout@v4
17 | 
18 |       - name: Set up Python
19 |         uses: actions/setup-python@v5
20 |         with:
21 |           python-version: '3.11'
22 |           check-latest: true
23 | 
24 |       - name: Set up R
25 |         uses: r-lib/actions/setup-r@v2
26 |         with:
27 |           r-version: '4.3'
28 | 
29 |       - name: Set up tox environment
30 |         run: |
31 |           python -m pip install --upgrade pip
32 |           pip install tox
33 |           tox -e docu --notest
34 | 
35 |       - name: Install Pandoc and build documentation
36 |         run: |
37 |           sudo apt-get update
38 |           sudo apt-get install -y pandoc
39 |           tox -e docu --skip-pkg-install
40 | 
41 |       - name: Publish HTML output on gh-pages branch
42 |         uses: peaceiris/actions-gh-pages@v4
43 |         with:
44 |           github_token: ${{ secrets.GITHUB_TOKEN }}
45 |           publish_dir: ./docs/_build/html
46 |           enable_jekyll: false
47 | 
48 |   deploy:
49 |     needs: build
50 |     runs-on: ubuntu-latest
51 |     permissions:
52 |       contents: read
53 |       pages: write
54 |       id-token: write
55 |     concurrency:
56 |       group: pages
57 |       cancel-in-progress: false
58 |     environment:
59 |       name: github-pages
60 |       url: ${{ steps.deployment.outputs.page_url }}
61 |     steps:
62 |       - name: Checkout code
63 |         uses: actions/checkout@v4
64 |         with:
65 |           ref: gh-pages
66 | 
67 |       - name: Setup Pages
68 |         uses: actions/configure-pages@v3
69 | 
70 |       - name: Upload artifact
71 |         uses: actions/upload-pages-artifact@v3
72 |         with:
73 |           path: '.'
74 | 
75 |       - name: Deploy to GitHub Pages
76 |         id: deployment
77 |         uses: actions/deploy-pages@v4
78 | 


--------------------------------------------------------------------------------
/tests/test_knmi.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | import pytest
 4 | 
 5 | from spei.knmi import (
 6 |     deficit_apr1,
 7 |     deficit_gdd,
 8 |     deficit_max,
 9 |     deficit_oct1,
10 |     deficit_wet,
11 |     get_cumulative_deficit,
12 |     get_yearly_temp_date,
13 | )
14 | from spei.plot import deficit_knmi
15 | 
16 | 
17 | @pytest.fixture
18 | def temp(deficit: pd.Series) -> pd.Series:
19 |     sine_wave = np.sin(2 * np.pi * np.arange(len(deficit)) / 365) * 15 + 15
20 |     temp = pd.Series(data=sine_wave, index=deficit.index, dtype=float)
21 |     return temp
22 | 
23 | 
24 | def test_get_yearly_temp_date(temp):
25 |     threshold = 440.0
26 |     result = get_yearly_temp_date(temp=temp, threshold=threshold)
27 |     assert isinstance(result, pd.Series)
28 | 
29 | 
30 | def test_get_cumulative_deficit(deficit):
31 |     startdate = pd.Timestamp("2000-04-01")
32 |     enddate = pd.Timestamp("2000-09-30")
33 |     result = get_cumulative_deficit(
34 |         deficit=deficit, startdate=startdate, enddate=enddate
35 |     )
36 |     assert isinstance(result, pd.DataFrame)
37 |     assert not result.empty
38 | 
39 | 
40 | def test_deficit_oct1(deficit):
41 |     result = deficit_oct1(deficit=deficit)
42 |     assert isinstance(result, pd.Series)
43 |     assert result.name == "Doct1"
44 | 
45 | 
46 | def test_deficit_max(deficit):
47 |     result = deficit_max(deficit=deficit)
48 |     assert isinstance(result, pd.Series)
49 |     assert result.name == "Dmax"
50 | 
51 | 
52 | def test_deficit_apr1(deficit):
53 |     result = deficit_apr1(deficit=deficit)
54 |     assert isinstance(result, pd.Series)
55 |     assert result.name == "DIapr1"
56 | 
57 | 
58 | def test_deficit_gdd(deficit, temp):
59 |     threshold = 440
60 |     result = deficit_gdd(
61 |         deficit=deficit,
62 |         temp=temp,
63 |         threshold=threshold,
64 |     )
65 |     assert isinstance(result, pd.Series)
66 |     assert result.name == "DIgdd"
67 | 
68 | 
69 | def test_deficit_wet(deficit):
70 |     result = deficit_wet(deficit=deficit)
71 |     assert isinstance(result, pd.Series)
72 |     assert result.name == "DIwet"
73 | 
74 | 
75 | def test_plot_knmi_deficit(deficit: pd.Series):
76 |     """Test the plot function for the deficit."""
77 |     startdate = pd.Timestamp("2000-04-01")
78 |     enddate = pd.Timestamp("2000-09-30")
79 |     cumdf = get_cumulative_deficit(
80 |         deficit=deficit,
81 |         startdate=startdate,
82 |         enddate=enddate,
83 |         allow_below_zero=False,
84 |     )
85 |     ax = deficit_knmi(cumdf)
86 |     assert ax is not None
87 |     assert ax.get_ylabel() == "Precipitation deficit (mm)"
88 | 


--------------------------------------------------------------------------------
/src/spei/rai.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | 
 4 | from .utils import get_data_series, group_yearly_df, validate_series
 5 | 
 6 | 
 7 | def rai(series: pd.Series) -> pd.Series:
 8 |     """
 9 |     Calculate the Rainfall Anomaly Index (RAI) for a given time
10 |     series of precipitation data.
11 | 
12 |     Parameters
13 |     ----------
14 |     series : pd.Series
15 |         A pandas Series containing precipitation data.
16 | 
17 |     Returns
18 |     -------
19 |     pd.Series
20 |         A pandas Series containing the RAI values.
21 | 
22 |     References
23 |     ----------
24 |     van Rooy, M.P. A Rainfall Anomaly Index Independent of Time and Space. Notos. 1965.
25 |     """
26 |     series = validate_series(series)
27 |     pm = series.mean()
28 |     pi_above = series > pm
29 |     rai = pd.Series(np.nan, index=series.index, dtype=float)
30 |     rai[pi_above] = 3.0 * (series[pi_above] - pm) / (series.nlargest(10).mean() - pm)
31 |     rai[~pi_above] = (
32 |         -3.0 * (series[~pi_above] - pm) / (series.nsmallest(10).mean() - pm)
33 |     )
34 |     return rai
35 | 
36 | 
37 | def mrai(series: pd.Series, sf: float = 1.7) -> pd.Series:
38 |     """Calculate the Modified Rainfall Anomaly Index (MRAI)
39 |     for a given time series of precipitation data.
40 | 
41 |     Parameters
42 |     ----------
43 |     series : pd.Series
44 |         A pandas Series containing precipitation data.
45 |     sf : float
46 |         Scaling factor for the MRAI calculation. Default is 1.7.
47 | 
48 |     Returns
49 |     -------
50 |     pd.Series
51 |         A pandas Series containing the MRAI values.
52 | 
53 |     References
54 |     ----------
55 |     Hänsel, S., Schucknecht, A. and Matschullat J. The Modified Rainfall
56 |     Anomaly Index (mRAI) — is this an alternative to the Standardised
57 |     Precipitation Index (SPI) in evaluating future extreme precipitation
58 |     characteristics? Theoretical and Applied Climatology. 2015.
59 |     """
60 |     series = validate_series(series)
61 |     mrai = pd.Series(np.nan, index=series.index, dtype=float)
62 |     group_df = group_yearly_df(series=series)
63 |     for _, gr in group_df.groupby(pd.Grouper(freq="MS")):
64 |         gr_series = get_data_series(gr)
65 |         pm = gr_series.mean()
66 |         pi_above = gr_series > pm
67 |         e_above = gr_series[gr_series > gr_series.quantile(0.9)].mean()
68 |         e_below = gr_series[gr_series < gr_series.quantile(0.1)].mean()
69 |         mrai_gr = pd.Series(np.nan, index=gr_series.index, dtype=float)
70 |         mrai_gr[pi_above] = sf * (gr_series[pi_above] - pm) / (e_above - pm)
71 |         mrai_gr[~pi_above] = -sf * (gr_series[~pi_above] - pm) / (e_below - pm)
72 |         mrai.loc[mrai_gr.index] = mrai_gr.values
73 | 
74 |     return mrai
75 | 


--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
 1 | name: Tests
 2 | 
 3 | on:
 4 |   - push
 5 |   - pull_request
 6 | 
 7 | jobs:
 8 |   test:
 9 |     runs-on: ${{ matrix.os }}
10 |     continue-on-error: ${{ matrix.experimental }}
11 |     strategy:
12 |       fail-fast: false
13 |       matrix:
14 |         include:
15 |           - name: Test suite with py310-ubuntu
16 |             python: "3.10"
17 |             os: ubuntu-latest
18 |             toxenv: py310
19 |             experimental: false
20 |           - name: Test suite with py311-ubuntu
21 |             python: "3.11"
22 |             os: ubuntu-latest
23 |             toxenv: py311
24 |             experimental: false
25 |           - name: Test suite with py312-ubuntu
26 |             python: "3.12"
27 |             os: ubuntu-latest
28 |             toxenv: py312
29 |             experimental: false
30 |           - name: Test suite with py313-ubuntu
31 |             python: "3.13"
32 |             os: ubuntu-latest
33 |             toxenv: py313
34 |             experimental: false
35 |           - name: Type check with mypy
36 |             python: "3.10"
37 |             os: ubuntu-latest
38 |             toxenv: type
39 |             experimental: false
40 |           - name: Formatting and linting with ruff
41 |             python: "3.10"
42 |             os: ubuntu-latest
43 |             toxenv: ruff
44 |             experimental: false
45 |           - name: Codacy Coverage Report
46 |             python: "3.10"
47 |             os: ubuntu-latest
48 |             toxenv: coverage
49 |             experimental: false
50 | 
51 |     name: ${{ matrix.name }}
52 |     env:
53 |       # Color Output
54 |       # Rich (pip)
55 |       FORCE_COLOR: 1
56 |       # Tox
57 |       PY_COLORS: 1
58 |       # MyPy
59 |       TERM: xterm-color
60 |       MYPY_FORCE_COLOR: 1
61 |       MYPY_FORCE_TERMINAL_WIDTH: 200
62 |       # Pytest
63 |       PYTEST_ADDOPTS: "--color=yes"
64 |     steps:
65 |       - uses: actions/checkout@v4
66 | 
67 |       - name: Set up Python ${{ matrix.python }}
68 |         uses: actions/setup-python@v5
69 |         with:
70 |           python-version: ${{ matrix.python }}
71 |           check-latest: true
72 | 
73 |       - name: Set up tox environment
74 |         run: |
75 |           python -m pip install --upgrade pip
76 |           pip install tox
77 |           tox -e ${{ matrix.toxenv }} --notest
78 | 
79 |       - name: Test
80 |         run: tox -e ${{ matrix.toxenv }} --skip-pkg-install
81 | 
82 |       - name: Run codacy-coverage-reporter
83 |         if: ${{ matrix.toxenv == 'coverage' && github.repository == 'martinvonk/spei' && success() }}
84 |         uses: codacy/codacy-coverage-reporter-action@master
85 |         with:
86 |           project-token: ${{ secrets.CODACY_PROJECT_TOKEN }}
87 |           coverage-reports: coverage.xml
88 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [build-system]
  2 | requires = ["setuptools>=64"]
  3 | build-backend = "setuptools.build_meta"
  4 | 
  5 | [project]
  6 | name = "spei"
  7 | dynamic = ["version"]
  8 | authors = [{ name = "Martin Vonk", email = "vonk.mart@gmail.com" }]
  9 | description = "A simple Python package to calculate drought indices for time series such as the SPI, SPEI and SGI."
 10 | readme = "README.md"
 11 | license = { file = "LICENSE" }
 12 | requires-python = ">=3.10"
 13 | dependencies = ["numpy", "scipy", "matplotlib", "pandas"]
 14 | classifiers = [
 15 |         "Programming Language :: Python :: 3 :: Only",
 16 |         "Programming Language :: Python :: 3.10",
 17 |         "Programming Language :: Python :: 3.11",
 18 |         "Programming Language :: Python :: 3.12",
 19 |         "Programming Language :: Python :: 3.13",
 20 |         "Topic :: Scientific/Engineering :: Hydrology",
 21 |         "Intended Audience :: Science/Research",
 22 |         "License :: OSI Approved :: MIT License",
 23 |         "Operating System :: OS Independent",
 24 |         "Typing :: Typed",
 25 | ]
 26 | 
 27 | [project.urls]
 28 | homepage = "https://github.com/martinvonk/spei"
 29 | repository = "https://github.com/martinvonk/spei"
 30 | 
 31 | [project.optional-dependencies]
 32 | notebook = ["ipykernel", "pastas", "rpy2", "standard-precip"]
 33 | documentation = ["sphinx>=8.2", "myst-parser", "nbsphinx"]
 34 | ruffing = ["ruff"]
 35 | typing = ["mypy", "pandas-stubs"]
 36 | pytesting = ["pytest>=7", "pytest-cov", "pytest-sugar"]
 37 | coveraging = ["coverage"]
 38 | dev = ["spei[ruffing,typing,pytesting,coveraging]", "tox"]
 39 | 
 40 | [tool.setuptools.dynamic]
 41 | version = { attr = "spei._version.__version__" }
 42 | 
 43 | [tool.mypy]
 44 | mypy_path = "src"
 45 | 
 46 | [[tool.mypy.overrides]]
 47 | module = ["matplotlib.*", "scipy.stats.*"]
 48 | ignore_missing_imports = true
 49 | 
 50 | [tool.pytest.ini_options]
 51 | pythonpath = ["src"]
 52 | 
 53 | [tool.ruff]
 54 | extend-include = ["*.ipynb"]
 55 | lint.extend-select = ["I"]
 56 | show-fixes = true
 57 | fix = true
 58 | 
 59 | [tool.tox]
 60 | requires = ["tox>=4"]
 61 | env_list = ["py310", "py311", "py312", "py313", "type", "ruff"]
 62 | 
 63 | [tool.tox.env_run_base]
 64 | description = "run unit tests"
 65 | extras = ["pytesting"]
 66 | commands = [["pytest", "tests"]]
 67 | 
 68 | [tool.tox.env.type]
 69 | description = "run type checks"
 70 | extras = ["typing"]
 71 | commands = [["mypy", "src"]]
 72 | 
 73 | [tool.tox.env.ruff]
 74 | description = "run ruff checks"
 75 | extras = ["ruffing"]
 76 | commands = [
 77 |         [
 78 |                 "ruff",
 79 |                 "check",
 80 |                 "--extend-select",
 81 |                 "I",
 82 |                 "--preview",
 83 |         ],
 84 |         [
 85 |                 "ruff",
 86 |                 "format",
 87 |                 "--check",
 88 |         ],
 89 | ]
 90 | 
 91 | [tool.tox.env.docu]
 92 | description = "build documentation"
 93 | extras = ["documentation", "notebook"]
 94 | commands = [["sphinx-build", "-M", "html", "docs", "docs/_build"]]
 95 | 
 96 | [tool.tox.env.coverage]
 97 | description = "get coverage report xml"
 98 | extras = ["coveraging", "pytesting"]
 99 | commands = [["coverage", "run", "-m", "pytest", "tests"], ["coverage", "xml"]]
100 | 


--------------------------------------------------------------------------------
/tests/test_validate.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | import pytest
 4 | from pandas import DataFrame, DatetimeIndex, Index, Series, Timestamp, to_datetime
 5 | 
 6 | from spei.utils import infer_frequency, validate_index, validate_series
 7 | 
 8 | 
 9 | def test_validate_index(caplog) -> None:
10 |     caplog.set_level(logging.INFO)
11 |     series = Series([1.0, 2.0, 3.0], index=["2018", "2019", "2020"])
12 |     validate_index(series.index)
13 |     msg = (
14 |         f"Expected the index to be a DatetimeIndex. Automatically converted "
15 |         f"{type(series.index)} using pd.to_datetime(Index)\n"
16 |     )
17 |     assert msg in caplog.text
18 | 
19 | 
20 | def test_validate_index_duplicated(caplog) -> None:
21 |     caplog.set_level(logging.ERROR)
22 |     series = Series(
23 |         [1.0, 1.0],
24 |         index=DatetimeIndex([Timestamp("2000-01-01"), Timestamp("2000-01-01")]),
25 |     )
26 |     with pytest.raises(ValueError):
27 |         validate_index(series.index)
28 |         msg = (
29 |             "Duplicated indices found. Please remove them. For instance by using"
30 |             "`series = series.loc[~series.index.duplicated(keep='first/last')]`"
31 |         )
32 |         assert msg in caplog.text
33 | 
34 | 
35 | def test_validate_series() -> None:
36 |     with pytest.raises(TypeError):
37 |         validate_series([1, 2, 3])
38 | 
39 | 
40 | def test_validate_series_df_1d(caplog) -> None:
41 |     df = DataFrame({"s": [1, 2, 3]}, index=to_datetime([1, 2, 3]))
42 |     validate_series(df)
43 |     msg = (
44 |         "Please convert series of type pandas.DataFrame to a"
45 |         "pandas.Series using DataFrame.squeeze(). Now done automatically.\n"
46 |     )
47 |     assert msg in caplog.text
48 | 
49 | 
50 | def test_validate_series_df_2d() -> None:
51 |     with pytest.raises(TypeError):
52 |         df = DataFrame({"s1": [1, 2, 3], "s2": [1, 2, 3]}, index=to_datetime([1, 2, 3]))
53 |         validate_series(df)
54 | 
55 | 
56 | def test_infer_frequency_monthly_start():
57 |     index = DatetimeIndex(["2020-01-01", "2020-02-01", "2020-03-01"])
58 |     assert infer_frequency(index) == "MS"  # Assuming pandas version >= 2.2.0
59 | 
60 | 
61 | def test_infer_frequency_monthly_end():
62 |     index = DatetimeIndex(["2020-01-31", "2020-02-28", "2020-03-31"])
63 |     assert infer_frequency(index) == "BME"  # Assuming pandas version >= 2.2.0
64 | 
65 | 
66 | def test_infer_frequency_weekly():
67 |     index = DatetimeIndex(["2020-01-01", "2020-01-08", "2020-01-15"])
68 |     assert infer_frequency(index) == "W"
69 | 
70 | 
71 | def test_infer_frequency_daily():
72 |     index = DatetimeIndex(["2020-01-01", "2020-01-02", "2020-01-03"])
73 |     assert infer_frequency(index) == "D"
74 | 
75 | 
76 | def test_infer_frequency_no_infer():
77 |     index = DatetimeIndex(["2020-01-01", "2020-01-03", "2020-01-07"])
78 |     assert infer_frequency(index) == "MS"  # Assuming pandas version >= 2.2.0
79 | 
80 | 
81 | def test_infer_frequency_non_datetime_index():
82 |     index = Index(["2020-01-01", "2020-02-01", "2020-03-01"])
83 |     assert infer_frequency(index) == "MS"  # Assuming pandas version >= 2.2.0
84 | 
85 | 
86 | def test_infer_frequency_invalid_index():
87 |     index = Index(["a", "b", "c"])
88 |     with pytest.raises(ValueError), pytest.warns(UserWarning):
89 |         infer_frequency(index)
90 | 


--------------------------------------------------------------------------------
/src/spei/climdex.py:
--------------------------------------------------------------------------------
  1 | # https://www.climdex.org/
  2 | 
  3 | from pandas import Series
  4 | 
  5 | from .utils import validate_index, validate_series
  6 | 
  7 | 
  8 | def rxnday(series: Series, interval: str, period: str = "30D") -> Series:
  9 |     """Maximum consecutive precipitation amount over an interval"""
 10 |     series = validate_series(series)
 11 |     _ = validate_index(series.index)
 12 | 
 13 |     return series.rolling(interval).sum().rolling(period).max()
 14 | 
 15 | 
 16 | def rx1day(series: Series, interval: str = "1D", period: str = "30D") -> Series:
 17 |     """Maximum 1-day precipitation amount"""
 18 |     return rxnday(series=series, interval=interval, period=period)
 19 | 
 20 | 
 21 | def rx5day(series: Series, interval: str = "5D", period: str = "30D") -> Series:
 22 |     """Maximum consecutive precipitation amount over an 5-day interval"""
 23 |     return rxnday(series=series, interval=interval, period=period)
 24 | 
 25 | 
 26 | def sdii(series: Series, threshold: float = 1.0, period: str = "30D") -> Series:
 27 |     """Simple precipitation intensity index"""
 28 |     series = validate_series(series)
 29 |     _ = validate_index(series.index)
 30 | 
 31 |     w = series >= threshold
 32 | 
 33 |     return series.loc[w].resample(period).sum() / w.sum()
 34 | 
 35 | 
 36 | def rnmm(series: Series, threshold: float, period: str = "1YE") -> Series:
 37 |     """Annual count of days when precipitation ≥ n mm. n is a user-defined threshold"""
 38 |     series = validate_series(series)
 39 |     _ = validate_index(series.index)
 40 | 
 41 |     w = series >= threshold
 42 | 
 43 |     return w.resample(period).sum()
 44 | 
 45 | 
 46 | def r10mm(series: Series, threshold: float = 10.0, period: str = "1YE") -> Series:
 47 |     """Annual count of days when precipitation ≥ 10 mm"""
 48 |     return rnmm(series=series, threshold=threshold, period=period)
 49 | 
 50 | 
 51 | def r20mm(series: Series, threshold: float = 20.0, period: str = "1YE") -> Series:
 52 |     """Annual count of days when precipitation ≥ 20 mm"""
 53 |     return rnmm(series=series, threshold=threshold, period=period)
 54 | 
 55 | 
 56 | def cdd(series: Series, threshold: float = 1.0, period: str = "365D") -> Series:
 57 |     """Maximum length of dry spell: maximum number of consecutive days with
 58 |     precipitation < 1mm"""
 59 |     series = validate_series(series)
 60 |     _ = validate_index(series.index)
 61 | 
 62 |     w = series < threshold
 63 | 
 64 |     return w.diff().rolling(period).sum().dropna().astype(int)
 65 | 
 66 | 
 67 | def cwd(series: Series, threshold: float = 1.0, period: str = "365D") -> Series:
 68 |     """Maximum length of wet spell: maximum number of consecutive days with
 69 |     precipitation ≥ 1mm"""
 70 |     series = validate_series(series)
 71 |     _ = validate_index(series.index)
 72 | 
 73 |     w = series >= threshold
 74 | 
 75 |     return w.diff().rolling(period).sum().dropna().astype(int)
 76 | 
 77 | 
 78 | def prcptot(series: Series, period: str = "1YE") -> Series:
 79 |     """Total precipitation on wet days over a certain period"""
 80 |     series = validate_series(series)
 81 |     _ = validate_index(series.index)
 82 | 
 83 |     return series.resample(period).sum()
 84 | 
 85 | 
 86 | def rnnp(
 87 |     series: Series, quantile: float, threshold: float = 1.0, period: str = "1YE"
 88 | ) -> Series:
 89 |     """Total amount of precipitation on wet days above certain quantile"""
 90 |     series = validate_series(series)
 91 |     _ = validate_index(series.index)
 92 | 
 93 |     series_w = series[series >= threshold]
 94 |     wq = series_w > series_w.quantile(quantile)
 95 | 
 96 |     return series_w.loc[wq].resample(period).sum()
 97 | 
 98 | 
 99 | def r95p(
100 |     series: Series, quantile: float = 0.95, threshold: float = 1.0, period: str = "1YE"
101 | ) -> Series:
102 |     """Total amount of precipitation on very wet days"""
103 |     return rnnp(series=series, quantile=quantile, threshold=threshold, period=period)
104 | 
105 | 
106 | def r99p(
107 |     series: Series, quantile: float = 0.99, threshold: float = 1.0, period: str = "1YE"
108 | ) -> Series:
109 |     """Total amount of precipitation on extremely wet days"""
110 |     return rnnp(series=series, quantile=quantile, threshold=threshold, period=period)
111 | 
112 | 
113 | def r95ptot(
114 |     series: Series, quantile: float = 0.95, threshold: float = 1.0, period: str = "1YE"
115 | ) -> Series:
116 |     """Contribution to total precipitation from very wet days"""
117 |     r95 = r95p(series=series, quantile=quantile, threshold=threshold, period=period)
118 |     tot = prcptot(series=series, period=period)
119 |     return r95 * 100 / tot
120 | 
121 | 
122 | def r99ptot(
123 |     series: Series, quantile: float = 0.99, threshold: float = 1.0, period: str = "1YE"
124 | ) -> Series:
125 |     """Contribution to total precipitation from extremely wet days"""
126 |     r99 = r99p(series=series, quantile=quantile, threshold=threshold, period=period)
127 |     tot = prcptot(series=series, period=period)
128 |     return r99 * 100 / tot
129 | 


--------------------------------------------------------------------------------
/docs/examples/example08_rai.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "607da258",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# Rainfall Anomaly Index\n",
  9 |     "\n",
 10 |     "Based on van Rooy, M.P. (1965). A Rainfall Anomaly Index Independent of Time and Space. Notos."
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "id": "fbc30e7f",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "## Packages"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": null,
 24 |    "id": "dcef936f",
 25 |    "metadata": {},
 26 |    "outputs": [],
 27 |    "source": [
 28 |     "import matplotlib as mpl\n",
 29 |     "import matplotlib.pyplot as plt\n",
 30 |     "import pandas as pd\n",
 31 |     "\n",
 32 |     "from spei.rai import mrai, rai"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "markdown",
 37 |    "id": "40d04848",
 38 |    "metadata": {},
 39 |    "source": [
 40 |     "## Data"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": null,
 46 |    "id": "0c274c53",
 47 |    "metadata": {},
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "df = pd.read_csv(\"data/DEBILT.csv\", index_col=0, parse_dates=True)\n",
 51 |     "prec = df[\"Prec [m/d] 260_DEBILT\"].multiply(1e3).resample(\"MS\").sum()"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "markdown",
 56 |    "id": "edfc521d",
 57 |    "metadata": {},
 58 |    "source": [
 59 |     "## Calculate Index"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": null,
 65 |    "id": "11dcd5f4",
 66 |    "metadata": {},
 67 |    "outputs": [],
 68 |    "source": [
 69 |     "ra_index = rai(prec)\n",
 70 |     "mra_index = mrai(prec, sf=1.7)"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "markdown",
 75 |    "id": "a6dc3eeb",
 76 |    "metadata": {},
 77 |    "source": [
 78 |     "## Visualize"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": null,
 84 |    "id": "92253a3f",
 85 |    "metadata": {},
 86 |    "outputs": [],
 87 |    "source": [
 88 |     "f, ax = plt.subplots(2, 1, figsize=(12, 6), sharex=True, sharey=True)\n",
 89 |     "ax[0].plot(ra_index.index, ra_index.values, color=\"C0\")\n",
 90 |     "ax[1].plot(mra_index.index, mra_index.values, color=\"C1\")\n",
 91 |     "ax[0].set_ylabel(\"Rainfall Anomaly Index\")\n",
 92 |     "ax[1].set_ylabel(\"Modified Rainfall Anomaly Index\")\n",
 93 |     "ax[0].grid(True)\n",
 94 |     "ax[1].grid(True)\n",
 95 |     "ax[1].yaxis.set_major_locator(mpl.ticker.MultipleLocator(1))\n",
 96 |     "ax[1].xaxis.set_major_locator(mpl.dates.YearLocator(1))\n",
 97 |     "ax[1].set_xlim(pd.Timestamp(\"2010-01-01\"), pd.Timestamp(\"2020-12-31\"))"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "markdown",
102 |    "id": "fc772d44",
103 |    "metadata": {},
104 |    "source": [
105 |     "Interpretation based on Hansel (2015) - [The Modified Rainfall Anomaly Index (mRAI)](https://doi.org/10.1007/s00704-015-1389-y)\n",
106 |     "\n",
107 |     "| RAI                                 | Description                   | mRAI                  | Description                   |\n",
108 |     "| :---------------------------------- | :---------------------------- | :-------------------------------- | :---------------------------- |\n",
109 |     "| $\\ge$ 3.00                           | Extremely wet                 | $\\ge$ 2.00                     | Extremely wet                 |\n",
110 |     "| 2.00 to 2.99                        | Very wet                      | 1.50 to 1.99                  | Very wet                      |\n",
111 |     "| 1.00 to 1.99                        | Moderately wet                | 1.00 to 1.49                  | Moderately wet                |\n",
112 |     "| 0.50 to 0.99                        | Slightly wet                  | 0.50 to 0.99                  | Slightly wet                  |\n",
113 |     "| -0.49 to 0.49                       | Near normal                   | -0.49 to 0.49                 | Near normal                   |\n",
114 |     "| -0.99 to -0.50                       | Slightly dry                  | -0.99 to -0.50                 | Slightly dry                  |\n",
115 |     "| -1.99 to -1.00                       | Moderately dry                | -1.49 to -1.00                 | Moderately dry                |\n",
116 |     "| -2.99 to -2.00                       | Very dry                      | -1.99 to -1.50                 | Very dry                      |\n",
117 |     "| ≤-3.00                           | Extremely dry                 | ≤-2.00                     | Extremely dry                 |"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "markdown",
122 |    "id": "75058050",
123 |    "metadata": {},
124 |    "source": []
125 |   }
126 |  ],
127 |  "metadata": {
128 |   "kernelspec": {
129 |    "display_name": "SPEI",
130 |    "language": "python",
131 |    "name": "python3"
132 |   },
133 |   "language_info": {
134 |    "codemirror_mode": {
135 |     "name": "ipython",
136 |     "version": 3
137 |    },
138 |    "file_extension": ".py",
139 |    "mimetype": "text/x-python",
140 |    "name": "python",
141 |    "nbconvert_exporter": "python",
142 |    "pygments_lexer": "ipython3",
143 |    "version": "3.13.1"
144 |   }
145 |  },
146 |  "nbformat": 4,
147 |  "nbformat_minor": 5
148 | }
149 | 


--------------------------------------------------------------------------------
/tests/test_si.py:
--------------------------------------------------------------------------------
  1 | from pandas import DataFrame, Series, Timestamp
  2 | from scipy.stats import norm
  3 | 
  4 | from spei import SI, sgi, spei, spi, ssfi, ssmi
  5 | from spei.dist import Dist
  6 | 
  7 | 
  8 | def test_spi(prec: Series) -> None:
  9 |     precr = prec.rolling("30D", min_periods=30).sum().dropna()
 10 |     spi(precr, fit_freq="MS", prob_zero=True)
 11 | 
 12 | 
 13 | def test_spei(prec: Series, evap: Series) -> None:
 14 |     n = (prec - evap).rolling("30D", min_periods=30).sum().dropna()
 15 |     spei(n, fit_freq="MS")
 16 | 
 17 | 
 18 | def test_sgi(head: Series) -> None:
 19 |     sgi(head, fit_freq="MS")
 20 | 
 21 | 
 22 | def test_ssfi_timescale(prec: Series) -> None:
 23 |     ssfi(prec, timescale=30)
 24 | 
 25 | 
 26 | def test_ssmi(prec: Series) -> None:
 27 |     ssmi(prec, dist=norm, fit_freq="MS")
 28 | 
 29 | 
 30 | def test_window(prec: Series, evap: Series) -> None:
 31 |     n = (prec - evap).rolling("30D", min_periods=30).sum().dropna()
 32 |     spei(n, fit_freq="W", fit_window=3)
 33 | 
 34 | 
 35 | def test_window_even(prec: Series, evap: Series, caplog) -> None:
 36 |     n = (prec - evap).rolling("30D", min_periods=30).sum().dropna()
 37 |     spei(n, fit_freq="W", fit_window=4)
 38 |     assert "Window should be odd. Setting the window value to" in caplog.text
 39 | 
 40 | 
 41 | def test_SI(prec: Series) -> None:
 42 |     si = SI(prec, dist=norm, timescale=30, fit_freq="MS")
 43 |     si.fit_distribution()
 44 |     si.pdf()
 45 |     dist = si.get_dist(Timestamp("2010-01-01"))
 46 |     dist.ks_test()
 47 | 
 48 | 
 49 | def test_SI_post_init_timescale(prec: Series) -> None:
 50 |     si = SI(prec, dist=norm, timescale=30, fit_freq="MS")
 51 |     assert si.series.equals(prec.rolling(30, min_periods=30).sum().dropna()), (
 52 |         "Timescale rolling sum not applied correctly"
 53 |     )
 54 | 
 55 | 
 56 | def test_SI_post_init_fit_freq_infer(prec: Series) -> None:
 57 |     si = SI(prec, dist=norm, timescale=0)
 58 |     assert si.fit_freq is not None, "Frequency inference failed"
 59 | 
 60 | 
 61 | def test_SI_post_init_grouped_year(prec: Series) -> None:
 62 |     si = SI(prec, dist=norm, timescale=0, fit_freq="MS")
 63 |     assert isinstance(si._grouped_year, DataFrame), "Grouped year DataFrame not created"
 64 | 
 65 | 
 66 | def test_SI_post_init_fit_window_adjustment(prec: Series) -> None:
 67 |     si = SI(prec, dist=norm, timescale=0, fit_freq="D", fit_window=2)
 68 |     assert si.fit_window == 3, "Fit window not adjusted to odd number"
 69 | 
 70 | 
 71 | def test_SI_post_init_fit_window_minimum(prec: Series) -> None:
 72 |     si = SI(prec, dist=norm, timescale=0, fit_freq="D", fit_window=1)
 73 |     assert si.fit_window == 3, "Fit window not adjusted to minimum value"
 74 | 
 75 | 
 76 | def test_fit_distribution_normal_scores_transform(prec: Series) -> None:
 77 |     si = SI(prec, dist=norm, timescale=30, fit_freq="MS", normal_scores_transform=True)
 78 |     si.fit_distribution()
 79 |     assert not si._dist_dict, (
 80 |         "Distribution dictionary should be empty when using normal scores transform"
 81 |     )
 82 | 
 83 | 
 84 | def test_fit_distribution_with_fit_window(prec: Series) -> None:
 85 |     si = SI(prec, dist=norm, timescale=30, fit_freq="D", fit_window=5)
 86 |     si.fit_distribution()
 87 |     assert si._dist_dict, (
 88 |         "Distribution dictionary should not be empty when using fit window"
 89 |     )
 90 |     for dist in si._dist_dict.values():
 91 |         assert isinstance(dist, Dist), (
 92 |             "Items in distribution dictionary should be of type Dist"
 93 |         )
 94 | 
 95 | 
 96 | def test_fit_distribution_with_fit_freq(prec: Series) -> None:
 97 |     si = SI(prec, dist=norm, timescale=30, fit_freq="MS")
 98 |     si.fit_distribution()
 99 |     assert si._dist_dict, (
100 |         "Distribution dictionary should not be empty when using fit frequency"
101 |     )
102 |     for dist in si._dist_dict.values():
103 |         assert isinstance(dist, Dist), (
104 |             "Items in distribution dictionary should be of type Dist"
105 |         )
106 | 
107 | 
108 | def test_fit_distribution_invalid_fit_freq_with_window(prec: Series) -> None:
109 |     si = SI(prec, dist=norm, timescale=30, fit_freq="M", fit_window=5)
110 |     try:
111 |         si.fit_distribution()
112 |     except ValueError as e:
113 |         assert (
114 |             str(e)
115 |             == "Frequency fit_freq must be 'D' or 'W', not 'M', if a fit_window is provided."
116 |         )
117 |     else:
118 |         assert False, "ValueError not raised for invalid fit frequency with fit window"
119 | 
120 | 
121 | def test_ppf(prec: Series) -> None:
122 |     si = SI(prec, dist=norm, timescale=1, fit_freq="MS")
123 |     si.fit_distribution()
124 |     ppf = si.ppf(0.5)
125 |     assert isinstance(ppf, Series), "PPF result should be a Pandas Series"
126 |     assert len(ppf) == len(si.series), (
127 |         "PPF result length does not match input series length"
128 |     )
129 | 
130 | 
131 | def test_ppf_nsf(prec: Series) -> None:
132 |     si = SI(prec, dist=norm, timescale=1, fit_freq="MS", normal_scores_transform=True)
133 |     si.fit_distribution()
134 |     ppf = si.ppf(0.5)
135 |     assert isinstance(ppf, Series), "PPF result should be a Pandas Series"
136 |     assert len(ppf) == len(si.series), (
137 |         "PPF result length does not match input series length"
138 |     )
139 | 


--------------------------------------------------------------------------------
/src/spei/utils.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from calendar import isleap
  3 | 
  4 | from numpy import array, nan
  5 | from packaging.version import parse as parse_version
  6 | from pandas import (
  7 |     DataFrame,
  8 |     DatetimeIndex,
  9 |     Grouper,
 10 |     Index,
 11 |     Series,
 12 |     Timedelta,
 13 |     concat,
 14 |     infer_freq,
 15 |     to_datetime,
 16 | )
 17 | 
 18 | pd_version = parse_version(__import__("pandas").__version__)
 19 | 
 20 | 
 21 | def validate_series(series: Series) -> Series:
 22 |     series = series.copy()
 23 | 
 24 |     if not isinstance(series, Series):
 25 |         if isinstance(series, DataFrame):
 26 |             if len(series.columns) == 1:
 27 |                 logging.warning(
 28 |                     "Please convert series of type pandas.DataFrame to a"
 29 |                     "pandas.Series using DataFrame.squeeze(). Now done automatically."
 30 |                 )
 31 |                 series = series.squeeze()
 32 |             else:
 33 |                 raise TypeError(
 34 |                     "Please provide a pandas.Series instead of a pandas.DataFrame"
 35 |                 )
 36 |         else:
 37 |             raise TypeError(f"Please provide a Pandas Series instead of {type(series)}")
 38 | 
 39 |     index = validate_index(series.index)
 40 | 
 41 |     return series.reindex(index, copy=True)
 42 | 
 43 | 
 44 | def validate_index(index: Index) -> DatetimeIndex:
 45 |     index = index.copy()
 46 | 
 47 |     if not isinstance(index, DatetimeIndex):
 48 |         logging.info(
 49 |             f"Expected the index to be a DatetimeIndex. Automatically converted "
 50 |             f"{type(index)} using pd.to_datetime(Index)"
 51 |         )
 52 |         index = DatetimeIndex(to_datetime(index))
 53 | 
 54 |     if index.has_duplicates:
 55 |         msg = (
 56 |             "Duplicated indices found. Please remove them. For instance by"
 57 |             " using `series = "
 58 |             "series.loc[~series.index.duplicated(keep='first/last')]`"
 59 |         )
 60 |         logging.error(msg)
 61 |         raise ValueError(msg)
 62 | 
 63 |     return index
 64 | 
 65 | 
 66 | def infer_frequency(index: Index | DatetimeIndex) -> str:
 67 |     """Infer frequency"""
 68 | 
 69 |     index = validate_index(index)
 70 | 
 71 |     inf_freq = infer_freq(index)
 72 | 
 73 |     if inf_freq is None:
 74 |         logging.info(
 75 |             "Could not infer frequency from index, using monthly frequency instead"
 76 |         )
 77 |         inf_freq = "MS" if pd_version >= parse_version("2.2.0") else "M"
 78 |     else:
 79 |         logging.info(f"Inferred frequency '{inf_freq}' from index")
 80 | 
 81 |     if "W-" in inf_freq:
 82 |         logging.info(f"Converted frequncy weekly '{inf_freq}' to 'W'")
 83 |         inf_freq = "W"
 84 | 
 85 |     return inf_freq
 86 | 
 87 | 
 88 | def group_yearly_df(series: Series) -> DataFrame:
 89 |     """Group Series per year in a DataFrame.
 90 | 
 91 |     This function groups a time series by year, creating a DataFrame where each
 92 |     column corresponds to a year (as int). The index of the DataFrame is set to
 93 |     the corresponding dates (in the year 2000).
 94 | 
 95 |     Parameters:
 96 |     -----------
 97 |     series : pd.Series
 98 |         A pandas Series with a DateTime index.
 99 | 
100 |     Returns:
101 |     --------
102 |     pd.DataFrame
103 |     """
104 |     strfstr: str = "%m-%d %H:%M:%S"
105 |     grs = {}
106 |     freq = "YE" if pd_version >= parse_version("2.2.0") else "Y"
107 |     for year_timestamp, gry in series.groupby(Grouper(freq=freq)):
108 |         index = validate_index(gry.index)
109 |         gry.index = to_datetime(
110 |             "2000-" + index.strftime(strfstr), format="%Y-" + strfstr
111 |         )
112 |         year = getattr(year_timestamp, "year")  # type: str
113 |         grs[year] = gry
114 |     return concat(grs, axis=1)
115 | 
116 | 
117 | def get_data_series(group_df: DataFrame) -> Series:
118 |     """Transform grouped dataframe by yearly values back to time series."""
119 |     strfstr: str = "%m-%d %H:%M:%S"
120 |     index = validate_index(group_df.index)
121 |     idx = array(
122 |         [(f"{col}-" + index.strftime(strfstr)).tolist() for col in group_df.columns]
123 |     ).flatten()
124 |     # remove illegal 29 february for non leap years created by group_yearly_df
125 |     boolidx = ~array(
126 |         [
127 |             (x.split(" ")[0].split("-", 1)[1] == "02-29")
128 |             and not isleap(int(x.split(" ")[0].split("-")[0]))
129 |             for x in idx
130 |         ]
131 |     )
132 | 
133 |     dt_idx = to_datetime(idx[boolidx], format="%Y-" + strfstr)
134 |     values = group_df.transpose().values.flatten()[boolidx]
135 |     return Series(values, index=dt_idx, dtype=float).dropna()
136 | 
137 | 
138 | def daily_window_group_yearly_df(dfval: DataFrame, period: int) -> DataFrame:
139 |     """Fill a period of daily values in grouped by yearly DataFrame to get
140 |     cyclic rolling window.
141 |     """
142 |     dfval_window_index_start = [
143 |         dfval.index[0] + Timedelta(value=-i, unit="D")
144 |         for i in reversed(range(1, period + 1))
145 |     ]
146 |     dfval_window_index_end = [
147 |         dfval.index[-1] + Timedelta(value=i, unit="D") for i in range(1, period + 1)
148 |     ]
149 |     dfval_window_index = DatetimeIndex(
150 |         dfval_window_index_start + dfval.index.to_list() + dfval_window_index_end
151 |     )
152 | 
153 |     dfval_window = DataFrame(
154 |         nan, index=dfval_window_index, columns=dfval.columns, dtype=float
155 |     )
156 |     dfval_window.loc[dfval.index, dfval.columns] = dfval.values
157 |     dfval_window.iloc[:period] = dfval.iloc[-period:].values
158 |     dfval_window.iloc[-period:] = dfval.iloc[:period].values
159 |     return dfval_window
160 | 


--------------------------------------------------------------------------------
/src/spei/dist.py:
--------------------------------------------------------------------------------
  1 | from dataclasses import dataclass, field
  2 | from typing import Literal
  3 | 
  4 | from numpy import std
  5 | from pandas import Series
  6 | from scipy.stats import kstest
  7 | 
  8 | from ._typing import ContinuousDist
  9 | 
 10 | 
 11 | @dataclass
 12 | class Dist:
 13 |     data: Series = field(init=True, repr=False)
 14 |     dist: ContinuousDist
 15 |     loc: float = field(init=False, repr=True)
 16 |     scale: float = field(init=False, repr=True)
 17 |     pars: list[float] | None = field(init=False, repr=False)
 18 |     prob_zero: bool = field(default=False, init=True, repr=False)
 19 |     p0: float = field(default=0.0, init=False, repr=False)
 20 |     data_window: Series | None = field(default=None, init=True, repr=False)
 21 |     """
 22 |     Represents a distribution associated with data.
 23 | 
 24 |     Parameters
 25 |     ----------
 26 |     data : Series
 27 |         The input data for fitting the distribution.
 28 |     dist : ContinuousDist
 29 |         The SciPy continuous distribution associated to be fitted.
 30 |     prob_zero : bool, default=False
 31 |         Flag indicating whether the probability of zero values in the series is
 32 |         calculated by the occurence.
 33 |     data_window : Optional[Series], default=None
 34 |         Subset of data for fitting more data (if provided).
 35 |     loc : float
 36 |         Location of the distribution
 37 |     scale : float
 38 |         Scale of the distribution
 39 |     pars : Optional[List[float]]
 40 |         Attribute storing additional distribution parameters (if applicable).
 41 |     p0 : float
 42 |         The probability of zero values in the data. Only calculated if prob_zero=True.
 43 | 
 44 |     Notes
 45 |     -----
 46 |     The `fit_dist` method uses the `dist.fit` function from Scipy to estimate
 47 |     distribution parameters. If the fitted distribution requires additional
 48 |     parameters beyond `loc` and `scale`, they are stored in the `pars` attribute.
 49 |     """
 50 | 
 51 |     def __post_init__(self):
 52 |         """
 53 |         Post initializes the Dist class by fitting the distribution.
 54 |         """
 55 |         data_fit = self.data_window if self.data_window is not None else self.data
 56 |         pars, loc, scale = self.fit_dist(data=data_fit, dist=self.dist)
 57 |         self.loc = loc
 58 |         self.scale = scale
 59 |         self.pars = pars
 60 | 
 61 |         if self.prob_zero:
 62 |             self.p0 = (data_fit == 0.0).sum() / len(data_fit)
 63 | 
 64 |     @staticmethod
 65 |     def fit_dist(
 66 |         data: Series, dist: ContinuousDist
 67 |     ) -> tuple[list[float] | None, float, float]:
 68 |         """
 69 |         Fits a Scipy continuous distribution to the data.
 70 | 
 71 |         Parameters
 72 |         ----------
 73 |         data : Series
 74 |             The input data for fitting.
 75 |         dist : ContinuousDist
 76 |             The continuous distribution to be fitted.
 77 | 
 78 |         Returns
 79 |         -------
 80 |         Tuple
 81 |             Tuple containing distribution parameters (pars, loc, scale).
 82 |         """
 83 |         fit_tuple = dist.fit(data, scale=std(data))
 84 |         if len(fit_tuple) == 2:
 85 |             loc, scale = fit_tuple
 86 |             pars = None
 87 |         else:
 88 |             *pars, loc, scale = fit_tuple
 89 |         return pars, loc, scale
 90 | 
 91 |     def cdf(self) -> Series:
 92 |         """Compute cumulative density function of a Scipy Continuous Distribution"""
 93 |         if self.pars is not None:
 94 |             cdf = self.dist.cdf(
 95 |                 self.data.values, *self.pars, loc=self.loc, scale=self.scale
 96 |             )
 97 |         else:
 98 |             cdf = self.dist.cdf(self.data.values, loc=self.loc, scale=self.scale)
 99 | 
100 |         if self.prob_zero:
101 |             cdf = self.p0 + (1 - self.p0) * cdf
102 |             cdf[self.data == 0.0] = self.p0
103 | 
104 |         return Series(cdf, index=self.data.index, dtype=float)
105 | 
106 |     def pdf(self) -> Series:
107 |         data_pdf = self.data.sort_values()
108 |         if self.pars is not None:
109 |             pdf = self.dist.pdf(
110 |                 data_pdf.values, *self.pars, loc=self.loc, scale=self.scale
111 |             )
112 |         else:
113 |             pdf = self.dist.pdf(data_pdf.values, loc=self.loc, scale=self.scale)
114 | 
115 |         if self.prob_zero:
116 |             pdf = self.p0 + (1 - self.p0) * pdf
117 |             pdf[self.data == 0.0] = self.p0
118 | 
119 |         return Series(pdf, index=data_pdf.index, dtype=float)
120 | 
121 |     def ppf(self, q: float) -> Series:
122 |         """Compute percent point function (inverse of cdf) at q"""
123 |         if self.pars is not None:
124 |             ppf = self.dist.ppf(q, *self.pars, loc=self.loc, scale=self.scale)
125 |         else:
126 |             ppf = self.dist.ppf(q, loc=self.loc, scale=self.scale)
127 | 
128 |         return Series(ppf, index=self.data.index, dtype=float)
129 | 
130 |     def ks_test(
131 |         self,
132 |         method: Literal["auto", "exact", "approx", "asymp"] = "auto",
133 |     ) -> float:
134 |         """Fit a distribution and perform the two-sided
135 |         Kolmogorov-Smirnov test for goodness of fit. The
136 |         null hypothesis is that the data and distributions
137 |         are identical, the alternative is that they are
138 |         not identical.
139 | 
140 |         Parameters
141 |         ----------
142 |         method : Literal['auto', 'exact', 'approx', 'asymp'], optional
143 |             Defines the distribution used for calculating the p-value. The
144 |             following options are available (default is 'auto'): 'auto' selects
145 |             one of the other options, 'exact' uses the exact distribution of
146 |             test statistic, 'approx' approximates the two-sided probability
147 |             with twice the one-sided probability, 'asymp' uses asymptotic
148 |             distribution of test statistic
149 | 
150 |         Returns
151 |         -------
152 |         float
153 |             p-value
154 | 
155 |         References
156 |         -------
157 |         Onnen, H.: Intro to Probability Distributions and Distribution
158 |         Fitting with Pythons  SciPy, 2021.
159 |         """
160 |         args = (
161 |             (self.pars, self.loc, self.scale)
162 |             if self.pars is not None
163 |             else (self.loc, self.scale)
164 |         )
165 |         kstest_result = kstest(
166 |             rvs=self.data, cdf=self.dist.name, args=args, method=method
167 |         )
168 |         # rej_h0 = kstest_result.pvalue < alpha
169 |         return kstest_result.pvalue
170 | 


--------------------------------------------------------------------------------
/docs/examples/example06_treshold.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |     "cells": [
  3 |         {
  4 |             "cell_type": "markdown",
  5 |             "id": "eaa846c5",
  6 |             "metadata": {},
  7 |             "source": [
  8 |                 "# Threshold Drought\n",
  9 |                 "\n",
 10 |                 "## Load packages"
 11 |             ]
 12 |         },
 13 |         {
 14 |             "cell_type": "code",
 15 |             "execution_count": null,
 16 |             "id": "247f31ac",
 17 |             "metadata": {},
 18 |             "outputs": [],
 19 |             "source": [
 20 |                 "import pandas as pd\n",
 21 |                 "from scipy import stats as sps\n",
 22 |                 "\n",
 23 |                 "import spei as si"
 24 |             ]
 25 |         },
 26 |         {
 27 |             "cell_type": "markdown",
 28 |             "id": "7b1fa2a9",
 29 |             "metadata": {},
 30 |             "source": [
 31 |                 "## Load data"
 32 |             ]
 33 |         },
 34 |         {
 35 |             "cell_type": "code",
 36 |             "execution_count": null,
 37 |             "id": "34ae712e",
 38 |             "metadata": {},
 39 |             "outputs": [],
 40 |             "source": [
 41 |                 "df = pd.read_csv(\"data/DEBILT.csv\", index_col=0, parse_dates=True)\n",
 42 |                 "prec = df[\"Prec [m/d] 260_DEBILT\"].multiply(1e3).rename(\"prec\")\n",
 43 |                 "evap = df[\"Evap [m/d] 260_DEBILT\"].multiply(1e3).rename(\"evap\")\n",
 44 |                 "head = df[\"Head [m] B32C0572_DEBILT\"].rename(\"head\").dropna()"
 45 |             ]
 46 |         },
 47 |         {
 48 |             "cell_type": "markdown",
 49 |             "id": "5830c471",
 50 |             "metadata": {},
 51 |             "source": [
 52 |                 "## Calculate precipitation surplus"
 53 |             ]
 54 |         },
 55 |         {
 56 |             "cell_type": "code",
 57 |             "execution_count": null,
 58 |             "id": "f750f7ae",
 59 |             "metadata": {},
 60 |             "outputs": [],
 61 |             "source": [
 62 |                 "surplusd = prec - evap\n",
 63 |                 "surplus = surplusd.resample(\"MS\").sum()\n",
 64 |                 "surplus.plot()"
 65 |             ]
 66 |         },
 67 |         {
 68 |             "cell_type": "markdown",
 69 |             "id": "f013e9f1",
 70 |             "metadata": {},
 71 |             "source": [
 72 |                 "## Fit distribution"
 73 |             ]
 74 |         },
 75 |         {
 76 |             "cell_type": "code",
 77 |             "execution_count": null,
 78 |             "id": "6ae83c3e",
 79 |             "metadata": {},
 80 |             "outputs": [],
 81 |             "source": [
 82 |                 "dist = sps.fisk\n",
 83 |                 "sispei = si.SI(\n",
 84 |                 "    series=surplus,\n",
 85 |                 "    dist=dist,\n",
 86 |                 "    timescale=0,\n",
 87 |                 ")\n",
 88 |                 "sispei.fit_distribution()"
 89 |             ]
 90 |         },
 91 |         {
 92 |             "cell_type": "markdown",
 93 |             "id": "41ab4ace",
 94 |             "metadata": {},
 95 |             "source": [
 96 |                 "## Get threshold\n",
 97 |                 "\n",
 98 |                 "Choose arbitrary threshold based on quantile of the distribution. Can be any threshold the user wants as well. Only then the threshold time series has to be created manually."
 99 |             ]
100 |         },
101 |         {
102 |             "cell_type": "code",
103 |             "execution_count": null,
104 |             "id": "62530ad6",
105 |             "metadata": {},
106 |             "outputs": [],
107 |             "source": [
108 |                 "speithr = sispei.ppf(0.3)  # 30% quantile threshold"
109 |             ]
110 |         },
111 |         {
112 |             "cell_type": "markdown",
113 |             "id": "efed2416",
114 |             "metadata": {},
115 |             "source": [
116 |                 "## Plot threshold"
117 |             ]
118 |         },
119 |         {
120 |             "cell_type": "code",
121 |             "execution_count": null,
122 |             "id": "c7492e76",
123 |             "metadata": {},
124 |             "outputs": [],
125 |             "source": [
126 |                 "ax = si.plot.threshold(\n",
127 |                 "    series=sispei.series,\n",
128 |                 "    threshold=speithr,\n",
129 |                 "    fill_color=\"red\",\n",
130 |                 ")\n",
131 |                 "_ = ax.set_xlim(pd.Timestamp(\"2010\"), pd.Timestamp(\"2020\"))"
132 |             ]
133 |         },
134 |         {
135 |             "cell_type": "markdown",
136 |             "id": "b54b1d3f",
137 |             "metadata": {},
138 |             "source": [
139 |                 "## Repeat for head time series"
140 |             ]
141 |         },
142 |         {
143 |             "cell_type": "code",
144 |             "execution_count": null,
145 |             "id": "1a7f3565",
146 |             "metadata": {},
147 |             "outputs": [],
148 |             "source": [
149 |                 "timescale = 6\n",
150 |                 "sisgi = si.SI(\n",
151 |                 "    head,\n",
152 |                 "    dist=sps.norm,\n",
153 |                 "    timescale=timescale,\n",
154 |                 "    fit_freq=\"MS\",\n",
155 |                 "    normal_scores_transform=True,\n",
156 |                 "    agg_func=\"mean\",\n",
157 |                 ")\n",
158 |                 "sgithr = sisgi.ppf(0.4)  # choose arbitrary threshold"
159 |             ]
160 |         },
161 |         {
162 |             "cell_type": "code",
163 |             "execution_count": null,
164 |             "id": "6cdf67c7",
165 |             "metadata": {},
166 |             "outputs": [],
167 |             "source": [
168 |                 "ax = si.plot.threshold(\n",
169 |                 "    series=head.iloc[timescale - 1 :],\n",
170 |                 "    threshold=sgithr,\n",
171 |                 "    fill_color=\"red\",\n",
172 |                 ")\n",
173 |                 "_ = ax.set_xlim(pd.Timestamp(\"2010\"), pd.Timestamp(\"2020\"))"
174 |             ]
175 |         }
176 |     ],
177 |     "metadata": {
178 |         "kernelspec": {
179 |             "display_name": "SPEI",
180 |             "language": "python",
181 |             "name": "python3"
182 |         },
183 |         "language_info": {
184 |             "codemirror_mode": {
185 |                 "name": "ipython",
186 |                 "version": 3
187 |             },
188 |             "file_extension": ".py",
189 |             "mimetype": "text/x-python",
190 |             "name": "python",
191 |             "nbconvert_exporter": "python",
192 |             "pygments_lexer": "ipython3",
193 |             "version": "3.12.3"
194 |         }
195 |     },
196 |     "nbformat": 4,
197 |     "nbformat_minor": 5
198 | }
199 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # SPEI
 2 | 
 3 | [![PyPI](https://img.shields.io/pypi/v/spei?style=flat-square&color=007ec6)](https://pypi.org/project/spei/)
 4 | [![PyPi Supported Python Versions](https://img.shields.io/pypi/pyversions/spei?style=flat-square&color=007ec6)](https://pypi.org/project/spei/)
 5 | [![Code Size](https://img.shields.io/github/languages/code-size/martinvonk/spei?style=flat-square&color=007ec6)](https://pypi.org/project/spei/)
 6 | [![PyPi Downloads](https://img.shields.io/pypi/dm/spei?style=flat-square&color=0a3d62)](https://pypi.org/project/spei/)
 7 | [![License](https://img.shields.io/pypi/l/spei?style=flat-square&color=0a3d62&logo=open-source-initiative&logoColor=white)](https://pypi.org/project/spei/)
 8 | 
 9 | [![JOSS](https://img.shields.io/badge/JOSS-10.21105/joss.08454-ff6600.svg?style=flat-square)](https://doi.org/10.21105/joss.08454)
10 | [![DOI](https://img.shields.io/badge/DOI-10.5281/zenodo.10816741-ff6600?style=flat-square)](https://doi.org/10.5281/zenodo.10816741)
11 | 
12 | [![Tests](https://img.shields.io/github/actions/workflow/status/martinvonk/spei/tests.yml?style=flat-square&color=green)](https://github.com/martinvonk/SPEI/actions/workflows/tests.yml)
13 | [![CodacyCoverage](https://img.shields.io/codacy/coverage/908b566912314666b84e1add22ea7d66?style=flat-square&color=green)](https://app.codacy.com/gh/martinvonk/SPEI/)
14 | [![CodacyGrade](https://img.shields.io/codacy/grade/908b566912314666b84e1add22ea7d66?style=flat-square&color=darkgreen)](https://app.codacy.com/gh/martinvonk/SPEI/)
15 | [![Typed: MyPy](https://img.shields.io/badge/type_checker-mypy-darkgreen?style=flat-square)](https://mypy-lang.org/)
16 | [![Formatter and Linter: ruff](https://img.shields.io/badge/linter-ruff-darkgreen?style=flat-square)](https://github.com/charliermarsh/ruff)
17 | 
18 | 
19 | SPEI is a simple Python package to calculate drought indices for hydrological time series. This package uses popular Python packages such as Pandas and Scipy to make it easy and versatile for the user to calculate the drought indices. Pandas Series are great for dealing with time series; providing interpolation, rolling average, and other manipulation options. SciPy enables us to use all different kinds of [distributions](https://docs.scipy.org/doc/scipy/reference/stats.html#probability-distributions) to fit the data. Different popular drought indices are supported such as the SPI (Standardized Precipitation Index), SPEI (Standardized Precipitation Evaporation Index), and SGI (Standardized Groundwater Index).
20 | 
21 | If you use this software for either the visualization and/or analysis, please cite this package via our article in the [Journal of Open Source Software](https://joss.theoj.org/papers/10.21105/joss.08454):
22 | > Vonk, M. A. (2025). SPEI: A Python package for calculating and visualizing drought indices. Journal of Open Source Software, 10(111), 8454. [doi.org/10.21105/joss.08454](https://doi.org/10.21105/joss.08454).
23 | 
24 | Or cite a specific version in the Zenodo archive:
25 | > Vonk, M. A. (XXXX). SPEI: A simple Python package to calculate and visualize drought indices (vX.X.X). Zenodo. [doi.org/10.5281/zenodo.10816740](https://doi.org/10.5281/zenodo.10816740).
26 | 
27 | ## Available Drought Indices
28 | 
29 | | Drought Index                                 | Abbreviation | Literature |
30 | | --------------------------------------------- | ------------ | ---------- |
31 | | Standardized Precipitation Index              | SPI          | 1          |
32 | | Standardized Precipitation Evaporation Index* | SPEI         | 2          |
33 | | Standardized Groundwater Index                | SGI          | 3,4        |
34 | | Standardized Streamflow Index                 | SSFI         | 5,6        |
35 | | Standardized Soil Moisture Index              | SSMI         | 7          |
36 | 
37 | The package is not limited to only these five drought indices. If any of the distributions in the Scipy library is valid on the observed hydrological series, the drought index can be calculated.
38 | 
39 | *For the calculation of potential evaporation, take a look at [pyet](https://github.com/phydrus/pyet). This is another great package that also uses pandas Series to calculate different kinds of potential evaporation time series.
40 | 
41 | ## Installation
42 | 
43 | To get the latest stable version install using:
44 | 
45 | `pip install spei`
46 | 
47 | To get the development version download or clone the GitHub repository to your local device. Install using:
48 | 
49 | `pip install -e <download_directory>`
50 | 
51 | ## Literature
52 | 
53 | This list of scientific literature is helpful as a reference to understand the context and application of drought indices.
54 | 
55 | 1. Lloyd-Hughes, B. and M.A. Saunders (2002) - A Drought Climatology for Europe. DOI: 10.1002/joc.846
56 | 2. Vicente-Serrano, S.M., S. Beguería and J.I. López-Moreno (2010) - A Multi-scalar drought index sensitive to global warming: The Standardized Precipitation Evapotranspiration Index. DOI: 10.1175/2009JCLI2909.1
57 | 3. Bloomfield, J.P. and B.P. Marchant (2013) - Analysis of groundwater drought building on the standardised precipitation index approach. DOI: 10.5194/hess-17-4769-2013
58 | 4. Babre, A., A. Kalvāns, Z. Avotniece, I. Retiķe, J. Bikše, K.P.M. Jemeljanova, A. Zelenkevičs and A. Dēliņa (2022) - The use of predefined drought indices for the assessment of groundwater drought episodes in the Baltic States over the period 1989–2018. DOI: 10.1016/j.ejrh.2022.101049
59 | 5. Vicente-Serrano, S. M., J. I. López-Moreno, S. Beguería, J. Lorenzo-Lacruz, C. Azorin-Molina, and E. Morán-Tejeda (2012). Accurate Computation of a Streamflow Drought Index. Journal of Hydrologic Engineering. American Society of Civil Engineers. DOI: 10.1061/(asce)he.1943-5584.0000433
60 | 6. Tijdeman, E.,  K. Stahl and L.M. Tallaksen (2020) - Drought characteristics derived based on the Standardized Streamflow Index: A large sample comparison for parametric and nonparametric methods. DOI: 10.1029/2019WR026315
61 | 7. Carrão. H., Russo, S., Sepulcre-Canto, G., Barbosa, P.: An empirical standardized soil moisture index for agricultural drought assessment from remotely sensed data. DOI: 10.1016/j.jag.2015.06.011s
62 | 
63 | ### Publications
64 | These are scientific publications that use and cite this Python package:
65 | 
66 | van Mourik, J., Ruijsch, D., van der Wiel, K., Hazeleger, W., & Wanders, N. (2025). Regional drivers and characteristics of multi-year droughts. Weather and Climate Extremes, 48, 100748. https://doi.org/10.1016/j.wace.2025.100748
67 | 
68 | Segura-Barrero, R., Lauvaux, T., Lian, J., Ciais, P., Badia, A., Ventura, S., Bazzi, H., Abbessi, E., Fu, Z., Xiao, J., Li, X., & Villalba, G. (2025). Heat and Drought Events Alter Biogenic Capacity to Balance CO2 Budget in South-Western Europe. Global biogeochemical cycles, 39(1), e2024GB008163. https://doi.org/10.1029/2024GB008163
69 | 
70 | Adla, S., Šaponjić, A., Tyagi, A., Nagi, A., Pastore, P., & Pande, S. (2024). Steering agricultural interventions towards sustained irrigation adoption by farmers: socio-psychological analysis of irrigation practices in Maharashtra, India. Hydrological Sciences Journal, 69(12), 1586–1603. https://doi.org/10.1080/02626667.2024.2376709
71 | 
72 | Panigrahi, S., Vidyarthi, V.K. (2025). Assessing the Suitability of SPI and SPEI in Steppe Hot and Arid Climatic Zones in India. In: Sefelnasr, A., Sherif, M., Singh, V.P. (eds) Water Resources Management and Sustainability. Water Science and Technology Library, vol 114. Springer, Cham. https://doi.org/10.1007/978-3-031-80520-2_12
73 | 


--------------------------------------------------------------------------------
/docs/examples/example05_multiyear_drought.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |     "cells": [
  3 |         {
  4 |             "cell_type": "markdown",
  5 |             "metadata": {},
  6 |             "source": [
  7 |                 "# Analyzing Multi-Year Droughts\n",
  8 |                 "\n",
  9 |                 "After van Mourik, J. and Ruijsch, D. and van der Wiel, K. and Hazeleger, W. and Wanders, N. (2025) - [Regional drivers and characteristics of multi-year droughts](https://doi.org/10.1016/j.wace.2025.100748)"
 10 |             ]
 11 |         },
 12 |         {
 13 |             "cell_type": "markdown",
 14 |             "metadata": {},
 15 |             "source": [
 16 |                 "## Load packages"
 17 |             ]
 18 |         },
 19 |         {
 20 |             "cell_type": "code",
 21 |             "execution_count": null,
 22 |             "metadata": {},
 23 |             "outputs": [],
 24 |             "source": [
 25 |                 "import matplotlib as mpl\n",
 26 |                 "import matplotlib.pyplot as plt\n",
 27 |                 "import pandas as pd\n",
 28 |                 "\n",
 29 |                 "import spei as si  # si for standardized index\n",
 30 |                 "\n",
 31 |                 "print(si.show_versions())"
 32 |             ]
 33 |         },
 34 |         {
 35 |             "cell_type": "markdown",
 36 |             "metadata": {},
 37 |             "source": [
 38 |                 "## Load meteo data"
 39 |             ]
 40 |         },
 41 |         {
 42 |             "cell_type": "code",
 43 |             "execution_count": null,
 44 |             "metadata": {},
 45 |             "outputs": [],
 46 |             "source": [
 47 |                 "df = pd.read_csv(\"data/DEBILT.csv\", index_col=0, parse_dates=True)\n",
 48 |                 "prec = df[\"Prec [m/d] 260_DEBILT\"].multiply(1e3).rename(\"prec\")\n",
 49 |                 "evap = df[\"Evap [m/d] 260_DEBILT\"].multiply(1e3).rename(\"evap\")"
 50 |             ]
 51 |         },
 52 |         {
 53 |             "cell_type": "markdown",
 54 |             "metadata": {},
 55 |             "source": [
 56 |                 "## Calculate SPEI over different time scales\n",
 57 |                 "\n",
 58 |                 "1 month to 24 months"
 59 |             ]
 60 |         },
 61 |         {
 62 |             "cell_type": "code",
 63 |             "execution_count": null,
 64 |             "metadata": {},
 65 |             "outputs": [],
 66 |             "source": [
 67 |                 "pe = (\n",
 68 |                 "    (prec - evap).dropna().resample(\"ME\").sum()\n",
 69 |                 ")  # calculate  monthly precipitation excess\n",
 70 |                 "spei1 = si.spei(pe, timescale=1).rename(\"1\")\n",
 71 |                 "spei3 = si.spei(pe, timescale=3).rename(\"3\")\n",
 72 |                 "spei6 = si.spei(pe, timescale=6).rename(\"6\")\n",
 73 |                 "spei9 = si.spei(pe, timescale=9).rename(\"9\")\n",
 74 |                 "spei12 = si.spei(pe, timescale=12).rename(\"12\")\n",
 75 |                 "spei24 = si.spei(pe, timescale=24).rename(\"24\")"
 76 |             ]
 77 |         },
 78 |         {
 79 |             "cell_type": "markdown",
 80 |             "metadata": {},
 81 |             "source": [
 82 |                 "## Visualization"
 83 |             ]
 84 |         },
 85 |         {
 86 |             "cell_type": "markdown",
 87 |             "metadata": {},
 88 |             "source": [
 89 |                 "### Plot as time series"
 90 |             ]
 91 |         },
 92 |         {
 93 |             "cell_type": "code",
 94 |             "execution_count": null,
 95 |             "metadata": {},
 96 |             "outputs": [],
 97 |             "source": [
 98 |                 "f, ax = plt.subplots(6, figsize=(7.0, 10.0), sharex=True)\n",
 99 |                 "si.plot.si(spei1, ax=ax[0], add_category=False)\n",
100 |                 "si.plot.si(spei3, ax=ax[1], add_category=False)\n",
101 |                 "si.plot.si(spei6, ax=ax[2], add_category=False)\n",
102 |                 "si.plot.si(spei9, ax=ax[3], add_category=False)\n",
103 |                 "si.plot.si(spei12, ax=ax[4], add_category=False)\n",
104 |                 "si.plot.si(spei24, ax=ax[5], add_category=False)\n",
105 |                 "ax[0].set_ylabel(\"SPEI-1\")\n",
106 |                 "ax[1].set_ylabel(\"SPEI-3\")\n",
107 |                 "ax[2].set_ylabel(\"SPEI-6\")\n",
108 |                 "ax[3].set_ylabel(\"SPEI-9\")\n",
109 |                 "ax[4].set_ylabel(\"SPEI-12\")\n",
110 |                 "ax[5].set_ylabel(\"SPEI-24\")\n",
111 |                 "ax[5].xaxis.set_major_locator(mpl.dates.YearLocator())\n",
112 |                 "ax[5].xaxis.set_minor_locator(mpl.dates.MonthLocator())\n",
113 |                 "ax[5].set_xlim(pd.Timestamp(\"2005\"), pd.Timestamp(\"2015\"))"
114 |             ]
115 |         },
116 |         {
117 |             "cell_type": "markdown",
118 |             "metadata": {},
119 |             "source": [
120 |                 "### Plot as heatmap\n",
121 |                 "\n",
122 |                 "Only droughts"
123 |             ]
124 |         },
125 |         {
126 |             "cell_type": "code",
127 |             "execution_count": null,
128 |             "metadata": {},
129 |             "outputs": [],
130 |             "source": [
131 |                 "ax = si.plot.heatmap([spei1, spei3, spei6, spei9, spei12, spei24])\n",
132 |                 "ax.xaxis.set_major_locator(mpl.dates.YearLocator())\n",
133 |                 "ax.xaxis.set_minor_locator(mpl.dates.MonthLocator())\n",
134 |                 "ax.get_figure().axes[-1].set_yticks(range(-3, 0))\n",
135 |                 "ax.set_xlim(pd.Timestamp(\"2010\"), pd.Timestamp(\"2015\"))"
136 |             ]
137 |         },
138 |         {
139 |             "cell_type": "markdown",
140 |             "metadata": {},
141 |             "source": [
142 |                 "Or with wet periods"
143 |             ]
144 |         },
145 |         {
146 |             "cell_type": "code",
147 |             "execution_count": null,
148 |             "metadata": {},
149 |             "outputs": [],
150 |             "source": [
151 |                 "f, ax = plt.subplots(figsize=(8.0, 3.0))\n",
152 |                 "ax = si.plot.heatmap(\n",
153 |                 "    [spei1, spei3, spei6, spei9, spei12, spei24],\n",
154 |                 "    cmap=\"vik_r\",\n",
155 |                 "    vmax=3,\n",
156 |                 "    add_category=True,\n",
157 |                 "    ax=ax,\n",
158 |                 ")\n",
159 |                 "ax.xaxis.set_major_locator(mpl.dates.YearLocator())\n",
160 |                 "ax.xaxis.set_minor_locator(mpl.dates.MonthLocator())\n",
161 |                 "ax.set_xlim(pd.Timestamp(\"2010\"), pd.Timestamp(\"2015\"))"
162 |             ]
163 |         }
164 |     ],
165 |     "metadata": {
166 |         "kernelspec": {
167 |             "display_name": "SPEI",
168 |             "language": "python",
169 |             "name": "python3"
170 |         },
171 |         "language_info": {
172 |             "codemirror_mode": {
173 |                 "name": "ipython",
174 |                 "version": 3
175 |             },
176 |             "file_extension": ".py",
177 |             "mimetype": "text/x-python",
178 |             "name": "python",
179 |             "nbconvert_exporter": "python",
180 |             "pygments_lexer": "ipython3",
181 |             "version": "3.13.1"
182 |         }
183 |     },
184 |     "nbformat": 4,
185 |     "nbformat_minor": 2
186 | }
187 | 


--------------------------------------------------------------------------------
/docs/examples/example01_indices.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Standardized Indices\n",
  8 |     "\n",
  9 |     "*Martin Vonk - 2022*\n",
 10 |     "\n",
 11 |     "This notebooks shows an example calculation of the three drought indices:\n",
 12 |     "- SPI: Standardized Precipitation Index\n",
 13 |     "- SPEI: Standardized Precipitation Evaporation Index\n",
 14 |     "- SGI: Standardized Groundwater Index\n",
 15 |     "\n",
 16 |     "## Required packages"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": null,
 22 |    "metadata": {},
 23 |    "outputs": [],
 24 |    "source": [
 25 |     "import matplotlib.pyplot as plt\n",
 26 |     "import pandas as pd\n",
 27 |     "import scipy.stats as scs\n",
 28 |     "\n",
 29 |     "import spei as si  # si for standardized index\n",
 30 |     "\n",
 31 |     "print(si.show_versions())"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "markdown",
 36 |    "metadata": {},
 37 |    "source": [
 38 |     "## Load time series\n",
 39 |     "\n",
 40 |     "We use time series of the precipitation and potential (Makkink) evaporation from the Netherlands and obtain them from the python package [Pastas](https://github.com/pastas/pastas)."
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": null,
 46 |    "metadata": {},
 47 |    "outputs": [],
 48 |    "source": [
 49 |     "df = pd.read_csv(\"data/DEBILT.csv\", index_col=0, parse_dates=True)\n",
 50 |     "prec = df[\"Prec [m/d] 260_DEBILT\"].multiply(1e3).rename(\"prec\")\n",
 51 |     "evap = df[\"Evap [m/d] 260_DEBILT\"].multiply(1e3).rename(\"evap\")\n",
 52 |     "head = df[\"Head [m] B32C0572_DEBILT\"].rename(\"B32C0572\").dropna()\n",
 53 |     "\n",
 54 |     "fig, ax = plt.subplots(3, 1, figsize=(12, 8), sharex=True)\n",
 55 |     "prec.plot(ax=ax[0], legend=True, grid=True)\n",
 56 |     "evap.plot(ax=ax[1], color=\"C1\", legend=True, grid=True)\n",
 57 |     "head.plot(ax=ax[2], color=\"k\", legend=True, grid=True);"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "markdown",
 62 |    "metadata": {},
 63 |    "source": [
 64 |     "## Calculate SPI\n",
 65 |     "\n",
 66 |     "The standardized precipitation index (SPI) is calculated using the gamma distribution from the [scipy stats library](https://docs.scipy.org/doc/scipy/reference/stats.html). In fact any continuous distribution of this library can be chosen. However there are sensible choices for the SPI such as gamma, lognorm (lognormal), fisk (log-logistic) or pearson3 distribution. The precipitation time series is summed over a 90D rolling interval, which corresponds to SPI3. \n",
 67 |     "\n",
 68 |     "For the literature we refer to: LLoyd-Hughes, B. and Saunders, M.A.: [A drought climatology for Europe](https://doi.org/10.1002/joc.846), 2002."
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": null,
 74 |    "metadata": {},
 75 |    "outputs": [],
 76 |    "source": [
 77 |     "f = 90  # days\n",
 78 |     "series = prec.rolling(f, min_periods=f).sum().dropna()\n",
 79 |     "series"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": null,
 85 |    "metadata": {},
 86 |    "outputs": [],
 87 |    "source": [
 88 |     "spi3_gamma = si.spi(series, dist=scs.gamma, fit_freq=\"ME\")\n",
 89 |     "spi3_gamma"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "markdown",
 94 |    "metadata": {},
 95 |    "source": [
 96 |     "Lets try that with the pearson3 distribution:"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": null,
102 |    "metadata": {},
103 |    "outputs": [],
104 |    "source": [
105 |     "spi3_pearson = si.spi(series, dist=scs.pearson3, fit_freq=\"ME\")\n",
106 |     "spi3_pearson"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": null,
112 |    "metadata": {},
113 |    "outputs": [],
114 |    "source": [
115 |     "tmin, tmax = pd.to_datetime([\"1994\", \"1998\"])\n",
116 |     "plt.figure(figsize=(8, 4))\n",
117 |     "spi3_gamma.plot(label=\"gamma\")\n",
118 |     "spi3_pearson.plot(label=\"pearson3\", linestyle=\"--\")\n",
119 |     "plt.xlim(tmin, tmax)\n",
120 |     "plt.legend()\n",
121 |     "plt.ylabel(\"Z-score\")\n",
122 |     "plt.grid()\n",
123 |     "plt.title(\"SPI for two distributions\");"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "markdown",
128 |    "metadata": {},
129 |    "source": [
130 |     "As can be seen from the figure the distributions do not give significantly different output. This might not be the case for other time series of the precipitation. Example notebook 2 (example2_distribution.ipynb) provides more insight in how to choose the right distribution."
131 |    ]
132 |   },
133 |   {
134 |    "cell_type": "markdown",
135 |    "metadata": {},
136 |    "source": [
137 |     "## Calculate SPEI\n",
138 |     "\n",
139 |     "The standardized precipitation evaporation index (SPEI) is calculated by first substracting the evaporation from the precipitation time series. By default the fisk distribution is used to calculate the SPEI, however for other regularly used distributions are lognorm, pearson3 and genextreme. The code internally can also calculate the timescale (30D; SPEI1 in this case)\n",
140 |     "\n",
141 |     "For the literature we refer to: Vicente-Serrano S.M., Beguería S., López-Moreno J.I.: [A Multi-scalar drought index sensitive to global warming: The Standardized Precipitation Evapotranspiration Index](https://doi.org/10.1175/2009JCLI2909.1), 2010."
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "code",
146 |    "execution_count": null,
147 |    "metadata": {},
148 |    "outputs": [],
149 |    "source": [
150 |     "pe = (prec - evap).dropna()  # calculate precipitation excess\n",
151 |     "spei1 = si.spei(pe, timescale=30, fit_freq=\"ME\")\n",
152 |     "spei1"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "markdown",
157 |    "metadata": {},
158 |    "source": [
159 |     "## Calculate SGI\n",
160 |     "\n",
161 |     "The standardized groundwater index (SGI) is calculated using the method as described by [Bloomfield, J. P. and Marchant, B. P.: Analysis of groundwater drought building on the standardised precipitation index approach](https://doi.org/10.5194/hess-17-4769-2013), 2013. The way the SGI is calculated is the same as in the groundwater time series analysis package Pastas. A nice example notebook on computing the SGI with Pastas time series models can be found [here](https://pastas.readthedocs.io/en/latest/examples/011_sgi_example.ipynb.html).\n",
162 |     "\n",
163 |     "For the head time series no distribution has to be selected by default. Since the time series has a 14 day frequency it is not resampled."
164 |    ]
165 |   },
166 |   {
167 |    "cell_type": "code",
168 |    "execution_count": null,
169 |    "metadata": {},
170 |    "outputs": [],
171 |    "source": [
172 |     "sgi = si.sgi(head, fit_freq=\"ME\")\n",
173 |     "sgi.plot(ylabel=\"Z-score\")"
174 |    ]
175 |   },
176 |   {
177 |    "cell_type": "markdown",
178 |    "metadata": {},
179 |    "source": [
180 |     "## Visualize indices\n",
181 |     "\n",
182 |     "The indices can be interpreted as such:\n",
183 |     "\n",
184 |     "| **Z-score**           | **Category**         | **Probability (%)** |\n",
185 |     "|-----------------------|----------------------|---------------------|\n",
186 |     "| ≥ 2.00                | Extremely wet        | 2.3                 |\n",
187 |     "| 1.50 ≤ Z < 2.00       | Severely wet         | 4.4                 |\n",
188 |     "| 1.00 ≤ Z < 1.50       | Moderately wet       | 9.2                 |\n",
189 |     "| 0.00 ≤ Z < 1.00       | Mildly wet           | 34.1                |\n",
190 |     "| -1.00 < Z < 0.00      | Mild drought         | 34.1                |\n",
191 |     "| -1.50 < Z ≤ -1.00     | Moderate drought     | 9.2                 |\n",
192 |     "| -2.00 < Z ≤ -1.50     | Severe drought       | 4.4                 |\n",
193 |     "| ≤ -2.00               | Extreme drought      | 2.3                 |\n",
194 |     "\n",
195 |     "The time series for the standardized indices are plotted using a build in method:"
196 |    ]
197 |   },
198 |   {
199 |    "cell_type": "code",
200 |    "execution_count": null,
201 |    "metadata": {},
202 |    "outputs": [],
203 |    "source": [
204 |     "f, ax = plt.subplots(3, 1, figsize=(12, 8), sharex=True)\n",
205 |     "\n",
206 |     "# choose a colormap to your liking:\n",
207 |     "si.plot.si(spi3_pearson, ax=ax[0], cmap=\"vik_r\")\n",
208 |     "si.plot.si(spei1, ax=ax[1], cmap=\"roma\")\n",
209 |     "si.plot.si(sgi, ax=ax[2], cmap=\"seismic_r\")\n",
210 |     "ax[0].set_xlim(pd.to_datetime([\"1994\", \"1998\"]))\n",
211 |     "[x.grid() for x in ax]\n",
212 |     "[ax[i].set_ylabel(n, fontsize=14) for i, n in enumerate([\"SPI3\", \"SPEI1\", \"SGI\"])];"
213 |    ]
214 |   }
215 |  ],
216 |  "metadata": {
217 |   "kernelspec": {
218 |    "display_name": "SPEI",
219 |    "language": "python",
220 |    "name": "python3"
221 |   },
222 |   "language_info": {
223 |    "codemirror_mode": {
224 |     "name": "ipython",
225 |     "version": 3
226 |    },
227 |    "file_extension": ".py",
228 |    "mimetype": "text/x-python",
229 |    "name": "python",
230 |    "nbconvert_exporter": "python",
231 |    "pygments_lexer": "ipython3",
232 |    "version": "3.12.3"
233 |   },
234 |   "orig_nbformat": 4
235 |  },
236 |  "nbformat": 4,
237 |  "nbformat_minor": 2
238 | }
239 | 


--------------------------------------------------------------------------------
/docs/examples/example03_drought_prediction.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |     "cells": [
  3 |         {
  4 |             "cell_type": "markdown",
  5 |             "metadata": {},
  6 |             "source": [
  7 |                 "# Drought Prediction with Time Series Modeling\n",
  8 |                 "\n",
  9 |                 "*Martin Vonk - 2022*\n",
 10 |                 "\n",
 11 |                 "This notebooks shows a quick calculation of the SPI, SPEI and SGI for De Bilt, in the Netherlands. The SGI is calculated using a [Pastas](https://github.com/pastas/pastas) time series model since the original time series is too short. The application of time series models for extrapolating groundwater time series is discussed in [Brakkee et al (2022)](https://hess.copernicus.org/articles/26/551/2022/hess-26-551-2022.html).\n",
 12 |                 "\n",
 13 |                 "## Required packages"
 14 |             ]
 15 |         },
 16 |         {
 17 |             "cell_type": "code",
 18 |             "execution_count": null,
 19 |             "metadata": {},
 20 |             "outputs": [],
 21 |             "source": [
 22 |                 "import matplotlib.pyplot as plt\n",
 23 |                 "import pandas as pd\n",
 24 |                 "import pastas as ps\n",
 25 |                 "import scipy.stats as scs\n",
 26 |                 "\n",
 27 |                 "import spei as si  # si for standardized index\n",
 28 |                 "\n",
 29 |                 "print(si.show_versions())"
 30 |             ]
 31 |         },
 32 |         {
 33 |             "cell_type": "markdown",
 34 |             "metadata": {},
 35 |             "source": [
 36 |                 "## Import time series\n",
 37 |                 "\n",
 38 |                 "Time series are imported using the package hydropandas. Enddate is by default yesterday. The head time series is obtained from a Pastas test dataset."
 39 |             ]
 40 |         },
 41 |         {
 42 |             "cell_type": "code",
 43 |             "execution_count": null,
 44 |             "metadata": {},
 45 |             "outputs": [],
 46 |             "source": [
 47 |                 "# import hydropandas as hpd\n",
 48 |                 "\n",
 49 |                 "# today = datetime.date.today()\n",
 50 |                 "# yesterday = (today - datetime.timedelta(days=1)).strftime(\"%Y-%m-%d\")\n",
 51 |                 "# prec = (\n",
 52 |                 "#     hpd.PrecipitationObs.from_knmi(\n",
 53 |                 "#         meteo_var=\"RH\", stn=260, startdate=\"1959-07-01\", enddate=yesterday\n",
 54 |                 "#     )\n",
 55 |                 "#     .multiply(1e3)\n",
 56 |                 "#     .squeeze()\n",
 57 |                 "# )\n",
 58 |                 "# prec.index = prec.index.normalize()\n",
 59 |                 "# evap = (\n",
 60 |                 "#     hpd.EvaporationObs.from_knmi(\n",
 61 |                 "#         meteo_var=\"EV24\", stn=260, startdate=\"1959-07-01\", enddate=yesterday\n",
 62 |                 "#     )\n",
 63 |                 "#     .multiply(1e3)\n",
 64 |                 "#     .squeeze()\n",
 65 |                 "# )\n",
 66 |                 "# evap.index = evap.index.normalize()\n",
 67 |                 "\n",
 68 |                 "\n",
 69 |                 "df = pd.read_csv(\"data/DEBILT.csv\", index_col=0, parse_dates=True)\n",
 70 |                 "prec = df[\"Prec [m/d] 260_DEBILT\"].multiply(1e3).rename(\"prec\")\n",
 71 |                 "evap = df[\"Evap [m/d] 260_DEBILT\"].multiply(1e3).rename(\"evap\")\n",
 72 |                 "head = df[\"Head [m] B32C0572_DEBILT\"].rename(\"B32C0572\").dropna()\n",
 73 |                 "today = df.index[-1]\n",
 74 |                 "yesterday = df.index[-2]"
 75 |             ]
 76 |         },
 77 |         {
 78 |             "cell_type": "markdown",
 79 |             "metadata": {},
 80 |             "source": [
 81 |                 "## Calculate SPI and SPEI"
 82 |             ]
 83 |         },
 84 |         {
 85 |             "cell_type": "code",
 86 |             "execution_count": null,
 87 |             "metadata": {},
 88 |             "outputs": [],
 89 |             "source": [
 90 |                 "# Accumulate time series on monthly basis\n",
 91 |                 "spi1 = si.spi(prec, timescale=30, dist=scs.gamma, fit_freq=\"MS\")\n",
 92 |                 "spei1 = si.spei((prec - evap), timescale=30, dist=scs.fisk, fit_freq=\"MS\")"
 93 |             ]
 94 |         },
 95 |         {
 96 |             "cell_type": "code",
 97 |             "execution_count": null,
 98 |             "metadata": {},
 99 |             "outputs": [],
100 |             "source": [
101 |                 "xlim = pd.to_datetime([\"2018-01-01\", df.index[-1]])\n",
102 |                 "\n",
103 |                 "fig, axs = plt.subplots(2, 1, figsize=(7.0, 5.5), sharex=True)\n",
104 |                 "si.plot.si(spi1, ybound=3.1, ax=axs[0], background=False, cmap=\"roma\")\n",
105 |                 "si.plot.si(spei1, ybound=3.1, ax=axs[1], background=False, cmap=\"roma\")\n",
106 |                 "[(x.grid(), x.set_xlim(xlim), x.set_ylabel(\"Z-Score\")) for x in axs]\n",
107 |                 "axs[0].set_title(\"Standardized Precipitation Index\")\n",
108 |                 "axs[1].set_title(\"Standardized Precipitation Evaporation Index\")\n",
109 |                 "fig.suptitle(\"Meteoroligical Drought-Indices De Bilt\")\n",
110 |                 "fig.tight_layout()"
111 |             ]
112 |         },
113 |         {
114 |             "cell_type": "markdown",
115 |             "metadata": {},
116 |             "source": [
117 |                 "## Create time series model and simulate head "
118 |             ]
119 |         },
120 |         {
121 |             "cell_type": "code",
122 |             "execution_count": null,
123 |             "metadata": {},
124 |             "outputs": [],
125 |             "source": [
126 |                 "ml = ps.Model(head)\n",
127 |                 "rm = ps.RechargeModel(\n",
128 |                 "    prec, evap, ps.Exponential(), recharge=ps.rch.FlexModel(gw_uptake=True)\n",
129 |                 ")\n",
130 |                 "ml.add_stressmodel(rm)\n",
131 |                 "ml.solve(tmin=\"1970-07-01\", report=True)\n",
132 |                 "_ = ml.plots.results(figsize=(10.0, 8.0))"
133 |             ]
134 |         },
135 |         {
136 |             "cell_type": "markdown",
137 |             "metadata": {},
138 |             "source": [
139 |                 "## Calculate SGI based on time series model"
140 |             ]
141 |         },
142 |         {
143 |             "cell_type": "code",
144 |             "execution_count": null,
145 |             "metadata": {},
146 |             "outputs": [],
147 |             "source": [
148 |                 "gws = ml.simulate(tmin=\"1990-07-01\", tmax=yesterday)\n",
149 |                 "sgi = si.sgi(gws, fit_freq=\"MS\")"
150 |             ]
151 |         },
152 |         {
153 |             "cell_type": "markdown",
154 |             "metadata": {},
155 |             "source": [
156 |                 "## Compare three drought-indices (SPI, SPEI, SGI) in plot"
157 |             ]
158 |         },
159 |         {
160 |             "cell_type": "code",
161 |             "execution_count": null,
162 |             "metadata": {},
163 |             "outputs": [],
164 |             "source": [
165 |                 "fig, axs = plt.subplot_mosaic(\n",
166 |                 "    [[\"SPI\"], [\"SPEI\"], [\"SGI\"]], figsize=(6.5, 8), sharex=True\n",
167 |                 ")\n",
168 |                 "si.plot.si(spi1, ybound=3.5, ax=axs[\"SPI\"], add_category=False)\n",
169 |                 "si.plot.si(spei1, ybound=3.5, ax=axs[\"SPEI\"], add_category=False)\n",
170 |                 "si.plot.si(sgi, ybound=3.5, ax=axs[\"SGI\"], add_category=False)\n",
171 |                 "[(axs[x].grid(), axs[x].set(xlim=xlim, ylabel=\"Z-Score\")) for x in axs]\n",
172 |                 "axs[\"SPI\"].set_title(\"Standardized Precipitation Index 1\")\n",
173 |                 "axs[\"SPEI\"].set_title(\"Standardized Precipitation Evaporation Index 1\")\n",
174 |                 "axs[\"SGI\"].set_title(\"Standardized Groundwater Index\")\n",
175 |                 "fig.suptitle(\"Drought-Indices for De Bilt\", fontsize=14)\n",
176 |                 "fig.tight_layout()\n",
177 |                 "# fig.savefig('Drought_Index_Bilt.png', dpi=600, bbox_inches='tight')"
178 |             ]
179 |         },
180 |         {
181 |             "cell_type": "markdown",
182 |             "metadata": {},
183 |             "source": [
184 |                 "## Compare SPEI Kernel Density Estimate for one month"
185 |             ]
186 |         },
187 |         {
188 |             "cell_type": "code",
189 |             "execution_count": null,
190 |             "metadata": {},
191 |             "outputs": [],
192 |             "source": [
193 |                 "ax = si.plot.monthly_density(\n",
194 |                 "    spi1, years=[today.year - 1, today.year], months=[today.month - 1]\n",
195 |                 ")\n",
196 |                 "ax.set_xlabel(\"Z-Score\")\n",
197 |                 "ax.set_title(\"SPEI\");"
198 |             ]
199 |         }
200 |     ],
201 |     "metadata": {
202 |         "kernelspec": {
203 |             "display_name": "SPEI",
204 |             "language": "python",
205 |             "name": "python3"
206 |         },
207 |         "language_info": {
208 |             "codemirror_mode": {
209 |                 "name": "ipython",
210 |                 "version": 3
211 |             },
212 |             "file_extension": ".py",
213 |             "mimetype": "text/x-python",
214 |             "name": "python",
215 |             "nbconvert_exporter": "python",
216 |             "pygments_lexer": "ipython3",
217 |             "version": "3.12.3"
218 |         },
219 |         "orig_nbformat": 4
220 |     },
221 |     "nbformat": 4,
222 |     "nbformat_minor": 2
223 | }
224 | 


--------------------------------------------------------------------------------
/paper/paper.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: 'SPEI: A Python package for calculating and visualizing drought indices'
  3 | tags:
  4 |   - hydrology
  5 |   - drought
  6 |   - time series
  7 |   - Python
  8 | authors:
  9 |   - name: Martin A. Vonk
 10 |     orcid: 0009-0007-3528-2991
 11 |     affiliation: "1, 2"
 12 | affiliations:
 13 |  - name: Department of Water Management, Faculty of Civil Engineering and Geosciences, Delft University of Technology, Delft, South Holland, The Netherlands
 14 |    index: 1
 15 |  - name: Artesia B.V., Schoonhoven, South Holland, The Netherlands
 16 |    index: 2
 17 | date: 24 February 2025
 18 | bibliography: paper.bib
 19 | 
 20 | ---
 21 | 
 22 | # Summary
 23 | `SPEI` is a Python package for calculating drought indices from time series.
 24 | Popular Python packages such as `Pandas` [@pandas_paper_2010], `SciPy` [@scipy_paper_2020], and `Matplotlib` [@matplotlib_paper_2007] are used for handling the time series, statistics, and visualization respectively.
 25 | This makes the calculation and visualization of drought indices straightforward and flexible.
 26 | 
 27 | # Statement of need
 28 | Water is a vital natural resource, but freshwater availability is increasingly threatened by droughts linked to climate change and human activities.
 29 | Drought refers to a water deficit relative to normal conditions [@sheffield_droughtdefinition_2011].
 30 | Both the definition of drought and the baseline for what constitutes "normal" conditions vary depending on the context and objective of a given analysis [@dracup_droughtdefinition_1980].
 31 | As a result, many drought indices have been developed to quantify drought characteristics.
 32 | Each index quantifies a drought's severity, location, timing, and duration, helping to track and predict its impact.
 33 | 
 34 | # Standardized drought indices
 35 | The most common drought indices are standardized indices, which fit a time series to a probability distribution and convert it into a Z-score of the standardized normal distribution.
 36 | For meteorological droughts, widely used indices include the Standardized Precipitation Index (SPI) [@mckee_spi_1993; @lloydhughes_spi_2002; @wmo_spi_2012] and the Standardized Precipitation Evaporation Index (SPEI) [@vicenteserrano_spei_2010]; the latter index is also the name of the `SPEI` package.
 37 | Hydrological droughts are often measured using the Standardized Groundwater Index (SGI) [@bloomfield_sgi_2013] and the Standardized Streamflow Index (SSFI or SSI) [@vicenteserrano_ssfi_2012].
 38 | For agricultural droughts, the Standardized Soil Moisture Index (SSMI) [@sheffield_ssmi_2004] can be used.
 39 | All of these standardized indices are explicitly supported by the `SPEI` package, though any other standardized drought index can also be computed using the same methodology.
 40 | 
 41 | ## Computation
 42 | Standardized indices are commonly calculated from a time series of at least 30 years [@mckee_spi_1993].
 43 | Rolling sums or averages are computed over typical time scales (generally 1, 3, 6, 12, 24, or 48 months)[^1], and a continuous probability distribution is fitted to each.
 44 | Alternatively, non-parametric methods like normal-scores transforms or kernel density estimates can be used.
 45 | The probability of each value is then converted to a Z-score using the inverse normal distribution, yielding a standardized index with a mean of zero and standard deviation of one.
 46 | 
 47 | [^1]: A month is not an unambiguous time unit, varying between 28 and 31 days, which adds complexity to computations.
 48 | The package handles this internally using `Pandas` to ensure consistent time aggregation.
 49 | 
 50 | ### Implementation
 51 | The `SPEI` package is built on `Pandas` [@pandas_paper_2010; @pandas_software_2020], which in turn relies heavily on `NumPy` [@numpy_article_2020].
 52 | It uses `pandas.Series` with a `DatetimeIndex`, enabling powerful time series methods such as `resample` and `rolling`.
 53 | Probability density functions are provided via the `SciPy` `stats` module [@scipy_paper_2020].
 54 | Literature offers general guidance for what distribution to use for each standardized index; e.g., a gamma distribution for SPI [@thom_gamma_1996] and a fisk (log-logistic) distribution for SPEI [@vicenteserrano_spei_2010]. However, with the `SciPy` package, users are free to experiment with any of the 200+ univariate continuous distributions available.
 55 | Each distribution has a `fit` method for maximum likelihood estimation on the data.
 56 | 
 57 | #### Example
 58 | As an example, the Standardized Precipitation Evaporation Index is computed using a dataset with daily precipitation and potential evaporation from the Royal Netherlands Meteorological Institute (KNMI), shown in \autoref{fig:meteo_surplus}a.
 59 | The SPEI uses the precipitation surplus (precipitation minus potential evaporation), which is aggregated monthly for this example and shown in \autoref{fig:meteo_surplus}b.
 60 | 
 61 | ![Example meteorological time series \label{fig:meteo_surplus}](figures/monthly_precipitation_surplus.png)
 62 | 
 63 | The Python code to compute the SPEI-1 (`-1` indicating a one month time scale) with a fisk distribution is as follows:
 64 | 
 65 | ```python
 66 | # load packages
 67 | import pandas as pd
 68 | import scipy.stats as sps
 69 | import spei as si
 70 | 
 71 | # load daily time series
 72 | meteo: pd.DataFrame = pd.read_csv(
 73 |   "meteo.csv",
 74 |   index_col="datetime",
 75 |   parse_dates=["datetime"],
 76 | )
 77 | prec: pd.Series = meteo["precipitation"]
 78 | evap: pd.Series = meteo["pot_evaporation"]
 79 | 
 80 | # compute monthly precipitation surplus
 81 | surplus: pd.Series = (prec - evap).resample("MS").sum() # MS: month-start
 82 | 
 83 | # compute SPEI-1
 84 | spei1: pd.Series = si.spei(
 85 |   series=surplus,
 86 |   dist=sps.fisk,
 87 |   timescale=1, # unit: frequency of the data (months in this case)
 88 | )
 89 | ```
 90 | 
 91 | The standardization process is illustrated in \autoref{fig:surplus_fit}.
 92 | The empirical cumulative density function of the surplus in March (red dots, matching \autoref{fig:meteo_surplus}b) with the fitted fisk distribution are shown in \autoref{fig:surplus_fit}a.
 93 | The fitted probability for each red dot is plotted in \autoref{fig:surplus_fit}b (blue dots) and converted to a Z-score using a standardized normal distribution (purple line).
 94 | The black dashed line traces this procedure for a 31 mm surplus from March 1994, near the 69th percentile, corresponding to a Z-score of around 0.4925.
 95 | 
 96 | ![Example equiprobability transformation for the precipitation surplus in March. Figure adapted from @edwards_transformation_1997. \label{fig:surplus_fit}](figures/surplus_fit_cdf.png)
 97 | 
 98 | Application of this procedure for all data points and months results in the standardized index, SPEI-1, as shown in \autoref{fig:spei1}.
 99 | The background filling and categories [based on @mckee_spi_1993] in \autoref{fig:spei1} allow for the interpretation of drought (and wet) periods.
100 | The `SPEI` package has additional options to allow for other time scales, time series frequencies (e.g., daily), and fit window options to ensure valid distribution fit.
101 | 
102 | ![Resulting SPEI-1 from the monthly precipitation surplus \label{fig:spei1}](figures/spei1.png)
103 | 
104 | ## Threshold
105 | Drought characteristics can also be derived from time series using a threshold level.
106 | This defines at what level a drought starts and quantifies the deficit.
107 | The threshold can be either fixed or variable.
108 | A variable threshold, as shown in \autoref{fig:threshold} for part of the series of \autoref{fig:meteo_surplus}b, is typically derived from percentiles of the time series or from a fitted probability density function [@vanloon_hydrodrought_2015].
109 | 
110 | ![Visualization of drought based on a variable threshold level \label{fig:threshold}](figures/threshold.png)
111 | 
112 | ## Heatmap
113 | When multiple time scales are used, standardized drought indices can be visualized in a single graph to reveal whether a drought persists over time and to identify the build-up to multi-year droughts [@mourik_use_2025].
114 | For hydrological droughts, this persistence relates to the system’s storage capacity and response time [e.g., @bloomfield_sgi_2013].
115 | The SPEI heatmap (\autoref{fig:spei_heatmap}) illustrates this across six time scales (1, 3, 6, 9, 12, and 24 months), clearly highlighting the 1995–1998 multi-year drought as a large red zone.
116 | 
117 | ![Visualization of the SPEI as a heatmap with different time scales \label{fig:spei_heatmap}](figures/spei_heatmap.png)
118 | 
119 | # Other drought indices in the SPEI package
120 | 
121 | Several other drought indices from the literature are also supported by the `SPEI` package, briefly outlined below.
122 | 
123 | ## Rainfall anomaly index
124 | The Rainfall Anomaly Index (RAI) is a relative drought index that quantifies deviations from historical precipitation without fitting a distribution [@vanrooy_rai_1965].
125 | The package also includes the Modified RAI (mRAI), which adds a scaling factor for local conditions. [@hansel_mrai_2016].
126 | 
127 | ## Climdex
128 | Climdex is an online platform providing indices for heat, cold, precipitation, and drought changes over time [@alexander_climdex_2025], with several of its precipitation indices available in the `SPEI` package.
129 | 
130 | ## Precipitation deficit
131 | The KNMI defines drought during the growing season using the precipitation deficit (potential evaporation minus precipitation).
132 | The package includes five functions [after @witte_knmi_2025] to calculate this absolute drought index, primarily for the Netherlands but adaptable to other regions by adjusting the keyword arguments.
133 | 
134 | # Acknowledgements
135 | Thanks to all the scientists who have used and cited this package so far [@adla_use_2024;@segura_use_2025;@mourik_use_2025;@panigrahi_use_2025] via Zenodo [@vonk_spei_zenodo].
136 | Thanks to Mark Bakker for reading this manuscript and providing feedback.
137 | 
138 | # References
139 | 


--------------------------------------------------------------------------------
/src/spei/knmi.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | 
  4 | from .utils import group_yearly_df, validate_series
  5 | 
  6 | 
  7 | def get_yearly_temp_date(temp: pd.Series, threshold: float) -> pd.Series:
  8 |     """
  9 |     Get the first date in each year where the cumulative temperature exceeds a given threshold.
 10 | 
 11 |     Parameters
 12 |     ----------
 13 |     temp : pd.Series
 14 |         A pandas Series representing the temperature time series, indexed by date.
 15 |     threshold : float
 16 |         The temperature threshold to identify the first date above it.
 17 | 
 18 |     Returns
 19 |     -------
 20 |     pd.Series
 21 |         A pandas Series containing the first date in each year where the cumulative
 22 |         temperature exceeds the threshold. The index corresponds to the years.
 23 |     """
 24 |     temp_group_df = group_yearly_df(series=temp).cumsum(axis=0)
 25 |     first_date_above_threshold = temp_group_df.gt(threshold).idxmax()
 26 |     return first_date_above_threshold
 27 | 
 28 | 
 29 | def cumsum(deficit: pd.Series, allow_below_zero: bool = True) -> pd.Series:
 30 |     """
 31 |     Calculate the cumulative sum of a deficit series.
 32 | 
 33 |     Parameters:
 34 |     -----------
 35 |     deficit : pd.Series
 36 |         A pandas Series representing the deficit values.
 37 |     allow_below_zero : bool, optional
 38 |         If True, the cumulative sum is calculated as-is, allowing negative values.
 39 |         If False, the cumulative sum is constrained to be non-negative, resetting
 40 |         to zero whenever the sum would drop below zero. Default is True.
 41 | 
 42 |     Returns:
 43 |     --------
 44 |     pd.Series
 45 |         A pandas Series containing the cumulative sum of the deficit values,
 46 |         optionally constrained to be non-negative.
 47 |     """
 48 |     if allow_below_zero:
 49 |         return deficit.cumsum()
 50 |     else:
 51 |         if deficit.iat[0] < 0.0:
 52 |             deficit.iat[0] = 0.0
 53 |         sumlm = np.frompyfunc(lambda a, b: 0.0 if a + b < 0.0 else a + b, nin=2, nout=1)
 54 |         return pd.Series(sumlm.accumulate(deficit.values), deficit.index, dtype=float)
 55 | 
 56 | 
 57 | def get_cumulative_deficit(
 58 |     deficit: pd.Series,
 59 |     startdate: pd.Timestamp | pd.Series,
 60 |     enddate: pd.Timestamp | pd.Series,
 61 |     allow_below_zero: bool = True,
 62 | ) -> pd.DataFrame:
 63 |     """
 64 |     Calculate the cumulative deficit for a given time period.
 65 | 
 66 |     This function computes the cumulative deficit for each column in a
 67 |     grouped yearly DataFrame, starting from `startdate` to `enddate`.
 68 |     The cumulative sum can optionally allow values below zero.
 69 | 
 70 |     Parameters:
 71 |     -----------
 72 |     deficit : pd.Series
 73 |         A pandas Series representing the deficit time series.
 74 |     startdate : pd.Timestamp | pd.Series
 75 |         The start date(s) for the cumulative deficit calculation. If a
 76 |         single timestamp is provided, it is applied to all columns. If
 77 |         a Series is provided, it should align with the columns of the
 78 |         grouped DataFrame.
 79 |     enddate : pd.Timestamp | pd.Series
 80 |         The end date(s) for the cumulative deficit calculation. Similar
 81 |         to `startdate`, it can be a single timestamp or a Series aligned
 82 |         with the columns.
 83 |     allow_below_zero : bool, optional
 84 |         If True, allows the cumulative sum to include values below zero.
 85 |         Defaults to True.
 86 | 
 87 |     Returns:
 88 |     --------
 89 |     pd.DataFrame
 90 |         A DataFrame containing the cumulative deficit for each column
 91 |         over the specified time period. The index represents the date
 92 |         range, and the columns correspond to the year.
 93 |     """
 94 |     deficit = validate_series(deficit)
 95 |     group_df = group_yearly_df(series=deficit)
 96 |     if isinstance(startdate, pd.Timestamp):
 97 |         if startdate.year != 2000:
 98 |             # year is replaced since group_yearly_df returns a df with 2000 as a base year
 99 |             startdate = startdate.replace(year=2000)
100 |         startdate = pd.Series(startdate, index=group_df.columns)
101 |     if isinstance(enddate, pd.Timestamp):
102 |         if enddate.year != 2000:
103 |             # year is replaced since group_yearly_df returns a df with 2000 as a base year
104 |             enddate = enddate.replace(year=2000)
105 |         enddate = pd.Series(enddate, index=group_df.columns)
106 | 
107 |     index = pd.date_range(start=startdate.min(), end=enddate.max(), freq="D")
108 |     cumdf = pd.DataFrame(np.nan, index=index, columns=group_df.columns)
109 |     for col in group_df.columns:
110 |         start = startdate[col]
111 |         end = enddate[col]
112 |         cumdf.loc[start:end, col] = cumsum(
113 |             group_df.loc[start:end, col],
114 |             allow_below_zero=allow_below_zero,
115 |         ).values
116 | 
117 |     return cumdf
118 | 
119 | 
120 | def deficit_oct1(deficit: pd.Series) -> pd.Series:
121 |     """
122 |     Calculate the cumulative deficit on October 1st.
123 | 
124 |     This function computes the cumulative deficit for a given time series
125 |     of deficits, considering only the period between April 1st and
126 |     September 30th. The cumulative deficit is reset to zero if it goes
127 |     below zero during this period.
128 | 
129 |     Parameters:
130 |     -----------
131 |     deficit : pd.Series
132 |         A pandas Series representing the deficit time series. The index
133 |         should be datetime-like, and the values should represent the
134 |         deficit amounts.
135 | 
136 |     Returns:
137 |     --------
138 |     pd.Series
139 |         A pandas Series containing the cumulative deficit values on
140 |         October 1st. The index of the returned Series corresponds to
141 |         the columns of the cumulative deficit DataFrame, and the name
142 |         of the Series is "Doct1".
143 |     """
144 |     startdate = pd.Timestamp("2000-04-01")
145 |     enddate = pd.Timestamp("2000-09-30")
146 |     cumdf = get_cumulative_deficit(
147 |         deficit=deficit,
148 |         startdate=startdate,
149 |         enddate=enddate,
150 |         allow_below_zero=False,
151 |     )
152 |     doct1 = pd.Series(
153 |         data=cumdf.loc[enddate].values,
154 |         index=cumdf.columns,
155 |         dtype=float,
156 |         name="Doct1",
157 |     )
158 |     return doct1
159 | 
160 | 
161 | def deficit_max(deficit: pd.Series) -> pd.Series:
162 |     """
163 |     Calculate the maximum cumulative deficit within a specified period.
164 | 
165 |     This function computes the maximum cumulative deficit for a given
166 |     deficit time series, starting from April 1st to September 30th.
167 |     The cumulative deficit is calculated using the `get_cumulative_deficit`
168 |     function, ensuring that values below zero are not allowed.
169 | 
170 |     Parameters:
171 |     -----------
172 |     deficit : pd.Series
173 |         A pandas Series representing the deficit values over time.
174 | 
175 |     Returns:
176 |     --------
177 |     pd.Series
178 |         A pandas Series containing the maximum cumulative deficit
179 |         within the specified period, labeled as "Dmax".
180 |     """
181 |     startdate = pd.Timestamp("2000-04-01")
182 |     enddate = pd.Timestamp("2000-09-30")
183 |     cumdf = get_cumulative_deficit(
184 |         deficit=deficit,
185 |         startdate=startdate,
186 |         enddate=enddate,
187 |         allow_below_zero=False,
188 |     )
189 |     return cumdf.max().rename("Dmax")
190 | 
191 | 
192 | def deficit_apr1(deficit: pd.Series) -> pd.Series:
193 |     """
194 |     Calculate the maximum change in cumulative deficit within a specified date range.
195 | 
196 |     This function computes the cumulative deficit for the given deficit series
197 |     between April 1st and September 30th of the year 2000. It then calculates
198 |     the maximum change in the cumulative deficit over this period.
199 | 
200 |     Parameters
201 |     ----------
202 |     deficit : pd.Series
203 |         A pandas Series representing the deficit values. The index is expected
204 |         to be datetime-like.
205 | 
206 |     Returns
207 |     -------
208 |     pd.Series
209 |         A pandas Series containing the maximum change in cumulative deficit
210 |         over the specified period, labeled as "DIapr1".
211 |     """
212 |     startdate = pd.Timestamp("2000-04-01")
213 |     enddate = pd.Timestamp("2000-09-30")
214 |     cumdf = get_cumulative_deficit(
215 |         deficit=deficit,
216 |         startdate=startdate,
217 |         enddate=enddate,
218 |         allow_below_zero=True,
219 |     )
220 |     return (cumdf.max() - cumdf.min()).rename("DIapr1")
221 | 
222 | 
223 | def deficit_gdd(
224 |     deficit: pd.Series, temp: pd.Series, threshold: float = 440.0
225 | ) -> pd.Series:
226 |     """
227 |     Calculate the maximum change in cumulative deficit starting from the
228 |     first day when the temperature sum (growing degree days; GDD)
229 |     exceeds a specified threshold.
230 | 
231 |     Parameters:
232 |     -----------
233 |     deficit : pd.Series
234 |         A pandas Series representing the daily deficit values.
235 |     temp : pd.Series
236 |         A pandas Series representing the daily temperature values.
237 |     threshold : float, optional
238 |         The temperature sum GDD threshold to determine the starting date for the calculation.
239 |         Defaults to 440.0.
240 | 
241 |     Returns:
242 |     --------
243 |     pd.Series
244 |         A pandas Series containing the maximum change in cumulative deficit,
245 |         labeled as "DIgdd".
246 |     """
247 |     temp = validate_series(temp)
248 |     startdate = get_yearly_temp_date(temp=temp, threshold=threshold)
249 |     enddate = pd.Timestamp("2000-09-30")
250 |     cumdf = get_cumulative_deficit(
251 |         deficit=deficit,
252 |         startdate=startdate,
253 |         enddate=enddate,
254 |         allow_below_zero=True,
255 |     )
256 |     return (cumdf.max() - cumdf.min()).rename("DIapr1").rename("DIgdd")
257 | 
258 | 
259 | def deficit_wet(deficit: pd.Series) -> pd.Series:
260 |     """
261 |     Calculate the maximum change in cumulative deficit for a specified period.
262 | 
263 |     This function computes the maximum change in  cumulative deficit from
264 |     January 1st to September 30th of a given year. The cumulative deficit
265 |     is calculated using the `get_cumulative_deficit` function, allowing
266 |     values below zero.
267 | 
268 |     Parameters:
269 |     -----------
270 |     deficit : pd.Series
271 |         A pandas Series representing the deficit values over time.
272 | 
273 |     Returns:
274 |     --------
275 |     pd.Series
276 |         A pandas Series containing the maximum change in cumulative deficit
277 |         for the specified period, labeled as "DIwet".
278 |     """
279 |     startdate = pd.Timestamp("2000-01-01")
280 |     enddate = pd.Timestamp("2000-09-30")
281 |     cumdf = get_cumulative_deficit(
282 |         deficit=deficit,
283 |         startdate=startdate,
284 |         enddate=enddate,
285 |         allow_below_zero=True,
286 |     )
287 |     return (cumdf.max() - cumdf.min()).rename("DIapr1").rename("DIwet")
288 | 


--------------------------------------------------------------------------------
/paper/paper.bib:
--------------------------------------------------------------------------------
  1 | @article{scipy_paper_2020,
  2 |   author  = {Virtanen, P. and Gommers, R. and Oliphant, T. E. and
  3 |              Haberland, M. and Reddy, T. and Cournapeau, D. and
  4 |              Burovski, E. and Peterson, P. and Weckesser, W. and
  5 |              Bright, J. and {van der Walt}, S. J. and
  6 |              Brett, M. and Wilson, J. and Millman, K. J. and
  7 |              Mayorov, N. and Nelson, A. R. J. and Jones, E. and
  8 |              Kern, R. and Larson, E. and Carey, CJ and
  9 |              Polat, I. and Feng, Y. and Moore, E. W. and
 10 |              {VanderPlas}, J. and Laxalde, D. and Perktold, J. and
 11 |              Cimrman, R. and Henriksen, I. and Quintero, E. A. and
 12 |              Harris, C. R. and Archibald, A. M. and
 13 |              Ribeiro, A. H. and Pedregosa, F. and
 14 |              {van Mulbregt}, P. and {SciPy 1.0 Contributors}},
 15 |   title   = {{SciPy} 1.0: Fundamental Algorithms for Scientific Computing in {Python}},
 16 |   journal = {Nature Methods},
 17 |   year    = {2020},
 18 |   volume  = {17},
 19 |   pages   = {261--272},
 20 |   doi     = {10.1038/s41592-019-0686-2}
 21 | }
 22 | 
 23 | @incollection{pandas_paper_2010,
 24 |   author    = {McKinney, W.},
 25 |   title     = {Data Structures for Statistical Computing in {Python}},
 26 |   booktitle = {Proceedings of the 9th {Python} in {Science} {Conference}},
 27 |   pages     = {56--61},
 28 |   year      = {2010},
 29 |   doi       = {10.25080/Majora-92bf1922-00a}
 30 | }
 31 | 
 32 | @software{pandas_software_2020,
 33 |   author    = {{The pandas development team}},
 34 |   title     = {pandas-dev/pandas: Pandas},
 35 |   year      = {2025},
 36 |   publisher = {Zenodo},
 37 |   version   = {latest},
 38 |   doi       = {10.5281/zenodo.3509134}
 39 | }
 40 | 
 41 | @article{matplotlib_paper_2007,
 42 |   author    = {Hunter, J. D.},
 43 |   title     = {Matplotlib: A 2D graphics environment},
 44 |   journal   = {Computing in Science \& Engineering},
 45 |   volume    = {9},
 46 |   number    = {3},
 47 |   pages     = {90--95},
 48 |   publisher = {{IEEE COMPUTER SOC}},
 49 |   doi       = {10.1109/MCSE.2007.55},
 50 |   year      = {2007}
 51 | }
 52 | 
 53 | @article{numpy_article_2020,
 54 |   title     = {Array programming with {NumPy}},
 55 |   author    = {Charles R. Harris and K. Jarrod Millman and St{\'{e}}fan J.
 56 |                van der Walt and Ralf Gommers and Pauli Virtanen and David
 57 |                Cournapeau and Eric Wieser and Julian Taylor and Sebastian
 58 |                Berg and Nathaniel J. Smith and Robert Kern and Matti Picus
 59 |                and Stephan Hoyer and Marten H. van Kerkwijk and Matthew
 60 |                Brett and Allan Haldane and Jaime Fern{\'{a}}ndez del
 61 |                R{\'{i}}o and Mark Wiebe and Pearu Peterson and Pierre
 62 |                G{\'{e}}rard-Marchant and Kevin Sheppard and Tyler Reddy and
 63 |                Warren Weckesser and Hameer Abbasi and Christoph Gohlke and
 64 |                Travis E. Oliphant},
 65 |   year      = {2020},
 66 |   journal   = {Nature},
 67 |   volume    = {585},
 68 |   number    = {7825},
 69 |   pages     = {357--362},
 70 |   doi       = {10.1038/s41586-020-2649-2},
 71 |   publisher = {Springer Science and Business Media {LLC}}
 72 | }
 73 | 
 74 | @incollection{mckee_spi_1993,
 75 |   author    = {McKee, Thomas B. and Doesken, N. J. and Kleist, J.},
 76 |   title     = {The Relationship of Drought Frequency and Duration to Time Scales},
 77 |   booktitle = {Proceedings of the {Eighth Conference on Applied Climatology}},
 78 |   year      = {1993},
 79 |   pages     = {179--184},
 80 |   publisher = {American Meteorological Society}
 81 | }
 82 | 
 83 | @article{lloydhughes_spi_2002,
 84 |   author  = {Lloyd-Hughes, Benjamin and Saunders, Mark A.},
 85 |   title   = {A drought climatology for {Europe}},
 86 |   journal = {International Journal of Climatology},
 87 |   volume  = {22},
 88 |   number  = {13},
 89 |   pages   = {1571--1592},
 90 |   doi     = {10.1002/joc.846},
 91 |   year    = {2002}
 92 | }
 93 | 
 94 | @article{vicenteserrano_spei_2010,
 95 |   author    = {Vicente-Serrano, S. M. and Beguería, S. and López-Moreno, J. I.},
 96 |   title     = {A Multi-scalar drought index sensitive to global warming: {The Standardized Precipitation Evapotranspiration Index}},
 97 |   journal   = {Journal of Climate},
 98 |   year      = {2010},
 99 |   publisher = {American Meteorological Society},
100 |   volume    = {23},
101 |   number    = {7},
102 |   pages     = {1696--1718},
103 |   doi       = {10.1175/2009JCLI2909.1}
104 | }
105 | 
106 | @article{bloomfield_sgi_2013,
107 |   author  = {Bloomfield, J. P. and Marchant, B. P.},
108 |   title   = {Analysis of groundwater drought building on the standardised precipitation index approach},
109 |   journal = {Hydrology and Earth System Sciences},
110 |   year    = {2013},
111 |   volume  = {17},
112 |   pages   = {4769--4787},
113 |   doi     = {10.5194/hess-17-4769-2013}
114 | }
115 | 
116 | @article{vicenteserrano_ssfi_2012,
117 |   author  = {Vicente-Serrano, S. M. and López-Moreno, J. I. and Beguería, S. and Lorenzo-Lacruz, J. and Azorin-Molina, C. and Morán-Tejeda, E.},
118 |   title   = {Accurate Computation of a Streamflow Drought Index},
119 |   journal = {Journal of Hydrologic Engineering},
120 |   volume  = {17},
121 |   number  = {2},
122 |   pages   = {318--332},
123 |   year    = {2012},
124 |   doi     = {10.1061/(ASCE)HE.1943-5584.0000433}
125 | }
126 | 
127 | @article{sheffield_ssmi_2004,
128 |   author  = {Sheffield, J. and Goteti, G. and Wen, F. and Wood, E. F.},
129 |   title   = {A simulated soil moisture based drought analysis for the {United States}},
130 |   journal = {Journal of Geophysical Research: Atmospheres},
131 |   volume  = {109},
132 |   number  = {D24},
133 |   doi     = {10.1029/2004JD005182},
134 |   year    = {2004}
135 | }
136 | 
137 | @article{segura_use_2025,
138 |   author  = {Segura-Barrero, R. and Lauvaux, T. and Lian, J. and Ciais, P. and Badia, A. and Ventura, S. and Bazzi, H. and Abbessi, E. and Fu, Z. and Xiao, J. and Li, X. and Villalba, G.},
139 |   title   = {Heat and Drought Events Alter Biogenic Capacity to Balance {CO2} Budget in South-Western {Europe}},
140 |   journal = {Global Biogeochemical Cycles},
141 |   volume  = {39},
142 |   number  = {1},
143 |   pages   = {e2024GB008163},
144 |   doi     = {10.1029/2024GB008163},
145 |   year    = {2025}
146 | }
147 | 
148 | @article{adla_use_2024,
149 |   author    = {Adla, S. and Šaponjić, A. and Tyagi, A. and Nagi, A. and Pastore, P. and Pande, S.},
150 |   title     = {Steering agricultural interventions towards sustained irrigation adoption by farmers: socio-psychological analysis of irrigation practices in {Maharashtra}, {India}},
151 |   journal   = {Hydrological Sciences Journal},
152 |   volume    = {69},
153 |   number    = {12},
154 |   pages     = {1586--1603},
155 |   year      = {2024},
156 |   publisher = {Taylor \& Francis},
157 |   doi       = {10.1080/02626667.2024.2376709}
158 | }
159 | 
160 | @article{mourik_use_2025,
161 |   author  = {{van Mourik}, J. and Ruijsch, D. and {van der Wiel}, K. and Hazeleger, W. and Wanders, N.},
162 |   title   = {Regional drivers and characteristics of multi-year droughts},
163 |   journal = {Weather and Climate Extremes},
164 |   volume  = {48},
165 |   pages   = {100748},
166 |   year    = {2025},
167 |   issn    = {2212-0947},
168 |   doi     = {10.1016/j.wace.2025.100748}
169 | }
170 | 
171 | @inbook{panigrahi_use_2025,
172 |   author    = {Panigrahi, S. and Vidyarthi, V. K.},
173 |   title     = {Assessing the Suitability of {SPI} and {SPEI} in Steppe Hot and Arid Climatic Zones in {India}},
174 |   editor    = {Sefelnasr, A. and Sherif, M. and Singh, V. P.},
175 |   booktitle = {Water Resources Management and Sustainability: Solutions for Arid Regions},
176 |   year      = {2025},
177 |   publisher = {Springer Nature Switzerland},
178 |   pages     = {201--216},
179 |   doi       = {10.1007/978-3-031-80520-2_12}
180 | }
181 | 
182 | @software{vonk_spei_zenodo,
183 |   author    = {Vonk, M. A.},
184 |   title     = {SPEI: A simple Python package to calculate and visualize drought indices},
185 |   year      = {2025},
186 |   publisher = {Zenodo},
187 |   version   = {v0.8.0},
188 |   doi       = {10.5281/zenodo.10816740}
189 | }
190 | 
191 | @online{alexander_climdex_2025,
192 |   title   = {Climdex: climate extremes indices},
193 |   author  = {L. Alexander and M. Donat and M. Bador and N. Herold and J. L. Vazquez-Aguirre and R. Dunn and P. L. Nguyen and R. Isphording and Y. Singh},
194 |   url     = {https://www.climdex.org},
195 |   urldate = {2025-04-24},
196 |   year    = {2025}
197 | }
198 | 
199 | @online{vonk_spei_github,
200 |   author  = {Vonk, M. A.},
201 |   title   = {SPEI},
202 |   url     = {https://github.com/martinvonk/spei},
203 |   urldate = {2025-04-24},
204 |   year    = {2025}
205 | }
206 | 
207 | @techreport{thom_gamma_1996,
208 |   author      = {Thom, H. C. S.},
209 |   title       = {Some Methods of Climatological Analysis},
210 |   year        = {1966},
211 |   type        = {WMO Technical Note},
212 |   number      = {81},
213 |   institution = {World Meteorological Organization},
214 |   address     = {Geneva},
215 |   url         = {https://library.wmo.int/idurl/4/59838}
216 | }
217 | 
218 | @techreport{wmo_spi_2012,
219 |   author      = {Svoboda, M. and Hayes, M. and Wood, D. A.},
220 |   title       = {{Standardized Precipitation Index} {User Guide}},
221 |   year        = {2012},
222 |   type        = {WMO Technical Document},
223 |   number      = {1090},
224 |   institution = {World Meteorological Organization},
225 |   address     = {Geneva},
226 |   url         = {https://library.wmo.int/idurl/4/39629}
227 | }
228 | 
229 | @book{sheffield_droughtdefinition_2011,
230 |   author    = {Sheffield, J. and Wood, E. F.},
231 |   title     = {Drought: Past Problems and Future Scenarios},
232 |   publisher = {Taylor \& Francis Group},
233 |   doi       = {10.4324/9781849775250},
234 |   year      = {2011}
235 | }
236 | 
237 | @article{dracup_droughtdefinition_1980,
238 |   author  = {Dracup, J. A. and Lee, K. S. and {Paulson Jr.}, E. G.},
239 |   title   = {On the definition of droughts},
240 |   journal = {Water Resources Research},
241 |   volume  = {16},
242 |   number  = {2},
243 |   pages   = {297-302},
244 |   doi     = {10.1029/WR016i002p00297},
245 |   year    = {1980}
246 | }
247 | 
248 | @article{vanloon_hydrodrought_2015,
249 |   author  = {{van Loon}, A. F.},
250 |   title   = {Hydrological drought explained},
251 |   journal = {WIREs Water},
252 |   volume  = {2},
253 |   number  = {4},
254 |   pages   = {359-392},
255 |   doi     = {10.1002/wat2.1085},
256 |   year    = {2015}
257 | }
258 | 
259 | @techreport{edwards_transformation_1997,
260 |   author      = {Edwards, D. C. and McKee, T. B.},
261 |   title       = {Characteristics of 20th Century Drought in the {United States} at Multiple Time Scales},
262 |   institution = {Colorado State University, Department of Atmospheric Science},
263 |   type        = {Climatology Report},
264 |   number      = {97-2},
265 |   year        = {1997},
266 |   address     = {Fort Collins, CO},
267 |   note        = {Atmospheric Science Paper No. 634}
268 | }
269 | 
270 | @article{vanrooy_rai_1965,
271 |   author  = {{van Rooy}, M. P.},
272 |   title   = {A Rainfall Anomaly Index Independent of Time and Space},
273 |   journal = {Notos},
274 |   year    = {1965},
275 |   volume  = {14},
276 |   pages   = {43--48}
277 | }
278 | 
279 | @article{hansel_mrai_2016,
280 |   author  = {Hänsel, S. and Schucknecht, A. and Matschullat, J.},
281 |   title   = {The Modified Rainfall Anomaly Index ({mRAI})—is this an alternative to the {Standardised Precipitation Index} ({SPI}) in evaluating future extreme precipitation characteristics?},
282 |   journal = {Theoretical and Applied Climatology},
283 |   year    = {2016},
284 |   volume  = {123},
285 |   number  = {3},
286 |   pages   = {827-844},
287 |   doi     = {10.1007/s00704-015-1389-y}
288 | }
289 | 
290 | @article{witte_knmi_2025,
291 |   author  = {Witte, J. P. M. and {van den Eertwegh}, G. A. P. H. and Torfs, P. J. J. F.},
292 |   title   = {Absolute Meteorological Drought Indices Validated Against Irrigation Amounts},
293 |   journal = {Water},
294 |   volume  = {17},
295 |   year    = {2025},
296 |   number  = {7},
297 |   doi     = {10.3390/w17071056}
298 | }
299 | 


--------------------------------------------------------------------------------
/docs/examples/example04_package_comparison.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "attachments": {},
  5 |    "cell_type": "markdown",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# Package Comparison\n",
  9 |     "\n",
 10 |     "*Martin Vonk - 2023*\n",
 11 |     "\n",
 12 |     "This notebooks compares the calculated drought indices to other (Python) packages or time series retrieved from other locations.\n",
 13 |     "Current comparisons include: \n",
 14 |     "* standard_precip (Python)\n",
 15 |     "* climate_indices (Python)\n",
 16 |     "* pastas (Python)\n",
 17 |     "* SPEI (R)\n",
 18 |     "\n",
 19 |     "Please note that it can be difficult to install these packages. SPEI (R) requires the R library. Pastas depends on Numba which has strict requirements for NumPy. Climate Indices only supports Python 3.11 and lower. Therefore running this notebook can be cumbersome.\n",
 20 |     "\n",
 21 |     "Future comparisons:\n",
 22 |     "* [KNMI](https://gitlab.com/KNMI-OSS/climexp/climexp_numerical/-/blob/be0f081a9d62856e4c52a370e70fec2ddfc45cfa/src/calcSPI3.f)\n",
 23 |     "\n",
 24 |     "## Required packages"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": null,
 30 |    "metadata": {},
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "import matplotlib.pyplot as plt\n",
 34 |     "import pandas as pd\n",
 35 |     "import scipy.stats as scs\n",
 36 |     "\n",
 37 |     "import spei as si\n",
 38 |     "\n",
 39 |     "print(si.show_versions())"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "attachments": {},
 44 |    "cell_type": "markdown",
 45 |    "metadata": {},
 46 |    "source": [
 47 |     "## Read Precipitation Data"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": null,
 53 |    "metadata": {},
 54 |    "outputs": [],
 55 |    "source": [
 56 |     "df = pd.read_csv(\"data/DEBILT.csv\", index_col=0, parse_dates=True)\n",
 57 |     "df.index.name = \"date\"\n",
 58 |     "prec = df[\"Prec [m/d] 260_DEBILT\"].multiply(1e3).rename(\"rain\")\n",
 59 |     "head = df[\"Head [m] B32C0572_DEBILT\"].rename(\"B32C0572\").dropna()\n",
 60 |     "\n",
 61 |     "_ = prec.plot(grid=True, linewidth=0.5, title=\"Precipitation\", figsize=(6.5, 4))"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": null,
 67 |    "metadata": {},
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "# get rolling sum\n",
 71 |     "prec_rsum = prec.resample(\"ME\").sum()\n",
 72 |     "_ = prec_rsum.plot(\n",
 73 |     "    grid=True, linewidth=0.5, title=\"Precipitation, monthly sum\", figsize=(6.5, 4)\n",
 74 |     ")"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "attachments": {},
 79 |    "cell_type": "markdown",
 80 |    "metadata": {},
 81 |    "source": [
 82 |     "## Compute Standardized Precipitation Index\n",
 83 |     "\n",
 84 |     "### Using SPEI package"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": null,
 90 |    "metadata": {},
 91 |    "outputs": [],
 92 |    "source": [
 93 |     "spi = si.spi(prec_rsum, dist=scs.gamma, prob_zero=True, timescale=3, fit_freq=\"ME\")\n",
 94 |     "spi  # pandas Series"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "attachments": {},
 99 |    "cell_type": "markdown",
100 |    "metadata": {},
101 |    "source": [
102 |     "### Using standard_precip package"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": null,
108 |    "metadata": {},
109 |    "outputs": [],
110 |    "source": [
111 |     "from standard_precip import spi as sp_spi\n",
112 |     "\n",
113 |     "# standard_precip also needs rolling sum dataframe, even though you provide freq=\"M\" and scale = 1\n",
114 |     "precdf = prec_rsum.to_frame().reset_index().copy()\n",
115 |     "\n",
116 |     "# initialize spi\n",
117 |     "standardp_spi_inst = sp_spi.SPI()\n",
118 |     "\n",
119 |     "# caclulate index with many parameters\n",
120 |     "standardp_spi = standardp_spi_inst.calculate(\n",
121 |     "    precdf,\n",
122 |     "    date_col=\"date\",\n",
123 |     "    precip_cols=\"rain\",\n",
124 |     "    freq=\"M\",\n",
125 |     "    scale=3,  # note that scale is not the same for the standard deviation in SciPy\n",
126 |     "    fit_type=\"mle\",\n",
127 |     "    dist_type=\"gam\",\n",
128 |     ")\n",
129 |     "standardp_spi.index = standardp_spi.loc[\n",
130 |     "    :, \"date\"\n",
131 |     "].values  # create datetimeindex because date had to be a column\n",
132 |     "\n",
133 |     "standardp_spi  # pandas DataFrame"
134 |    ]
135 |   },
136 |   {
137 |    "attachments": {},
138 |    "cell_type": "markdown",
139 |    "metadata": {},
140 |    "source": [
141 |     "### Using climate_indices package\n",
142 |     "\n",
143 |     "Previously there was a significant difference beteween the SPEI and climate_indices package, not sure why. I thought it had something to do with the fitting method used for the gamma distribution. In issue [#61](https://github.com/martinvonk/SPEI/issues/61) it was mentioned that the same outcome could be achieved. However, I found it difficult to install `climate_indces` due to lack of support (for newer python versions)."
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": null,
149 |    "metadata": {},
150 |    "outputs": [],
151 |    "source": [
152 |     "# from climate_indices.compute import scale_values, Periodicity\n",
153 |     "# from climate_indices import compute, indices, utils"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "code",
158 |    "execution_count": null,
159 |    "metadata": {},
160 |    "outputs": [],
161 |    "source": [
162 |     "# initial_year = prec_rsum.index[0].year\n",
163 |     "# calibration_year_initial = prec_rsum.index[0].year\n",
164 |     "# calibration_year_final = prec_rsum.index[-1].year\n",
165 |     "# period_times = 366\n",
166 |     "# scale = 1\n",
167 |     "# periodicity = compute.Periodicity.daily\n",
168 |     "\n",
169 |     "# values = prec_rsum.values\n",
170 |     "\n",
171 |     "# scaled_values = compute.scale_values(\n",
172 |     "#     values,\n",
173 |     "#     scale=scale,\n",
174 |     "#     periodicity=periodicity,\n",
175 |     "# )\n",
176 |     "\n",
177 |     "# alphas, betas = compute.gamma_parameters(\n",
178 |     "#     scaled_values,\n",
179 |     "#     data_start_year=initial_year,\n",
180 |     "#     calibration_start_year=calibration_year_initial,\n",
181 |     "#     calibration_end_year=calibration_year_final,\n",
182 |     "#     periodicity=periodicity,\n",
183 |     "# )\n",
184 |     "\n",
185 |     "# gamma_params = {\"alpha\": alphas, \"beta\": betas}\n",
186 |     "\n",
187 |     "# spival = indices.spi(\n",
188 |     "#     values,\n",
189 |     "#     scale=scale,\n",
190 |     "#     distribution=indices.Distribution.gamma,\n",
191 |     "#     data_start_year=initial_year,\n",
192 |     "#     calibration_year_initial=calibration_year_initial,\n",
193 |     "#     calibration_year_final=calibration_year_final,\n",
194 |     "#     periodicity=compute.Periodicity.daily,\n",
195 |     "#     fitting_params=gamma_params,\n",
196 |     "# )\n",
197 |     "\n",
198 |     "# climateind_spi = pd.Series(spival, index=prec_rsum.index, name=\"Climate Index SPI\")\n",
199 |     "# climateind_spi"
200 |    ]
201 |   },
202 |   {
203 |    "cell_type": "markdown",
204 |    "metadata": {},
205 |    "source": [
206 |     "### Using SPEI R package"
207 |    ]
208 |   },
209 |   {
210 |    "cell_type": "code",
211 |    "execution_count": null,
212 |    "metadata": {},
213 |    "outputs": [],
214 |    "source": [
215 |     "from rpy2.robjects import pandas2ri\n",
216 |     "from rpy2.robjects.packages import importr\n",
217 |     "\n",
218 |     "sr = importr(\"SPEI\")\n",
219 |     "\n",
220 |     "with pandas2ri.converter.context():  # pandas2ri.activate()\n",
221 |     "    spir_res = sr.spi(prec_rsum.values, scale=3)\n",
222 |     "\n",
223 |     "r_spi = pd.Series(spir_res[2].ravel(), index=prec_rsum.index, name=\"SPI\")\n",
224 |     "r_spi"
225 |    ]
226 |   },
227 |   {
228 |    "attachments": {},
229 |    "cell_type": "markdown",
230 |    "metadata": {},
231 |    "source": [
232 |     "### Plot and compare"
233 |    ]
234 |   },
235 |   {
236 |    "cell_type": "code",
237 |    "execution_count": null,
238 |    "metadata": {},
239 |    "outputs": [],
240 |    "source": [
241 |     "f, ax = plt.subplot_mosaic(\n",
242 |     "    [[\"SPI\"], [\"DIFF\"]],\n",
243 |     "    figsize=(8, 4),\n",
244 |     "    sharex=True,\n",
245 |     "    height_ratios=[2, 1],\n",
246 |     ")\n",
247 |     "spi.plot(ax=ax[\"SPI\"], grid=True, linestyle=\"-\", label=\"SPI\")\n",
248 |     "standardp_spi.iloc[:, -1].plot(\n",
249 |     "    ax=ax[\"SPI\"],\n",
250 |     "    color=\"C1\",\n",
251 |     "    grid=True,\n",
252 |     "    linestyle=\"--\",\n",
253 |     "    label=\"standard_precip\",\n",
254 |     ")\n",
255 |     "# climateind_spi.plot(\n",
256 |     "#     ax=ax[\"SPI\"], color=\"C2\", grid=True, linestyle=\":\", label=\"climate_indices\"\n",
257 |     "# )\n",
258 |     "# r_spi.plot(ax=ax[\"SPI\"], color=\"C2\", grid=True, linestyle=\":\", label=\"R package\")\n",
259 |     "\n",
260 |     "(ax[\"SPI\"].set_ylim(-3.5, 3.5),)\n",
261 |     "(ax[\"SPI\"].set_title(\"Comparison\"),)\n",
262 |     "(ax[\"SPI\"].set_ylabel(\"SPI\"),)\n",
263 |     "ax[\"SPI\"].legend(ncol=3)\n",
264 |     "\n",
265 |     "(spi - standardp_spi.iloc[:, -1]).plot(\n",
266 |     "    ax=ax[\"DIFF\"], color=\"C4\", label=\"SPEI - standard_precip\", grid=True\n",
267 |     ")\n",
268 |     "# (spi - r_spi).plot(ax=ax[\"DIFF\"], color=\"C3\", label=\"SPEI - R Package\")\n",
269 |     "\n",
270 |     "# ax[\"DIFF1\"].set_ylim(-0.05, 0.05)\n",
271 |     "ax[\"DIFF\"].legend(ncol=2)\n",
272 |     "ax[\"DIFF\"].set_title(\"SPEI minus other package\")\n",
273 |     "ax[\"DIFF\"].set_ylabel(\"Difference\")\n",
274 |     "ax[\"DIFF\"].set_xlim(\"1996\", \"1999\")\n",
275 |     "f.tight_layout()"
276 |    ]
277 |   },
278 |   {
279 |    "attachments": {},
280 |    "cell_type": "markdown",
281 |    "metadata": {},
282 |    "source": [
283 |     "Difference is very small between SPEI an the standard_precip package.\n",
284 |     "\n",
285 |     "The standard_precip package does not explicitely support the Standardized Precipitation Evaporation Index, as far as I can see. However, the SPI class in standard_precip could probably be used, even though the naming of `precip_cols` is not universal. In general, the standard_precip package needs much more keyword arguments while the SPEI package makes more use of all the nice logic already available in SciPy and Pandas.\n",
286 |     "\n",
287 |     "The climate_indices package needs even more code.\n",
288 |     "\n",
289 |     "The SPEI R package also has a similar result but seems to vary a bit more. More research is needed to understand why that is the case. Most likely is the differences in fitting the gamma distribution."
290 |    ]
291 |   },
292 |   {
293 |    "attachments": {},
294 |    "cell_type": "markdown",
295 |    "metadata": {},
296 |    "source": [
297 |     "## Compute Standardized Groundwater Index"
298 |    ]
299 |   },
300 |   {
301 |    "cell_type": "code",
302 |    "execution_count": null,
303 |    "metadata": {},
304 |    "outputs": [],
305 |    "source": [
306 |     "import pastas as ps\n",
307 |     "\n",
308 |     "sgi = si.sgi(head, fit_freq=\"ME\")\n",
309 |     "sgi_pastas = ps.stats.sgi(head)"
310 |    ]
311 |   },
312 |   {
313 |    "cell_type": "code",
314 |    "execution_count": null,
315 |    "metadata": {},
316 |    "outputs": [],
317 |    "source": [
318 |     "pd.concat([sgi, sgi_pastas], axis=1).rename(columns={0: \"SGI\", \"head\": \"Pastas\"})"
319 |    ]
320 |   },
321 |   {
322 |    "cell_type": "code",
323 |    "execution_count": null,
324 |    "metadata": {},
325 |    "outputs": [],
326 |    "source": [
327 |     "f, ax = plt.subplot_mosaic(\n",
328 |     "    [[\"SGI\"], [\"DIFF\"]],\n",
329 |     "    figsize=(8, 4),\n",
330 |     "    sharex=True,\n",
331 |     "    height_ratios=[2, 1],\n",
332 |     ")\n",
333 |     "sgi.plot(ax=ax[\"SGI\"], grid=True, linestyle=\"-\", label=\"SGI\")\n",
334 |     "sgi_pastas.plot(ax=ax[\"SGI\"], color=\"C1\", grid=True, linestyle=\"--\", label=\"pastas\")\n",
335 |     "(ax[\"SGI\"].set_ylim(-3.5, 3.5),)\n",
336 |     "(ax[\"SGI\"].set_title(\"Comparison\"),)\n",
337 |     "(ax[\"SGI\"].set_ylabel(\"SGI\"),)\n",
338 |     "ax[\"SGI\"].legend(ncol=3)\n",
339 |     "\n",
340 |     "(sgi - sgi_pastas).plot(ax=ax[\"DIFF\"], color=\"C3\", label=\"SGI - pastas\")\n",
341 |     "\n",
342 |     "ax[\"DIFF\"].legend(ncol=2)\n",
343 |     "ax[\"DIFF\"].set_title(\"SPEI minus other package\")\n",
344 |     "ax[\"DIFF\"].set_ylabel(\"Difference\")\n",
345 |     "ax[\"DIFF\"].set_xlim(\"1996\", \"1999\")\n",
346 |     "f.tight_layout()"
347 |    ]
348 |   }
349 |  ],
350 |  "metadata": {
351 |   "kernelspec": {
352 |    "display_name": "SPEI",
353 |    "language": "python",
354 |    "name": "python3"
355 |   },
356 |   "language_info": {
357 |    "codemirror_mode": {
358 |     "name": "ipython",
359 |     "version": 3
360 |    },
361 |    "file_extension": ".py",
362 |    "mimetype": "text/x-python",
363 |    "name": "python",
364 |    "nbconvert_exporter": "python",
365 |    "pygments_lexer": "ipython3",
366 |    "version": "3.12.3"
367 |   },
368 |   "orig_nbformat": 4
369 |  },
370 |  "nbformat": 4,
371 |  "nbformat_minor": 2
372 | }
373 | 


--------------------------------------------------------------------------------
/docs/examples/example07_knmi.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |     "cells": [
  3 |         {
  4 |             "cell_type": "markdown",
  5 |             "id": "87fd561f",
  6 |             "metadata": {},
  7 |             "source": [
  8 |                 "# KNMI Drought Indices\n",
  9 |                 "\n",
 10 |                 "J.P.M. Witte, G.A.P.H. van den Eertwegh and P.J.J.F. Torfs (2025) - [Absolute Meteorological Drought Indices Validated Against Irrigation Amounts](https://doi.org/10.3390/w17071056)."
 11 |             ]
 12 |         },
 13 |         {
 14 |             "cell_type": "markdown",
 15 |             "id": "5f4ec1ab",
 16 |             "metadata": {},
 17 |             "source": [
 18 |                 "## Load packages"
 19 |             ]
 20 |         },
 21 |         {
 22 |             "cell_type": "code",
 23 |             "execution_count": null,
 24 |             "id": "1a439ca7",
 25 |             "metadata": {},
 26 |             "outputs": [],
 27 |             "source": [
 28 |                 "import matplotlib as mpl\n",
 29 |                 "import pandas as pd\n",
 30 |                 "\n",
 31 |                 "from spei import knmi\n",
 32 |                 "from spei.plot import deficit_knmi\n",
 33 |                 "from spei.utils import group_yearly_df"
 34 |             ]
 35 |         },
 36 |         {
 37 |             "cell_type": "markdown",
 38 |             "id": "ae00001a",
 39 |             "metadata": {},
 40 |             "source": [
 41 |                 "## Get data\n",
 42 |                 "Data from De Bilt (260) or P13 stations from 1960 till today"
 43 |             ]
 44 |         },
 45 |         {
 46 |             "cell_type": "markdown",
 47 |             "id": "fba32ea8",
 48 |             "metadata": {},
 49 |             "source": [
 50 |                 "### Most recent De Bilt data"
 51 |             ]
 52 |         },
 53 |         {
 54 |             "cell_type": "code",
 55 |             "execution_count": null,
 56 |             "id": "131815ed",
 57 |             "metadata": {},
 58 |             "outputs": [],
 59 |             "source": [
 60 |                 "# import hydropandas as hpd\n",
 61 |                 "# prec = hpd.PrecipitationObs.from_knmi(\n",
 62 |                 "#     meteo_var=\"RH\",\n",
 63 |                 "#     stn=260,\n",
 64 |                 "#     startdate=pd.Timestamp(\"1960-01-01\"),\n",
 65 |                 "#     enddate=pd.Timestamp.today(),\n",
 66 |                 "# )[\"RH\"].multiply(1e3)\n",
 67 |                 "# prec.index = prec.index.normalize()\n",
 68 |                 "# evap = hpd.EvaporationObs.from_knmi(\n",
 69 |                 "#     meteo_var=\"EV24\",\n",
 70 |                 "#     stn=260,\n",
 71 |                 "#     startdate=pd.Timestamp(\"1960-01-01\"),\n",
 72 |                 "#     enddate=pd.Timestamp.today(),\n",
 73 |                 "# )[\"EV24\"].multiply(1e3)\n",
 74 |                 "# evap.index = evap.index.normalize()\n",
 75 |                 "# temp = hpd.MeteoObs.from_knmi(\n",
 76 |                 "#     meteo_var=\"TG\",\n",
 77 |                 "#     stn=260,\n",
 78 |                 "#     startdate=pd.Timestamp(\"1960-01-01\"),\n",
 79 |                 "#     enddate=pd.Timestamp.today(),\n",
 80 |                 "# )[\"TG\"]\n",
 81 |                 "# temp.index = temp.index.normalize()"
 82 |             ]
 83 |         },
 84 |         {
 85 |             "cell_type": "markdown",
 86 |             "id": "72136d23",
 87 |             "metadata": {},
 88 |             "source": [
 89 |                 "### KNMI stations data\n",
 90 |                 "https://www.knmi.nl/kennis-en-datacentrum/achtergrond/achtergrondinformatie-klimaatdashboard\n",
 91 |                 "\n",
 92 |                 "De neerslagtekort klimaatdashboardgrafiek is alleen voor het landelijk gemiddelde beschikbaar, gebaseerd op:\n",
 93 |                 "\n",
 94 |                 "Voor 1906 t/m 2000: officiële reeks voor Nederland voor neerslagtekort:  Dagelijks neerslagtekort NL (1 apr t/m 30 sep) op basis van Makkink verdamping De Bilt geschat uit zonneschijnduur minus 13 neerslagstations (P13) (c) KNMI, mei 2020, Jules Beersma: Climate Explorer \n",
 95 |                 "\n",
 96 |                 "Vanaf 2001: verdamping (gemiddelde van 13 automatische weerstations nabij 13 neerslagstations) minus de gemiddelde neerslag van 13 neerslagstations:\n",
 97 |                 "De P13: het gemiddelde van de hoeveelheid neerslag op de volgende 13 KNMI-neerslagstations: De Bilt (550_N), De Kooy (25_N), Groningen (139_N), Heerde (328_N), Hoofddorp (438_N), Hoorn (222_N), Kerkwerve (737_N), Oudenbosch (828_N), Roermond (961_N), Ter Apel (144_N), West-Terschelling (11_N), Westdorpe (770_N) en Winterswijk (666_N).\n",
 98 |                 "\n",
 99 |                 "Het gemiddelde van de hoeveelheid verdamping (EV24) op 13 automatische weerstations van het KNMI nabij de 13 neerslagstations: De Bilt (260_H), De Kooy (235_H), Eelde (280_H), Heino (278_H), Schiphol (240_H), Berkhout (249_H), Vlissingen (310_H), Eindhoven (370_H), Ell (377_H), Nieuw Beerta (286_H), Hoorn Terschelling (251_H), Westdorpe (319_H) en Hupsel (283_H).\n"
100 |             ]
101 |         },
102 |         {
103 |             "cell_type": "code",
104 |             "execution_count": null,
105 |             "id": "7401ab30",
106 |             "metadata": {},
107 |             "outputs": [],
108 |             "source": [
109 |                 "# import hydropandas as hpd\n",
110 |                 "\n",
111 |                 "# P13 stations\n",
112 |                 "# p_stns = [\n",
113 |                 "#     550,  # De Bilt\n",
114 |                 "#     25,   # De Kooy\n",
115 |                 "#     139,  # Groningen\n",
116 |                 "#     328,  # Heerde\n",
117 |                 "#     438,  # Hoofddorp\n",
118 |                 "#     222,  # Hoorn\n",
119 |                 "#     737,  # Kerkwerve\n",
120 |                 "#     828,  # Oudenbosch\n",
121 |                 "#     961,  # Roermond\n",
122 |                 "#     144,  # Ter Apel\n",
123 |                 "#     11,   # West-Terschelling\n",
124 |                 "#     770,  # Westdorpe\n",
125 |                 "#     666,  # Winterswijk\n",
126 |                 "# ]\n",
127 |                 "\n",
128 |                 "# # EV24-13 stations\n",
129 |                 "# ev_stns = [\n",
130 |                 "#     260,  # De Bilt\n",
131 |                 "#     235,  # De Kooy\n",
132 |                 "#     280,  # Eelde\n",
133 |                 "#     278,  # Heino\n",
134 |                 "#     240,  # Schiphol\n",
135 |                 "#     249,  # Berkhout\n",
136 |                 "#     310,  # Vlissingen\n",
137 |                 "#     370,  # Eindhoven\n",
138 |                 "#     377,  # Ell\n",
139 |                 "#     286,  # Nieuw Beerta\n",
140 |                 "#     251,  # Hoorn Terschelling\n",
141 |                 "#     319,  # Westdorpe\n",
142 |                 "#     283,  # Hupsel\n",
143 |                 "# ]\n",
144 |                 "# oc_p = hpd.ObsCollection.from_knmi(\n",
145 |                 "#     stns=p_stns,\n",
146 |                 "#     starts=pd.Timestamp(\"1960-01-01\"),\n",
147 |                 "#     ends=pd.Timestamp.today(),\n",
148 |                 "#     meteo_vars=[\"RD\"],\n",
149 |                 "# )\n",
150 |                 "# oc_ev = hpd.ObsCollection.from_knmi(\n",
151 |                 "#     stns=ev_stns,\n",
152 |                 "#     starts=pd.Timestamp(\"1960-01-01\"),\n",
153 |                 "#     ends=pd.Timestamp.today(),\n",
154 |                 "#     meteo_vars=[\"EV24\", \"TG\"],\n",
155 |                 "# )\n",
156 |                 "\n",
157 |                 "# prec = pd.concat([o[\"RD\"] for o in oc_p[\"obs\"]], axis=1).mean(axis=1).multiply(1e3).rename(\"prec\")\n",
158 |                 "# prec.index = prec.index.normalize()\n",
159 |                 "# ev_data = pd.DataFrame({mv: pd.concat([o[mv] for o in gr[\"obs\"]], axis=1).mean(axis=1) for mv, gr in oc_ev.groupby(\"meteo_var\")})\n",
160 |                 "# ev_data.index = ev_data.index.normalize()\n",
161 |                 "# ev_data = ev_data.loc[prec.index]  # align indices because prec stations less frequently reported\n",
162 |                 "# evap = ev_data[\"EV24\"].multiply(1e3).rename(\"evap\")\n",
163 |                 "# temp = ev_data[\"TG\"].rename(\"temp\")"
164 |             ]
165 |         },
166 |         {
167 |             "cell_type": "markdown",
168 |             "id": "24a7519c",
169 |             "metadata": {},
170 |             "source": [
171 |                 "### From a CSV file"
172 |             ]
173 |         },
174 |         {
175 |             "cell_type": "code",
176 |             "execution_count": null,
177 |             "id": "528daaa6",
178 |             "metadata": {},
179 |             "outputs": [],
180 |             "source": [
181 |                 "df = pd.read_csv(\"data/DEBILT.csv\", index_col=0, parse_dates=True)\n",
182 |                 "prec = df[\"Prec [m/d] 260_DEBILT\"].multiply(1e3).rename(\"prec\")\n",
183 |                 "evap = df[\"Evap [m/d] 260_DEBILT\"].multiply(1e3).rename(\"evap\")\n",
184 |                 "temp = df[\"Temp [C] 260_DEBILT\"].rename(\"temp\")"
185 |             ]
186 |         },
187 |         {
188 |             "cell_type": "markdown",
189 |             "id": "b268e457",
190 |             "metadata": {},
191 |             "source": [
192 |                 "## Calculate precipitation deficit"
193 |             ]
194 |         },
195 |         {
196 |             "cell_type": "code",
197 |             "execution_count": null,
198 |             "id": "433a3701",
199 |             "metadata": {},
200 |             "outputs": [],
201 |             "source": [
202 |                 "deficit = evap - prec\n",
203 |                 "## deficit period\n",
204 |                 "startdate = pd.Timestamp(\"2000-04-01\")\n",
205 |                 "enddate = pd.Timestamp(\"2000-09-30\")\n",
206 |                 "# calculate cumulative deficit\n",
207 |                 "cumdf = knmi.get_cumulative_deficit(\n",
208 |                 "    deficit=deficit,\n",
209 |                 "    startdate=startdate,\n",
210 |                 "    enddate=enddate,\n",
211 |                 "    allow_below_zero=False,\n",
212 |                 ")\n",
213 |                 "# plot deficit\n",
214 |                 "ax = cumdf.plot(figsize=(7.0, 6.0), cmap=\"cividis\")\n",
215 |                 "ax.legend(ncol=5, loc=(0, 1))\n",
216 |                 "ax.xaxis.set_major_locator(mpl.dates.MonthLocator())\n",
217 |                 "ax.xaxis.set_major_formatter(mpl.dates.DateFormatter(\"%B\"))\n",
218 |                 "ax.xaxis.set_ticks([], minor=True)\n",
219 |                 "ax.set_ylim(0.0)"
220 |             ]
221 |         },
222 |         {
223 |             "cell_type": "markdown",
224 |             "id": "b9a26454",
225 |             "metadata": {},
226 |             "source": [
227 |                 "## Precipitation deficit indices"
228 |             ]
229 |         },
230 |         {
231 |             "cell_type": "code",
232 |             "execution_count": null,
233 |             "id": "a0a18964",
234 |             "metadata": {},
235 |             "outputs": [],
236 |             "source": [
237 |                 "doct1 = knmi.deficit_oct1(deficit)\n",
238 |                 "doct1.to_frame().transpose()"
239 |             ]
240 |         },
241 |         {
242 |             "cell_type": "code",
243 |             "execution_count": null,
244 |             "id": "ca3b85aa",
245 |             "metadata": {},
246 |             "outputs": [],
247 |             "source": [
248 |                 "dmax = knmi.deficit_max(deficit)\n",
249 |                 "dmax.to_frame().transpose()"
250 |             ]
251 |         },
252 |         {
253 |             "cell_type": "code",
254 |             "execution_count": null,
255 |             "id": "88fa3017",
256 |             "metadata": {},
257 |             "outputs": [],
258 |             "source": [
259 |                 "diapr1 = knmi.deficit_apr1(deficit)\n",
260 |                 "diapr1.to_frame().transpose()"
261 |             ]
262 |         },
263 |         {
264 |             "cell_type": "code",
265 |             "execution_count": null,
266 |             "id": "7fa91d05",
267 |             "metadata": {},
268 |             "outputs": [],
269 |             "source": [
270 |                 "digdd = knmi.deficit_gdd(deficit, temp, threshold=440.0)\n",
271 |                 "digdd.to_frame().transpose()"
272 |             ]
273 |         },
274 |         {
275 |             "cell_type": "code",
276 |             "execution_count": null,
277 |             "id": "66972df3",
278 |             "metadata": {},
279 |             "outputs": [],
280 |             "source": [
281 |                 "diwet = knmi.deficit_wet(deficit)\n",
282 |                 "diwet.to_frame().transpose()"
283 |             ]
284 |         },
285 |         {
286 |             "cell_type": "markdown",
287 |             "id": "07085e76",
288 |             "metadata": {},
289 |             "source": [
290 |                 "## Compare to original KNMI data\n",
291 |                 "\n",
292 |                 "File obtained from https://climexp.knmi.nl/getindices.cgi?NPERYEAR=366&STATION=precipitationdeficit&TYPE=i&WMO=KNMIData/nt_nl&id=someone@somewhere"
293 |             ]
294 |         },
295 |         {
296 |             "cell_type": "code",
297 |             "execution_count": null,
298 |             "id": "38d6f9af",
299 |             "metadata": {},
300 |             "outputs": [],
301 |             "source": [
302 |                 "knmi_cumdf = group_yearly_df(\n",
303 |                 "    pd.read_csv(\n",
304 |                 "        \"data/neerslagtekort.txt\",\n",
305 |                 "        skiprows=11,\n",
306 |                 "        sep=\"\\t\",\n",
307 |                 "        header=None,\n",
308 |                 "        index_col=0,\n",
309 |                 "        parse_dates=True,\n",
310 |                 "        date_format=\"%Y%m%d\",\n",
311 |                 "    )\n",
312 |                 "    .dropna(how=\"all\", axis=1)\n",
313 |                 "    .squeeze()\n",
314 |                 "    .rename(\"KNMI\")\n",
315 |                 ")\n",
316 |                 "knmi_cumdf.index.name = \"\"\n",
317 |                 "ax = knmi_cumdf.plot(figsize=(7.0, 6.0), cmap=\"viridis\")\n",
318 |                 "ax.legend(ncol=5, loc=(0, 1))\n",
319 |                 "ax.xaxis.set_major_locator(mpl.dates.MonthLocator())\n",
320 |                 "ax.xaxis.set_major_formatter(mpl.dates.DateFormatter(\"%B\"))\n",
321 |                 "ax.xaxis.set_ticks([], minor=True)\n",
322 |                 "ax.set_ylim(0.0)"
323 |             ]
324 |         },
325 |         {
326 |             "cell_type": "markdown",
327 |             "id": "a5d7d7da",
328 |             "metadata": {},
329 |             "source": [
330 |                 "### KNMI plot\n",
331 |                 "\n",
332 |                 "From KNMI website the drought deficit is plotted as below:"
333 |             ]
334 |         },
335 |         {
336 |             "cell_type": "markdown",
337 |             "id": "a798d38d",
338 |             "metadata": {},
339 |             "source": [
340 |                 "![neerslagtekort](https://cdn.knmi.nl/knmi/map/page/klimatologie/grafieken/neerslagtekort/neerslagtekort.png)"
341 |             ]
342 |         },
343 |         {
344 |             "cell_type": "markdown",
345 |             "id": "22245717",
346 |             "metadata": {},
347 |             "source": [
348 |                 "#### With KNMI deficit data\n",
349 |                 "This plot can be reproduced (almost perfectly) as seen from the figure below.\n",
350 |                 "\n",
351 |                 "The calculation uses the average precipitation from 13 reference stations in the Netherlands (the so-called P13/EV24-13 stations) and the reference evaporation calculated based on sunshine duration in De Bilt (until 2001) or the global radiation near the P13 stations (from 2001 onwards). For the median and 5% driest years a rolling window is aplied. However, the size of this window is not documented anywhere."
352 |             ]
353 |         },
354 |         {
355 |             "cell_type": "code",
356 |             "execution_count": null,
357 |             "id": "b06fa427",
358 |             "metadata": {},
359 |             "outputs": [],
360 |             "source": [
361 |                 "ax = deficit_knmi(knmi_cumdf, window=28)\n",
362 |                 "ax.set_title(\"KNMI computed preciptiation deficit\")"
363 |             ]
364 |         },
365 |         {
366 |             "cell_type": "markdown",
367 |             "id": "e804afea",
368 |             "metadata": {},
369 |             "source": [
370 |                 "#### With own computed deficit (with downloaded knmi data)"
371 |             ]
372 |         },
373 |         {
374 |             "cell_type": "code",
375 |             "execution_count": null,
376 |             "id": "eb4ea498",
377 |             "metadata": {},
378 |             "outputs": [],
379 |             "source": [
380 |                 "ax = deficit_knmi(cumdf, window=0)\n",
381 |                 "ax.set_title(\"Downloaded measurements\")"
382 |             ]
383 |         }
384 |     ],
385 |     "metadata": {
386 |         "kernelspec": {
387 |             "display_name": "SPEI",
388 |             "language": "python",
389 |             "name": "python3"
390 |         },
391 |         "language_info": {
392 |             "codemirror_mode": {
393 |                 "name": "ipython",
394 |                 "version": 3
395 |             },
396 |             "file_extension": ".py",
397 |             "mimetype": "text/x-python",
398 |             "name": "python",
399 |             "nbconvert_exporter": "python",
400 |             "pygments_lexer": "ipython3",
401 |             "version": "3.13.1"
402 |         }
403 |     },
404 |     "nbformat": 4,
405 |     "nbformat_minor": 5
406 | }
407 | 


--------------------------------------------------------------------------------
/docs/examples/example09_joss_paper.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Article for Journal of Open Source Software\n",
  8 |     "\n",
  9 |     "*Martin Vonk (2025)*\n",
 10 |     "\n",
 11 |     "This notebook replicates the results presented in the article submitted to the Journal of Open Source Software ([JOSS](https://joss.theoj.org/)). The article can be found here: Vonk, M. A. (2025). SPEI: A Python package for calculating and visualizing drought indices. Journal of Open Source Software, 10(111), 8454. [doi.org/10.21105/joss.08454](https://doi.org/10.21105/joss.08454)\n",
 12 |     "\n",
 13 |     "\n",
 14 |     "JOSS is a developer-friendly, open-access academic journal (ISSN 2475-9066) dedicated to research software packages and features a formal peer-review process. The pre-review and review of the SPEI package are publicly available in issues [openjournals/joss-reviews#8430](https://github.com/openjournals/joss-reviews/issues/8430) and [openjournals/joss-reviews#8454](https://github.com/openjournals/joss-reviews/issues/8454), respectively."
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "## Setup"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": null,
 27 |    "metadata": {},
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "# dependencies\n",
 31 |     "from typing import Literal\n",
 32 |     "\n",
 33 |     "import matplotlib as mpl\n",
 34 |     "import matplotlib.pyplot as plt\n",
 35 |     "import numpy as np\n",
 36 |     "import pandas as pd\n",
 37 |     "import scipy.stats as sps\n",
 38 |     "from cycler import cycler\n",
 39 |     "from matplotlib import patheffects\n",
 40 |     "from scipy.stats._survival import EmpiricalDistributionFunction\n",
 41 |     "\n",
 42 |     "import spei as si\n",
 43 |     "\n",
 44 |     "# matplotlib settings\n",
 45 |     "plt.rcParams.update(\n",
 46 |     "    {\n",
 47 |     "        \"axes.prop_cycle\": cycler(\n",
 48 |     "            color=[\n",
 49 |     "                \"#3f90da\",\n",
 50 |     "                \"#ffa90e\",\n",
 51 |     "                \"#bd1f01\",\n",
 52 |     "                \"#94a4a2\",\n",
 53 |     "                \"#832db6\",\n",
 54 |     "                \"#a96b59\",\n",
 55 |     "                \"#e76300\",\n",
 56 |     "                \"#b9ac70\",\n",
 57 |     "                \"#717581\",\n",
 58 |     "                \"#92dadd\",\n",
 59 |     "            ]\n",
 60 |     "        ),\n",
 61 |     "        \"axes.titlesize\": 7.0,\n",
 62 |     "        \"axes.labelsize\": 7.0,\n",
 63 |     "        \"xtick.labelsize\": 6.0,\n",
 64 |     "        \"ytick.labelsize\": 6.0,\n",
 65 |     "        \"legend.fontsize\": 7.0,\n",
 66 |     "        \"legend.framealpha\": 1.0,\n",
 67 |     "    }\n",
 68 |     ")\n",
 69 |     "\n",
 70 |     "\n",
 71 |     "# helper functions\n",
 72 |     "def axes_indicator(\n",
 73 |     "    ax: plt.Axes,\n",
 74 |     "    letter: str,\n",
 75 |     "    x: float,\n",
 76 |     "    y: float,\n",
 77 |     "    ha: Literal[\"left\", \"right\"],\n",
 78 |     "    va: Literal[\"top\", \"bottom\"],\n",
 79 |     "):\n",
 80 |     "    \"\"\"Add an indicator to the axes.\"\"\"\n",
 81 |     "    ax.annotate(\n",
 82 |     "        f\"({letter})\",\n",
 83 |     "        xy=(x, y),\n",
 84 |     "        xycoords=\"axes fraction\",\n",
 85 |     "        fontsize=mpl.rcParams[\"axes.titlesize\"],\n",
 86 |     "        horizontalalignment=ha,\n",
 87 |     "        verticalalignment=va,\n",
 88 |     "        path_effects=[\n",
 89 |     "            patheffects.Stroke(linewidth=1, foreground=\"white\"),\n",
 90 |     "            patheffects.Normal(),\n",
 91 |     "        ],\n",
 92 |     "    )\n",
 93 |     "\n",
 94 |     "\n",
 95 |     "def plot_ecdf(\n",
 96 |     "    ax: plt.Axes,\n",
 97 |     "    data: pd.Series,\n",
 98 |     "    ecdf: EmpiricalDistributionFunction,\n",
 99 |     "    s: float,\n",
100 |     "    color: str,\n",
101 |     "    label: str,\n",
102 |     "    cdf: pd.Series | None = None,\n",
103 |     "    **kwargs,\n",
104 |     ") -> None:\n",
105 |     "    data = data.drop_duplicates()\n",
106 |     "    ax.scatter(\n",
107 |     "        data,\n",
108 |     "        ecdf.probabilities,\n",
109 |     "        s=s,\n",
110 |     "        facecolor=color,\n",
111 |     "        label=label,\n",
112 |     "        **kwargs,\n",
113 |     "    )\n",
114 |     "    if cdf is not None:\n",
115 |     "        for idata, icdf, iecdf in zip(data, cdf, ecdf.probabilities):\n",
116 |     "            ax.plot(\n",
117 |     "                [idata, idata],\n",
118 |     "                [iecdf, icdf],\n",
119 |     "                color=color,\n",
120 |     "                linewidth=0.5,\n",
121 |     "                **kwargs,\n",
122 |     "            )\n",
123 |     "    return ecdf"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "markdown",
128 |    "metadata": {},
129 |    "source": [
130 |     "## Data"
131 |    ]
132 |   },
133 |   {
134 |    "cell_type": "markdown",
135 |    "metadata": {},
136 |    "source": [
137 |     "### Load"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "code",
142 |    "execution_count": null,
143 |    "metadata": {},
144 |    "outputs": [],
145 |    "source": [
146 |     "df = pd.read_csv(\"data/CABAUW.csv\", index_col=0, parse_dates=True)\n",
147 |     "prec = df[\"prec\"]\n",
148 |     "evap = df[\"evap\"]\n",
149 |     "surplusd = prec - evap\n",
150 |     "surplus = surplusd.resample(\"MS\").sum()\n",
151 |     "head = df[\"head\"]"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "markdown",
156 |    "metadata": {},
157 |    "source": [
158 |     "### Plot"
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "code",
163 |    "execution_count": null,
164 |    "metadata": {},
165 |    "outputs": [],
166 |    "source": [
167 |     "# highlight specific month\n",
168 |     "month = 3\n",
169 |     "ts = pd.Timestamp(\"2000-{:02d}-01\".format(month))"
170 |    ]
171 |   },
172 |   {
173 |    "cell_type": "code",
174 |    "execution_count": null,
175 |    "metadata": {},
176 |    "outputs": [],
177 |    "source": [
178 |     "fig, axd = plt.subplot_mosaic(\n",
179 |     "    [[\"meteo\"], [\"sp\"]], figsize=(7.0, 3.2), sharex=True, layout=\"constrained\"\n",
180 |     ")\n",
181 |     "\n",
182 |     "axd[\"meteo\"].plot(prec.index, prec, linewidth=0.8, color=\"C0\")\n",
183 |     "axd[\"meteo\"].plot(evap.index, evap, linewidth=0.8, color=\"C6\")\n",
184 |     "axd[\"meteo\"].plot([], [], color=\"C0\", label=\"Precipitation\")\n",
185 |     "axd[\"meteo\"].plot([], [], color=\"C6\", label=\"Potential Evaporation\")\n",
186 |     "\n",
187 |     "axd[\"meteo\"].legend(loc=(0, 1), ncol=2, frameon=False, columnspacing=1.0)\n",
188 |     "axd[\"meteo\"].set_ylabel(\"Flux (mm/day)\")\n",
189 |     "\n",
190 |     "axd[\"meteo\"].yaxis.set_major_locator(mpl.ticker.MultipleLocator(10))\n",
191 |     "axd[\"meteo\"].yaxis.set_minor_locator(mpl.ticker.MultipleLocator(5))\n",
192 |     "axd[\"meteo\"].set_ylim(bottom=0.0)\n",
193 |     "axes_indicator(axd[\"meteo\"], letter=\"a\", x=0.005, y=0.97, ha=\"left\", va=\"top\")\n",
194 |     "\n",
195 |     "axd[\"sp\"].plot(\n",
196 |     "    surplus.index,\n",
197 |     "    surplus.values,\n",
198 |     "    color=\"C3\",\n",
199 |     "    linewidth=1.0,\n",
200 |     "    marker=\".\",\n",
201 |     "    markersize=2.0,\n",
202 |     "    label=\"Monthly Surplus (Precipitation minus Evaporation)\",\n",
203 |     ")\n",
204 |     "mid = surplus.index.month == ts.month\n",
205 |     "axd[\"sp\"].scatter(\n",
206 |     "    surplus.index[mid],  # + pd.Timedelta(days=15),\n",
207 |     "    surplus.values[mid],\n",
208 |     "    color=\"C2\",\n",
209 |     "    s=5.0,\n",
210 |     "    zorder=2,\n",
211 |     "    label=f\"Data points {ts.strftime('%B')}\",\n",
212 |     ")\n",
213 |     "axd[\"sp\"].yaxis.set_major_locator(mpl.ticker.MultipleLocator(50))\n",
214 |     "axd[\"sp\"].yaxis.set_minor_locator(mpl.ticker.MultipleLocator(25))\n",
215 |     "axd[\"sp\"].xaxis.set_minor_locator(mpl.dates.YearLocator(1))\n",
216 |     "axd[\"sp\"].xaxis.set_major_locator(mpl.dates.YearLocator(2))\n",
217 |     "axd[\"sp\"].set_xlim(surplus.index[0], surplus.index[-1])\n",
218 |     "axd[\"sp\"].set_ylabel(\"Precipitation\\nsurplus (mm)\")\n",
219 |     "axd[\"sp\"].legend(loc=(0, 1), frameon=False, ncol=2)\n",
220 |     "axes_indicator(axd[\"sp\"], letter=\"b\", x=0.005, y=0.97, ha=\"left\", va=\"top\")\n",
221 |     "\n",
222 |     "axd[\"sp\"].set_xlim(pd.Timestamp(\"1990\"), pd.Timestamp(\"2020\"))\n",
223 |     "\n",
224 |     "# fig.savefig(\"../../paper/figures/monthly_precipitation_surplus.png\", dpi=300, bbox_inches=\"tight\")"
225 |    ]
226 |   },
227 |   {
228 |    "cell_type": "markdown",
229 |    "metadata": {},
230 |    "source": [
231 |     "## Standardized Index Procedure"
232 |    ]
233 |   },
234 |   {
235 |    "cell_type": "markdown",
236 |    "metadata": {},
237 |    "source": [
238 |     "### Fit Distribution"
239 |    ]
240 |   },
241 |   {
242 |    "cell_type": "code",
243 |    "execution_count": null,
244 |    "metadata": {},
245 |    "outputs": [],
246 |    "source": [
247 |     "dist = sps.fisk\n",
248 |     "sispei = si.SI(\n",
249 |     "    series=surplus,\n",
250 |     "    dist=dist,\n",
251 |     "    timescale=1,\n",
252 |     "    # fit_freq=\"MS\",\n",
253 |     ")\n",
254 |     "sispei.fit_distribution()"
255 |    ]
256 |   },
257 |   {
258 |    "cell_type": "markdown",
259 |    "metadata": {},
260 |    "source": [
261 |     "### Equiprobability Transform"
262 |    ]
263 |   },
264 |   {
265 |    "cell_type": "code",
266 |    "execution_count": null,
267 |    "metadata": {},
268 |    "outputs": [],
269 |    "source": [
270 |     "fit_dist = sispei._dist_dict[ts]\n",
271 |     "data = fit_dist.data.sort_values()\n",
272 |     "cdf = fit_dist.cdf().loc[data.index]\n",
273 |     "ecdf = sps.ecdf(data).cdf\n",
274 |     "\n",
275 |     "zscores = np.arange(-3.0, 3.1, 0.1)\n",
276 |     "norm_cdf = sps.norm.cdf(zscores, loc=0.0, scale=1.0)\n",
277 |     "norm_cdf_transformed = sps.norm.ppf(cdf.values, loc=0.0, scale=1.0)\n",
278 |     "\n",
279 |     "fig, axd = plt.subplot_mosaic(\n",
280 |     "    [[\"cdf\", \"norm\"]],\n",
281 |     "    figsize=(7.0, 3),\n",
282 |     "    width_ratios=[1.5, 1.0],\n",
283 |     "    sharey=True,\n",
284 |     "    layout=\"tight\",\n",
285 |     ")\n",
286 |     "plot_ecdf(\n",
287 |     "    ax=axd[\"cdf\"],\n",
288 |     "    data=data,\n",
289 |     "    cdf=cdf,\n",
290 |     "    ecdf=ecdf,\n",
291 |     "    s=10.0,\n",
292 |     "    color=\"C2\",\n",
293 |     "    label=f\"Data points {ts.strftime('%B')}\",\n",
294 |     "    zorder=3,\n",
295 |     ")\n",
296 |     "\n",
297 |     "bin = 5.0\n",
298 |     "bins = np.arange(data.min() // bin * bin, data.max() + bin, bin)\n",
299 |     "axd[\"cdf\"].plot(\n",
300 |     "    bins,\n",
301 |     "    fit_dist.dist.cdf(bins, *fit_dist.pars, loc=fit_dist.loc, scale=fit_dist.scale),\n",
302 |     "    label=f\"Fitted {dist.name} distribution\",\n",
303 |     "    color=\"C0\",\n",
304 |     ")\n",
305 |     "\n",
306 |     "axd[\"cdf\"].legend(loc=\"upper left\")\n",
307 |     "axd[\"cdf\"].set_xlim(np.min(bins), np.max(bins))\n",
308 |     "axd[\"cdf\"].xaxis.set_minor_locator(mpl.ticker.MultipleLocator(bin))\n",
309 |     "axd[\"cdf\"].xaxis.set_major_locator(mpl.ticker.MultipleLocator(bin * 2))\n",
310 |     "axd[\"cdf\"].set_ylim(0.0, 1.0)\n",
311 |     "axd[\"cdf\"].yaxis.set_major_locator(mpl.ticker.MultipleLocator(0.1))\n",
312 |     "axd[\"cdf\"].yaxis.set_major_formatter(mpl.ticker.PercentFormatter(1.0))\n",
313 |     "axd[\"cdf\"].set_xlabel(\"Precipitation surplus (mm)\")\n",
314 |     "axd[\"cdf\"].set_ylabel(\"Cumulative probability\")\n",
315 |     "axes_indicator(axd[\"cdf\"], \"a\", 0.99, 0.02, ha=\"right\", va=\"bottom\")\n",
316 |     "\n",
317 |     "axd[\"norm\"].plot(\n",
318 |     "    zscores, norm_cdf, label=\"Standardized\\nnormal distribution\", color=\"C4\", zorder=3\n",
319 |     ")\n",
320 |     "axd[\"norm\"].scatter(\n",
321 |     "    norm_cdf_transformed,\n",
322 |     "    cdf.values,\n",
323 |     "    s=10.0,\n",
324 |     "    label=f\"Projected points\\n{dist.name} distribution\",\n",
325 |     "    color=\"C0\",\n",
326 |     "    zorder=2,\n",
327 |     ")\n",
328 |     "axd[\"norm\"].legend(loc=\"upper left\")\n",
329 |     "axd[\"norm\"].set_xlim(np.min(zscores), np.max(zscores))\n",
330 |     "axd[\"norm\"].set_xlabel(\"Z-score / SPEI value\")\n",
331 |     "\n",
332 |     "# visualize specific data point\n",
333 |     "idx = data.index[20]\n",
334 |     "cdf_idx = cdf.at[idx]\n",
335 |     "ppf_idx = sps.norm.ppf(cdf_idx)\n",
336 |     "print(\n",
337 |     "    f\"Data index: {idx.strftime('%Y')}, Data value: {data.loc[idx]:0.2f} CDF: {cdf_idx:0.1%}, PPF: {ppf_idx:0.4f}\"\n",
338 |     ")\n",
339 |     "axd[\"cdf\"].plot(\n",
340 |     "    [data.loc[idx], data.loc[idx], np.max(data)],\n",
341 |     "    [0.0, cdf_idx, cdf_idx],\n",
342 |     "    color=\"k\",\n",
343 |     "    linestyle=\"--\",\n",
344 |     "    linewidth=1.0,\n",
345 |     "    zorder=0,\n",
346 |     ")\n",
347 |     "axd[\"norm\"].plot(\n",
348 |     "    [np.min(zscores), ppf_idx, ppf_idx],\n",
349 |     "    [\n",
350 |     "        cdf_idx,\n",
351 |     "        cdf_idx,\n",
352 |     "        0.0,\n",
353 |     "    ],\n",
354 |     "    color=\"k\",\n",
355 |     "    linestyle=\"--\",\n",
356 |     "    linewidth=1.0,\n",
357 |     "    zorder=0,\n",
358 |     ")\n",
359 |     "axes_indicator(axd[\"norm\"], \"b\", 0.99, 0.02, ha=\"right\", va=\"bottom\")\n",
360 |     "\n",
361 |     "# fig.savefig(\"../../paper/figures/surplus_fit_cdf.png\", dpi=300, bbox_inches=\"tight\")"
362 |    ]
363 |   },
364 |   {
365 |    "cell_type": "markdown",
366 |    "metadata": {},
367 |    "source": [
368 |     "### Results\n",
369 |     "\n",
370 |     "#### Time Series"
371 |    ]
372 |   },
373 |   {
374 |    "cell_type": "code",
375 |    "execution_count": null,
376 |    "metadata": {},
377 |    "outputs": [],
378 |    "source": [
379 |     "spei1 = sispei.norm_ppf()\n",
380 |     "\n",
381 |     "ax = si.plot.si(spei1, figsize=(7.0, 2.0), layout=\"tight\")\n",
382 |     "# ax.xaxis.set_minor_locator(mpl.dates.MonthLocator())\n",
383 |     "ax.xaxis.set_minor_locator(mpl.dates.YearLocator(1))\n",
384 |     "ax.xaxis.set_major_locator(mpl.dates.YearLocator(2))\n",
385 |     "ax.legend(labels=[\"SPEI-1\"], loc=(0, 1), frameon=False)\n",
386 |     "ax.set_xlim(pd.Timestamp(\"1990\"), pd.Timestamp(\"2020\"))\n",
387 |     "ax.set_ylabel(\"Z-score\")\n",
388 |     "\n",
389 |     "# ax.get_figure().savefig(\"../../paper/figures/spei1.png\", dpi=300, bbox_inches=\"tight\")"
390 |    ]
391 |   },
392 |   {
393 |    "cell_type": "markdown",
394 |    "metadata": {},
395 |    "source": [
396 |     "#### Heatmap"
397 |    ]
398 |   },
399 |   {
400 |    "cell_type": "code",
401 |    "execution_count": null,
402 |    "metadata": {},
403 |    "outputs": [],
404 |    "source": [
405 |     "speis = [\n",
406 |     "    spei1.rename(\"1\"),\n",
407 |     "    si.spei(surplus, timescale=3).rename(\"3\"),\n",
408 |     "    si.spei(surplus, timescale=6).rename(\"6\"),\n",
409 |     "    si.spei(surplus, timescale=9).rename(\"9\"),\n",
410 |     "    si.spei(surplus, timescale=12).rename(\"12\"),\n",
411 |     "    si.spei(surplus, timescale=24).rename(\"24\"),\n",
412 |     "]\n",
413 |     "f, ax = plt.subplots(figsize=(7.0, 2.0))\n",
414 |     "si.plot.heatmap(speis, cmap=\"vik_r\", vmin=-3, vmax=3, add_category=False, ax=ax)\n",
415 |     "ax.set_ylabel(\"Time scale (months)\")\n",
416 |     "f.axes[-1].set_ylabel(\"Z-score\")\n",
417 |     "ax.xaxis.set_minor_locator(mpl.dates.YearLocator(1))\n",
418 |     "ax.xaxis.set_major_locator(mpl.dates.YearLocator(2))\n",
419 |     "ax.set_xlim(pd.Timestamp(\"1990\"), pd.Timestamp(\"2020\"))\n",
420 |     "\n",
421 |     "# ax.get_figure().savefig(\"../../paper/figures/spei_heatmap.png\", dpi=300, bbox_inches=\"tight\")"
422 |    ]
423 |   },
424 |   {
425 |    "cell_type": "markdown",
426 |    "metadata": {},
427 |    "source": [
428 |     "#### Threshold"
429 |    ]
430 |   },
431 |   {
432 |    "cell_type": "code",
433 |    "execution_count": null,
434 |    "metadata": {},
435 |    "outputs": [],
436 |    "source": [
437 |     "perc = sps.norm.cdf(-1.0)  # same as zscore -1.0\n",
438 |     "thres = sispei.ppf(perc).rename(f\"Threshold {perc:0.0%} percentile\")\n",
439 |     "fig, ax = plt.subplots(figsize=(7.0, 2.0), layout=\"tight\")\n",
440 |     "ax = si.plot.threshold(\n",
441 |     "    surplus,\n",
442 |     "    thres,\n",
443 |     "    ax=ax,\n",
444 |     "    **dict(\n",
445 |     "        color=\"C3\",\n",
446 |     "        linewidth=1.0,\n",
447 |     "        marker=\".\",\n",
448 |     "        markersize=2.0,\n",
449 |     "        label=\"Monthly Surplus (Precipitation minus Evaporation)\",\n",
450 |     "    ),\n",
451 |     ")\n",
452 |     "ax.set_xlim(pd.Timestamp(\"2003\"), pd.Timestamp(\"2019\"))\n",
453 |     "ax.yaxis.set_major_locator(mpl.ticker.MultipleLocator(50))\n",
454 |     "ax.yaxis.set_minor_locator(mpl.ticker.MultipleLocator(25))\n",
455 |     "ax.xaxis.set_major_locator(mpl.dates.YearLocator(1))\n",
456 |     "ax.xaxis.set_minor_locator(mpl.dates.MonthLocator([4, 7, 10]))\n",
457 |     "ax.set_ylabel(\"Precipitation\\nsurplus (mm)\")\n",
458 |     "ax.legend(ncol=3, loc=(0, 1), frameon=False)\n",
459 |     "\n",
460 |     "# fig.savefig(\"../../paper/figures/threshold.png\", dpi=300, bbox_inches=\"tight\")"
461 |    ]
462 |   }
463 |  ],
464 |  "metadata": {
465 |   "kernelspec": {
466 |    "display_name": "SPEI",
467 |    "language": "python",
468 |    "name": "python3"
469 |   },
470 |   "language_info": {
471 |    "codemirror_mode": {
472 |     "name": "ipython",
473 |     "version": 3
474 |    },
475 |    "file_extension": ".py",
476 |    "mimetype": "text/x-python",
477 |    "name": "python",
478 |    "nbconvert_exporter": "python",
479 |    "pygments_lexer": "ipython3",
480 |    "version": "3.12.3"
481 |   }
482 |  },
483 |  "nbformat": 4,
484 |  "nbformat_minor": 2
485 | }
486 | 


--------------------------------------------------------------------------------
/src/spei/si.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from dataclasses import dataclass, field
  3 | from typing import Literal
  4 | 
  5 | from numpy import ceil, interp, linspace, nan
  6 | from pandas import DataFrame, Grouper, Series, Timedelta, Timestamp
  7 | from scipy.stats import beta, fisk, gamma, genextreme, norm
  8 | 
  9 | from ._typing import ContinuousDist
 10 | from .dist import Dist
 11 | from .utils import (
 12 |     daily_window_group_yearly_df,
 13 |     get_data_series,
 14 |     group_yearly_df,
 15 |     infer_frequency,
 16 |     validate_series,
 17 | )
 18 | 
 19 | 
 20 | def sgi(
 21 |     series: Series,
 22 |     timescale: int = 0,
 23 |     fit_freq: str | None = None,
 24 | ) -> Series:
 25 |     """Method to compute the Standardized Groundwater Index. Uses
 26 |     the normal scores transform to calculate the cumulative density function.
 27 | 
 28 |     Parameters
 29 |     ----------
 30 |     series: pandas.Series
 31 |         Pandas time series of the groundwater levels. Time series index
 32 |         should be a pandas DatetimeIndex.
 33 |     fit_freq : str, optional, default=None
 34 |         Frequency for fitting the distribution. Default is None in which case
 35 |         the frequency of the series is inferred. If this fails a monthly
 36 |         frequency is used.
 37 | 
 38 |     Returns
 39 |     -------
 40 |     pandas.Series
 41 | 
 42 |     References
 43 |     ----------
 44 |     Bloomfield, J. P. and Marchant, B. P.: Analysis of
 45 |     groundwater drought building on the standardised precipitation index
 46 |     approach. Hydrol. Earth Syst. Sci., 17, 4769-4787, 2013.
 47 |     """
 48 | 
 49 |     mock_dist = norm
 50 |     sgi = SI(
 51 |         series=series,
 52 |         dist=mock_dist,
 53 |         timescale=timescale,
 54 |         fit_freq=fit_freq,
 55 |         fit_window=0,
 56 |         prob_zero=False,
 57 |         normal_scores_transform=True,
 58 |         agg_func="mean",
 59 |     )
 60 |     return sgi.norm_ppf()
 61 | 
 62 | 
 63 | def spi(
 64 |     series: Series,
 65 |     dist: ContinuousDist = gamma,
 66 |     timescale: int = 0,
 67 |     fit_freq: str | None = None,
 68 |     fit_window: int = 0,
 69 |     prob_zero: bool = True,
 70 | ) -> Series:
 71 |     """Method to compute the Standardized Precipitation Index.
 72 | 
 73 |     Parameters
 74 |     ----------
 75 |     series: pandas.Series
 76 |         Pandas time series of the precipitation. Time series index
 77 |         should be a pandas DatetimeIndex.
 78 |     dist: scipy.stats.rv_continuous
 79 |         Can be any continuous distribution from the scipy.stats library.
 80 |         However, for the SPI generally the Gamma probability density
 81 |         function is recommended. Other appropriate choices could be the
 82 |         lognormal, log-logistic (fisk) or PearsonIII distribution.
 83 |     timescale : int, optional, default=0
 84 |         Size of the moving window over which the series is summed. If zero, no
 85 |         summation is performed over the time series. If the time series
 86 |         frequency is daily, then one would provide timescale=30 for SI1,
 87 |         timescale=90 for SI3, timescale=180 for SI6 etc.
 88 |     fit_freq : str, optional, default=None
 89 |         Frequency for fitting the distribution. Default is None in which case
 90 |         the frequency of the series is inferred. If this fails a monthly
 91 |         frequency is used.
 92 |     fit_window : int, optional, default=0
 93 |         Window size for fitting data in fit_freq frequency's unit. Default is
 94 |         zero in which case only data within the fit_freq is considered. If
 95 |         larger than zero data data within the window is used to fit the
 96 |         distribution for the series. fit_window must be a odd number larger
 97 |         than 3 when used.
 98 |     prob_zero : bool, default=True
 99 |         Option to correct the distribution if x=0 is not in probability density
100 |         function. E.g. the case with the Gamma distriubtion. If True, the
101 |         probability of zero values in the series is calculated by the
102 |         occurence.
103 | 
104 |     Returns
105 |     -------
106 |     pandas.Series
107 | 
108 |     References
109 |     ----------
110 |     LLoyd-Hughes, B. and Saunders, M.A.: A drought climatology for Europe.
111 |     International Journal of Climatology, 22, 1571-1592, 2002.
112 |     """
113 | 
114 |     spi = SI(
115 |         series=series,
116 |         dist=dist,
117 |         timescale=timescale,
118 |         fit_freq=fit_freq,
119 |         fit_window=fit_window,
120 |         prob_zero=prob_zero,
121 |         normal_scores_transform=False,
122 |         agg_func="sum",
123 |     )
124 |     spi.fit_distribution()
125 |     return spi.norm_ppf()
126 | 
127 | 
128 | def spei(
129 |     series: Series,
130 |     dist: ContinuousDist = fisk,
131 |     timescale: int = 0,
132 |     fit_freq: str | None = None,
133 |     fit_window: int = 0,
134 |     prob_zero: bool = False,
135 | ) -> Series:
136 |     """Method to compute the Standardized Precipitation Evaporation Index.
137 | 
138 |     Parameters
139 |     ----------
140 |     series: pandas.Series
141 |         Pandas time series of the precipitation. Time series index
142 |         should be a pandas DatetimeIndex.
143 |     dist: scipy.stats.rv_continuous
144 |         Can be any continuous distribution from the scipy.stats library.
145 |         However, for the SPEI generally the log-logistic (fisk) probability
146 |         density function is recommended. Other appropriate choices could be
147 |         the lognormal or PearsonIII distribution.
148 |     timescale : int, optional, default=0
149 |         Size of the moving window over which the series is summed. If zero, no
150 |         summation is performed over the time series. If the time series
151 |         frequency is daily, then one would provide timescale=30 for SI1,
152 |         timescale=90 for SI3, timescale=180 for SI6 etc.
153 |     fit_freq : str, optional, default=None
154 |         Frequency for fitting the distribution. Default is None in which case
155 |         the frequency of the series is inferred. If this fails a monthly
156 |         frequency is used.
157 |     fit_window : int, optional, default=0
158 |         Window size for fitting data in fit_freq frequency's unit. Default is
159 |         zero in which case only data within the fit_freq is considered. If
160 |         larger than zero data data within the window is used to fit the
161 |         distribution for the series. fit_window must be a odd number larger
162 |         than 3 when used.
163 |     prob_zero : bool, default=False
164 |         Flag indicating whether the probability of zero values in the series is
165 |         calculated by the occurence.
166 | 
167 |     Returns
168 |     -------
169 |     pandas.Series
170 | 
171 |     References
172 |     ----------
173 |     Vicente-Serrano S.M., Beguería S., López-Moreno J.I.:
174 |     A Multi-scalar drought index sensitive to global warming:
175 |     The Standardized Precipitation Evapotranspiration Index.
176 |     Journal of Climate, 23, 1696-1718, 2010.
177 |     """
178 | 
179 |     spei = SI(
180 |         series=series,
181 |         dist=dist,
182 |         timescale=timescale,
183 |         fit_freq=fit_freq,
184 |         fit_window=fit_window,
185 |         prob_zero=prob_zero,
186 |         normal_scores_transform=False,
187 |         agg_func="sum",
188 |     )
189 |     spei.fit_distribution()
190 |     return spei.norm_ppf()
191 | 
192 | 
193 | def ssfi(
194 |     series: Series,
195 |     dist: ContinuousDist = genextreme,
196 |     timescale: int = 0,
197 |     fit_freq: str | None = None,
198 |     fit_window: int = 0,
199 |     prob_zero: bool = True,
200 | ) -> Series:
201 |     """Method to compute the Standardized StreamFlow Index.
202 | 
203 |     Parameters
204 |     ----------
205 |     series: pandas.Series
206 |         Pandas time series of the precipitation. Time series index
207 |         should be a pandas DatetimeIndex.
208 |     dist: scipy.stats.rv_continuous
209 |         Can be any continuous distribution from the scipy.stats library.
210 |         However, for the SSFI generally the gamma probability density function
211 |         is recommended. Other choices could be the normal, lognormal,
212 |         pearsonIII, GEV or Gen-Logistic distribution or any distribution deemed
213 |         appropriate.
214 |     timescale : int, optional, default=0
215 |         Size of the moving window over which the series is summed. If zero, no
216 |         summation is performed over the time series. If the time series
217 |         frequency is daily, then one would provide timescale=30 for SI1,
218 |         timescale=90 for SI3, timescale=180 for SI6 etc.
219 |     fit_freq : str, optional, default=None
220 |         Frequency for fitting the distribution. Default is None in which case
221 |         the frequency of the series is inferred. If this fails a monthly
222 |         frequency is used.
223 |     fit_window : int, optional, default=0
224 |         Window size for fitting data in fit_freq frequency's unit. Default is
225 |         zero in which case only data within the fit_freq is considered. If
226 |         larger than zero data data within the window is used to fit the
227 |         distribution for the series. fit_window must be a odd number larger
228 |         than 3 when used.
229 |     prob_zero : bool, default=False
230 |         Flag indicating whether the probability of zero values in the series is
231 |         calculated by the occurence.
232 | 
233 |     Returns
234 |     -------
235 |     pandas.Series
236 | 
237 |     References
238 |     ----------
239 |     Vicente-Serrano, S. M., J. I. López-Moreno, S. Beguería, J. Lorenzo-Lacruz,
240 |     C. Azorin-Molina, and E. Morán-Tejeda. Accurate Computation of a Streamflow
241 |     Drought Index. Journal of Hydrologic Engineering 17 (2): 318-332. 2012.
242 |     """
243 |     ssfi = SI(
244 |         series=series,
245 |         dist=dist,
246 |         timescale=timescale,
247 |         fit_freq=fit_freq,
248 |         fit_window=fit_window,
249 |         prob_zero=prob_zero,
250 |         normal_scores_transform=False,
251 |         agg_func="mean",
252 |     )
253 |     ssfi.fit_distribution()
254 |     return ssfi.norm_ppf()
255 | 
256 | 
257 | def ssmi(
258 |     series: Series,
259 |     dist: ContinuousDist = beta,
260 |     timescale: int = 0,
261 |     fit_freq: str | None = None,
262 |     fit_window: int = 0,
263 |     prob_zero: bool = True,
264 | ) -> Series:
265 |     """Method to compute the Standardized Soil Moisture Index.
266 | 
267 |     Parameters
268 |     ----------
269 |     series: pandas.Series
270 |         Pandas time series of the precipitation. Time series index
271 |         should be a pandas DatetimeIndex.
272 |     dist: scipy.stats.rv_continuous
273 |         Can be any continuous distribution from the scipy.stats library.
274 |         However, for the SSMI generally the beta probability density function
275 |         is recommended. Other choices could be the normal or ECDF distribution
276 |         or any distribution deemed appropriate.
277 |     timescale : int, optional, default=0
278 |         Size of the moving window over which the series is summed. If zero, no
279 |         summation is performed over the time series. If the time series
280 |         frequency is daily, then one would provide timescale=30 for SI1,
281 |         timescale=90 for SI3, timescale=180 for SI6 etc.
282 |     fit_freq : str, optional, default=None
283 |         Frequency for fitting the distribution. Default is None in which case
284 |         the frequency of the series is inferred. If this fails a monthly
285 |         frequency is used.
286 |     fit_window : int, optional, default=0
287 |         Window size for fitting data in fit_freq frequency's unit. Default is
288 |         zero in which case only data within the fit_freq is considered. If
289 |         larger than zero data data within the window is used to fit the
290 |         distribution for the series. fit_window must be a odd number larger
291 |         than 3 when used.
292 |     prob_zero : bool, default=False
293 |         Flag indicating whether the probability of zero values in the series is
294 |         calculated by the occurence.
295 | 
296 |     Returns
297 |     -------
298 |     pandas.Series
299 | 
300 |     References
301 |     ----------
302 |     Carrão. H., Russo, S., Sepulcre-Canto, G., Barbosa, P.: An empirical standardized
303 |     soil moisture index for agricultural drought assessment from remotely sensed data.
304 |     International Journal of Applied Earth Observation and Geoinformation, 48, 2016.
305 |     """
306 | 
307 |     ssmi = SI(
308 |         series=series,
309 |         dist=dist,
310 |         timescale=timescale,
311 |         fit_freq=fit_freq,
312 |         fit_window=fit_window,
313 |         prob_zero=prob_zero,
314 |         normal_scores_transform=False,
315 |         agg_func="mean",
316 |     )
317 |     ssmi.fit_distribution()
318 |     return ssmi.norm_ppf()
319 | 
320 | 
321 | @dataclass
322 | class SI:
323 |     """
324 |     Standardized Index Class.
325 | 
326 |     Parameters
327 |     ----------
328 |     series : Series
329 |         The input time series data.
330 |     dist : ContinuousDist
331 |         The SciPy continuous distribution associated with the data.
332 |     timescale : int, optional, default=0
333 |         Size of the moving window over which the series is summed. If zero, no
334 |         summation is performed over the time series. If the time series
335 |         frequency is daily, then one would provide timescale=30 for SI1,
336 |         timescale=90 for SI3, timescale=180 for SI6 etc.
337 |     fit_freq : str, optional, default=None
338 |         Frequency for fitting the distribution. Default is None in which case
339 |         the frequency of the series is inferred. If this fails a monthly
340 |         frequency is used.
341 |     fit_window : int, optional, default=0
342 |         Window size for fitting data in fit_freq frequency's unit. Default is
343 |         zero in which case only data within the fit_freq is considered. If
344 |         larger than zero data data within the window is used to fit the
345 |         distribution for the series. fit_window must be a odd number larger
346 |         than 3 when used.
347 |     prob_zero : bool, default=False
348 |         Flag indicating whether the probability of zero values in the series is
349 |         calculated by the occurence.
350 |     normal_scores_transform : bool, default=False
351 |         Flag to use the normal scores transformation for calculating the
352 |         cumulative density function.
353 |     agg_func: Literal['sum', 'mean'], default='sum'
354 |         String of the function to use for aggregating the time series if the
355 |         timescale is larger than 0. Can either be 'sum' or 'mean'.
356 | 
357 |     Attributes
358 |     ----------
359 |     _grouped_year : DataFrame
360 |         Dataframe with all data grouped in a one-year (2000) DataFrame with the
361 |         original years as columns
362 |     _dist_dict : Dict[int, Dist]
363 |         Dictionary of distributions used to fit the data.
364 |     """
365 | 
366 |     series: Series = field(repr=False)
367 |     dist: ContinuousDist
368 |     timescale: int = 0
369 |     fit_freq: str | None = field(default=None)
370 |     fit_window: int = field(default=0)
371 |     prob_zero: bool = field(default=False)
372 |     normal_scores_transform: bool = field(default=False)
373 |     agg_func: Literal["sum", "mean"] = "sum"
374 |     _grouped_year: DataFrame = field(init=False, repr=False, compare=False)
375 |     _dist_dict: dict[int, Dist] = field(
376 |         default_factory=dict, init=False, repr=False, compare=False
377 |     )
378 | 
379 |     def __post_init__(self) -> None:
380 |         """
381 |         Post initializes the SI class and performs necessary data
382 |         preprocessing and validation.
383 |         """
384 |         self.series = validate_series(self.series)
385 | 
386 |         if self.timescale > 0:
387 |             self.series = (
388 |                 self.series.rolling(self.timescale, min_periods=self.timescale)
389 |                 .agg(self.agg_func)
390 |                 .dropna()
391 |                 .copy()
392 |             )
393 | 
394 |         if self.fit_freq is None:
395 |             self.fit_freq = infer_frequency(self.series.index)
396 | 
397 |         self._grouped_year = group_yearly_df(series=self.series)
398 | 
399 |         if self.fit_window > 0:
400 |             if self.fit_window < 3:
401 |                 logging.error(
402 |                     "Window should be larger than 2. Setting the window value to 3."
403 |                 )
404 |                 self.fit_window = 3  # make sure window is at least three
405 |             elif self.fit_window % 2 == 0:
406 |                 logging.error(
407 |                     "Window should be odd. Setting the window value to"
408 |                     f"{self.fit_window + 1}"
409 |                 )
410 |                 self.fit_window += 1  # make sure window is odd
411 | 
412 |     def fit_distribution(self) -> None:
413 |         """
414 |         Fit distribution on the time series per fit_frequency and/or fit_window
415 |         """
416 | 
417 |         if self.normal_scores_transform:
418 |             logging.info("Using normal-scores-transform. No distribution is fitted.")
419 | 
420 |         elif self.fit_window > 0:
421 |             if self.fit_freq not in (
422 |                 "d",
423 |                 "w",
424 |                 "D",
425 |                 "W",
426 |             ):  # TODO: ideally 14D should also work.
427 |                 raise ValueError(
428 |                     "Frequency fit_freq must be 'D' or 'W', not "
429 |                     f"'{self.fit_freq}', if a fit_window is provided."
430 |                 )
431 | 
432 |             logging.info("Using rolling window method")
433 |             window = self.fit_window
434 |             period = int(ceil(window / 2))
435 |             if self.fit_freq in ("W", "w"):
436 |                 period = Timedelta(value=period, unit="W").days
437 |                 window = period * 2 + 1
438 | 
439 |             dfval_window = daily_window_group_yearly_df(
440 |                 dfval=self._grouped_year, period=period
441 |             )
442 |             for dfval_rwindow in dfval_window.rolling(
443 |                 window=window, min_periods=window, closed="right"
444 |             ):
445 |                 if len(dfval_rwindow) < window:
446 |                     continue  # min_periods ignored by Rolling.__iter__
447 |                 date = dfval_rwindow.index[period]
448 |                 data = get_data_series(dfval_rwindow.loc[[date]])
449 |                 data_window = get_data_series(dfval_rwindow)
450 |                 fd = Dist(
451 |                     data=data,
452 |                     dist=self.dist,
453 |                     prob_zero=self.prob_zero,
454 |                     data_window=data_window,
455 |                 )
456 |                 self._dist_dict[date] = fd
457 |         else:
458 |             logging.info("Using groupby fit by frequency method")
459 |             for date, grval in self._grouped_year.groupby(
460 |                 Grouper(freq=str(self.fit_freq))
461 |             ):
462 |                 data = get_data_series(grval)
463 |                 fd = Dist(
464 |                     data=data,
465 |                     dist=self.dist,
466 |                     prob_zero=self.prob_zero,
467 |                     data_window=None,
468 |                 )
469 |                 self._dist_dict[date] = fd  # type: ignore
470 | 
471 |     def cdf(self) -> Series:
472 |         """Compute the cumulative density function"""
473 |         if self.normal_scores_transform:
474 |             cdf = self.cdf_nsf()
475 |         else:
476 |             cdf = Series(nan, index=self.series.index, dtype=float)
477 |             for k in self._dist_dict:
478 |                 cdf_k = self._dist_dict[k].cdf()
479 |                 cdf.loc[cdf_k.index] = cdf_k.values
480 | 
481 |         return cdf
482 | 
483 |     def pdf(self) -> Series:
484 |         """Compute the probability density function"""
485 |         if self.normal_scores_transform:
486 |             pdf = self.cdf().diff()
487 |         else:
488 |             pdf = Series(nan, index=self.series.index, dtype=float)
489 |             for k in self._dist_dict:
490 |                 pdf_k = self._dist_dict[k].pdf()
491 |                 pdf.loc[pdf_k.index] = pdf_k.values
492 |         return pdf
493 | 
494 |     def cdf_nsf(self) -> Series:
495 |         """
496 |         Compute the cumulative density function using the Normal Scores
497 |         Transform
498 | 
499 |         Returns
500 |         -------
501 |         Series
502 |         """
503 |         logging.info("Using the normal scores transform")
504 |         cdf = Series(nan, index=self.series.index, dtype=float)
505 |         for _, grval in self._grouped_year.groupby(Grouper(freq=str(self.fit_freq))):
506 |             data = get_data_series(grval).sort_values()
507 |             n = len(data)
508 |             cdf.loc[data.index] = linspace(1 / (2 * n), 1 - 1 / (2 * n), n)
509 |         return cdf
510 | 
511 |     def ppf(self, q: float) -> Series:
512 |         """
513 |         Method to calculate the percentile point function
514 |         (inverse of cdf — percentiles) of a fitted
515 |         distribution.
516 | 
517 |         Parameters
518 |         ----------
519 |         q : float
520 |             The quantile value (between 0 and 1) for which to calculate the
521 |             percentile point function.
522 | 
523 |         Returns
524 |         -------
525 |         Series
526 |         """
527 |         ppf = Series(nan, index=self.series.index, dtype=float)
528 |         if self.normal_scores_transform:
529 |             cdf = self.cdf_nsf()
530 |             for _, grval in self._grouped_year.groupby(
531 |                 Grouper(freq=str(self.fit_freq))
532 |             ):
533 |                 data = get_data_series(grval).sort_values()
534 |                 cdf_i = cdf.loc[data.index]
535 |                 ppf.loc[data.index] = interp(
536 |                     x=q,
537 |                     xp=cdf_i.values.astype(float),
538 |                     fp=data.values.astype(float),
539 |                 )
540 |         else:
541 |             for k in self._dist_dict:
542 |                 ppf_k = self._dist_dict[k].ppf(q=q)
543 |                 ppf.loc[ppf_k.index] = ppf_k.values
544 |         return ppf
545 | 
546 |     def norm_ppf(self) -> Series:
547 |         """
548 |         Method to calculate propability point function of normal distribution
549 |         based on a cumulative density function of a fitted distribution
550 | 
551 |         Returns
552 |         -------
553 |         Series
554 |         """
555 | 
556 |         cdf = self.cdf()
557 |         ppf = Series(
558 |             norm.ppf(cdf.values, loc=0, scale=1), index=self.series.index, dtype=float
559 |         )
560 |         return ppf
561 | 
562 |     def get_dist(self, date: Timestamp) -> Dist:
563 |         for k in self._dist_dict:
564 |             dist = self._dist_dict[k]
565 |             if date in dist.data.index:
566 |                 return dist
567 | 
568 |         raise KeyError("Date not found in distributions")
569 | 


--------------------------------------------------------------------------------