├── tests ├── __init__.py ├── test_version.py ├── test_rai.py ├── test_plots.py ├── conftest.py ├── test_climdex.py ├── test_knmi.py ├── test_validate.py └── test_si.py ├── docs ├── _static │ └── .gitkeep ├── examples │ ├── index.md │ ├── example08_rai.ipynb │ ├── example06_treshold.ipynb │ ├── example05_multiyear_drought.ipynb │ ├── example01_indices.ipynb │ ├── example03_drought_prediction.ipynb │ ├── example04_package_comparison.ipynb │ ├── example07_knmi.ipynb │ └── example09_joss_paper.ipynb ├── index.md └── conf.py ├── src └── spei │ ├── py.typed │ ├── __init__.py │ ├── _typing.py │ ├── _version.py │ ├── rai.py │ ├── climdex.py │ ├── utils.py │ ├── dist.py │ ├── knmi.py │ └── si.py ├── paper ├── figures │ ├── spei1.png │ ├── threshold.png │ ├── spei_density.png │ ├── spei_heatmap.png │ ├── surplus_fit_cdf.png │ └── monthly_precipitation_surplus.png ├── paper.md └── paper.bib ├── .gitignore ├── .github ├── workflows │ ├── auto-author-assign.yml │ ├── draft-pdf.yml │ ├── python-publish.yml │ ├── documentation.yml │ └── tests.yml ├── ISSUE_TEMPLATE │ ├── question.yml │ ├── enhancement.yml │ └── bug.yml └── CONTRIBUTING.md ├── CITATION.cff ├── LICENSE ├── pyproject.toml └── README.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/_static/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/spei/py.typed: -------------------------------------------------------------------------------- 1 | # Marker file for PEP 561. The mypy package uses inline types. -------------------------------------------------------------------------------- /paper/figures/spei1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martinvonk/SPEI/HEAD/paper/figures/spei1.png -------------------------------------------------------------------------------- /paper/figures/threshold.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martinvonk/SPEI/HEAD/paper/figures/threshold.png -------------------------------------------------------------------------------- /paper/figures/spei_density.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martinvonk/SPEI/HEAD/paper/figures/spei_density.png -------------------------------------------------------------------------------- /paper/figures/spei_heatmap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martinvonk/SPEI/HEAD/paper/figures/spei_heatmap.png -------------------------------------------------------------------------------- /paper/figures/surplus_fit_cdf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martinvonk/SPEI/HEAD/paper/figures/surplus_fit_cdf.png -------------------------------------------------------------------------------- /paper/figures/monthly_precipitation_surplus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martinvonk/SPEI/HEAD/paper/figures/monthly_precipitation_surplus.png -------------------------------------------------------------------------------- /src/spei/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from . import climdex, rai, knmi, dist, plot, si, utils 3 | from ._version import __version__, show_versions 4 | from .si import SI, sgi, spei, spi, ssfi, ssmi 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.egg-info 3 | 4 | /dist 5 | /docs/references 6 | /docs/_build 7 | /docs/_api 8 | /paper/literature 9 | /paper/data 10 | 11 | /.vscode 12 | *.code-workspace 13 | /.mypy_cache 14 | /.tox 15 | /.pytest_cache 16 | /htmlcov 17 | coverage.xml 18 | *.coverage 19 | uv.lock 20 | -------------------------------------------------------------------------------- /tests/test_version.py: -------------------------------------------------------------------------------- 1 | import spei as si 2 | 3 | 4 | def test_version() -> None: 5 | assert isinstance(si.__version__, str) 6 | assert si.__version__.count(".") == 2 7 | 8 | 9 | def test_show_versions(): 10 | msg = si.show_versions() 11 | assert isinstance(msg, str) 12 | -------------------------------------------------------------------------------- /src/spei/_typing.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from typing import Any 3 | 4 | from numpy import float64 5 | from numpy.typing import NDArray 6 | from scipy.stats._continuous_distns import rv_continuous 7 | 8 | ContinuousDist = Any | rv_continuous 9 | NDArrayAxes = NDArray[Any] 10 | NDArrayFloat = NDArray[float64] 11 | -------------------------------------------------------------------------------- /.github/workflows/auto-author-assign.yml: -------------------------------------------------------------------------------- 1 | # .github/workflows/auto-author-assign.yml 2 | name: Auto Author Assign 3 | 4 | on: 5 | pull_request_target: 6 | types: [opened, reopened] 7 | 8 | permissions: 9 | pull-requests: write 10 | 11 | jobs: 12 | assign-author: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - uses: toshimaru/auto-author-assign@v1.6.2 -------------------------------------------------------------------------------- /docs/examples/index.md: -------------------------------------------------------------------------------- 1 | # Examples 2 | 3 | Below you can find examples of how SPEI can be used. These Jupyter Notbooks can also be run via [examples directory on GitHub](https://github.com/martinvonk/SPEI/tree/master/docs/examples). 4 | 5 | ```{toctree} 6 | :maxdepth: 1 7 | 8 | example01_indices 9 | example02_distributions 10 | example03_drought_prediction 11 | example04_package_comparison 12 | example05_multiyear_drought 13 | example06_treshold 14 | example07_knmi 15 | example08_rai 16 | example09_joss_paper 17 | ``` 18 | -------------------------------------------------------------------------------- /src/spei/_version.py: -------------------------------------------------------------------------------- 1 | from importlib import metadata 2 | from platform import python_version 3 | 4 | __version__ = "0.8.0" 5 | 6 | 7 | def show_versions() -> str: 8 | msg = f"python: {python_version()}\nspei: {__version__}\n" 9 | 10 | requirements = metadata.requires("spei") 11 | if requirements: 12 | deps = [x for x in requirements if "extra" not in x] 13 | for dep in deps: 14 | msg += f"{dep}: {metadata.version(dep)}" 15 | msg += "\n" if deps.index(dep) < len(deps) - 1 else "" 16 | 17 | return msg 18 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/question.yml: -------------------------------------------------------------------------------- 1 | name: Question 2 | description: Form for a question or clarification 3 | labels: ["question"] 4 | body: 5 | - type: checkboxes 6 | attributes: 7 | label: Is there an existing issue for this? 8 | description: Please search to see if an issue already exists for the question you have. 9 | options: 10 | - label: I have searched the existing issues 11 | required: true 12 | - type: textarea 13 | attributes: 14 | label: Describe the question 15 | description: A concise description of what you're would like to know. 16 | validations: 17 | required: false -------------------------------------------------------------------------------- /.github/workflows/draft-pdf.yml: -------------------------------------------------------------------------------- 1 | # https://github.com/marketplace/actions/open-journals-pdf-generator 2 | name: JOSS 3 | on: 4 | push: 5 | branches: 6 | - joss 7 | # - dev 8 | 9 | jobs: 10 | paper: 11 | runs-on: ubuntu-latest 12 | name: Build Paper Draft 13 | steps: 14 | - name: Checkout 15 | uses: actions/checkout@v4 16 | 17 | - name: Build draft PDF 18 | uses: openjournals/openjournals-draft-action@master 19 | with: 20 | journal: joss 21 | paper-path: paper/paper.md 22 | 23 | - name: Upload 24 | uses: actions/upload-artifact@v4 25 | with: 26 | name: paper 27 | path: paper/paper.pdf -------------------------------------------------------------------------------- /tests/test_rai.py: -------------------------------------------------------------------------------- 1 | from pandas import Series 2 | 3 | from spei.rai import mrai, rai 4 | 5 | 6 | def test_rai(prec: Series) -> None: 7 | precrs = prec.resample("MS").sum() 8 | rai_result = rai(precrs) 9 | assert isinstance(rai_result, Series), "RAI result is not a pandas Series" 10 | assert len(rai_result) == len(precrs), ( 11 | "RAI result length does not match input length" 12 | ) 13 | 14 | 15 | def test_mrai(prec: Series) -> None: 16 | precrs = prec.resample("MS").sum() 17 | mrai_result = mrai(precrs) 18 | assert isinstance(mrai_result, Series), "MRAI result is not a pandas Series" 19 | assert len(mrai_result) == len(precrs), ( 20 | "MRAI result length does not match input length" 21 | ) 22 | -------------------------------------------------------------------------------- /tests/test_plots.py: -------------------------------------------------------------------------------- 1 | import matplotlib as mpl 2 | from pandas import Series 3 | 4 | from spei.plot import heatmap, monthly_density, threshold 5 | from spei.plot import si as plot_si 6 | 7 | mpl.use("Agg") # prevent _tkinter.TclError: Can't find a usable tk.tcl error 8 | 9 | 10 | def test_plot_si(si: Series) -> None: 11 | _ = plot_si(si) 12 | 13 | 14 | def test_plot_si_no_background(si: Series) -> None: 15 | _ = plot_si(si, cmap="roma_r", background=False) 16 | 17 | 18 | def test_plot_monthly_density(si: Series) -> None: 19 | _ = monthly_density(si, years=[2011], months=[1, 2, 3, 4, 5]) 20 | 21 | 22 | def test_plot_heatmap(si: Series) -> None: 23 | _ = heatmap([si], cmap="vik", vmin=-3.0, vmax=3.0) 24 | 25 | 26 | def test_plot_threshold(head: Series) -> None: 27 | th = Series(head.mean(), index=head.index, dtype=float) 28 | _ = threshold(series=head, threshold=th, fill_color="orange") 29 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: "1.2.0" 2 | authors: 3 | - family-names: Vonk 4 | given-names: M. A. 5 | orcid: "https://orcid.org/0009-0007-3528-2991" 6 | doi: 10.5281/zenodo.16441123 7 | message: If you use this software, please cite our article in the 8 | Journal of Open Source Software. 9 | preferred-citation: 10 | authors: 11 | - family-names: Vonk 12 | given-names: M. A. 13 | orcid: "https://orcid.org/0009-0007-3528-2991" 14 | date-published: 2025-07-29 15 | doi: 10.21105/joss.08454 16 | issn: 2475-9066 17 | issue: 111 18 | journal: Journal of Open Source Software 19 | publisher: 20 | name: Open Journals 21 | start: 8454 22 | title: "SPEI: A Python package for calculating and visualizing drought 23 | indices" 24 | type: article 25 | url: "https://joss.theoj.org/papers/10.21105/joss.08454" 26 | volume: 10 27 | title: "SPEI: A Python package for calculating and visualizing drought 28 | indices" 29 | -------------------------------------------------------------------------------- /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | # This workflows will upload a Python Package using Twine when a release is created 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries 3 | 4 | name: Upload Python Package 5 | 6 | on: 7 | release: 8 | types: [created] 9 | 10 | jobs: 11 | deploy: 12 | 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - uses: actions/checkout@v4 17 | - name: Set up Python 18 | uses: actions/setup-python@v5 19 | with: 20 | python-version: '3.x' 21 | - name: Install dependencies 22 | run: | 23 | python -m pip install --upgrade pip 24 | pip install wheel twine build 25 | - name: Build and publish 26 | env: 27 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} 28 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 29 | run: | 30 | python3 -m build 31 | python3 -m twine upload --repository pypi dist/* 32 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Martin Vonk 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/enhancement.yml: -------------------------------------------------------------------------------- 1 | name: Enhancement 2 | description: Form for an enhancement, new feature or request 3 | labels: ["enhancement"] 4 | body: 5 | - type: checkboxes 6 | attributes: 7 | label: Is there an existing issue for this? 8 | description: Please search to see if an issue already exists for the feature you are requesting. 9 | options: 10 | - label: I have searched the existing issues 11 | required: true 12 | - type: textarea 13 | attributes: 14 | label: Describe the enhancement 15 | description: A concise description of the feature you are requesting. 16 | validations: 17 | required: false 18 | placeholder: | 19 | Description of the feature. 20 | 21 | ```python 22 | # Possible pseudo code to describe the feature 23 | ``` 24 | - type: textarea 25 | attributes: 26 | label: Anything else? 27 | description: | 28 | Links? References? Anything that will give us more context! 29 | Tip: You can attach images or log files by clicking this area to highlight it and then dragging files in. 30 | validations: 31 | required: false -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pytest 4 | from pandas import Series, Timestamp, read_csv 5 | 6 | from spei.si import spi 7 | 8 | 9 | def read_data(column: str) -> Series: 10 | df = read_csv( 11 | Path(__file__).parent / "data/B11C0329_EAGMARYP.csv", 12 | index_col=0, 13 | parse_dates=True, 14 | sep=";", 15 | ) 16 | return df.loc[:, column] 17 | 18 | 19 | @pytest.fixture 20 | def prec() -> Series: 21 | prec = read_data("Prec [m/d] 081_JOURE").dropna() 22 | return prec 23 | 24 | 25 | @pytest.fixture 26 | def precmm(prec) -> Series: 27 | return prec.multiply(1e3).rename("Prec [mm/d] 081_JOURE") 28 | 29 | 30 | @pytest.fixture 31 | def evap() -> Series: 32 | evap = read_data("Evap [m/d] 235_DE-KOOY").dropna() 33 | return evap 34 | 35 | 36 | @pytest.fixture 37 | def head() -> Series: 38 | head = read_data("Head [m] B11C0329_EAGMARYP").dropna() 39 | return head 40 | 41 | 42 | @pytest.fixture 43 | def si(prec: Series) -> Series: 44 | si = spi(prec.rolling("30D", min_periods=30).sum().dropna(), prob_zero=True) 45 | return si 46 | 47 | 48 | @pytest.fixture 49 | def deficit(prec: Series, evap: Series) -> Series: 50 | deficit = ( 51 | (evap - prec) 52 | .loc[Timestamp("1965-01-01") : Timestamp("2020-12-31")] 53 | .rename("deficit") 54 | ) 55 | return deficit 56 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug.yml: -------------------------------------------------------------------------------- 1 | name: Bug 2 | description: Form for an unexpected problem or behavior 3 | labels: ["bug"] 4 | body: 5 | - type: checkboxes 6 | attributes: 7 | label: Is there an existing issue for this? 8 | description: Please search to see if an issue already exists for the bug you encountered. 9 | options: 10 | - label: I have searched the existing issues 11 | required: true 12 | - type: textarea 13 | attributes: 14 | label: Describe the bug 15 | description: A concise description of what you're experiencing. 16 | validations: 17 | required: false 18 | - type: textarea 19 | attributes: 20 | label: Expected behavior 21 | description: A concise description of what you expected to happen. 22 | validations: 23 | required: false 24 | - type: textarea 25 | attributes: 26 | label: Code to reproduce 27 | description: Steps to reproduce the behavior. 28 | placeholder: | 29 | 1. In this environment `spei.show_versions()` 30 | 2. Run 31 | ```python 32 | # Code to reproduce the behavior 33 | ``` 34 | 3. Gives output or error... 35 | validations: 36 | required: false 37 | - type: textarea 38 | attributes: 39 | label: Anything else? 40 | description: | 41 | Links? References? Anything that will give us more context about the issue you are encountering! 42 | Tip: You can attach images or log files by clicking this area to highlight it and then dragging files in. 43 | validations: 44 | required: false -------------------------------------------------------------------------------- /tests/test_climdex.py: -------------------------------------------------------------------------------- 1 | from pandas import Series 2 | 3 | from spei import climdex 4 | 5 | 6 | def test_climdex_rxnday(precmm: Series) -> None: 7 | climdex.rxnday(series=precmm, interval="10D", period="90D") 8 | 9 | 10 | def test_climdex_rx1day(precmm: Series) -> None: 11 | climdex.rx1day(series=precmm) 12 | 13 | 14 | def test_climdex_rx5day(precmm: Series) -> None: 15 | climdex.rx5day(series=precmm) 16 | 17 | 18 | def test_climdex_sdii(precmm: Series) -> None: 19 | climdex.sdii(series=precmm) 20 | 21 | 22 | def test_climdex_rnmm(precmm: Series) -> None: 23 | climdex.rnmm(series=precmm, threshold=5, period="90D") 24 | 25 | 26 | def test_climdex_r10mm(precmm: Series) -> None: 27 | climdex.r10mm(series=precmm) 28 | 29 | 30 | def test_climdex_r20mm(precmm: Series) -> None: 31 | climdex.r20mm(series=precmm) 32 | 33 | 34 | def test_climdex_cdd(precmm: Series) -> None: 35 | climdex.cdd(series=precmm) 36 | 37 | 38 | def test_climdex_cwd(precmm: Series) -> None: 39 | climdex.cwd(series=precmm) 40 | 41 | 42 | def test_climdex_prcptot(precmm: Series) -> None: 43 | climdex.prcptot(series=precmm) 44 | 45 | 46 | def test_climdex_rnnp(precmm: Series) -> None: 47 | climdex.rnnp(series=precmm, quantile=0.5) 48 | 49 | 50 | def test_climdex_r95p(precmm: Series) -> None: 51 | climdex.r95p(series=precmm) 52 | 53 | 54 | def test_climdex_r99p(precmm: Series) -> None: 55 | climdex.r99p(series=precmm) 56 | 57 | 58 | def test_climdex_r95ptot(precmm: Series) -> None: 59 | climdex.r95ptot(series=precmm) 60 | 61 | 62 | def test_climdex_r99ptot(precmm: Series) -> None: 63 | climdex.r99ptot(series=precmm) 64 | -------------------------------------------------------------------------------- /.github/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to the SPEI Python Package 2 | 3 | Thank you for your interest in contributing to the **SPEI Python package**! We welcome contributions from everyone — whether you want to report an issue, improve the documentation, or submit code enhancements. This document outlines guidelines to help you get started. 4 | 5 | ## How to Contribute 6 | 7 | ### Creating a Good Issue 8 | 9 | Before creating an issue, check whether it has already been reported. 10 | When opening an new issue, please include: 11 | 12 | - A clear and descriptive title 13 | - A detailed description of the problem or suggestion 14 | - Steps to reproduce (if it's a bug) 15 | - The version of the package and Python you’re using 16 | - If relevant, a minimal reproducible example 17 | 18 | Before creating a new issue, check whether it has already been reported. 19 | 20 | ### Submitting a Pull Request 21 | 22 | To submit a pull request: 23 | 24 | 1. Create an issue first 25 | 2. Fork the repository 26 | 3. Create a new branch with a meaningful name 27 | 4. Fix the issue 28 | 5. Make your changes and include tests / example notebook if applicable. 29 | 6. Run the test suite to ensure everything works (including type hints and formatting & linting with ruff) 30 | 7. Submit a pull request (PR) with a clear description of what was changed and why. 31 | 32 | ## Useful Links 33 | - SPEI GitHub Repository: [github.com/martinvonk/SPEI](https://github.com/martinvonk/SPEI) 34 | - Documentation and examples: [notebooks folder](https://github.com/martinvonk/SPEI/tree/main/doc/examples) 35 | - Code of Conduct: [Contributor Covenant](https://www.contributor-covenant.org/version/2/1/code_of_conduct/) 36 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # SPEI documentation 2 | 3 | SPEI is a Python package designed for calculating drought indices from meteorological and hydrological time series. Built on widely used libraries like Pandas and SciPy, it offers a flexible and simple approach to drought analysis. 4 | 5 | This website provides full documentation, example notebooks, and a detailed API reference to help you get started. 6 | 7 | The SPEI package is open-source and hosted on [GitHub]((https://github.com/martinvonk/SPEI)), where you can find more information about the available drought indices and ongoing development. The package is published on [PyPi](https://pypi.org/project/spei/) from which it can be installed using `pip install spei`. 8 | 9 | If you use this package for drought analysis and/or visualization, please cite it by referencing our article in the [Journal of Open Source Software](https://joss.theoj.org/papers/10.21105/joss.08454): 10 | 11 | Vonk, M. A. (2025). SPEI: A Python package for calculating and visualizing drought indices. Journal of Open Source Software, 10(111), 8454. [doi.org/10.21105/joss.08454](https://doi.org/10.21105/joss.08454). 12 | 13 | ```bibtex 14 | @article{Vonk_SPEI_2025, 15 | author = {Vonk, M. A.}, 16 | doi = {10.21105/joss.08454}, 17 | journal = {Journal of Open Source Software}, 18 | number = {111}, 19 | pages = {8454}, 20 | title = {{SPEI: A Python package for calculating and visualizing drought indices}}, 21 | url = {https://joss.theoj.org/papers/10.21105/joss.08454}, 22 | volume = {10}, 23 | year = {2025} 24 | } 25 | ``` 26 | 27 | ```{toctree} 28 | :maxdepth: 2 29 | 30 | examples/index.md 31 | _api/modules.rst 32 | ``` -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # For the full list of built-in configuration values, see the documentation: 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 5 | 6 | # -- Project information ----------------------------------------------------- 7 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information 8 | 9 | from spei import __version__ 10 | 11 | project = "SPEI" 12 | copyright = "2025, Martin Vonk" 13 | author = "Martin Vonk" 14 | release = __version__ 15 | 16 | # make docs 17 | # sphinx-build -M html docs/source docs/build 18 | 19 | # -- General configuration --------------------------------------------------- 20 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration 21 | 22 | extensions = [ 23 | "myst_parser", # For Markdown support 24 | "nbsphinx", # For Jupyter Notebooks support 25 | "sphinx.ext.autodoc", # For automatic documentation generation from docstrings 26 | "sphinx.ext.apidoc", # For automatic API documentation generation 27 | "sphinx.ext.napoleon", # For Google and NumPy style docstrings 28 | ] 29 | 30 | exclude_patterns = [ 31 | "_build", # Exclude the build directory 32 | "**.ipynb_checkpoints", # ignores WARNING: Pygments lexer name 'ipython3' is not known 33 | ] 34 | nbsphinx_allow_errors = True # Allow errors in notebooks 35 | apidoc_modules = [ 36 | { 37 | "path": "../src/spei", 38 | "destination": "_api", 39 | "separate_modules": True, 40 | "max_depth": 2, 41 | } 42 | ] 43 | 44 | # -- Options for HTML output ------------------------------------------------- 45 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output 46 | 47 | html_theme = "alabaster" 48 | html_static_path = ["_static"] 49 | -------------------------------------------------------------------------------- /.github/workflows/documentation.yml: -------------------------------------------------------------------------------- 1 | name: Build and Deploy Docs 2 | 3 | on: 4 | pull_request: 5 | branches: [main] 6 | push: 7 | branches: [main] 8 | 9 | jobs: 10 | build: 11 | runs-on: ubuntu-latest 12 | permissions: 13 | contents: write 14 | steps: 15 | - name: Checkout code 16 | uses: actions/checkout@v4 17 | 18 | - name: Set up Python 19 | uses: actions/setup-python@v5 20 | with: 21 | python-version: '3.11' 22 | check-latest: true 23 | 24 | - name: Set up R 25 | uses: r-lib/actions/setup-r@v2 26 | with: 27 | r-version: '4.3' 28 | 29 | - name: Set up tox environment 30 | run: | 31 | python -m pip install --upgrade pip 32 | pip install tox 33 | tox -e docu --notest 34 | 35 | - name: Install Pandoc and build documentation 36 | run: | 37 | sudo apt-get update 38 | sudo apt-get install -y pandoc 39 | tox -e docu --skip-pkg-install 40 | 41 | - name: Publish HTML output on gh-pages branch 42 | uses: peaceiris/actions-gh-pages@v4 43 | with: 44 | github_token: ${{ secrets.GITHUB_TOKEN }} 45 | publish_dir: ./docs/_build/html 46 | enable_jekyll: false 47 | 48 | deploy: 49 | needs: build 50 | runs-on: ubuntu-latest 51 | permissions: 52 | contents: read 53 | pages: write 54 | id-token: write 55 | concurrency: 56 | group: pages 57 | cancel-in-progress: false 58 | environment: 59 | name: github-pages 60 | url: ${{ steps.deployment.outputs.page_url }} 61 | steps: 62 | - name: Checkout code 63 | uses: actions/checkout@v4 64 | with: 65 | ref: gh-pages 66 | 67 | - name: Setup Pages 68 | uses: actions/configure-pages@v3 69 | 70 | - name: Upload artifact 71 | uses: actions/upload-pages-artifact@v3 72 | with: 73 | path: '.' 74 | 75 | - name: Deploy to GitHub Pages 76 | id: deployment 77 | uses: actions/deploy-pages@v4 78 | -------------------------------------------------------------------------------- /tests/test_knmi.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import pytest 4 | 5 | from spei.knmi import ( 6 | deficit_apr1, 7 | deficit_gdd, 8 | deficit_max, 9 | deficit_oct1, 10 | deficit_wet, 11 | get_cumulative_deficit, 12 | get_yearly_temp_date, 13 | ) 14 | from spei.plot import deficit_knmi 15 | 16 | 17 | @pytest.fixture 18 | def temp(deficit: pd.Series) -> pd.Series: 19 | sine_wave = np.sin(2 * np.pi * np.arange(len(deficit)) / 365) * 15 + 15 20 | temp = pd.Series(data=sine_wave, index=deficit.index, dtype=float) 21 | return temp 22 | 23 | 24 | def test_get_yearly_temp_date(temp): 25 | threshold = 440.0 26 | result = get_yearly_temp_date(temp=temp, threshold=threshold) 27 | assert isinstance(result, pd.Series) 28 | 29 | 30 | def test_get_cumulative_deficit(deficit): 31 | startdate = pd.Timestamp("2000-04-01") 32 | enddate = pd.Timestamp("2000-09-30") 33 | result = get_cumulative_deficit( 34 | deficit=deficit, startdate=startdate, enddate=enddate 35 | ) 36 | assert isinstance(result, pd.DataFrame) 37 | assert not result.empty 38 | 39 | 40 | def test_deficit_oct1(deficit): 41 | result = deficit_oct1(deficit=deficit) 42 | assert isinstance(result, pd.Series) 43 | assert result.name == "Doct1" 44 | 45 | 46 | def test_deficit_max(deficit): 47 | result = deficit_max(deficit=deficit) 48 | assert isinstance(result, pd.Series) 49 | assert result.name == "Dmax" 50 | 51 | 52 | def test_deficit_apr1(deficit): 53 | result = deficit_apr1(deficit=deficit) 54 | assert isinstance(result, pd.Series) 55 | assert result.name == "DIapr1" 56 | 57 | 58 | def test_deficit_gdd(deficit, temp): 59 | threshold = 440 60 | result = deficit_gdd( 61 | deficit=deficit, 62 | temp=temp, 63 | threshold=threshold, 64 | ) 65 | assert isinstance(result, pd.Series) 66 | assert result.name == "DIgdd" 67 | 68 | 69 | def test_deficit_wet(deficit): 70 | result = deficit_wet(deficit=deficit) 71 | assert isinstance(result, pd.Series) 72 | assert result.name == "DIwet" 73 | 74 | 75 | def test_plot_knmi_deficit(deficit: pd.Series): 76 | """Test the plot function for the deficit.""" 77 | startdate = pd.Timestamp("2000-04-01") 78 | enddate = pd.Timestamp("2000-09-30") 79 | cumdf = get_cumulative_deficit( 80 | deficit=deficit, 81 | startdate=startdate, 82 | enddate=enddate, 83 | allow_below_zero=False, 84 | ) 85 | ax = deficit_knmi(cumdf) 86 | assert ax is not None 87 | assert ax.get_ylabel() == "Precipitation deficit (mm)" 88 | -------------------------------------------------------------------------------- /src/spei/rai.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | from .utils import get_data_series, group_yearly_df, validate_series 5 | 6 | 7 | def rai(series: pd.Series) -> pd.Series: 8 | """ 9 | Calculate the Rainfall Anomaly Index (RAI) for a given time 10 | series of precipitation data. 11 | 12 | Parameters 13 | ---------- 14 | series : pd.Series 15 | A pandas Series containing precipitation data. 16 | 17 | Returns 18 | ------- 19 | pd.Series 20 | A pandas Series containing the RAI values. 21 | 22 | References 23 | ---------- 24 | van Rooy, M.P. A Rainfall Anomaly Index Independent of Time and Space. Notos. 1965. 25 | """ 26 | series = validate_series(series) 27 | pm = series.mean() 28 | pi_above = series > pm 29 | rai = pd.Series(np.nan, index=series.index, dtype=float) 30 | rai[pi_above] = 3.0 * (series[pi_above] - pm) / (series.nlargest(10).mean() - pm) 31 | rai[~pi_above] = ( 32 | -3.0 * (series[~pi_above] - pm) / (series.nsmallest(10).mean() - pm) 33 | ) 34 | return rai 35 | 36 | 37 | def mrai(series: pd.Series, sf: float = 1.7) -> pd.Series: 38 | """Calculate the Modified Rainfall Anomaly Index (MRAI) 39 | for a given time series of precipitation data. 40 | 41 | Parameters 42 | ---------- 43 | series : pd.Series 44 | A pandas Series containing precipitation data. 45 | sf : float 46 | Scaling factor for the MRAI calculation. Default is 1.7. 47 | 48 | Returns 49 | ------- 50 | pd.Series 51 | A pandas Series containing the MRAI values. 52 | 53 | References 54 | ---------- 55 | Hänsel, S., Schucknecht, A. and Matschullat J. The Modified Rainfall 56 | Anomaly Index (mRAI) — is this an alternative to the Standardised 57 | Precipitation Index (SPI) in evaluating future extreme precipitation 58 | characteristics? Theoretical and Applied Climatology. 2015. 59 | """ 60 | series = validate_series(series) 61 | mrai = pd.Series(np.nan, index=series.index, dtype=float) 62 | group_df = group_yearly_df(series=series) 63 | for _, gr in group_df.groupby(pd.Grouper(freq="MS")): 64 | gr_series = get_data_series(gr) 65 | pm = gr_series.mean() 66 | pi_above = gr_series > pm 67 | e_above = gr_series[gr_series > gr_series.quantile(0.9)].mean() 68 | e_below = gr_series[gr_series < gr_series.quantile(0.1)].mean() 69 | mrai_gr = pd.Series(np.nan, index=gr_series.index, dtype=float) 70 | mrai_gr[pi_above] = sf * (gr_series[pi_above] - pm) / (e_above - pm) 71 | mrai_gr[~pi_above] = -sf * (gr_series[~pi_above] - pm) / (e_below - pm) 72 | mrai.loc[mrai_gr.index] = mrai_gr.values 73 | 74 | return mrai 75 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: 4 | - push 5 | - pull_request 6 | 7 | jobs: 8 | test: 9 | runs-on: ${{ matrix.os }} 10 | continue-on-error: ${{ matrix.experimental }} 11 | strategy: 12 | fail-fast: false 13 | matrix: 14 | include: 15 | - name: Test suite with py310-ubuntu 16 | python: "3.10" 17 | os: ubuntu-latest 18 | toxenv: py310 19 | experimental: false 20 | - name: Test suite with py311-ubuntu 21 | python: "3.11" 22 | os: ubuntu-latest 23 | toxenv: py311 24 | experimental: false 25 | - name: Test suite with py312-ubuntu 26 | python: "3.12" 27 | os: ubuntu-latest 28 | toxenv: py312 29 | experimental: false 30 | - name: Test suite with py313-ubuntu 31 | python: "3.13" 32 | os: ubuntu-latest 33 | toxenv: py313 34 | experimental: false 35 | - name: Type check with mypy 36 | python: "3.10" 37 | os: ubuntu-latest 38 | toxenv: type 39 | experimental: false 40 | - name: Formatting and linting with ruff 41 | python: "3.10" 42 | os: ubuntu-latest 43 | toxenv: ruff 44 | experimental: false 45 | - name: Codacy Coverage Report 46 | python: "3.10" 47 | os: ubuntu-latest 48 | toxenv: coverage 49 | experimental: false 50 | 51 | name: ${{ matrix.name }} 52 | env: 53 | # Color Output 54 | # Rich (pip) 55 | FORCE_COLOR: 1 56 | # Tox 57 | PY_COLORS: 1 58 | # MyPy 59 | TERM: xterm-color 60 | MYPY_FORCE_COLOR: 1 61 | MYPY_FORCE_TERMINAL_WIDTH: 200 62 | # Pytest 63 | PYTEST_ADDOPTS: "--color=yes" 64 | steps: 65 | - uses: actions/checkout@v4 66 | 67 | - name: Set up Python ${{ matrix.python }} 68 | uses: actions/setup-python@v5 69 | with: 70 | python-version: ${{ matrix.python }} 71 | check-latest: true 72 | 73 | - name: Set up tox environment 74 | run: | 75 | python -m pip install --upgrade pip 76 | pip install tox 77 | tox -e ${{ matrix.toxenv }} --notest 78 | 79 | - name: Test 80 | run: tox -e ${{ matrix.toxenv }} --skip-pkg-install 81 | 82 | - name: Run codacy-coverage-reporter 83 | if: ${{ matrix.toxenv == 'coverage' && github.repository == 'martinvonk/spei' && success() }} 84 | uses: codacy/codacy-coverage-reporter-action@master 85 | with: 86 | project-token: ${{ secrets.CODACY_PROJECT_TOKEN }} 87 | coverage-reports: coverage.xml 88 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=64"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "spei" 7 | dynamic = ["version"] 8 | authors = [{ name = "Martin Vonk", email = "vonk.mart@gmail.com" }] 9 | description = "A simple Python package to calculate drought indices for time series such as the SPI, SPEI and SGI." 10 | readme = "README.md" 11 | license = { file = "LICENSE" } 12 | requires-python = ">=3.10" 13 | dependencies = ["numpy", "scipy", "matplotlib", "pandas"] 14 | classifiers = [ 15 | "Programming Language :: Python :: 3 :: Only", 16 | "Programming Language :: Python :: 3.10", 17 | "Programming Language :: Python :: 3.11", 18 | "Programming Language :: Python :: 3.12", 19 | "Programming Language :: Python :: 3.13", 20 | "Topic :: Scientific/Engineering :: Hydrology", 21 | "Intended Audience :: Science/Research", 22 | "License :: OSI Approved :: MIT License", 23 | "Operating System :: OS Independent", 24 | "Typing :: Typed", 25 | ] 26 | 27 | [project.urls] 28 | homepage = "https://github.com/martinvonk/spei" 29 | repository = "https://github.com/martinvonk/spei" 30 | 31 | [project.optional-dependencies] 32 | notebook = ["ipykernel", "pastas", "rpy2", "standard-precip"] 33 | documentation = ["sphinx>=8.2", "myst-parser", "nbsphinx"] 34 | ruffing = ["ruff"] 35 | typing = ["mypy", "pandas-stubs"] 36 | pytesting = ["pytest>=7", "pytest-cov", "pytest-sugar"] 37 | coveraging = ["coverage"] 38 | dev = ["spei[ruffing,typing,pytesting,coveraging]", "tox"] 39 | 40 | [tool.setuptools.dynamic] 41 | version = { attr = "spei._version.__version__" } 42 | 43 | [tool.mypy] 44 | mypy_path = "src" 45 | 46 | [[tool.mypy.overrides]] 47 | module = ["matplotlib.*", "scipy.stats.*"] 48 | ignore_missing_imports = true 49 | 50 | [tool.pytest.ini_options] 51 | pythonpath = ["src"] 52 | 53 | [tool.ruff] 54 | extend-include = ["*.ipynb"] 55 | lint.extend-select = ["I"] 56 | show-fixes = true 57 | fix = true 58 | 59 | [tool.tox] 60 | requires = ["tox>=4"] 61 | env_list = ["py310", "py311", "py312", "py313", "type", "ruff"] 62 | 63 | [tool.tox.env_run_base] 64 | description = "run unit tests" 65 | extras = ["pytesting"] 66 | commands = [["pytest", "tests"]] 67 | 68 | [tool.tox.env.type] 69 | description = "run type checks" 70 | extras = ["typing"] 71 | commands = [["mypy", "src"]] 72 | 73 | [tool.tox.env.ruff] 74 | description = "run ruff checks" 75 | extras = ["ruffing"] 76 | commands = [ 77 | [ 78 | "ruff", 79 | "check", 80 | "--extend-select", 81 | "I", 82 | "--preview", 83 | ], 84 | [ 85 | "ruff", 86 | "format", 87 | "--check", 88 | ], 89 | ] 90 | 91 | [tool.tox.env.docu] 92 | description = "build documentation" 93 | extras = ["documentation", "notebook"] 94 | commands = [["sphinx-build", "-M", "html", "docs", "docs/_build"]] 95 | 96 | [tool.tox.env.coverage] 97 | description = "get coverage report xml" 98 | extras = ["coveraging", "pytesting"] 99 | commands = [["coverage", "run", "-m", "pytest", "tests"], ["coverage", "xml"]] 100 | -------------------------------------------------------------------------------- /tests/test_validate.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import pytest 4 | from pandas import DataFrame, DatetimeIndex, Index, Series, Timestamp, to_datetime 5 | 6 | from spei.utils import infer_frequency, validate_index, validate_series 7 | 8 | 9 | def test_validate_index(caplog) -> None: 10 | caplog.set_level(logging.INFO) 11 | series = Series([1.0, 2.0, 3.0], index=["2018", "2019", "2020"]) 12 | validate_index(series.index) 13 | msg = ( 14 | f"Expected the index to be a DatetimeIndex. Automatically converted " 15 | f"{type(series.index)} using pd.to_datetime(Index)\n" 16 | ) 17 | assert msg in caplog.text 18 | 19 | 20 | def test_validate_index_duplicated(caplog) -> None: 21 | caplog.set_level(logging.ERROR) 22 | series = Series( 23 | [1.0, 1.0], 24 | index=DatetimeIndex([Timestamp("2000-01-01"), Timestamp("2000-01-01")]), 25 | ) 26 | with pytest.raises(ValueError): 27 | validate_index(series.index) 28 | msg = ( 29 | "Duplicated indices found. Please remove them. For instance by using" 30 | "`series = series.loc[~series.index.duplicated(keep='first/last')]`" 31 | ) 32 | assert msg in caplog.text 33 | 34 | 35 | def test_validate_series() -> None: 36 | with pytest.raises(TypeError): 37 | validate_series([1, 2, 3]) 38 | 39 | 40 | def test_validate_series_df_1d(caplog) -> None: 41 | df = DataFrame({"s": [1, 2, 3]}, index=to_datetime([1, 2, 3])) 42 | validate_series(df) 43 | msg = ( 44 | "Please convert series of type pandas.DataFrame to a" 45 | "pandas.Series using DataFrame.squeeze(). Now done automatically.\n" 46 | ) 47 | assert msg in caplog.text 48 | 49 | 50 | def test_validate_series_df_2d() -> None: 51 | with pytest.raises(TypeError): 52 | df = DataFrame({"s1": [1, 2, 3], "s2": [1, 2, 3]}, index=to_datetime([1, 2, 3])) 53 | validate_series(df) 54 | 55 | 56 | def test_infer_frequency_monthly_start(): 57 | index = DatetimeIndex(["2020-01-01", "2020-02-01", "2020-03-01"]) 58 | assert infer_frequency(index) == "MS" # Assuming pandas version >= 2.2.0 59 | 60 | 61 | def test_infer_frequency_monthly_end(): 62 | index = DatetimeIndex(["2020-01-31", "2020-02-28", "2020-03-31"]) 63 | assert infer_frequency(index) == "BME" # Assuming pandas version >= 2.2.0 64 | 65 | 66 | def test_infer_frequency_weekly(): 67 | index = DatetimeIndex(["2020-01-01", "2020-01-08", "2020-01-15"]) 68 | assert infer_frequency(index) == "W" 69 | 70 | 71 | def test_infer_frequency_daily(): 72 | index = DatetimeIndex(["2020-01-01", "2020-01-02", "2020-01-03"]) 73 | assert infer_frequency(index) == "D" 74 | 75 | 76 | def test_infer_frequency_no_infer(): 77 | index = DatetimeIndex(["2020-01-01", "2020-01-03", "2020-01-07"]) 78 | assert infer_frequency(index) == "MS" # Assuming pandas version >= 2.2.0 79 | 80 | 81 | def test_infer_frequency_non_datetime_index(): 82 | index = Index(["2020-01-01", "2020-02-01", "2020-03-01"]) 83 | assert infer_frequency(index) == "MS" # Assuming pandas version >= 2.2.0 84 | 85 | 86 | def test_infer_frequency_invalid_index(): 87 | index = Index(["a", "b", "c"]) 88 | with pytest.raises(ValueError), pytest.warns(UserWarning): 89 | infer_frequency(index) 90 | -------------------------------------------------------------------------------- /src/spei/climdex.py: -------------------------------------------------------------------------------- 1 | # https://www.climdex.org/ 2 | 3 | from pandas import Series 4 | 5 | from .utils import validate_index, validate_series 6 | 7 | 8 | def rxnday(series: Series, interval: str, period: str = "30D") -> Series: 9 | """Maximum consecutive precipitation amount over an interval""" 10 | series = validate_series(series) 11 | _ = validate_index(series.index) 12 | 13 | return series.rolling(interval).sum().rolling(period).max() 14 | 15 | 16 | def rx1day(series: Series, interval: str = "1D", period: str = "30D") -> Series: 17 | """Maximum 1-day precipitation amount""" 18 | return rxnday(series=series, interval=interval, period=period) 19 | 20 | 21 | def rx5day(series: Series, interval: str = "5D", period: str = "30D") -> Series: 22 | """Maximum consecutive precipitation amount over an 5-day interval""" 23 | return rxnday(series=series, interval=interval, period=period) 24 | 25 | 26 | def sdii(series: Series, threshold: float = 1.0, period: str = "30D") -> Series: 27 | """Simple precipitation intensity index""" 28 | series = validate_series(series) 29 | _ = validate_index(series.index) 30 | 31 | w = series >= threshold 32 | 33 | return series.loc[w].resample(period).sum() / w.sum() 34 | 35 | 36 | def rnmm(series: Series, threshold: float, period: str = "1YE") -> Series: 37 | """Annual count of days when precipitation ≥ n mm. n is a user-defined threshold""" 38 | series = validate_series(series) 39 | _ = validate_index(series.index) 40 | 41 | w = series >= threshold 42 | 43 | return w.resample(period).sum() 44 | 45 | 46 | def r10mm(series: Series, threshold: float = 10.0, period: str = "1YE") -> Series: 47 | """Annual count of days when precipitation ≥ 10 mm""" 48 | return rnmm(series=series, threshold=threshold, period=period) 49 | 50 | 51 | def r20mm(series: Series, threshold: float = 20.0, period: str = "1YE") -> Series: 52 | """Annual count of days when precipitation ≥ 20 mm""" 53 | return rnmm(series=series, threshold=threshold, period=period) 54 | 55 | 56 | def cdd(series: Series, threshold: float = 1.0, period: str = "365D") -> Series: 57 | """Maximum length of dry spell: maximum number of consecutive days with 58 | precipitation < 1mm""" 59 | series = validate_series(series) 60 | _ = validate_index(series.index) 61 | 62 | w = series < threshold 63 | 64 | return w.diff().rolling(period).sum().dropna().astype(int) 65 | 66 | 67 | def cwd(series: Series, threshold: float = 1.0, period: str = "365D") -> Series: 68 | """Maximum length of wet spell: maximum number of consecutive days with 69 | precipitation ≥ 1mm""" 70 | series = validate_series(series) 71 | _ = validate_index(series.index) 72 | 73 | w = series >= threshold 74 | 75 | return w.diff().rolling(period).sum().dropna().astype(int) 76 | 77 | 78 | def prcptot(series: Series, period: str = "1YE") -> Series: 79 | """Total precipitation on wet days over a certain period""" 80 | series = validate_series(series) 81 | _ = validate_index(series.index) 82 | 83 | return series.resample(period).sum() 84 | 85 | 86 | def rnnp( 87 | series: Series, quantile: float, threshold: float = 1.0, period: str = "1YE" 88 | ) -> Series: 89 | """Total amount of precipitation on wet days above certain quantile""" 90 | series = validate_series(series) 91 | _ = validate_index(series.index) 92 | 93 | series_w = series[series >= threshold] 94 | wq = series_w > series_w.quantile(quantile) 95 | 96 | return series_w.loc[wq].resample(period).sum() 97 | 98 | 99 | def r95p( 100 | series: Series, quantile: float = 0.95, threshold: float = 1.0, period: str = "1YE" 101 | ) -> Series: 102 | """Total amount of precipitation on very wet days""" 103 | return rnnp(series=series, quantile=quantile, threshold=threshold, period=period) 104 | 105 | 106 | def r99p( 107 | series: Series, quantile: float = 0.99, threshold: float = 1.0, period: str = "1YE" 108 | ) -> Series: 109 | """Total amount of precipitation on extremely wet days""" 110 | return rnnp(series=series, quantile=quantile, threshold=threshold, period=period) 111 | 112 | 113 | def r95ptot( 114 | series: Series, quantile: float = 0.95, threshold: float = 1.0, period: str = "1YE" 115 | ) -> Series: 116 | """Contribution to total precipitation from very wet days""" 117 | r95 = r95p(series=series, quantile=quantile, threshold=threshold, period=period) 118 | tot = prcptot(series=series, period=period) 119 | return r95 * 100 / tot 120 | 121 | 122 | def r99ptot( 123 | series: Series, quantile: float = 0.99, threshold: float = 1.0, period: str = "1YE" 124 | ) -> Series: 125 | """Contribution to total precipitation from extremely wet days""" 126 | r99 = r99p(series=series, quantile=quantile, threshold=threshold, period=period) 127 | tot = prcptot(series=series, period=period) 128 | return r99 * 100 / tot 129 | -------------------------------------------------------------------------------- /docs/examples/example08_rai.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "607da258", 6 | "metadata": {}, 7 | "source": [ 8 | "# Rainfall Anomaly Index\n", 9 | "\n", 10 | "Based on van Rooy, M.P. (1965). A Rainfall Anomaly Index Independent of Time and Space. Notos." 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "id": "fbc30e7f", 16 | "metadata": {}, 17 | "source": [ 18 | "## Packages" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "id": "dcef936f", 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "import matplotlib as mpl\n", 29 | "import matplotlib.pyplot as plt\n", 30 | "import pandas as pd\n", 31 | "\n", 32 | "from spei.rai import mrai, rai" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "id": "40d04848", 38 | "metadata": {}, 39 | "source": [ 40 | "## Data" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "id": "0c274c53", 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "df = pd.read_csv(\"data/DEBILT.csv\", index_col=0, parse_dates=True)\n", 51 | "prec = df[\"Prec [m/d] 260_DEBILT\"].multiply(1e3).resample(\"MS\").sum()" 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "id": "edfc521d", 57 | "metadata": {}, 58 | "source": [ 59 | "## Calculate Index" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": null, 65 | "id": "11dcd5f4", 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [ 69 | "ra_index = rai(prec)\n", 70 | "mra_index = mrai(prec, sf=1.7)" 71 | ] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "id": "a6dc3eeb", 76 | "metadata": {}, 77 | "source": [ 78 | "## Visualize" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "id": "92253a3f", 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [ 88 | "f, ax = plt.subplots(2, 1, figsize=(12, 6), sharex=True, sharey=True)\n", 89 | "ax[0].plot(ra_index.index, ra_index.values, color=\"C0\")\n", 90 | "ax[1].plot(mra_index.index, mra_index.values, color=\"C1\")\n", 91 | "ax[0].set_ylabel(\"Rainfall Anomaly Index\")\n", 92 | "ax[1].set_ylabel(\"Modified Rainfall Anomaly Index\")\n", 93 | "ax[0].grid(True)\n", 94 | "ax[1].grid(True)\n", 95 | "ax[1].yaxis.set_major_locator(mpl.ticker.MultipleLocator(1))\n", 96 | "ax[1].xaxis.set_major_locator(mpl.dates.YearLocator(1))\n", 97 | "ax[1].set_xlim(pd.Timestamp(\"2010-01-01\"), pd.Timestamp(\"2020-12-31\"))" 98 | ] 99 | }, 100 | { 101 | "cell_type": "markdown", 102 | "id": "fc772d44", 103 | "metadata": {}, 104 | "source": [ 105 | "Interpretation based on Hansel (2015) - [The Modified Rainfall Anomaly Index (mRAI)](https://doi.org/10.1007/s00704-015-1389-y)\n", 106 | "\n", 107 | "| RAI | Description | mRAI | Description |\n", 108 | "| :---------------------------------- | :---------------------------- | :-------------------------------- | :---------------------------- |\n", 109 | "| $\\ge$ 3.00 | Extremely wet | $\\ge$ 2.00 | Extremely wet |\n", 110 | "| 2.00 to 2.99 | Very wet | 1.50 to 1.99 | Very wet |\n", 111 | "| 1.00 to 1.99 | Moderately wet | 1.00 to 1.49 | Moderately wet |\n", 112 | "| 0.50 to 0.99 | Slightly wet | 0.50 to 0.99 | Slightly wet |\n", 113 | "| -0.49 to 0.49 | Near normal | -0.49 to 0.49 | Near normal |\n", 114 | "| -0.99 to -0.50 | Slightly dry | -0.99 to -0.50 | Slightly dry |\n", 115 | "| -1.99 to -1.00 | Moderately dry | -1.49 to -1.00 | Moderately dry |\n", 116 | "| -2.99 to -2.00 | Very dry | -1.99 to -1.50 | Very dry |\n", 117 | "| ≤-3.00 | Extremely dry | ≤-2.00 | Extremely dry |" 118 | ] 119 | }, 120 | { 121 | "cell_type": "markdown", 122 | "id": "75058050", 123 | "metadata": {}, 124 | "source": [] 125 | } 126 | ], 127 | "metadata": { 128 | "kernelspec": { 129 | "display_name": "SPEI", 130 | "language": "python", 131 | "name": "python3" 132 | }, 133 | "language_info": { 134 | "codemirror_mode": { 135 | "name": "ipython", 136 | "version": 3 137 | }, 138 | "file_extension": ".py", 139 | "mimetype": "text/x-python", 140 | "name": "python", 141 | "nbconvert_exporter": "python", 142 | "pygments_lexer": "ipython3", 143 | "version": "3.13.1" 144 | } 145 | }, 146 | "nbformat": 4, 147 | "nbformat_minor": 5 148 | } 149 | -------------------------------------------------------------------------------- /tests/test_si.py: -------------------------------------------------------------------------------- 1 | from pandas import DataFrame, Series, Timestamp 2 | from scipy.stats import norm 3 | 4 | from spei import SI, sgi, spei, spi, ssfi, ssmi 5 | from spei.dist import Dist 6 | 7 | 8 | def test_spi(prec: Series) -> None: 9 | precr = prec.rolling("30D", min_periods=30).sum().dropna() 10 | spi(precr, fit_freq="MS", prob_zero=True) 11 | 12 | 13 | def test_spei(prec: Series, evap: Series) -> None: 14 | n = (prec - evap).rolling("30D", min_periods=30).sum().dropna() 15 | spei(n, fit_freq="MS") 16 | 17 | 18 | def test_sgi(head: Series) -> None: 19 | sgi(head, fit_freq="MS") 20 | 21 | 22 | def test_ssfi_timescale(prec: Series) -> None: 23 | ssfi(prec, timescale=30) 24 | 25 | 26 | def test_ssmi(prec: Series) -> None: 27 | ssmi(prec, dist=norm, fit_freq="MS") 28 | 29 | 30 | def test_window(prec: Series, evap: Series) -> None: 31 | n = (prec - evap).rolling("30D", min_periods=30).sum().dropna() 32 | spei(n, fit_freq="W", fit_window=3) 33 | 34 | 35 | def test_window_even(prec: Series, evap: Series, caplog) -> None: 36 | n = (prec - evap).rolling("30D", min_periods=30).sum().dropna() 37 | spei(n, fit_freq="W", fit_window=4) 38 | assert "Window should be odd. Setting the window value to" in caplog.text 39 | 40 | 41 | def test_SI(prec: Series) -> None: 42 | si = SI(prec, dist=norm, timescale=30, fit_freq="MS") 43 | si.fit_distribution() 44 | si.pdf() 45 | dist = si.get_dist(Timestamp("2010-01-01")) 46 | dist.ks_test() 47 | 48 | 49 | def test_SI_post_init_timescale(prec: Series) -> None: 50 | si = SI(prec, dist=norm, timescale=30, fit_freq="MS") 51 | assert si.series.equals(prec.rolling(30, min_periods=30).sum().dropna()), ( 52 | "Timescale rolling sum not applied correctly" 53 | ) 54 | 55 | 56 | def test_SI_post_init_fit_freq_infer(prec: Series) -> None: 57 | si = SI(prec, dist=norm, timescale=0) 58 | assert si.fit_freq is not None, "Frequency inference failed" 59 | 60 | 61 | def test_SI_post_init_grouped_year(prec: Series) -> None: 62 | si = SI(prec, dist=norm, timescale=0, fit_freq="MS") 63 | assert isinstance(si._grouped_year, DataFrame), "Grouped year DataFrame not created" 64 | 65 | 66 | def test_SI_post_init_fit_window_adjustment(prec: Series) -> None: 67 | si = SI(prec, dist=norm, timescale=0, fit_freq="D", fit_window=2) 68 | assert si.fit_window == 3, "Fit window not adjusted to odd number" 69 | 70 | 71 | def test_SI_post_init_fit_window_minimum(prec: Series) -> None: 72 | si = SI(prec, dist=norm, timescale=0, fit_freq="D", fit_window=1) 73 | assert si.fit_window == 3, "Fit window not adjusted to minimum value" 74 | 75 | 76 | def test_fit_distribution_normal_scores_transform(prec: Series) -> None: 77 | si = SI(prec, dist=norm, timescale=30, fit_freq="MS", normal_scores_transform=True) 78 | si.fit_distribution() 79 | assert not si._dist_dict, ( 80 | "Distribution dictionary should be empty when using normal scores transform" 81 | ) 82 | 83 | 84 | def test_fit_distribution_with_fit_window(prec: Series) -> None: 85 | si = SI(prec, dist=norm, timescale=30, fit_freq="D", fit_window=5) 86 | si.fit_distribution() 87 | assert si._dist_dict, ( 88 | "Distribution dictionary should not be empty when using fit window" 89 | ) 90 | for dist in si._dist_dict.values(): 91 | assert isinstance(dist, Dist), ( 92 | "Items in distribution dictionary should be of type Dist" 93 | ) 94 | 95 | 96 | def test_fit_distribution_with_fit_freq(prec: Series) -> None: 97 | si = SI(prec, dist=norm, timescale=30, fit_freq="MS") 98 | si.fit_distribution() 99 | assert si._dist_dict, ( 100 | "Distribution dictionary should not be empty when using fit frequency" 101 | ) 102 | for dist in si._dist_dict.values(): 103 | assert isinstance(dist, Dist), ( 104 | "Items in distribution dictionary should be of type Dist" 105 | ) 106 | 107 | 108 | def test_fit_distribution_invalid_fit_freq_with_window(prec: Series) -> None: 109 | si = SI(prec, dist=norm, timescale=30, fit_freq="M", fit_window=5) 110 | try: 111 | si.fit_distribution() 112 | except ValueError as e: 113 | assert ( 114 | str(e) 115 | == "Frequency fit_freq must be 'D' or 'W', not 'M', if a fit_window is provided." 116 | ) 117 | else: 118 | assert False, "ValueError not raised for invalid fit frequency with fit window" 119 | 120 | 121 | def test_ppf(prec: Series) -> None: 122 | si = SI(prec, dist=norm, timescale=1, fit_freq="MS") 123 | si.fit_distribution() 124 | ppf = si.ppf(0.5) 125 | assert isinstance(ppf, Series), "PPF result should be a Pandas Series" 126 | assert len(ppf) == len(si.series), ( 127 | "PPF result length does not match input series length" 128 | ) 129 | 130 | 131 | def test_ppf_nsf(prec: Series) -> None: 132 | si = SI(prec, dist=norm, timescale=1, fit_freq="MS", normal_scores_transform=True) 133 | si.fit_distribution() 134 | ppf = si.ppf(0.5) 135 | assert isinstance(ppf, Series), "PPF result should be a Pandas Series" 136 | assert len(ppf) == len(si.series), ( 137 | "PPF result length does not match input series length" 138 | ) 139 | -------------------------------------------------------------------------------- /src/spei/utils.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from calendar import isleap 3 | 4 | from numpy import array, nan 5 | from packaging.version import parse as parse_version 6 | from pandas import ( 7 | DataFrame, 8 | DatetimeIndex, 9 | Grouper, 10 | Index, 11 | Series, 12 | Timedelta, 13 | concat, 14 | infer_freq, 15 | to_datetime, 16 | ) 17 | 18 | pd_version = parse_version(__import__("pandas").__version__) 19 | 20 | 21 | def validate_series(series: Series) -> Series: 22 | series = series.copy() 23 | 24 | if not isinstance(series, Series): 25 | if isinstance(series, DataFrame): 26 | if len(series.columns) == 1: 27 | logging.warning( 28 | "Please convert series of type pandas.DataFrame to a" 29 | "pandas.Series using DataFrame.squeeze(). Now done automatically." 30 | ) 31 | series = series.squeeze() 32 | else: 33 | raise TypeError( 34 | "Please provide a pandas.Series instead of a pandas.DataFrame" 35 | ) 36 | else: 37 | raise TypeError(f"Please provide a Pandas Series instead of {type(series)}") 38 | 39 | index = validate_index(series.index) 40 | 41 | return series.reindex(index, copy=True) 42 | 43 | 44 | def validate_index(index: Index) -> DatetimeIndex: 45 | index = index.copy() 46 | 47 | if not isinstance(index, DatetimeIndex): 48 | logging.info( 49 | f"Expected the index to be a DatetimeIndex. Automatically converted " 50 | f"{type(index)} using pd.to_datetime(Index)" 51 | ) 52 | index = DatetimeIndex(to_datetime(index)) 53 | 54 | if index.has_duplicates: 55 | msg = ( 56 | "Duplicated indices found. Please remove them. For instance by" 57 | " using `series = " 58 | "series.loc[~series.index.duplicated(keep='first/last')]`" 59 | ) 60 | logging.error(msg) 61 | raise ValueError(msg) 62 | 63 | return index 64 | 65 | 66 | def infer_frequency(index: Index | DatetimeIndex) -> str: 67 | """Infer frequency""" 68 | 69 | index = validate_index(index) 70 | 71 | inf_freq = infer_freq(index) 72 | 73 | if inf_freq is None: 74 | logging.info( 75 | "Could not infer frequency from index, using monthly frequency instead" 76 | ) 77 | inf_freq = "MS" if pd_version >= parse_version("2.2.0") else "M" 78 | else: 79 | logging.info(f"Inferred frequency '{inf_freq}' from index") 80 | 81 | if "W-" in inf_freq: 82 | logging.info(f"Converted frequncy weekly '{inf_freq}' to 'W'") 83 | inf_freq = "W" 84 | 85 | return inf_freq 86 | 87 | 88 | def group_yearly_df(series: Series) -> DataFrame: 89 | """Group Series per year in a DataFrame. 90 | 91 | This function groups a time series by year, creating a DataFrame where each 92 | column corresponds to a year (as int). The index of the DataFrame is set to 93 | the corresponding dates (in the year 2000). 94 | 95 | Parameters: 96 | ----------- 97 | series : pd.Series 98 | A pandas Series with a DateTime index. 99 | 100 | Returns: 101 | -------- 102 | pd.DataFrame 103 | """ 104 | strfstr: str = "%m-%d %H:%M:%S" 105 | grs = {} 106 | freq = "YE" if pd_version >= parse_version("2.2.0") else "Y" 107 | for year_timestamp, gry in series.groupby(Grouper(freq=freq)): 108 | index = validate_index(gry.index) 109 | gry.index = to_datetime( 110 | "2000-" + index.strftime(strfstr), format="%Y-" + strfstr 111 | ) 112 | year = getattr(year_timestamp, "year") # type: str 113 | grs[year] = gry 114 | return concat(grs, axis=1) 115 | 116 | 117 | def get_data_series(group_df: DataFrame) -> Series: 118 | """Transform grouped dataframe by yearly values back to time series.""" 119 | strfstr: str = "%m-%d %H:%M:%S" 120 | index = validate_index(group_df.index) 121 | idx = array( 122 | [(f"{col}-" + index.strftime(strfstr)).tolist() for col in group_df.columns] 123 | ).flatten() 124 | # remove illegal 29 february for non leap years created by group_yearly_df 125 | boolidx = ~array( 126 | [ 127 | (x.split(" ")[0].split("-", 1)[1] == "02-29") 128 | and not isleap(int(x.split(" ")[0].split("-")[0])) 129 | for x in idx 130 | ] 131 | ) 132 | 133 | dt_idx = to_datetime(idx[boolidx], format="%Y-" + strfstr) 134 | values = group_df.transpose().values.flatten()[boolidx] 135 | return Series(values, index=dt_idx, dtype=float).dropna() 136 | 137 | 138 | def daily_window_group_yearly_df(dfval: DataFrame, period: int) -> DataFrame: 139 | """Fill a period of daily values in grouped by yearly DataFrame to get 140 | cyclic rolling window. 141 | """ 142 | dfval_window_index_start = [ 143 | dfval.index[0] + Timedelta(value=-i, unit="D") 144 | for i in reversed(range(1, period + 1)) 145 | ] 146 | dfval_window_index_end = [ 147 | dfval.index[-1] + Timedelta(value=i, unit="D") for i in range(1, period + 1) 148 | ] 149 | dfval_window_index = DatetimeIndex( 150 | dfval_window_index_start + dfval.index.to_list() + dfval_window_index_end 151 | ) 152 | 153 | dfval_window = DataFrame( 154 | nan, index=dfval_window_index, columns=dfval.columns, dtype=float 155 | ) 156 | dfval_window.loc[dfval.index, dfval.columns] = dfval.values 157 | dfval_window.iloc[:period] = dfval.iloc[-period:].values 158 | dfval_window.iloc[-period:] = dfval.iloc[:period].values 159 | return dfval_window 160 | -------------------------------------------------------------------------------- /src/spei/dist.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | from typing import Literal 3 | 4 | from numpy import std 5 | from pandas import Series 6 | from scipy.stats import kstest 7 | 8 | from ._typing import ContinuousDist 9 | 10 | 11 | @dataclass 12 | class Dist: 13 | data: Series = field(init=True, repr=False) 14 | dist: ContinuousDist 15 | loc: float = field(init=False, repr=True) 16 | scale: float = field(init=False, repr=True) 17 | pars: list[float] | None = field(init=False, repr=False) 18 | prob_zero: bool = field(default=False, init=True, repr=False) 19 | p0: float = field(default=0.0, init=False, repr=False) 20 | data_window: Series | None = field(default=None, init=True, repr=False) 21 | """ 22 | Represents a distribution associated with data. 23 | 24 | Parameters 25 | ---------- 26 | data : Series 27 | The input data for fitting the distribution. 28 | dist : ContinuousDist 29 | The SciPy continuous distribution associated to be fitted. 30 | prob_zero : bool, default=False 31 | Flag indicating whether the probability of zero values in the series is 32 | calculated by the occurence. 33 | data_window : Optional[Series], default=None 34 | Subset of data for fitting more data (if provided). 35 | loc : float 36 | Location of the distribution 37 | scale : float 38 | Scale of the distribution 39 | pars : Optional[List[float]] 40 | Attribute storing additional distribution parameters (if applicable). 41 | p0 : float 42 | The probability of zero values in the data. Only calculated if prob_zero=True. 43 | 44 | Notes 45 | ----- 46 | The `fit_dist` method uses the `dist.fit` function from Scipy to estimate 47 | distribution parameters. If the fitted distribution requires additional 48 | parameters beyond `loc` and `scale`, they are stored in the `pars` attribute. 49 | """ 50 | 51 | def __post_init__(self): 52 | """ 53 | Post initializes the Dist class by fitting the distribution. 54 | """ 55 | data_fit = self.data_window if self.data_window is not None else self.data 56 | pars, loc, scale = self.fit_dist(data=data_fit, dist=self.dist) 57 | self.loc = loc 58 | self.scale = scale 59 | self.pars = pars 60 | 61 | if self.prob_zero: 62 | self.p0 = (data_fit == 0.0).sum() / len(data_fit) 63 | 64 | @staticmethod 65 | def fit_dist( 66 | data: Series, dist: ContinuousDist 67 | ) -> tuple[list[float] | None, float, float]: 68 | """ 69 | Fits a Scipy continuous distribution to the data. 70 | 71 | Parameters 72 | ---------- 73 | data : Series 74 | The input data for fitting. 75 | dist : ContinuousDist 76 | The continuous distribution to be fitted. 77 | 78 | Returns 79 | ------- 80 | Tuple 81 | Tuple containing distribution parameters (pars, loc, scale). 82 | """ 83 | fit_tuple = dist.fit(data, scale=std(data)) 84 | if len(fit_tuple) == 2: 85 | loc, scale = fit_tuple 86 | pars = None 87 | else: 88 | *pars, loc, scale = fit_tuple 89 | return pars, loc, scale 90 | 91 | def cdf(self) -> Series: 92 | """Compute cumulative density function of a Scipy Continuous Distribution""" 93 | if self.pars is not None: 94 | cdf = self.dist.cdf( 95 | self.data.values, *self.pars, loc=self.loc, scale=self.scale 96 | ) 97 | else: 98 | cdf = self.dist.cdf(self.data.values, loc=self.loc, scale=self.scale) 99 | 100 | if self.prob_zero: 101 | cdf = self.p0 + (1 - self.p0) * cdf 102 | cdf[self.data == 0.0] = self.p0 103 | 104 | return Series(cdf, index=self.data.index, dtype=float) 105 | 106 | def pdf(self) -> Series: 107 | data_pdf = self.data.sort_values() 108 | if self.pars is not None: 109 | pdf = self.dist.pdf( 110 | data_pdf.values, *self.pars, loc=self.loc, scale=self.scale 111 | ) 112 | else: 113 | pdf = self.dist.pdf(data_pdf.values, loc=self.loc, scale=self.scale) 114 | 115 | if self.prob_zero: 116 | pdf = self.p0 + (1 - self.p0) * pdf 117 | pdf[self.data == 0.0] = self.p0 118 | 119 | return Series(pdf, index=data_pdf.index, dtype=float) 120 | 121 | def ppf(self, q: float) -> Series: 122 | """Compute percent point function (inverse of cdf) at q""" 123 | if self.pars is not None: 124 | ppf = self.dist.ppf(q, *self.pars, loc=self.loc, scale=self.scale) 125 | else: 126 | ppf = self.dist.ppf(q, loc=self.loc, scale=self.scale) 127 | 128 | return Series(ppf, index=self.data.index, dtype=float) 129 | 130 | def ks_test( 131 | self, 132 | method: Literal["auto", "exact", "approx", "asymp"] = "auto", 133 | ) -> float: 134 | """Fit a distribution and perform the two-sided 135 | Kolmogorov-Smirnov test for goodness of fit. The 136 | null hypothesis is that the data and distributions 137 | are identical, the alternative is that they are 138 | not identical. 139 | 140 | Parameters 141 | ---------- 142 | method : Literal['auto', 'exact', 'approx', 'asymp'], optional 143 | Defines the distribution used for calculating the p-value. The 144 | following options are available (default is 'auto'): 'auto' selects 145 | one of the other options, 'exact' uses the exact distribution of 146 | test statistic, 'approx' approximates the two-sided probability 147 | with twice the one-sided probability, 'asymp' uses asymptotic 148 | distribution of test statistic 149 | 150 | Returns 151 | ------- 152 | float 153 | p-value 154 | 155 | References 156 | ------- 157 | Onnen, H.: Intro to Probability Distributions and Distribution 158 | Fitting with Pythons SciPy, 2021. 159 | """ 160 | args = ( 161 | (self.pars, self.loc, self.scale) 162 | if self.pars is not None 163 | else (self.loc, self.scale) 164 | ) 165 | kstest_result = kstest( 166 | rvs=self.data, cdf=self.dist.name, args=args, method=method 167 | ) 168 | # rej_h0 = kstest_result.pvalue < alpha 169 | return kstest_result.pvalue 170 | -------------------------------------------------------------------------------- /docs/examples/example06_treshold.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "eaa846c5", 6 | "metadata": {}, 7 | "source": [ 8 | "# Threshold Drought\n", 9 | "\n", 10 | "## Load packages" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "id": "247f31ac", 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "import pandas as pd\n", 21 | "from scipy import stats as sps\n", 22 | "\n", 23 | "import spei as si" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "id": "7b1fa2a9", 29 | "metadata": {}, 30 | "source": [ 31 | "## Load data" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "id": "34ae712e", 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "df = pd.read_csv(\"data/DEBILT.csv\", index_col=0, parse_dates=True)\n", 42 | "prec = df[\"Prec [m/d] 260_DEBILT\"].multiply(1e3).rename(\"prec\")\n", 43 | "evap = df[\"Evap [m/d] 260_DEBILT\"].multiply(1e3).rename(\"evap\")\n", 44 | "head = df[\"Head [m] B32C0572_DEBILT\"].rename(\"head\").dropna()" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "id": "5830c471", 50 | "metadata": {}, 51 | "source": [ 52 | "## Calculate precipitation surplus" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "id": "f750f7ae", 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "surplusd = prec - evap\n", 63 | "surplus = surplusd.resample(\"MS\").sum()\n", 64 | "surplus.plot()" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "id": "f013e9f1", 70 | "metadata": {}, 71 | "source": [ 72 | "## Fit distribution" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "id": "6ae83c3e", 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "dist = sps.fisk\n", 83 | "sispei = si.SI(\n", 84 | " series=surplus,\n", 85 | " dist=dist,\n", 86 | " timescale=0,\n", 87 | ")\n", 88 | "sispei.fit_distribution()" 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "id": "41ab4ace", 94 | "metadata": {}, 95 | "source": [ 96 | "## Get threshold\n", 97 | "\n", 98 | "Choose arbitrary threshold based on quantile of the distribution. Can be any threshold the user wants as well. Only then the threshold time series has to be created manually." 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": null, 104 | "id": "62530ad6", 105 | "metadata": {}, 106 | "outputs": [], 107 | "source": [ 108 | "speithr = sispei.ppf(0.3) # 30% quantile threshold" 109 | ] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "id": "efed2416", 114 | "metadata": {}, 115 | "source": [ 116 | "## Plot threshold" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": null, 122 | "id": "c7492e76", 123 | "metadata": {}, 124 | "outputs": [], 125 | "source": [ 126 | "ax = si.plot.threshold(\n", 127 | " series=sispei.series,\n", 128 | " threshold=speithr,\n", 129 | " fill_color=\"red\",\n", 130 | ")\n", 131 | "_ = ax.set_xlim(pd.Timestamp(\"2010\"), pd.Timestamp(\"2020\"))" 132 | ] 133 | }, 134 | { 135 | "cell_type": "markdown", 136 | "id": "b54b1d3f", 137 | "metadata": {}, 138 | "source": [ 139 | "## Repeat for head time series" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": null, 145 | "id": "1a7f3565", 146 | "metadata": {}, 147 | "outputs": [], 148 | "source": [ 149 | "timescale = 6\n", 150 | "sisgi = si.SI(\n", 151 | " head,\n", 152 | " dist=sps.norm,\n", 153 | " timescale=timescale,\n", 154 | " fit_freq=\"MS\",\n", 155 | " normal_scores_transform=True,\n", 156 | " agg_func=\"mean\",\n", 157 | ")\n", 158 | "sgithr = sisgi.ppf(0.4) # choose arbitrary threshold" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": null, 164 | "id": "6cdf67c7", 165 | "metadata": {}, 166 | "outputs": [], 167 | "source": [ 168 | "ax = si.plot.threshold(\n", 169 | " series=head.iloc[timescale - 1 :],\n", 170 | " threshold=sgithr,\n", 171 | " fill_color=\"red\",\n", 172 | ")\n", 173 | "_ = ax.set_xlim(pd.Timestamp(\"2010\"), pd.Timestamp(\"2020\"))" 174 | ] 175 | } 176 | ], 177 | "metadata": { 178 | "kernelspec": { 179 | "display_name": "SPEI", 180 | "language": "python", 181 | "name": "python3" 182 | }, 183 | "language_info": { 184 | "codemirror_mode": { 185 | "name": "ipython", 186 | "version": 3 187 | }, 188 | "file_extension": ".py", 189 | "mimetype": "text/x-python", 190 | "name": "python", 191 | "nbconvert_exporter": "python", 192 | "pygments_lexer": "ipython3", 193 | "version": "3.12.3" 194 | } 195 | }, 196 | "nbformat": 4, 197 | "nbformat_minor": 5 198 | } 199 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SPEI 2 | 3 | [![PyPI](https://img.shields.io/pypi/v/spei?style=flat-square&color=007ec6)](https://pypi.org/project/spei/) 4 | [![PyPi Supported Python Versions](https://img.shields.io/pypi/pyversions/spei?style=flat-square&color=007ec6)](https://pypi.org/project/spei/) 5 | [![Code Size](https://img.shields.io/github/languages/code-size/martinvonk/spei?style=flat-square&color=007ec6)](https://pypi.org/project/spei/) 6 | [![PyPi Downloads](https://img.shields.io/pypi/dm/spei?style=flat-square&color=0a3d62)](https://pypi.org/project/spei/) 7 | [![License](https://img.shields.io/pypi/l/spei?style=flat-square&color=0a3d62&logo=open-source-initiative&logoColor=white)](https://pypi.org/project/spei/) 8 | 9 | [![JOSS](https://img.shields.io/badge/JOSS-10.21105/joss.08454-ff6600.svg?style=flat-square)](https://doi.org/10.21105/joss.08454) 10 | [![DOI](https://img.shields.io/badge/DOI-10.5281/zenodo.10816741-ff6600?style=flat-square)](https://doi.org/10.5281/zenodo.10816741) 11 | 12 | [![Tests](https://img.shields.io/github/actions/workflow/status/martinvonk/spei/tests.yml?style=flat-square&color=green)](https://github.com/martinvonk/SPEI/actions/workflows/tests.yml) 13 | [![CodacyCoverage](https://img.shields.io/codacy/coverage/908b566912314666b84e1add22ea7d66?style=flat-square&color=green)](https://app.codacy.com/gh/martinvonk/SPEI/) 14 | [![CodacyGrade](https://img.shields.io/codacy/grade/908b566912314666b84e1add22ea7d66?style=flat-square&color=darkgreen)](https://app.codacy.com/gh/martinvonk/SPEI/) 15 | [![Typed: MyPy](https://img.shields.io/badge/type_checker-mypy-darkgreen?style=flat-square)](https://mypy-lang.org/) 16 | [![Formatter and Linter: ruff](https://img.shields.io/badge/linter-ruff-darkgreen?style=flat-square)](https://github.com/charliermarsh/ruff) 17 | 18 | 19 | SPEI is a simple Python package to calculate drought indices for hydrological time series. This package uses popular Python packages such as Pandas and Scipy to make it easy and versatile for the user to calculate the drought indices. Pandas Series are great for dealing with time series; providing interpolation, rolling average, and other manipulation options. SciPy enables us to use all different kinds of [distributions](https://docs.scipy.org/doc/scipy/reference/stats.html#probability-distributions) to fit the data. Different popular drought indices are supported such as the SPI (Standardized Precipitation Index), SPEI (Standardized Precipitation Evaporation Index), and SGI (Standardized Groundwater Index). 20 | 21 | If you use this software for either the visualization and/or analysis, please cite this package via our article in the [Journal of Open Source Software](https://joss.theoj.org/papers/10.21105/joss.08454): 22 | > Vonk, M. A. (2025). SPEI: A Python package for calculating and visualizing drought indices. Journal of Open Source Software, 10(111), 8454. [doi.org/10.21105/joss.08454](https://doi.org/10.21105/joss.08454). 23 | 24 | Or cite a specific version in the Zenodo archive: 25 | > Vonk, M. A. (XXXX). SPEI: A simple Python package to calculate and visualize drought indices (vX.X.X). Zenodo. [doi.org/10.5281/zenodo.10816740](https://doi.org/10.5281/zenodo.10816740). 26 | 27 | ## Available Drought Indices 28 | 29 | | Drought Index | Abbreviation | Literature | 30 | | --------------------------------------------- | ------------ | ---------- | 31 | | Standardized Precipitation Index | SPI | 1 | 32 | | Standardized Precipitation Evaporation Index* | SPEI | 2 | 33 | | Standardized Groundwater Index | SGI | 3,4 | 34 | | Standardized Streamflow Index | SSFI | 5,6 | 35 | | Standardized Soil Moisture Index | SSMI | 7 | 36 | 37 | The package is not limited to only these five drought indices. If any of the distributions in the Scipy library is valid on the observed hydrological series, the drought index can be calculated. 38 | 39 | *For the calculation of potential evaporation, take a look at [pyet](https://github.com/phydrus/pyet). This is another great package that also uses pandas Series to calculate different kinds of potential evaporation time series. 40 | 41 | ## Installation 42 | 43 | To get the latest stable version install using: 44 | 45 | `pip install spei` 46 | 47 | To get the development version download or clone the GitHub repository to your local device. Install using: 48 | 49 | `pip install -e ` 50 | 51 | ## Literature 52 | 53 | This list of scientific literature is helpful as a reference to understand the context and application of drought indices. 54 | 55 | 1. Lloyd-Hughes, B. and M.A. Saunders (2002) - A Drought Climatology for Europe. DOI: 10.1002/joc.846 56 | 2. Vicente-Serrano, S.M., S. Beguería and J.I. López-Moreno (2010) - A Multi-scalar drought index sensitive to global warming: The Standardized Precipitation Evapotranspiration Index. DOI: 10.1175/2009JCLI2909.1 57 | 3. Bloomfield, J.P. and B.P. Marchant (2013) - Analysis of groundwater drought building on the standardised precipitation index approach. DOI: 10.5194/hess-17-4769-2013 58 | 4. Babre, A., A. Kalvāns, Z. Avotniece, I. Retiķe, J. Bikše, K.P.M. Jemeljanova, A. Zelenkevičs and A. Dēliņa (2022) - The use of predefined drought indices for the assessment of groundwater drought episodes in the Baltic States over the period 1989–2018. DOI: 10.1016/j.ejrh.2022.101049 59 | 5. Vicente-Serrano, S. M., J. I. López-Moreno, S. Beguería, J. Lorenzo-Lacruz, C. Azorin-Molina, and E. Morán-Tejeda (2012). Accurate Computation of a Streamflow Drought Index. Journal of Hydrologic Engineering. American Society of Civil Engineers. DOI: 10.1061/(asce)he.1943-5584.0000433 60 | 6. Tijdeman, E., K. Stahl and L.M. Tallaksen (2020) - Drought characteristics derived based on the Standardized Streamflow Index: A large sample comparison for parametric and nonparametric methods. DOI: 10.1029/2019WR026315 61 | 7. Carrão. H., Russo, S., Sepulcre-Canto, G., Barbosa, P.: An empirical standardized soil moisture index for agricultural drought assessment from remotely sensed data. DOI: 10.1016/j.jag.2015.06.011s 62 | 63 | ### Publications 64 | These are scientific publications that use and cite this Python package: 65 | 66 | van Mourik, J., Ruijsch, D., van der Wiel, K., Hazeleger, W., & Wanders, N. (2025). Regional drivers and characteristics of multi-year droughts. Weather and Climate Extremes, 48, 100748. https://doi.org/10.1016/j.wace.2025.100748 67 | 68 | Segura-Barrero, R., Lauvaux, T., Lian, J., Ciais, P., Badia, A., Ventura, S., Bazzi, H., Abbessi, E., Fu, Z., Xiao, J., Li, X., & Villalba, G. (2025). Heat and Drought Events Alter Biogenic Capacity to Balance CO2 Budget in South-Western Europe. Global biogeochemical cycles, 39(1), e2024GB008163. https://doi.org/10.1029/2024GB008163 69 | 70 | Adla, S., Šaponjić, A., Tyagi, A., Nagi, A., Pastore, P., & Pande, S. (2024). Steering agricultural interventions towards sustained irrigation adoption by farmers: socio-psychological analysis of irrigation practices in Maharashtra, India. Hydrological Sciences Journal, 69(12), 1586–1603. https://doi.org/10.1080/02626667.2024.2376709 71 | 72 | Panigrahi, S., Vidyarthi, V.K. (2025). Assessing the Suitability of SPI and SPEI in Steppe Hot and Arid Climatic Zones in India. In: Sefelnasr, A., Sherif, M., Singh, V.P. (eds) Water Resources Management and Sustainability. Water Science and Technology Library, vol 114. Springer, Cham. https://doi.org/10.1007/978-3-031-80520-2_12 73 | -------------------------------------------------------------------------------- /docs/examples/example05_multiyear_drought.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Analyzing Multi-Year Droughts\n", 8 | "\n", 9 | "After van Mourik, J. and Ruijsch, D. and van der Wiel, K. and Hazeleger, W. and Wanders, N. (2025) - [Regional drivers and characteristics of multi-year droughts](https://doi.org/10.1016/j.wace.2025.100748)" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "## Load packages" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "import matplotlib as mpl\n", 26 | "import matplotlib.pyplot as plt\n", 27 | "import pandas as pd\n", 28 | "\n", 29 | "import spei as si # si for standardized index\n", 30 | "\n", 31 | "print(si.show_versions())" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "## Load meteo data" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "df = pd.read_csv(\"data/DEBILT.csv\", index_col=0, parse_dates=True)\n", 48 | "prec = df[\"Prec [m/d] 260_DEBILT\"].multiply(1e3).rename(\"prec\")\n", 49 | "evap = df[\"Evap [m/d] 260_DEBILT\"].multiply(1e3).rename(\"evap\")" 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": {}, 55 | "source": [ 56 | "## Calculate SPEI over different time scales\n", 57 | "\n", 58 | "1 month to 24 months" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": null, 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "pe = (\n", 68 | " (prec - evap).dropna().resample(\"ME\").sum()\n", 69 | ") # calculate monthly precipitation excess\n", 70 | "spei1 = si.spei(pe, timescale=1).rename(\"1\")\n", 71 | "spei3 = si.spei(pe, timescale=3).rename(\"3\")\n", 72 | "spei6 = si.spei(pe, timescale=6).rename(\"6\")\n", 73 | "spei9 = si.spei(pe, timescale=9).rename(\"9\")\n", 74 | "spei12 = si.spei(pe, timescale=12).rename(\"12\")\n", 75 | "spei24 = si.spei(pe, timescale=24).rename(\"24\")" 76 | ] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "metadata": {}, 81 | "source": [ 82 | "## Visualization" 83 | ] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": {}, 88 | "source": [ 89 | "### Plot as time series" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": null, 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [ 98 | "f, ax = plt.subplots(6, figsize=(7.0, 10.0), sharex=True)\n", 99 | "si.plot.si(spei1, ax=ax[0], add_category=False)\n", 100 | "si.plot.si(spei3, ax=ax[1], add_category=False)\n", 101 | "si.plot.si(spei6, ax=ax[2], add_category=False)\n", 102 | "si.plot.si(spei9, ax=ax[3], add_category=False)\n", 103 | "si.plot.si(spei12, ax=ax[4], add_category=False)\n", 104 | "si.plot.si(spei24, ax=ax[5], add_category=False)\n", 105 | "ax[0].set_ylabel(\"SPEI-1\")\n", 106 | "ax[1].set_ylabel(\"SPEI-3\")\n", 107 | "ax[2].set_ylabel(\"SPEI-6\")\n", 108 | "ax[3].set_ylabel(\"SPEI-9\")\n", 109 | "ax[4].set_ylabel(\"SPEI-12\")\n", 110 | "ax[5].set_ylabel(\"SPEI-24\")\n", 111 | "ax[5].xaxis.set_major_locator(mpl.dates.YearLocator())\n", 112 | "ax[5].xaxis.set_minor_locator(mpl.dates.MonthLocator())\n", 113 | "ax[5].set_xlim(pd.Timestamp(\"2005\"), pd.Timestamp(\"2015\"))" 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "metadata": {}, 119 | "source": [ 120 | "### Plot as heatmap\n", 121 | "\n", 122 | "Only droughts" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": null, 128 | "metadata": {}, 129 | "outputs": [], 130 | "source": [ 131 | "ax = si.plot.heatmap([spei1, spei3, spei6, spei9, spei12, spei24])\n", 132 | "ax.xaxis.set_major_locator(mpl.dates.YearLocator())\n", 133 | "ax.xaxis.set_minor_locator(mpl.dates.MonthLocator())\n", 134 | "ax.get_figure().axes[-1].set_yticks(range(-3, 0))\n", 135 | "ax.set_xlim(pd.Timestamp(\"2010\"), pd.Timestamp(\"2015\"))" 136 | ] 137 | }, 138 | { 139 | "cell_type": "markdown", 140 | "metadata": {}, 141 | "source": [ 142 | "Or with wet periods" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": null, 148 | "metadata": {}, 149 | "outputs": [], 150 | "source": [ 151 | "f, ax = plt.subplots(figsize=(8.0, 3.0))\n", 152 | "ax = si.plot.heatmap(\n", 153 | " [spei1, spei3, spei6, spei9, spei12, spei24],\n", 154 | " cmap=\"vik_r\",\n", 155 | " vmax=3,\n", 156 | " add_category=True,\n", 157 | " ax=ax,\n", 158 | ")\n", 159 | "ax.xaxis.set_major_locator(mpl.dates.YearLocator())\n", 160 | "ax.xaxis.set_minor_locator(mpl.dates.MonthLocator())\n", 161 | "ax.set_xlim(pd.Timestamp(\"2010\"), pd.Timestamp(\"2015\"))" 162 | ] 163 | } 164 | ], 165 | "metadata": { 166 | "kernelspec": { 167 | "display_name": "SPEI", 168 | "language": "python", 169 | "name": "python3" 170 | }, 171 | "language_info": { 172 | "codemirror_mode": { 173 | "name": "ipython", 174 | "version": 3 175 | }, 176 | "file_extension": ".py", 177 | "mimetype": "text/x-python", 178 | "name": "python", 179 | "nbconvert_exporter": "python", 180 | "pygments_lexer": "ipython3", 181 | "version": "3.13.1" 182 | } 183 | }, 184 | "nbformat": 4, 185 | "nbformat_minor": 2 186 | } 187 | -------------------------------------------------------------------------------- /docs/examples/example01_indices.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Standardized Indices\n", 8 | "\n", 9 | "*Martin Vonk - 2022*\n", 10 | "\n", 11 | "This notebooks shows an example calculation of the three drought indices:\n", 12 | "- SPI: Standardized Precipitation Index\n", 13 | "- SPEI: Standardized Precipitation Evaporation Index\n", 14 | "- SGI: Standardized Groundwater Index\n", 15 | "\n", 16 | "## Required packages" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "import matplotlib.pyplot as plt\n", 26 | "import pandas as pd\n", 27 | "import scipy.stats as scs\n", 28 | "\n", 29 | "import spei as si # si for standardized index\n", 30 | "\n", 31 | "print(si.show_versions())" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "## Load time series\n", 39 | "\n", 40 | "We use time series of the precipitation and potential (Makkink) evaporation from the Netherlands and obtain them from the python package [Pastas](https://github.com/pastas/pastas)." 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "df = pd.read_csv(\"data/DEBILT.csv\", index_col=0, parse_dates=True)\n", 50 | "prec = df[\"Prec [m/d] 260_DEBILT\"].multiply(1e3).rename(\"prec\")\n", 51 | "evap = df[\"Evap [m/d] 260_DEBILT\"].multiply(1e3).rename(\"evap\")\n", 52 | "head = df[\"Head [m] B32C0572_DEBILT\"].rename(\"B32C0572\").dropna()\n", 53 | "\n", 54 | "fig, ax = plt.subplots(3, 1, figsize=(12, 8), sharex=True)\n", 55 | "prec.plot(ax=ax[0], legend=True, grid=True)\n", 56 | "evap.plot(ax=ax[1], color=\"C1\", legend=True, grid=True)\n", 57 | "head.plot(ax=ax[2], color=\"k\", legend=True, grid=True);" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": {}, 63 | "source": [ 64 | "## Calculate SPI\n", 65 | "\n", 66 | "The standardized precipitation index (SPI) is calculated using the gamma distribution from the [scipy stats library](https://docs.scipy.org/doc/scipy/reference/stats.html). In fact any continuous distribution of this library can be chosen. However there are sensible choices for the SPI such as gamma, lognorm (lognormal), fisk (log-logistic) or pearson3 distribution. The precipitation time series is summed over a 90D rolling interval, which corresponds to SPI3. \n", 67 | "\n", 68 | "For the literature we refer to: LLoyd-Hughes, B. and Saunders, M.A.: [A drought climatology for Europe](https://doi.org/10.1002/joc.846), 2002." 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "metadata": {}, 75 | "outputs": [], 76 | "source": [ 77 | "f = 90 # days\n", 78 | "series = prec.rolling(f, min_periods=f).sum().dropna()\n", 79 | "series" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": null, 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [ 88 | "spi3_gamma = si.spi(series, dist=scs.gamma, fit_freq=\"ME\")\n", 89 | "spi3_gamma" 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": {}, 95 | "source": [ 96 | "Lets try that with the pearson3 distribution:" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "metadata": {}, 103 | "outputs": [], 104 | "source": [ 105 | "spi3_pearson = si.spi(series, dist=scs.pearson3, fit_freq=\"ME\")\n", 106 | "spi3_pearson" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": null, 112 | "metadata": {}, 113 | "outputs": [], 114 | "source": [ 115 | "tmin, tmax = pd.to_datetime([\"1994\", \"1998\"])\n", 116 | "plt.figure(figsize=(8, 4))\n", 117 | "spi3_gamma.plot(label=\"gamma\")\n", 118 | "spi3_pearson.plot(label=\"pearson3\", linestyle=\"--\")\n", 119 | "plt.xlim(tmin, tmax)\n", 120 | "plt.legend()\n", 121 | "plt.ylabel(\"Z-score\")\n", 122 | "plt.grid()\n", 123 | "plt.title(\"SPI for two distributions\");" 124 | ] 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "metadata": {}, 129 | "source": [ 130 | "As can be seen from the figure the distributions do not give significantly different output. This might not be the case for other time series of the precipitation. Example notebook 2 (example2_distribution.ipynb) provides more insight in how to choose the right distribution." 131 | ] 132 | }, 133 | { 134 | "cell_type": "markdown", 135 | "metadata": {}, 136 | "source": [ 137 | "## Calculate SPEI\n", 138 | "\n", 139 | "The standardized precipitation evaporation index (SPEI) is calculated by first substracting the evaporation from the precipitation time series. By default the fisk distribution is used to calculate the SPEI, however for other regularly used distributions are lognorm, pearson3 and genextreme. The code internally can also calculate the timescale (30D; SPEI1 in this case)\n", 140 | "\n", 141 | "For the literature we refer to: Vicente-Serrano S.M., Beguería S., López-Moreno J.I.: [A Multi-scalar drought index sensitive to global warming: The Standardized Precipitation Evapotranspiration Index](https://doi.org/10.1175/2009JCLI2909.1), 2010." 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": null, 147 | "metadata": {}, 148 | "outputs": [], 149 | "source": [ 150 | "pe = (prec - evap).dropna() # calculate precipitation excess\n", 151 | "spei1 = si.spei(pe, timescale=30, fit_freq=\"ME\")\n", 152 | "spei1" 153 | ] 154 | }, 155 | { 156 | "cell_type": "markdown", 157 | "metadata": {}, 158 | "source": [ 159 | "## Calculate SGI\n", 160 | "\n", 161 | "The standardized groundwater index (SGI) is calculated using the method as described by [Bloomfield, J. P. and Marchant, B. P.: Analysis of groundwater drought building on the standardised precipitation index approach](https://doi.org/10.5194/hess-17-4769-2013), 2013. The way the SGI is calculated is the same as in the groundwater time series analysis package Pastas. A nice example notebook on computing the SGI with Pastas time series models can be found [here](https://pastas.readthedocs.io/en/latest/examples/011_sgi_example.ipynb.html).\n", 162 | "\n", 163 | "For the head time series no distribution has to be selected by default. Since the time series has a 14 day frequency it is not resampled." 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": null, 169 | "metadata": {}, 170 | "outputs": [], 171 | "source": [ 172 | "sgi = si.sgi(head, fit_freq=\"ME\")\n", 173 | "sgi.plot(ylabel=\"Z-score\")" 174 | ] 175 | }, 176 | { 177 | "cell_type": "markdown", 178 | "metadata": {}, 179 | "source": [ 180 | "## Visualize indices\n", 181 | "\n", 182 | "The indices can be interpreted as such:\n", 183 | "\n", 184 | "| **Z-score** | **Category** | **Probability (%)** |\n", 185 | "|-----------------------|----------------------|---------------------|\n", 186 | "| ≥ 2.00 | Extremely wet | 2.3 |\n", 187 | "| 1.50 ≤ Z < 2.00 | Severely wet | 4.4 |\n", 188 | "| 1.00 ≤ Z < 1.50 | Moderately wet | 9.2 |\n", 189 | "| 0.00 ≤ Z < 1.00 | Mildly wet | 34.1 |\n", 190 | "| -1.00 < Z < 0.00 | Mild drought | 34.1 |\n", 191 | "| -1.50 < Z ≤ -1.00 | Moderate drought | 9.2 |\n", 192 | "| -2.00 < Z ≤ -1.50 | Severe drought | 4.4 |\n", 193 | "| ≤ -2.00 | Extreme drought | 2.3 |\n", 194 | "\n", 195 | "The time series for the standardized indices are plotted using a build in method:" 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "execution_count": null, 201 | "metadata": {}, 202 | "outputs": [], 203 | "source": [ 204 | "f, ax = plt.subplots(3, 1, figsize=(12, 8), sharex=True)\n", 205 | "\n", 206 | "# choose a colormap to your liking:\n", 207 | "si.plot.si(spi3_pearson, ax=ax[0], cmap=\"vik_r\")\n", 208 | "si.plot.si(spei1, ax=ax[1], cmap=\"roma\")\n", 209 | "si.plot.si(sgi, ax=ax[2], cmap=\"seismic_r\")\n", 210 | "ax[0].set_xlim(pd.to_datetime([\"1994\", \"1998\"]))\n", 211 | "[x.grid() for x in ax]\n", 212 | "[ax[i].set_ylabel(n, fontsize=14) for i, n in enumerate([\"SPI3\", \"SPEI1\", \"SGI\"])];" 213 | ] 214 | } 215 | ], 216 | "metadata": { 217 | "kernelspec": { 218 | "display_name": "SPEI", 219 | "language": "python", 220 | "name": "python3" 221 | }, 222 | "language_info": { 223 | "codemirror_mode": { 224 | "name": "ipython", 225 | "version": 3 226 | }, 227 | "file_extension": ".py", 228 | "mimetype": "text/x-python", 229 | "name": "python", 230 | "nbconvert_exporter": "python", 231 | "pygments_lexer": "ipython3", 232 | "version": "3.12.3" 233 | }, 234 | "orig_nbformat": 4 235 | }, 236 | "nbformat": 4, 237 | "nbformat_minor": 2 238 | } 239 | -------------------------------------------------------------------------------- /docs/examples/example03_drought_prediction.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Drought Prediction with Time Series Modeling\n", 8 | "\n", 9 | "*Martin Vonk - 2022*\n", 10 | "\n", 11 | "This notebooks shows a quick calculation of the SPI, SPEI and SGI for De Bilt, in the Netherlands. The SGI is calculated using a [Pastas](https://github.com/pastas/pastas) time series model since the original time series is too short. The application of time series models for extrapolating groundwater time series is discussed in [Brakkee et al (2022)](https://hess.copernicus.org/articles/26/551/2022/hess-26-551-2022.html).\n", 12 | "\n", 13 | "## Required packages" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": null, 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "import matplotlib.pyplot as plt\n", 23 | "import pandas as pd\n", 24 | "import pastas as ps\n", 25 | "import scipy.stats as scs\n", 26 | "\n", 27 | "import spei as si # si for standardized index\n", 28 | "\n", 29 | "print(si.show_versions())" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "## Import time series\n", 37 | "\n", 38 | "Time series are imported using the package hydropandas. Enddate is by default yesterday. The head time series is obtained from a Pastas test dataset." 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "# import hydropandas as hpd\n", 48 | "\n", 49 | "# today = datetime.date.today()\n", 50 | "# yesterday = (today - datetime.timedelta(days=1)).strftime(\"%Y-%m-%d\")\n", 51 | "# prec = (\n", 52 | "# hpd.PrecipitationObs.from_knmi(\n", 53 | "# meteo_var=\"RH\", stn=260, startdate=\"1959-07-01\", enddate=yesterday\n", 54 | "# )\n", 55 | "# .multiply(1e3)\n", 56 | "# .squeeze()\n", 57 | "# )\n", 58 | "# prec.index = prec.index.normalize()\n", 59 | "# evap = (\n", 60 | "# hpd.EvaporationObs.from_knmi(\n", 61 | "# meteo_var=\"EV24\", stn=260, startdate=\"1959-07-01\", enddate=yesterday\n", 62 | "# )\n", 63 | "# .multiply(1e3)\n", 64 | "# .squeeze()\n", 65 | "# )\n", 66 | "# evap.index = evap.index.normalize()\n", 67 | "\n", 68 | "\n", 69 | "df = pd.read_csv(\"data/DEBILT.csv\", index_col=0, parse_dates=True)\n", 70 | "prec = df[\"Prec [m/d] 260_DEBILT\"].multiply(1e3).rename(\"prec\")\n", 71 | "evap = df[\"Evap [m/d] 260_DEBILT\"].multiply(1e3).rename(\"evap\")\n", 72 | "head = df[\"Head [m] B32C0572_DEBILT\"].rename(\"B32C0572\").dropna()\n", 73 | "today = df.index[-1]\n", 74 | "yesterday = df.index[-2]" 75 | ] 76 | }, 77 | { 78 | "cell_type": "markdown", 79 | "metadata": {}, 80 | "source": [ 81 | "## Calculate SPI and SPEI" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "# Accumulate time series on monthly basis\n", 91 | "spi1 = si.spi(prec, timescale=30, dist=scs.gamma, fit_freq=\"MS\")\n", 92 | "spei1 = si.spei((prec - evap), timescale=30, dist=scs.fisk, fit_freq=\"MS\")" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": null, 98 | "metadata": {}, 99 | "outputs": [], 100 | "source": [ 101 | "xlim = pd.to_datetime([\"2018-01-01\", df.index[-1]])\n", 102 | "\n", 103 | "fig, axs = plt.subplots(2, 1, figsize=(7.0, 5.5), sharex=True)\n", 104 | "si.plot.si(spi1, ybound=3.1, ax=axs[0], background=False, cmap=\"roma\")\n", 105 | "si.plot.si(spei1, ybound=3.1, ax=axs[1], background=False, cmap=\"roma\")\n", 106 | "[(x.grid(), x.set_xlim(xlim), x.set_ylabel(\"Z-Score\")) for x in axs]\n", 107 | "axs[0].set_title(\"Standardized Precipitation Index\")\n", 108 | "axs[1].set_title(\"Standardized Precipitation Evaporation Index\")\n", 109 | "fig.suptitle(\"Meteoroligical Drought-Indices De Bilt\")\n", 110 | "fig.tight_layout()" 111 | ] 112 | }, 113 | { 114 | "cell_type": "markdown", 115 | "metadata": {}, 116 | "source": [ 117 | "## Create time series model and simulate head " 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": null, 123 | "metadata": {}, 124 | "outputs": [], 125 | "source": [ 126 | "ml = ps.Model(head)\n", 127 | "rm = ps.RechargeModel(\n", 128 | " prec, evap, ps.Exponential(), recharge=ps.rch.FlexModel(gw_uptake=True)\n", 129 | ")\n", 130 | "ml.add_stressmodel(rm)\n", 131 | "ml.solve(tmin=\"1970-07-01\", report=True)\n", 132 | "_ = ml.plots.results(figsize=(10.0, 8.0))" 133 | ] 134 | }, 135 | { 136 | "cell_type": "markdown", 137 | "metadata": {}, 138 | "source": [ 139 | "## Calculate SGI based on time series model" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": null, 145 | "metadata": {}, 146 | "outputs": [], 147 | "source": [ 148 | "gws = ml.simulate(tmin=\"1990-07-01\", tmax=yesterday)\n", 149 | "sgi = si.sgi(gws, fit_freq=\"MS\")" 150 | ] 151 | }, 152 | { 153 | "cell_type": "markdown", 154 | "metadata": {}, 155 | "source": [ 156 | "## Compare three drought-indices (SPI, SPEI, SGI) in plot" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": null, 162 | "metadata": {}, 163 | "outputs": [], 164 | "source": [ 165 | "fig, axs = plt.subplot_mosaic(\n", 166 | " [[\"SPI\"], [\"SPEI\"], [\"SGI\"]], figsize=(6.5, 8), sharex=True\n", 167 | ")\n", 168 | "si.plot.si(spi1, ybound=3.5, ax=axs[\"SPI\"], add_category=False)\n", 169 | "si.plot.si(spei1, ybound=3.5, ax=axs[\"SPEI\"], add_category=False)\n", 170 | "si.plot.si(sgi, ybound=3.5, ax=axs[\"SGI\"], add_category=False)\n", 171 | "[(axs[x].grid(), axs[x].set(xlim=xlim, ylabel=\"Z-Score\")) for x in axs]\n", 172 | "axs[\"SPI\"].set_title(\"Standardized Precipitation Index 1\")\n", 173 | "axs[\"SPEI\"].set_title(\"Standardized Precipitation Evaporation Index 1\")\n", 174 | "axs[\"SGI\"].set_title(\"Standardized Groundwater Index\")\n", 175 | "fig.suptitle(\"Drought-Indices for De Bilt\", fontsize=14)\n", 176 | "fig.tight_layout()\n", 177 | "# fig.savefig('Drought_Index_Bilt.png', dpi=600, bbox_inches='tight')" 178 | ] 179 | }, 180 | { 181 | "cell_type": "markdown", 182 | "metadata": {}, 183 | "source": [ 184 | "## Compare SPEI Kernel Density Estimate for one month" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": null, 190 | "metadata": {}, 191 | "outputs": [], 192 | "source": [ 193 | "ax = si.plot.monthly_density(\n", 194 | " spi1, years=[today.year - 1, today.year], months=[today.month - 1]\n", 195 | ")\n", 196 | "ax.set_xlabel(\"Z-Score\")\n", 197 | "ax.set_title(\"SPEI\");" 198 | ] 199 | } 200 | ], 201 | "metadata": { 202 | "kernelspec": { 203 | "display_name": "SPEI", 204 | "language": "python", 205 | "name": "python3" 206 | }, 207 | "language_info": { 208 | "codemirror_mode": { 209 | "name": "ipython", 210 | "version": 3 211 | }, 212 | "file_extension": ".py", 213 | "mimetype": "text/x-python", 214 | "name": "python", 215 | "nbconvert_exporter": "python", 216 | "pygments_lexer": "ipython3", 217 | "version": "3.12.3" 218 | }, 219 | "orig_nbformat": 4 220 | }, 221 | "nbformat": 4, 222 | "nbformat_minor": 2 223 | } 224 | -------------------------------------------------------------------------------- /paper/paper.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: 'SPEI: A Python package for calculating and visualizing drought indices' 3 | tags: 4 | - hydrology 5 | - drought 6 | - time series 7 | - Python 8 | authors: 9 | - name: Martin A. Vonk 10 | orcid: 0009-0007-3528-2991 11 | affiliation: "1, 2" 12 | affiliations: 13 | - name: Department of Water Management, Faculty of Civil Engineering and Geosciences, Delft University of Technology, Delft, South Holland, The Netherlands 14 | index: 1 15 | - name: Artesia B.V., Schoonhoven, South Holland, The Netherlands 16 | index: 2 17 | date: 24 February 2025 18 | bibliography: paper.bib 19 | 20 | --- 21 | 22 | # Summary 23 | `SPEI` is a Python package for calculating drought indices from time series. 24 | Popular Python packages such as `Pandas` [@pandas_paper_2010], `SciPy` [@scipy_paper_2020], and `Matplotlib` [@matplotlib_paper_2007] are used for handling the time series, statistics, and visualization respectively. 25 | This makes the calculation and visualization of drought indices straightforward and flexible. 26 | 27 | # Statement of need 28 | Water is a vital natural resource, but freshwater availability is increasingly threatened by droughts linked to climate change and human activities. 29 | Drought refers to a water deficit relative to normal conditions [@sheffield_droughtdefinition_2011]. 30 | Both the definition of drought and the baseline for what constitutes "normal" conditions vary depending on the context and objective of a given analysis [@dracup_droughtdefinition_1980]. 31 | As a result, many drought indices have been developed to quantify drought characteristics. 32 | Each index quantifies a drought's severity, location, timing, and duration, helping to track and predict its impact. 33 | 34 | # Standardized drought indices 35 | The most common drought indices are standardized indices, which fit a time series to a probability distribution and convert it into a Z-score of the standardized normal distribution. 36 | For meteorological droughts, widely used indices include the Standardized Precipitation Index (SPI) [@mckee_spi_1993; @lloydhughes_spi_2002; @wmo_spi_2012] and the Standardized Precipitation Evaporation Index (SPEI) [@vicenteserrano_spei_2010]; the latter index is also the name of the `SPEI` package. 37 | Hydrological droughts are often measured using the Standardized Groundwater Index (SGI) [@bloomfield_sgi_2013] and the Standardized Streamflow Index (SSFI or SSI) [@vicenteserrano_ssfi_2012]. 38 | For agricultural droughts, the Standardized Soil Moisture Index (SSMI) [@sheffield_ssmi_2004] can be used. 39 | All of these standardized indices are explicitly supported by the `SPEI` package, though any other standardized drought index can also be computed using the same methodology. 40 | 41 | ## Computation 42 | Standardized indices are commonly calculated from a time series of at least 30 years [@mckee_spi_1993]. 43 | Rolling sums or averages are computed over typical time scales (generally 1, 3, 6, 12, 24, or 48 months)[^1], and a continuous probability distribution is fitted to each. 44 | Alternatively, non-parametric methods like normal-scores transforms or kernel density estimates can be used. 45 | The probability of each value is then converted to a Z-score using the inverse normal distribution, yielding a standardized index with a mean of zero and standard deviation of one. 46 | 47 | [^1]: A month is not an unambiguous time unit, varying between 28 and 31 days, which adds complexity to computations. 48 | The package handles this internally using `Pandas` to ensure consistent time aggregation. 49 | 50 | ### Implementation 51 | The `SPEI` package is built on `Pandas` [@pandas_paper_2010; @pandas_software_2020], which in turn relies heavily on `NumPy` [@numpy_article_2020]. 52 | It uses `pandas.Series` with a `DatetimeIndex`, enabling powerful time series methods such as `resample` and `rolling`. 53 | Probability density functions are provided via the `SciPy` `stats` module [@scipy_paper_2020]. 54 | Literature offers general guidance for what distribution to use for each standardized index; e.g., a gamma distribution for SPI [@thom_gamma_1996] and a fisk (log-logistic) distribution for SPEI [@vicenteserrano_spei_2010]. However, with the `SciPy` package, users are free to experiment with any of the 200+ univariate continuous distributions available. 55 | Each distribution has a `fit` method for maximum likelihood estimation on the data. 56 | 57 | #### Example 58 | As an example, the Standardized Precipitation Evaporation Index is computed using a dataset with daily precipitation and potential evaporation from the Royal Netherlands Meteorological Institute (KNMI), shown in \autoref{fig:meteo_surplus}a. 59 | The SPEI uses the precipitation surplus (precipitation minus potential evaporation), which is aggregated monthly for this example and shown in \autoref{fig:meteo_surplus}b. 60 | 61 | ![Example meteorological time series \label{fig:meteo_surplus}](figures/monthly_precipitation_surplus.png) 62 | 63 | The Python code to compute the SPEI-1 (`-1` indicating a one month time scale) with a fisk distribution is as follows: 64 | 65 | ```python 66 | # load packages 67 | import pandas as pd 68 | import scipy.stats as sps 69 | import spei as si 70 | 71 | # load daily time series 72 | meteo: pd.DataFrame = pd.read_csv( 73 | "meteo.csv", 74 | index_col="datetime", 75 | parse_dates=["datetime"], 76 | ) 77 | prec: pd.Series = meteo["precipitation"] 78 | evap: pd.Series = meteo["pot_evaporation"] 79 | 80 | # compute monthly precipitation surplus 81 | surplus: pd.Series = (prec - evap).resample("MS").sum() # MS: month-start 82 | 83 | # compute SPEI-1 84 | spei1: pd.Series = si.spei( 85 | series=surplus, 86 | dist=sps.fisk, 87 | timescale=1, # unit: frequency of the data (months in this case) 88 | ) 89 | ``` 90 | 91 | The standardization process is illustrated in \autoref{fig:surplus_fit}. 92 | The empirical cumulative density function of the surplus in March (red dots, matching \autoref{fig:meteo_surplus}b) with the fitted fisk distribution are shown in \autoref{fig:surplus_fit}a. 93 | The fitted probability for each red dot is plotted in \autoref{fig:surplus_fit}b (blue dots) and converted to a Z-score using a standardized normal distribution (purple line). 94 | The black dashed line traces this procedure for a 31 mm surplus from March 1994, near the 69th percentile, corresponding to a Z-score of around 0.4925. 95 | 96 | ![Example equiprobability transformation for the precipitation surplus in March. Figure adapted from @edwards_transformation_1997. \label{fig:surplus_fit}](figures/surplus_fit_cdf.png) 97 | 98 | Application of this procedure for all data points and months results in the standardized index, SPEI-1, as shown in \autoref{fig:spei1}. 99 | The background filling and categories [based on @mckee_spi_1993] in \autoref{fig:spei1} allow for the interpretation of drought (and wet) periods. 100 | The `SPEI` package has additional options to allow for other time scales, time series frequencies (e.g., daily), and fit window options to ensure valid distribution fit. 101 | 102 | ![Resulting SPEI-1 from the monthly precipitation surplus \label{fig:spei1}](figures/spei1.png) 103 | 104 | ## Threshold 105 | Drought characteristics can also be derived from time series using a threshold level. 106 | This defines at what level a drought starts and quantifies the deficit. 107 | The threshold can be either fixed or variable. 108 | A variable threshold, as shown in \autoref{fig:threshold} for part of the series of \autoref{fig:meteo_surplus}b, is typically derived from percentiles of the time series or from a fitted probability density function [@vanloon_hydrodrought_2015]. 109 | 110 | ![Visualization of drought based on a variable threshold level \label{fig:threshold}](figures/threshold.png) 111 | 112 | ## Heatmap 113 | When multiple time scales are used, standardized drought indices can be visualized in a single graph to reveal whether a drought persists over time and to identify the build-up to multi-year droughts [@mourik_use_2025]. 114 | For hydrological droughts, this persistence relates to the system’s storage capacity and response time [e.g., @bloomfield_sgi_2013]. 115 | The SPEI heatmap (\autoref{fig:spei_heatmap}) illustrates this across six time scales (1, 3, 6, 9, 12, and 24 months), clearly highlighting the 1995–1998 multi-year drought as a large red zone. 116 | 117 | ![Visualization of the SPEI as a heatmap with different time scales \label{fig:spei_heatmap}](figures/spei_heatmap.png) 118 | 119 | # Other drought indices in the SPEI package 120 | 121 | Several other drought indices from the literature are also supported by the `SPEI` package, briefly outlined below. 122 | 123 | ## Rainfall anomaly index 124 | The Rainfall Anomaly Index (RAI) is a relative drought index that quantifies deviations from historical precipitation without fitting a distribution [@vanrooy_rai_1965]. 125 | The package also includes the Modified RAI (mRAI), which adds a scaling factor for local conditions. [@hansel_mrai_2016]. 126 | 127 | ## Climdex 128 | Climdex is an online platform providing indices for heat, cold, precipitation, and drought changes over time [@alexander_climdex_2025], with several of its precipitation indices available in the `SPEI` package. 129 | 130 | ## Precipitation deficit 131 | The KNMI defines drought during the growing season using the precipitation deficit (potential evaporation minus precipitation). 132 | The package includes five functions [after @witte_knmi_2025] to calculate this absolute drought index, primarily for the Netherlands but adaptable to other regions by adjusting the keyword arguments. 133 | 134 | # Acknowledgements 135 | Thanks to all the scientists who have used and cited this package so far [@adla_use_2024;@segura_use_2025;@mourik_use_2025;@panigrahi_use_2025] via Zenodo [@vonk_spei_zenodo]. 136 | Thanks to Mark Bakker for reading this manuscript and providing feedback. 137 | 138 | # References 139 | -------------------------------------------------------------------------------- /src/spei/knmi.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | from .utils import group_yearly_df, validate_series 5 | 6 | 7 | def get_yearly_temp_date(temp: pd.Series, threshold: float) -> pd.Series: 8 | """ 9 | Get the first date in each year where the cumulative temperature exceeds a given threshold. 10 | 11 | Parameters 12 | ---------- 13 | temp : pd.Series 14 | A pandas Series representing the temperature time series, indexed by date. 15 | threshold : float 16 | The temperature threshold to identify the first date above it. 17 | 18 | Returns 19 | ------- 20 | pd.Series 21 | A pandas Series containing the first date in each year where the cumulative 22 | temperature exceeds the threshold. The index corresponds to the years. 23 | """ 24 | temp_group_df = group_yearly_df(series=temp).cumsum(axis=0) 25 | first_date_above_threshold = temp_group_df.gt(threshold).idxmax() 26 | return first_date_above_threshold 27 | 28 | 29 | def cumsum(deficit: pd.Series, allow_below_zero: bool = True) -> pd.Series: 30 | """ 31 | Calculate the cumulative sum of a deficit series. 32 | 33 | Parameters: 34 | ----------- 35 | deficit : pd.Series 36 | A pandas Series representing the deficit values. 37 | allow_below_zero : bool, optional 38 | If True, the cumulative sum is calculated as-is, allowing negative values. 39 | If False, the cumulative sum is constrained to be non-negative, resetting 40 | to zero whenever the sum would drop below zero. Default is True. 41 | 42 | Returns: 43 | -------- 44 | pd.Series 45 | A pandas Series containing the cumulative sum of the deficit values, 46 | optionally constrained to be non-negative. 47 | """ 48 | if allow_below_zero: 49 | return deficit.cumsum() 50 | else: 51 | if deficit.iat[0] < 0.0: 52 | deficit.iat[0] = 0.0 53 | sumlm = np.frompyfunc(lambda a, b: 0.0 if a + b < 0.0 else a + b, nin=2, nout=1) 54 | return pd.Series(sumlm.accumulate(deficit.values), deficit.index, dtype=float) 55 | 56 | 57 | def get_cumulative_deficit( 58 | deficit: pd.Series, 59 | startdate: pd.Timestamp | pd.Series, 60 | enddate: pd.Timestamp | pd.Series, 61 | allow_below_zero: bool = True, 62 | ) -> pd.DataFrame: 63 | """ 64 | Calculate the cumulative deficit for a given time period. 65 | 66 | This function computes the cumulative deficit for each column in a 67 | grouped yearly DataFrame, starting from `startdate` to `enddate`. 68 | The cumulative sum can optionally allow values below zero. 69 | 70 | Parameters: 71 | ----------- 72 | deficit : pd.Series 73 | A pandas Series representing the deficit time series. 74 | startdate : pd.Timestamp | pd.Series 75 | The start date(s) for the cumulative deficit calculation. If a 76 | single timestamp is provided, it is applied to all columns. If 77 | a Series is provided, it should align with the columns of the 78 | grouped DataFrame. 79 | enddate : pd.Timestamp | pd.Series 80 | The end date(s) for the cumulative deficit calculation. Similar 81 | to `startdate`, it can be a single timestamp or a Series aligned 82 | with the columns. 83 | allow_below_zero : bool, optional 84 | If True, allows the cumulative sum to include values below zero. 85 | Defaults to True. 86 | 87 | Returns: 88 | -------- 89 | pd.DataFrame 90 | A DataFrame containing the cumulative deficit for each column 91 | over the specified time period. The index represents the date 92 | range, and the columns correspond to the year. 93 | """ 94 | deficit = validate_series(deficit) 95 | group_df = group_yearly_df(series=deficit) 96 | if isinstance(startdate, pd.Timestamp): 97 | if startdate.year != 2000: 98 | # year is replaced since group_yearly_df returns a df with 2000 as a base year 99 | startdate = startdate.replace(year=2000) 100 | startdate = pd.Series(startdate, index=group_df.columns) 101 | if isinstance(enddate, pd.Timestamp): 102 | if enddate.year != 2000: 103 | # year is replaced since group_yearly_df returns a df with 2000 as a base year 104 | enddate = enddate.replace(year=2000) 105 | enddate = pd.Series(enddate, index=group_df.columns) 106 | 107 | index = pd.date_range(start=startdate.min(), end=enddate.max(), freq="D") 108 | cumdf = pd.DataFrame(np.nan, index=index, columns=group_df.columns) 109 | for col in group_df.columns: 110 | start = startdate[col] 111 | end = enddate[col] 112 | cumdf.loc[start:end, col] = cumsum( 113 | group_df.loc[start:end, col], 114 | allow_below_zero=allow_below_zero, 115 | ).values 116 | 117 | return cumdf 118 | 119 | 120 | def deficit_oct1(deficit: pd.Series) -> pd.Series: 121 | """ 122 | Calculate the cumulative deficit on October 1st. 123 | 124 | This function computes the cumulative deficit for a given time series 125 | of deficits, considering only the period between April 1st and 126 | September 30th. The cumulative deficit is reset to zero if it goes 127 | below zero during this period. 128 | 129 | Parameters: 130 | ----------- 131 | deficit : pd.Series 132 | A pandas Series representing the deficit time series. The index 133 | should be datetime-like, and the values should represent the 134 | deficit amounts. 135 | 136 | Returns: 137 | -------- 138 | pd.Series 139 | A pandas Series containing the cumulative deficit values on 140 | October 1st. The index of the returned Series corresponds to 141 | the columns of the cumulative deficit DataFrame, and the name 142 | of the Series is "Doct1". 143 | """ 144 | startdate = pd.Timestamp("2000-04-01") 145 | enddate = pd.Timestamp("2000-09-30") 146 | cumdf = get_cumulative_deficit( 147 | deficit=deficit, 148 | startdate=startdate, 149 | enddate=enddate, 150 | allow_below_zero=False, 151 | ) 152 | doct1 = pd.Series( 153 | data=cumdf.loc[enddate].values, 154 | index=cumdf.columns, 155 | dtype=float, 156 | name="Doct1", 157 | ) 158 | return doct1 159 | 160 | 161 | def deficit_max(deficit: pd.Series) -> pd.Series: 162 | """ 163 | Calculate the maximum cumulative deficit within a specified period. 164 | 165 | This function computes the maximum cumulative deficit for a given 166 | deficit time series, starting from April 1st to September 30th. 167 | The cumulative deficit is calculated using the `get_cumulative_deficit` 168 | function, ensuring that values below zero are not allowed. 169 | 170 | Parameters: 171 | ----------- 172 | deficit : pd.Series 173 | A pandas Series representing the deficit values over time. 174 | 175 | Returns: 176 | -------- 177 | pd.Series 178 | A pandas Series containing the maximum cumulative deficit 179 | within the specified period, labeled as "Dmax". 180 | """ 181 | startdate = pd.Timestamp("2000-04-01") 182 | enddate = pd.Timestamp("2000-09-30") 183 | cumdf = get_cumulative_deficit( 184 | deficit=deficit, 185 | startdate=startdate, 186 | enddate=enddate, 187 | allow_below_zero=False, 188 | ) 189 | return cumdf.max().rename("Dmax") 190 | 191 | 192 | def deficit_apr1(deficit: pd.Series) -> pd.Series: 193 | """ 194 | Calculate the maximum change in cumulative deficit within a specified date range. 195 | 196 | This function computes the cumulative deficit for the given deficit series 197 | between April 1st and September 30th of the year 2000. It then calculates 198 | the maximum change in the cumulative deficit over this period. 199 | 200 | Parameters 201 | ---------- 202 | deficit : pd.Series 203 | A pandas Series representing the deficit values. The index is expected 204 | to be datetime-like. 205 | 206 | Returns 207 | ------- 208 | pd.Series 209 | A pandas Series containing the maximum change in cumulative deficit 210 | over the specified period, labeled as "DIapr1". 211 | """ 212 | startdate = pd.Timestamp("2000-04-01") 213 | enddate = pd.Timestamp("2000-09-30") 214 | cumdf = get_cumulative_deficit( 215 | deficit=deficit, 216 | startdate=startdate, 217 | enddate=enddate, 218 | allow_below_zero=True, 219 | ) 220 | return (cumdf.max() - cumdf.min()).rename("DIapr1") 221 | 222 | 223 | def deficit_gdd( 224 | deficit: pd.Series, temp: pd.Series, threshold: float = 440.0 225 | ) -> pd.Series: 226 | """ 227 | Calculate the maximum change in cumulative deficit starting from the 228 | first day when the temperature sum (growing degree days; GDD) 229 | exceeds a specified threshold. 230 | 231 | Parameters: 232 | ----------- 233 | deficit : pd.Series 234 | A pandas Series representing the daily deficit values. 235 | temp : pd.Series 236 | A pandas Series representing the daily temperature values. 237 | threshold : float, optional 238 | The temperature sum GDD threshold to determine the starting date for the calculation. 239 | Defaults to 440.0. 240 | 241 | Returns: 242 | -------- 243 | pd.Series 244 | A pandas Series containing the maximum change in cumulative deficit, 245 | labeled as "DIgdd". 246 | """ 247 | temp = validate_series(temp) 248 | startdate = get_yearly_temp_date(temp=temp, threshold=threshold) 249 | enddate = pd.Timestamp("2000-09-30") 250 | cumdf = get_cumulative_deficit( 251 | deficit=deficit, 252 | startdate=startdate, 253 | enddate=enddate, 254 | allow_below_zero=True, 255 | ) 256 | return (cumdf.max() - cumdf.min()).rename("DIapr1").rename("DIgdd") 257 | 258 | 259 | def deficit_wet(deficit: pd.Series) -> pd.Series: 260 | """ 261 | Calculate the maximum change in cumulative deficit for a specified period. 262 | 263 | This function computes the maximum change in cumulative deficit from 264 | January 1st to September 30th of a given year. The cumulative deficit 265 | is calculated using the `get_cumulative_deficit` function, allowing 266 | values below zero. 267 | 268 | Parameters: 269 | ----------- 270 | deficit : pd.Series 271 | A pandas Series representing the deficit values over time. 272 | 273 | Returns: 274 | -------- 275 | pd.Series 276 | A pandas Series containing the maximum change in cumulative deficit 277 | for the specified period, labeled as "DIwet". 278 | """ 279 | startdate = pd.Timestamp("2000-01-01") 280 | enddate = pd.Timestamp("2000-09-30") 281 | cumdf = get_cumulative_deficit( 282 | deficit=deficit, 283 | startdate=startdate, 284 | enddate=enddate, 285 | allow_below_zero=True, 286 | ) 287 | return (cumdf.max() - cumdf.min()).rename("DIapr1").rename("DIwet") 288 | -------------------------------------------------------------------------------- /paper/paper.bib: -------------------------------------------------------------------------------- 1 | @article{scipy_paper_2020, 2 | author = {Virtanen, P. and Gommers, R. and Oliphant, T. E. and 3 | Haberland, M. and Reddy, T. and Cournapeau, D. and 4 | Burovski, E. and Peterson, P. and Weckesser, W. and 5 | Bright, J. and {van der Walt}, S. J. and 6 | Brett, M. and Wilson, J. and Millman, K. J. and 7 | Mayorov, N. and Nelson, A. R. J. and Jones, E. and 8 | Kern, R. and Larson, E. and Carey, CJ and 9 | Polat, I. and Feng, Y. and Moore, E. W. and 10 | {VanderPlas}, J. and Laxalde, D. and Perktold, J. and 11 | Cimrman, R. and Henriksen, I. and Quintero, E. A. and 12 | Harris, C. R. and Archibald, A. M. and 13 | Ribeiro, A. H. and Pedregosa, F. and 14 | {van Mulbregt}, P. and {SciPy 1.0 Contributors}}, 15 | title = {{SciPy} 1.0: Fundamental Algorithms for Scientific Computing in {Python}}, 16 | journal = {Nature Methods}, 17 | year = {2020}, 18 | volume = {17}, 19 | pages = {261--272}, 20 | doi = {10.1038/s41592-019-0686-2} 21 | } 22 | 23 | @incollection{pandas_paper_2010, 24 | author = {McKinney, W.}, 25 | title = {Data Structures for Statistical Computing in {Python}}, 26 | booktitle = {Proceedings of the 9th {Python} in {Science} {Conference}}, 27 | pages = {56--61}, 28 | year = {2010}, 29 | doi = {10.25080/Majora-92bf1922-00a} 30 | } 31 | 32 | @software{pandas_software_2020, 33 | author = {{The pandas development team}}, 34 | title = {pandas-dev/pandas: Pandas}, 35 | year = {2025}, 36 | publisher = {Zenodo}, 37 | version = {latest}, 38 | doi = {10.5281/zenodo.3509134} 39 | } 40 | 41 | @article{matplotlib_paper_2007, 42 | author = {Hunter, J. D.}, 43 | title = {Matplotlib: A 2D graphics environment}, 44 | journal = {Computing in Science \& Engineering}, 45 | volume = {9}, 46 | number = {3}, 47 | pages = {90--95}, 48 | publisher = {{IEEE COMPUTER SOC}}, 49 | doi = {10.1109/MCSE.2007.55}, 50 | year = {2007} 51 | } 52 | 53 | @article{numpy_article_2020, 54 | title = {Array programming with {NumPy}}, 55 | author = {Charles R. Harris and K. Jarrod Millman and St{\'{e}}fan J. 56 | van der Walt and Ralf Gommers and Pauli Virtanen and David 57 | Cournapeau and Eric Wieser and Julian Taylor and Sebastian 58 | Berg and Nathaniel J. Smith and Robert Kern and Matti Picus 59 | and Stephan Hoyer and Marten H. van Kerkwijk and Matthew 60 | Brett and Allan Haldane and Jaime Fern{\'{a}}ndez del 61 | R{\'{i}}o and Mark Wiebe and Pearu Peterson and Pierre 62 | G{\'{e}}rard-Marchant and Kevin Sheppard and Tyler Reddy and 63 | Warren Weckesser and Hameer Abbasi and Christoph Gohlke and 64 | Travis E. Oliphant}, 65 | year = {2020}, 66 | journal = {Nature}, 67 | volume = {585}, 68 | number = {7825}, 69 | pages = {357--362}, 70 | doi = {10.1038/s41586-020-2649-2}, 71 | publisher = {Springer Science and Business Media {LLC}} 72 | } 73 | 74 | @incollection{mckee_spi_1993, 75 | author = {McKee, Thomas B. and Doesken, N. J. and Kleist, J.}, 76 | title = {The Relationship of Drought Frequency and Duration to Time Scales}, 77 | booktitle = {Proceedings of the {Eighth Conference on Applied Climatology}}, 78 | year = {1993}, 79 | pages = {179--184}, 80 | publisher = {American Meteorological Society} 81 | } 82 | 83 | @article{lloydhughes_spi_2002, 84 | author = {Lloyd-Hughes, Benjamin and Saunders, Mark A.}, 85 | title = {A drought climatology for {Europe}}, 86 | journal = {International Journal of Climatology}, 87 | volume = {22}, 88 | number = {13}, 89 | pages = {1571--1592}, 90 | doi = {10.1002/joc.846}, 91 | year = {2002} 92 | } 93 | 94 | @article{vicenteserrano_spei_2010, 95 | author = {Vicente-Serrano, S. M. and Beguería, S. and López-Moreno, J. I.}, 96 | title = {A Multi-scalar drought index sensitive to global warming: {The Standardized Precipitation Evapotranspiration Index}}, 97 | journal = {Journal of Climate}, 98 | year = {2010}, 99 | publisher = {American Meteorological Society}, 100 | volume = {23}, 101 | number = {7}, 102 | pages = {1696--1718}, 103 | doi = {10.1175/2009JCLI2909.1} 104 | } 105 | 106 | @article{bloomfield_sgi_2013, 107 | author = {Bloomfield, J. P. and Marchant, B. P.}, 108 | title = {Analysis of groundwater drought building on the standardised precipitation index approach}, 109 | journal = {Hydrology and Earth System Sciences}, 110 | year = {2013}, 111 | volume = {17}, 112 | pages = {4769--4787}, 113 | doi = {10.5194/hess-17-4769-2013} 114 | } 115 | 116 | @article{vicenteserrano_ssfi_2012, 117 | author = {Vicente-Serrano, S. M. and López-Moreno, J. I. and Beguería, S. and Lorenzo-Lacruz, J. and Azorin-Molina, C. and Morán-Tejeda, E.}, 118 | title = {Accurate Computation of a Streamflow Drought Index}, 119 | journal = {Journal of Hydrologic Engineering}, 120 | volume = {17}, 121 | number = {2}, 122 | pages = {318--332}, 123 | year = {2012}, 124 | doi = {10.1061/(ASCE)HE.1943-5584.0000433} 125 | } 126 | 127 | @article{sheffield_ssmi_2004, 128 | author = {Sheffield, J. and Goteti, G. and Wen, F. and Wood, E. F.}, 129 | title = {A simulated soil moisture based drought analysis for the {United States}}, 130 | journal = {Journal of Geophysical Research: Atmospheres}, 131 | volume = {109}, 132 | number = {D24}, 133 | doi = {10.1029/2004JD005182}, 134 | year = {2004} 135 | } 136 | 137 | @article{segura_use_2025, 138 | author = {Segura-Barrero, R. and Lauvaux, T. and Lian, J. and Ciais, P. and Badia, A. and Ventura, S. and Bazzi, H. and Abbessi, E. and Fu, Z. and Xiao, J. and Li, X. and Villalba, G.}, 139 | title = {Heat and Drought Events Alter Biogenic Capacity to Balance {CO2} Budget in South-Western {Europe}}, 140 | journal = {Global Biogeochemical Cycles}, 141 | volume = {39}, 142 | number = {1}, 143 | pages = {e2024GB008163}, 144 | doi = {10.1029/2024GB008163}, 145 | year = {2025} 146 | } 147 | 148 | @article{adla_use_2024, 149 | author = {Adla, S. and Šaponjić, A. and Tyagi, A. and Nagi, A. and Pastore, P. and Pande, S.}, 150 | title = {Steering agricultural interventions towards sustained irrigation adoption by farmers: socio-psychological analysis of irrigation practices in {Maharashtra}, {India}}, 151 | journal = {Hydrological Sciences Journal}, 152 | volume = {69}, 153 | number = {12}, 154 | pages = {1586--1603}, 155 | year = {2024}, 156 | publisher = {Taylor \& Francis}, 157 | doi = {10.1080/02626667.2024.2376709} 158 | } 159 | 160 | @article{mourik_use_2025, 161 | author = {{van Mourik}, J. and Ruijsch, D. and {van der Wiel}, K. and Hazeleger, W. and Wanders, N.}, 162 | title = {Regional drivers and characteristics of multi-year droughts}, 163 | journal = {Weather and Climate Extremes}, 164 | volume = {48}, 165 | pages = {100748}, 166 | year = {2025}, 167 | issn = {2212-0947}, 168 | doi = {10.1016/j.wace.2025.100748} 169 | } 170 | 171 | @inbook{panigrahi_use_2025, 172 | author = {Panigrahi, S. and Vidyarthi, V. K.}, 173 | title = {Assessing the Suitability of {SPI} and {SPEI} in Steppe Hot and Arid Climatic Zones in {India}}, 174 | editor = {Sefelnasr, A. and Sherif, M. and Singh, V. P.}, 175 | booktitle = {Water Resources Management and Sustainability: Solutions for Arid Regions}, 176 | year = {2025}, 177 | publisher = {Springer Nature Switzerland}, 178 | pages = {201--216}, 179 | doi = {10.1007/978-3-031-80520-2_12} 180 | } 181 | 182 | @software{vonk_spei_zenodo, 183 | author = {Vonk, M. A.}, 184 | title = {SPEI: A simple Python package to calculate and visualize drought indices}, 185 | year = {2025}, 186 | publisher = {Zenodo}, 187 | version = {v0.8.0}, 188 | doi = {10.5281/zenodo.10816740} 189 | } 190 | 191 | @online{alexander_climdex_2025, 192 | title = {Climdex: climate extremes indices}, 193 | author = {L. Alexander and M. Donat and M. Bador and N. Herold and J. L. Vazquez-Aguirre and R. Dunn and P. L. Nguyen and R. Isphording and Y. Singh}, 194 | url = {https://www.climdex.org}, 195 | urldate = {2025-04-24}, 196 | year = {2025} 197 | } 198 | 199 | @online{vonk_spei_github, 200 | author = {Vonk, M. A.}, 201 | title = {SPEI}, 202 | url = {https://github.com/martinvonk/spei}, 203 | urldate = {2025-04-24}, 204 | year = {2025} 205 | } 206 | 207 | @techreport{thom_gamma_1996, 208 | author = {Thom, H. C. S.}, 209 | title = {Some Methods of Climatological Analysis}, 210 | year = {1966}, 211 | type = {WMO Technical Note}, 212 | number = {81}, 213 | institution = {World Meteorological Organization}, 214 | address = {Geneva}, 215 | url = {https://library.wmo.int/idurl/4/59838} 216 | } 217 | 218 | @techreport{wmo_spi_2012, 219 | author = {Svoboda, M. and Hayes, M. and Wood, D. A.}, 220 | title = {{Standardized Precipitation Index} {User Guide}}, 221 | year = {2012}, 222 | type = {WMO Technical Document}, 223 | number = {1090}, 224 | institution = {World Meteorological Organization}, 225 | address = {Geneva}, 226 | url = {https://library.wmo.int/idurl/4/39629} 227 | } 228 | 229 | @book{sheffield_droughtdefinition_2011, 230 | author = {Sheffield, J. and Wood, E. F.}, 231 | title = {Drought: Past Problems and Future Scenarios}, 232 | publisher = {Taylor \& Francis Group}, 233 | doi = {10.4324/9781849775250}, 234 | year = {2011} 235 | } 236 | 237 | @article{dracup_droughtdefinition_1980, 238 | author = {Dracup, J. A. and Lee, K. S. and {Paulson Jr.}, E. G.}, 239 | title = {On the definition of droughts}, 240 | journal = {Water Resources Research}, 241 | volume = {16}, 242 | number = {2}, 243 | pages = {297-302}, 244 | doi = {10.1029/WR016i002p00297}, 245 | year = {1980} 246 | } 247 | 248 | @article{vanloon_hydrodrought_2015, 249 | author = {{van Loon}, A. F.}, 250 | title = {Hydrological drought explained}, 251 | journal = {WIREs Water}, 252 | volume = {2}, 253 | number = {4}, 254 | pages = {359-392}, 255 | doi = {10.1002/wat2.1085}, 256 | year = {2015} 257 | } 258 | 259 | @techreport{edwards_transformation_1997, 260 | author = {Edwards, D. C. and McKee, T. B.}, 261 | title = {Characteristics of 20th Century Drought in the {United States} at Multiple Time Scales}, 262 | institution = {Colorado State University, Department of Atmospheric Science}, 263 | type = {Climatology Report}, 264 | number = {97-2}, 265 | year = {1997}, 266 | address = {Fort Collins, CO}, 267 | note = {Atmospheric Science Paper No. 634} 268 | } 269 | 270 | @article{vanrooy_rai_1965, 271 | author = {{van Rooy}, M. P.}, 272 | title = {A Rainfall Anomaly Index Independent of Time and Space}, 273 | journal = {Notos}, 274 | year = {1965}, 275 | volume = {14}, 276 | pages = {43--48} 277 | } 278 | 279 | @article{hansel_mrai_2016, 280 | author = {Hänsel, S. and Schucknecht, A. and Matschullat, J.}, 281 | title = {The Modified Rainfall Anomaly Index ({mRAI})—is this an alternative to the {Standardised Precipitation Index} ({SPI}) in evaluating future extreme precipitation characteristics?}, 282 | journal = {Theoretical and Applied Climatology}, 283 | year = {2016}, 284 | volume = {123}, 285 | number = {3}, 286 | pages = {827-844}, 287 | doi = {10.1007/s00704-015-1389-y} 288 | } 289 | 290 | @article{witte_knmi_2025, 291 | author = {Witte, J. P. M. and {van den Eertwegh}, G. A. P. H. and Torfs, P. J. J. F.}, 292 | title = {Absolute Meteorological Drought Indices Validated Against Irrigation Amounts}, 293 | journal = {Water}, 294 | volume = {17}, 295 | year = {2025}, 296 | number = {7}, 297 | doi = {10.3390/w17071056} 298 | } 299 | -------------------------------------------------------------------------------- /docs/examples/example04_package_comparison.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": {}, 7 | "source": [ 8 | "# Package Comparison\n", 9 | "\n", 10 | "*Martin Vonk - 2023*\n", 11 | "\n", 12 | "This notebooks compares the calculated drought indices to other (Python) packages or time series retrieved from other locations.\n", 13 | "Current comparisons include: \n", 14 | "* standard_precip (Python)\n", 15 | "* climate_indices (Python)\n", 16 | "* pastas (Python)\n", 17 | "* SPEI (R)\n", 18 | "\n", 19 | "Please note that it can be difficult to install these packages. SPEI (R) requires the R library. Pastas depends on Numba which has strict requirements for NumPy. Climate Indices only supports Python 3.11 and lower. Therefore running this notebook can be cumbersome.\n", 20 | "\n", 21 | "Future comparisons:\n", 22 | "* [KNMI](https://gitlab.com/KNMI-OSS/climexp/climexp_numerical/-/blob/be0f081a9d62856e4c52a370e70fec2ddfc45cfa/src/calcSPI3.f)\n", 23 | "\n", 24 | "## Required packages" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "import matplotlib.pyplot as plt\n", 34 | "import pandas as pd\n", 35 | "import scipy.stats as scs\n", 36 | "\n", 37 | "import spei as si\n", 38 | "\n", 39 | "print(si.show_versions())" 40 | ] 41 | }, 42 | { 43 | "attachments": {}, 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "## Read Precipitation Data" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": null, 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [ 56 | "df = pd.read_csv(\"data/DEBILT.csv\", index_col=0, parse_dates=True)\n", 57 | "df.index.name = \"date\"\n", 58 | "prec = df[\"Prec [m/d] 260_DEBILT\"].multiply(1e3).rename(\"rain\")\n", 59 | "head = df[\"Head [m] B32C0572_DEBILT\"].rename(\"B32C0572\").dropna()\n", 60 | "\n", 61 | "_ = prec.plot(grid=True, linewidth=0.5, title=\"Precipitation\", figsize=(6.5, 4))" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "# get rolling sum\n", 71 | "prec_rsum = prec.resample(\"ME\").sum()\n", 72 | "_ = prec_rsum.plot(\n", 73 | " grid=True, linewidth=0.5, title=\"Precipitation, monthly sum\", figsize=(6.5, 4)\n", 74 | ")" 75 | ] 76 | }, 77 | { 78 | "attachments": {}, 79 | "cell_type": "markdown", 80 | "metadata": {}, 81 | "source": [ 82 | "## Compute Standardized Precipitation Index\n", 83 | "\n", 84 | "### Using SPEI package" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": null, 90 | "metadata": {}, 91 | "outputs": [], 92 | "source": [ 93 | "spi = si.spi(prec_rsum, dist=scs.gamma, prob_zero=True, timescale=3, fit_freq=\"ME\")\n", 94 | "spi # pandas Series" 95 | ] 96 | }, 97 | { 98 | "attachments": {}, 99 | "cell_type": "markdown", 100 | "metadata": {}, 101 | "source": [ 102 | "### Using standard_precip package" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": null, 108 | "metadata": {}, 109 | "outputs": [], 110 | "source": [ 111 | "from standard_precip import spi as sp_spi\n", 112 | "\n", 113 | "# standard_precip also needs rolling sum dataframe, even though you provide freq=\"M\" and scale = 1\n", 114 | "precdf = prec_rsum.to_frame().reset_index().copy()\n", 115 | "\n", 116 | "# initialize spi\n", 117 | "standardp_spi_inst = sp_spi.SPI()\n", 118 | "\n", 119 | "# caclulate index with many parameters\n", 120 | "standardp_spi = standardp_spi_inst.calculate(\n", 121 | " precdf,\n", 122 | " date_col=\"date\",\n", 123 | " precip_cols=\"rain\",\n", 124 | " freq=\"M\",\n", 125 | " scale=3, # note that scale is not the same for the standard deviation in SciPy\n", 126 | " fit_type=\"mle\",\n", 127 | " dist_type=\"gam\",\n", 128 | ")\n", 129 | "standardp_spi.index = standardp_spi.loc[\n", 130 | " :, \"date\"\n", 131 | "].values # create datetimeindex because date had to be a column\n", 132 | "\n", 133 | "standardp_spi # pandas DataFrame" 134 | ] 135 | }, 136 | { 137 | "attachments": {}, 138 | "cell_type": "markdown", 139 | "metadata": {}, 140 | "source": [ 141 | "### Using climate_indices package\n", 142 | "\n", 143 | "Previously there was a significant difference beteween the SPEI and climate_indices package, not sure why. I thought it had something to do with the fitting method used for the gamma distribution. In issue [#61](https://github.com/martinvonk/SPEI/issues/61) it was mentioned that the same outcome could be achieved. However, I found it difficult to install `climate_indces` due to lack of support (for newer python versions)." 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": null, 149 | "metadata": {}, 150 | "outputs": [], 151 | "source": [ 152 | "# from climate_indices.compute import scale_values, Periodicity\n", 153 | "# from climate_indices import compute, indices, utils" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": null, 159 | "metadata": {}, 160 | "outputs": [], 161 | "source": [ 162 | "# initial_year = prec_rsum.index[0].year\n", 163 | "# calibration_year_initial = prec_rsum.index[0].year\n", 164 | "# calibration_year_final = prec_rsum.index[-1].year\n", 165 | "# period_times = 366\n", 166 | "# scale = 1\n", 167 | "# periodicity = compute.Periodicity.daily\n", 168 | "\n", 169 | "# values = prec_rsum.values\n", 170 | "\n", 171 | "# scaled_values = compute.scale_values(\n", 172 | "# values,\n", 173 | "# scale=scale,\n", 174 | "# periodicity=periodicity,\n", 175 | "# )\n", 176 | "\n", 177 | "# alphas, betas = compute.gamma_parameters(\n", 178 | "# scaled_values,\n", 179 | "# data_start_year=initial_year,\n", 180 | "# calibration_start_year=calibration_year_initial,\n", 181 | "# calibration_end_year=calibration_year_final,\n", 182 | "# periodicity=periodicity,\n", 183 | "# )\n", 184 | "\n", 185 | "# gamma_params = {\"alpha\": alphas, \"beta\": betas}\n", 186 | "\n", 187 | "# spival = indices.spi(\n", 188 | "# values,\n", 189 | "# scale=scale,\n", 190 | "# distribution=indices.Distribution.gamma,\n", 191 | "# data_start_year=initial_year,\n", 192 | "# calibration_year_initial=calibration_year_initial,\n", 193 | "# calibration_year_final=calibration_year_final,\n", 194 | "# periodicity=compute.Periodicity.daily,\n", 195 | "# fitting_params=gamma_params,\n", 196 | "# )\n", 197 | "\n", 198 | "# climateind_spi = pd.Series(spival, index=prec_rsum.index, name=\"Climate Index SPI\")\n", 199 | "# climateind_spi" 200 | ] 201 | }, 202 | { 203 | "cell_type": "markdown", 204 | "metadata": {}, 205 | "source": [ 206 | "### Using SPEI R package" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": null, 212 | "metadata": {}, 213 | "outputs": [], 214 | "source": [ 215 | "from rpy2.robjects import pandas2ri\n", 216 | "from rpy2.robjects.packages import importr\n", 217 | "\n", 218 | "sr = importr(\"SPEI\")\n", 219 | "\n", 220 | "with pandas2ri.converter.context(): # pandas2ri.activate()\n", 221 | " spir_res = sr.spi(prec_rsum.values, scale=3)\n", 222 | "\n", 223 | "r_spi = pd.Series(spir_res[2].ravel(), index=prec_rsum.index, name=\"SPI\")\n", 224 | "r_spi" 225 | ] 226 | }, 227 | { 228 | "attachments": {}, 229 | "cell_type": "markdown", 230 | "metadata": {}, 231 | "source": [ 232 | "### Plot and compare" 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": null, 238 | "metadata": {}, 239 | "outputs": [], 240 | "source": [ 241 | "f, ax = plt.subplot_mosaic(\n", 242 | " [[\"SPI\"], [\"DIFF\"]],\n", 243 | " figsize=(8, 4),\n", 244 | " sharex=True,\n", 245 | " height_ratios=[2, 1],\n", 246 | ")\n", 247 | "spi.plot(ax=ax[\"SPI\"], grid=True, linestyle=\"-\", label=\"SPI\")\n", 248 | "standardp_spi.iloc[:, -1].plot(\n", 249 | " ax=ax[\"SPI\"],\n", 250 | " color=\"C1\",\n", 251 | " grid=True,\n", 252 | " linestyle=\"--\",\n", 253 | " label=\"standard_precip\",\n", 254 | ")\n", 255 | "# climateind_spi.plot(\n", 256 | "# ax=ax[\"SPI\"], color=\"C2\", grid=True, linestyle=\":\", label=\"climate_indices\"\n", 257 | "# )\n", 258 | "# r_spi.plot(ax=ax[\"SPI\"], color=\"C2\", grid=True, linestyle=\":\", label=\"R package\")\n", 259 | "\n", 260 | "(ax[\"SPI\"].set_ylim(-3.5, 3.5),)\n", 261 | "(ax[\"SPI\"].set_title(\"Comparison\"),)\n", 262 | "(ax[\"SPI\"].set_ylabel(\"SPI\"),)\n", 263 | "ax[\"SPI\"].legend(ncol=3)\n", 264 | "\n", 265 | "(spi - standardp_spi.iloc[:, -1]).plot(\n", 266 | " ax=ax[\"DIFF\"], color=\"C4\", label=\"SPEI - standard_precip\", grid=True\n", 267 | ")\n", 268 | "# (spi - r_spi).plot(ax=ax[\"DIFF\"], color=\"C3\", label=\"SPEI - R Package\")\n", 269 | "\n", 270 | "# ax[\"DIFF1\"].set_ylim(-0.05, 0.05)\n", 271 | "ax[\"DIFF\"].legend(ncol=2)\n", 272 | "ax[\"DIFF\"].set_title(\"SPEI minus other package\")\n", 273 | "ax[\"DIFF\"].set_ylabel(\"Difference\")\n", 274 | "ax[\"DIFF\"].set_xlim(\"1996\", \"1999\")\n", 275 | "f.tight_layout()" 276 | ] 277 | }, 278 | { 279 | "attachments": {}, 280 | "cell_type": "markdown", 281 | "metadata": {}, 282 | "source": [ 283 | "Difference is very small between SPEI an the standard_precip package.\n", 284 | "\n", 285 | "The standard_precip package does not explicitely support the Standardized Precipitation Evaporation Index, as far as I can see. However, the SPI class in standard_precip could probably be used, even though the naming of `precip_cols` is not universal. In general, the standard_precip package needs much more keyword arguments while the SPEI package makes more use of all the nice logic already available in SciPy and Pandas.\n", 286 | "\n", 287 | "The climate_indices package needs even more code.\n", 288 | "\n", 289 | "The SPEI R package also has a similar result but seems to vary a bit more. More research is needed to understand why that is the case. Most likely is the differences in fitting the gamma distribution." 290 | ] 291 | }, 292 | { 293 | "attachments": {}, 294 | "cell_type": "markdown", 295 | "metadata": {}, 296 | "source": [ 297 | "## Compute Standardized Groundwater Index" 298 | ] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "execution_count": null, 303 | "metadata": {}, 304 | "outputs": [], 305 | "source": [ 306 | "import pastas as ps\n", 307 | "\n", 308 | "sgi = si.sgi(head, fit_freq=\"ME\")\n", 309 | "sgi_pastas = ps.stats.sgi(head)" 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "execution_count": null, 315 | "metadata": {}, 316 | "outputs": [], 317 | "source": [ 318 | "pd.concat([sgi, sgi_pastas], axis=1).rename(columns={0: \"SGI\", \"head\": \"Pastas\"})" 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "execution_count": null, 324 | "metadata": {}, 325 | "outputs": [], 326 | "source": [ 327 | "f, ax = plt.subplot_mosaic(\n", 328 | " [[\"SGI\"], [\"DIFF\"]],\n", 329 | " figsize=(8, 4),\n", 330 | " sharex=True,\n", 331 | " height_ratios=[2, 1],\n", 332 | ")\n", 333 | "sgi.plot(ax=ax[\"SGI\"], grid=True, linestyle=\"-\", label=\"SGI\")\n", 334 | "sgi_pastas.plot(ax=ax[\"SGI\"], color=\"C1\", grid=True, linestyle=\"--\", label=\"pastas\")\n", 335 | "(ax[\"SGI\"].set_ylim(-3.5, 3.5),)\n", 336 | "(ax[\"SGI\"].set_title(\"Comparison\"),)\n", 337 | "(ax[\"SGI\"].set_ylabel(\"SGI\"),)\n", 338 | "ax[\"SGI\"].legend(ncol=3)\n", 339 | "\n", 340 | "(sgi - sgi_pastas).plot(ax=ax[\"DIFF\"], color=\"C3\", label=\"SGI - pastas\")\n", 341 | "\n", 342 | "ax[\"DIFF\"].legend(ncol=2)\n", 343 | "ax[\"DIFF\"].set_title(\"SPEI minus other package\")\n", 344 | "ax[\"DIFF\"].set_ylabel(\"Difference\")\n", 345 | "ax[\"DIFF\"].set_xlim(\"1996\", \"1999\")\n", 346 | "f.tight_layout()" 347 | ] 348 | } 349 | ], 350 | "metadata": { 351 | "kernelspec": { 352 | "display_name": "SPEI", 353 | "language": "python", 354 | "name": "python3" 355 | }, 356 | "language_info": { 357 | "codemirror_mode": { 358 | "name": "ipython", 359 | "version": 3 360 | }, 361 | "file_extension": ".py", 362 | "mimetype": "text/x-python", 363 | "name": "python", 364 | "nbconvert_exporter": "python", 365 | "pygments_lexer": "ipython3", 366 | "version": "3.12.3" 367 | }, 368 | "orig_nbformat": 4 369 | }, 370 | "nbformat": 4, 371 | "nbformat_minor": 2 372 | } 373 | -------------------------------------------------------------------------------- /docs/examples/example07_knmi.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "87fd561f", 6 | "metadata": {}, 7 | "source": [ 8 | "# KNMI Drought Indices\n", 9 | "\n", 10 | "J.P.M. Witte, G.A.P.H. van den Eertwegh and P.J.J.F. Torfs (2025) - [Absolute Meteorological Drought Indices Validated Against Irrigation Amounts](https://doi.org/10.3390/w17071056)." 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "id": "5f4ec1ab", 16 | "metadata": {}, 17 | "source": [ 18 | "## Load packages" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "id": "1a439ca7", 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "import matplotlib as mpl\n", 29 | "import pandas as pd\n", 30 | "\n", 31 | "from spei import knmi\n", 32 | "from spei.plot import deficit_knmi\n", 33 | "from spei.utils import group_yearly_df" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "id": "ae00001a", 39 | "metadata": {}, 40 | "source": [ 41 | "## Get data\n", 42 | "Data from De Bilt (260) or P13 stations from 1960 till today" 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "id": "fba32ea8", 48 | "metadata": {}, 49 | "source": [ 50 | "### Most recent De Bilt data" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": null, 56 | "id": "131815ed", 57 | "metadata": {}, 58 | "outputs": [], 59 | "source": [ 60 | "# import hydropandas as hpd\n", 61 | "# prec = hpd.PrecipitationObs.from_knmi(\n", 62 | "# meteo_var=\"RH\",\n", 63 | "# stn=260,\n", 64 | "# startdate=pd.Timestamp(\"1960-01-01\"),\n", 65 | "# enddate=pd.Timestamp.today(),\n", 66 | "# )[\"RH\"].multiply(1e3)\n", 67 | "# prec.index = prec.index.normalize()\n", 68 | "# evap = hpd.EvaporationObs.from_knmi(\n", 69 | "# meteo_var=\"EV24\",\n", 70 | "# stn=260,\n", 71 | "# startdate=pd.Timestamp(\"1960-01-01\"),\n", 72 | "# enddate=pd.Timestamp.today(),\n", 73 | "# )[\"EV24\"].multiply(1e3)\n", 74 | "# evap.index = evap.index.normalize()\n", 75 | "# temp = hpd.MeteoObs.from_knmi(\n", 76 | "# meteo_var=\"TG\",\n", 77 | "# stn=260,\n", 78 | "# startdate=pd.Timestamp(\"1960-01-01\"),\n", 79 | "# enddate=pd.Timestamp.today(),\n", 80 | "# )[\"TG\"]\n", 81 | "# temp.index = temp.index.normalize()" 82 | ] 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "id": "72136d23", 87 | "metadata": {}, 88 | "source": [ 89 | "### KNMI stations data\n", 90 | "https://www.knmi.nl/kennis-en-datacentrum/achtergrond/achtergrondinformatie-klimaatdashboard\n", 91 | "\n", 92 | "De neerslagtekort klimaatdashboardgrafiek is alleen voor het landelijk gemiddelde beschikbaar, gebaseerd op:\n", 93 | "\n", 94 | "Voor 1906 t/m 2000: officiële reeks voor Nederland voor neerslagtekort: Dagelijks neerslagtekort NL (1 apr t/m 30 sep) op basis van Makkink verdamping De Bilt geschat uit zonneschijnduur minus 13 neerslagstations (P13) (c) KNMI, mei 2020, Jules Beersma: Climate Explorer \n", 95 | "\n", 96 | "Vanaf 2001: verdamping (gemiddelde van 13 automatische weerstations nabij 13 neerslagstations) minus de gemiddelde neerslag van 13 neerslagstations:\n", 97 | "De P13: het gemiddelde van de hoeveelheid neerslag op de volgende 13 KNMI-neerslagstations: De Bilt (550_N), De Kooy (25_N), Groningen (139_N), Heerde (328_N), Hoofddorp (438_N), Hoorn (222_N), Kerkwerve (737_N), Oudenbosch (828_N), Roermond (961_N), Ter Apel (144_N), West-Terschelling (11_N), Westdorpe (770_N) en Winterswijk (666_N).\n", 98 | "\n", 99 | "Het gemiddelde van de hoeveelheid verdamping (EV24) op 13 automatische weerstations van het KNMI nabij de 13 neerslagstations: De Bilt (260_H), De Kooy (235_H), Eelde (280_H), Heino (278_H), Schiphol (240_H), Berkhout (249_H), Vlissingen (310_H), Eindhoven (370_H), Ell (377_H), Nieuw Beerta (286_H), Hoorn Terschelling (251_H), Westdorpe (319_H) en Hupsel (283_H).\n" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "id": "7401ab30", 106 | "metadata": {}, 107 | "outputs": [], 108 | "source": [ 109 | "# import hydropandas as hpd\n", 110 | "\n", 111 | "# P13 stations\n", 112 | "# p_stns = [\n", 113 | "# 550, # De Bilt\n", 114 | "# 25, # De Kooy\n", 115 | "# 139, # Groningen\n", 116 | "# 328, # Heerde\n", 117 | "# 438, # Hoofddorp\n", 118 | "# 222, # Hoorn\n", 119 | "# 737, # Kerkwerve\n", 120 | "# 828, # Oudenbosch\n", 121 | "# 961, # Roermond\n", 122 | "# 144, # Ter Apel\n", 123 | "# 11, # West-Terschelling\n", 124 | "# 770, # Westdorpe\n", 125 | "# 666, # Winterswijk\n", 126 | "# ]\n", 127 | "\n", 128 | "# # EV24-13 stations\n", 129 | "# ev_stns = [\n", 130 | "# 260, # De Bilt\n", 131 | "# 235, # De Kooy\n", 132 | "# 280, # Eelde\n", 133 | "# 278, # Heino\n", 134 | "# 240, # Schiphol\n", 135 | "# 249, # Berkhout\n", 136 | "# 310, # Vlissingen\n", 137 | "# 370, # Eindhoven\n", 138 | "# 377, # Ell\n", 139 | "# 286, # Nieuw Beerta\n", 140 | "# 251, # Hoorn Terschelling\n", 141 | "# 319, # Westdorpe\n", 142 | "# 283, # Hupsel\n", 143 | "# ]\n", 144 | "# oc_p = hpd.ObsCollection.from_knmi(\n", 145 | "# stns=p_stns,\n", 146 | "# starts=pd.Timestamp(\"1960-01-01\"),\n", 147 | "# ends=pd.Timestamp.today(),\n", 148 | "# meteo_vars=[\"RD\"],\n", 149 | "# )\n", 150 | "# oc_ev = hpd.ObsCollection.from_knmi(\n", 151 | "# stns=ev_stns,\n", 152 | "# starts=pd.Timestamp(\"1960-01-01\"),\n", 153 | "# ends=pd.Timestamp.today(),\n", 154 | "# meteo_vars=[\"EV24\", \"TG\"],\n", 155 | "# )\n", 156 | "\n", 157 | "# prec = pd.concat([o[\"RD\"] for o in oc_p[\"obs\"]], axis=1).mean(axis=1).multiply(1e3).rename(\"prec\")\n", 158 | "# prec.index = prec.index.normalize()\n", 159 | "# ev_data = pd.DataFrame({mv: pd.concat([o[mv] for o in gr[\"obs\"]], axis=1).mean(axis=1) for mv, gr in oc_ev.groupby(\"meteo_var\")})\n", 160 | "# ev_data.index = ev_data.index.normalize()\n", 161 | "# ev_data = ev_data.loc[prec.index] # align indices because prec stations less frequently reported\n", 162 | "# evap = ev_data[\"EV24\"].multiply(1e3).rename(\"evap\")\n", 163 | "# temp = ev_data[\"TG\"].rename(\"temp\")" 164 | ] 165 | }, 166 | { 167 | "cell_type": "markdown", 168 | "id": "24a7519c", 169 | "metadata": {}, 170 | "source": [ 171 | "### From a CSV file" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": null, 177 | "id": "528daaa6", 178 | "metadata": {}, 179 | "outputs": [], 180 | "source": [ 181 | "df = pd.read_csv(\"data/DEBILT.csv\", index_col=0, parse_dates=True)\n", 182 | "prec = df[\"Prec [m/d] 260_DEBILT\"].multiply(1e3).rename(\"prec\")\n", 183 | "evap = df[\"Evap [m/d] 260_DEBILT\"].multiply(1e3).rename(\"evap\")\n", 184 | "temp = df[\"Temp [C] 260_DEBILT\"].rename(\"temp\")" 185 | ] 186 | }, 187 | { 188 | "cell_type": "markdown", 189 | "id": "b268e457", 190 | "metadata": {}, 191 | "source": [ 192 | "## Calculate precipitation deficit" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": null, 198 | "id": "433a3701", 199 | "metadata": {}, 200 | "outputs": [], 201 | "source": [ 202 | "deficit = evap - prec\n", 203 | "## deficit period\n", 204 | "startdate = pd.Timestamp(\"2000-04-01\")\n", 205 | "enddate = pd.Timestamp(\"2000-09-30\")\n", 206 | "# calculate cumulative deficit\n", 207 | "cumdf = knmi.get_cumulative_deficit(\n", 208 | " deficit=deficit,\n", 209 | " startdate=startdate,\n", 210 | " enddate=enddate,\n", 211 | " allow_below_zero=False,\n", 212 | ")\n", 213 | "# plot deficit\n", 214 | "ax = cumdf.plot(figsize=(7.0, 6.0), cmap=\"cividis\")\n", 215 | "ax.legend(ncol=5, loc=(0, 1))\n", 216 | "ax.xaxis.set_major_locator(mpl.dates.MonthLocator())\n", 217 | "ax.xaxis.set_major_formatter(mpl.dates.DateFormatter(\"%B\"))\n", 218 | "ax.xaxis.set_ticks([], minor=True)\n", 219 | "ax.set_ylim(0.0)" 220 | ] 221 | }, 222 | { 223 | "cell_type": "markdown", 224 | "id": "b9a26454", 225 | "metadata": {}, 226 | "source": [ 227 | "## Precipitation deficit indices" 228 | ] 229 | }, 230 | { 231 | "cell_type": "code", 232 | "execution_count": null, 233 | "id": "a0a18964", 234 | "metadata": {}, 235 | "outputs": [], 236 | "source": [ 237 | "doct1 = knmi.deficit_oct1(deficit)\n", 238 | "doct1.to_frame().transpose()" 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": null, 244 | "id": "ca3b85aa", 245 | "metadata": {}, 246 | "outputs": [], 247 | "source": [ 248 | "dmax = knmi.deficit_max(deficit)\n", 249 | "dmax.to_frame().transpose()" 250 | ] 251 | }, 252 | { 253 | "cell_type": "code", 254 | "execution_count": null, 255 | "id": "88fa3017", 256 | "metadata": {}, 257 | "outputs": [], 258 | "source": [ 259 | "diapr1 = knmi.deficit_apr1(deficit)\n", 260 | "diapr1.to_frame().transpose()" 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": null, 266 | "id": "7fa91d05", 267 | "metadata": {}, 268 | "outputs": [], 269 | "source": [ 270 | "digdd = knmi.deficit_gdd(deficit, temp, threshold=440.0)\n", 271 | "digdd.to_frame().transpose()" 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": null, 277 | "id": "66972df3", 278 | "metadata": {}, 279 | "outputs": [], 280 | "source": [ 281 | "diwet = knmi.deficit_wet(deficit)\n", 282 | "diwet.to_frame().transpose()" 283 | ] 284 | }, 285 | { 286 | "cell_type": "markdown", 287 | "id": "07085e76", 288 | "metadata": {}, 289 | "source": [ 290 | "## Compare to original KNMI data\n", 291 | "\n", 292 | "File obtained from https://climexp.knmi.nl/getindices.cgi?NPERYEAR=366&STATION=precipitationdeficit&TYPE=i&WMO=KNMIData/nt_nl&id=someone@somewhere" 293 | ] 294 | }, 295 | { 296 | "cell_type": "code", 297 | "execution_count": null, 298 | "id": "38d6f9af", 299 | "metadata": {}, 300 | "outputs": [], 301 | "source": [ 302 | "knmi_cumdf = group_yearly_df(\n", 303 | " pd.read_csv(\n", 304 | " \"data/neerslagtekort.txt\",\n", 305 | " skiprows=11,\n", 306 | " sep=\"\\t\",\n", 307 | " header=None,\n", 308 | " index_col=0,\n", 309 | " parse_dates=True,\n", 310 | " date_format=\"%Y%m%d\",\n", 311 | " )\n", 312 | " .dropna(how=\"all\", axis=1)\n", 313 | " .squeeze()\n", 314 | " .rename(\"KNMI\")\n", 315 | ")\n", 316 | "knmi_cumdf.index.name = \"\"\n", 317 | "ax = knmi_cumdf.plot(figsize=(7.0, 6.0), cmap=\"viridis\")\n", 318 | "ax.legend(ncol=5, loc=(0, 1))\n", 319 | "ax.xaxis.set_major_locator(mpl.dates.MonthLocator())\n", 320 | "ax.xaxis.set_major_formatter(mpl.dates.DateFormatter(\"%B\"))\n", 321 | "ax.xaxis.set_ticks([], minor=True)\n", 322 | "ax.set_ylim(0.0)" 323 | ] 324 | }, 325 | { 326 | "cell_type": "markdown", 327 | "id": "a5d7d7da", 328 | "metadata": {}, 329 | "source": [ 330 | "### KNMI plot\n", 331 | "\n", 332 | "From KNMI website the drought deficit is plotted as below:" 333 | ] 334 | }, 335 | { 336 | "cell_type": "markdown", 337 | "id": "a798d38d", 338 | "metadata": {}, 339 | "source": [ 340 | "![neerslagtekort](https://cdn.knmi.nl/knmi/map/page/klimatologie/grafieken/neerslagtekort/neerslagtekort.png)" 341 | ] 342 | }, 343 | { 344 | "cell_type": "markdown", 345 | "id": "22245717", 346 | "metadata": {}, 347 | "source": [ 348 | "#### With KNMI deficit data\n", 349 | "This plot can be reproduced (almost perfectly) as seen from the figure below.\n", 350 | "\n", 351 | "The calculation uses the average precipitation from 13 reference stations in the Netherlands (the so-called P13/EV24-13 stations) and the reference evaporation calculated based on sunshine duration in De Bilt (until 2001) or the global radiation near the P13 stations (from 2001 onwards). For the median and 5% driest years a rolling window is aplied. However, the size of this window is not documented anywhere." 352 | ] 353 | }, 354 | { 355 | "cell_type": "code", 356 | "execution_count": null, 357 | "id": "b06fa427", 358 | "metadata": {}, 359 | "outputs": [], 360 | "source": [ 361 | "ax = deficit_knmi(knmi_cumdf, window=28)\n", 362 | "ax.set_title(\"KNMI computed preciptiation deficit\")" 363 | ] 364 | }, 365 | { 366 | "cell_type": "markdown", 367 | "id": "e804afea", 368 | "metadata": {}, 369 | "source": [ 370 | "#### With own computed deficit (with downloaded knmi data)" 371 | ] 372 | }, 373 | { 374 | "cell_type": "code", 375 | "execution_count": null, 376 | "id": "eb4ea498", 377 | "metadata": {}, 378 | "outputs": [], 379 | "source": [ 380 | "ax = deficit_knmi(cumdf, window=0)\n", 381 | "ax.set_title(\"Downloaded measurements\")" 382 | ] 383 | } 384 | ], 385 | "metadata": { 386 | "kernelspec": { 387 | "display_name": "SPEI", 388 | "language": "python", 389 | "name": "python3" 390 | }, 391 | "language_info": { 392 | "codemirror_mode": { 393 | "name": "ipython", 394 | "version": 3 395 | }, 396 | "file_extension": ".py", 397 | "mimetype": "text/x-python", 398 | "name": "python", 399 | "nbconvert_exporter": "python", 400 | "pygments_lexer": "ipython3", 401 | "version": "3.13.1" 402 | } 403 | }, 404 | "nbformat": 4, 405 | "nbformat_minor": 5 406 | } 407 | -------------------------------------------------------------------------------- /docs/examples/example09_joss_paper.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Article for Journal of Open Source Software\n", 8 | "\n", 9 | "*Martin Vonk (2025)*\n", 10 | "\n", 11 | "This notebook replicates the results presented in the article submitted to the Journal of Open Source Software ([JOSS](https://joss.theoj.org/)). The article can be found here: Vonk, M. A. (2025). SPEI: A Python package for calculating and visualizing drought indices. Journal of Open Source Software, 10(111), 8454. [doi.org/10.21105/joss.08454](https://doi.org/10.21105/joss.08454)\n", 12 | "\n", 13 | "\n", 14 | "JOSS is a developer-friendly, open-access academic journal (ISSN 2475-9066) dedicated to research software packages and features a formal peer-review process. The pre-review and review of the SPEI package are publicly available in issues [openjournals/joss-reviews#8430](https://github.com/openjournals/joss-reviews/issues/8430) and [openjournals/joss-reviews#8454](https://github.com/openjournals/joss-reviews/issues/8454), respectively." 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "## Setup" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "# dependencies\n", 31 | "from typing import Literal\n", 32 | "\n", 33 | "import matplotlib as mpl\n", 34 | "import matplotlib.pyplot as plt\n", 35 | "import numpy as np\n", 36 | "import pandas as pd\n", 37 | "import scipy.stats as sps\n", 38 | "from cycler import cycler\n", 39 | "from matplotlib import patheffects\n", 40 | "from scipy.stats._survival import EmpiricalDistributionFunction\n", 41 | "\n", 42 | "import spei as si\n", 43 | "\n", 44 | "# matplotlib settings\n", 45 | "plt.rcParams.update(\n", 46 | " {\n", 47 | " \"axes.prop_cycle\": cycler(\n", 48 | " color=[\n", 49 | " \"#3f90da\",\n", 50 | " \"#ffa90e\",\n", 51 | " \"#bd1f01\",\n", 52 | " \"#94a4a2\",\n", 53 | " \"#832db6\",\n", 54 | " \"#a96b59\",\n", 55 | " \"#e76300\",\n", 56 | " \"#b9ac70\",\n", 57 | " \"#717581\",\n", 58 | " \"#92dadd\",\n", 59 | " ]\n", 60 | " ),\n", 61 | " \"axes.titlesize\": 7.0,\n", 62 | " \"axes.labelsize\": 7.0,\n", 63 | " \"xtick.labelsize\": 6.0,\n", 64 | " \"ytick.labelsize\": 6.0,\n", 65 | " \"legend.fontsize\": 7.0,\n", 66 | " \"legend.framealpha\": 1.0,\n", 67 | " }\n", 68 | ")\n", 69 | "\n", 70 | "\n", 71 | "# helper functions\n", 72 | "def axes_indicator(\n", 73 | " ax: plt.Axes,\n", 74 | " letter: str,\n", 75 | " x: float,\n", 76 | " y: float,\n", 77 | " ha: Literal[\"left\", \"right\"],\n", 78 | " va: Literal[\"top\", \"bottom\"],\n", 79 | "):\n", 80 | " \"\"\"Add an indicator to the axes.\"\"\"\n", 81 | " ax.annotate(\n", 82 | " f\"({letter})\",\n", 83 | " xy=(x, y),\n", 84 | " xycoords=\"axes fraction\",\n", 85 | " fontsize=mpl.rcParams[\"axes.titlesize\"],\n", 86 | " horizontalalignment=ha,\n", 87 | " verticalalignment=va,\n", 88 | " path_effects=[\n", 89 | " patheffects.Stroke(linewidth=1, foreground=\"white\"),\n", 90 | " patheffects.Normal(),\n", 91 | " ],\n", 92 | " )\n", 93 | "\n", 94 | "\n", 95 | "def plot_ecdf(\n", 96 | " ax: plt.Axes,\n", 97 | " data: pd.Series,\n", 98 | " ecdf: EmpiricalDistributionFunction,\n", 99 | " s: float,\n", 100 | " color: str,\n", 101 | " label: str,\n", 102 | " cdf: pd.Series | None = None,\n", 103 | " **kwargs,\n", 104 | ") -> None:\n", 105 | " data = data.drop_duplicates()\n", 106 | " ax.scatter(\n", 107 | " data,\n", 108 | " ecdf.probabilities,\n", 109 | " s=s,\n", 110 | " facecolor=color,\n", 111 | " label=label,\n", 112 | " **kwargs,\n", 113 | " )\n", 114 | " if cdf is not None:\n", 115 | " for idata, icdf, iecdf in zip(data, cdf, ecdf.probabilities):\n", 116 | " ax.plot(\n", 117 | " [idata, idata],\n", 118 | " [iecdf, icdf],\n", 119 | " color=color,\n", 120 | " linewidth=0.5,\n", 121 | " **kwargs,\n", 122 | " )\n", 123 | " return ecdf" 124 | ] 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "metadata": {}, 129 | "source": [ 130 | "## Data" 131 | ] 132 | }, 133 | { 134 | "cell_type": "markdown", 135 | "metadata": {}, 136 | "source": [ 137 | "### Load" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": null, 143 | "metadata": {}, 144 | "outputs": [], 145 | "source": [ 146 | "df = pd.read_csv(\"data/CABAUW.csv\", index_col=0, parse_dates=True)\n", 147 | "prec = df[\"prec\"]\n", 148 | "evap = df[\"evap\"]\n", 149 | "surplusd = prec - evap\n", 150 | "surplus = surplusd.resample(\"MS\").sum()\n", 151 | "head = df[\"head\"]" 152 | ] 153 | }, 154 | { 155 | "cell_type": "markdown", 156 | "metadata": {}, 157 | "source": [ 158 | "### Plot" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": null, 164 | "metadata": {}, 165 | "outputs": [], 166 | "source": [ 167 | "# highlight specific month\n", 168 | "month = 3\n", 169 | "ts = pd.Timestamp(\"2000-{:02d}-01\".format(month))" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": null, 175 | "metadata": {}, 176 | "outputs": [], 177 | "source": [ 178 | "fig, axd = plt.subplot_mosaic(\n", 179 | " [[\"meteo\"], [\"sp\"]], figsize=(7.0, 3.2), sharex=True, layout=\"constrained\"\n", 180 | ")\n", 181 | "\n", 182 | "axd[\"meteo\"].plot(prec.index, prec, linewidth=0.8, color=\"C0\")\n", 183 | "axd[\"meteo\"].plot(evap.index, evap, linewidth=0.8, color=\"C6\")\n", 184 | "axd[\"meteo\"].plot([], [], color=\"C0\", label=\"Precipitation\")\n", 185 | "axd[\"meteo\"].plot([], [], color=\"C6\", label=\"Potential Evaporation\")\n", 186 | "\n", 187 | "axd[\"meteo\"].legend(loc=(0, 1), ncol=2, frameon=False, columnspacing=1.0)\n", 188 | "axd[\"meteo\"].set_ylabel(\"Flux (mm/day)\")\n", 189 | "\n", 190 | "axd[\"meteo\"].yaxis.set_major_locator(mpl.ticker.MultipleLocator(10))\n", 191 | "axd[\"meteo\"].yaxis.set_minor_locator(mpl.ticker.MultipleLocator(5))\n", 192 | "axd[\"meteo\"].set_ylim(bottom=0.0)\n", 193 | "axes_indicator(axd[\"meteo\"], letter=\"a\", x=0.005, y=0.97, ha=\"left\", va=\"top\")\n", 194 | "\n", 195 | "axd[\"sp\"].plot(\n", 196 | " surplus.index,\n", 197 | " surplus.values,\n", 198 | " color=\"C3\",\n", 199 | " linewidth=1.0,\n", 200 | " marker=\".\",\n", 201 | " markersize=2.0,\n", 202 | " label=\"Monthly Surplus (Precipitation minus Evaporation)\",\n", 203 | ")\n", 204 | "mid = surplus.index.month == ts.month\n", 205 | "axd[\"sp\"].scatter(\n", 206 | " surplus.index[mid], # + pd.Timedelta(days=15),\n", 207 | " surplus.values[mid],\n", 208 | " color=\"C2\",\n", 209 | " s=5.0,\n", 210 | " zorder=2,\n", 211 | " label=f\"Data points {ts.strftime('%B')}\",\n", 212 | ")\n", 213 | "axd[\"sp\"].yaxis.set_major_locator(mpl.ticker.MultipleLocator(50))\n", 214 | "axd[\"sp\"].yaxis.set_minor_locator(mpl.ticker.MultipleLocator(25))\n", 215 | "axd[\"sp\"].xaxis.set_minor_locator(mpl.dates.YearLocator(1))\n", 216 | "axd[\"sp\"].xaxis.set_major_locator(mpl.dates.YearLocator(2))\n", 217 | "axd[\"sp\"].set_xlim(surplus.index[0], surplus.index[-1])\n", 218 | "axd[\"sp\"].set_ylabel(\"Precipitation\\nsurplus (mm)\")\n", 219 | "axd[\"sp\"].legend(loc=(0, 1), frameon=False, ncol=2)\n", 220 | "axes_indicator(axd[\"sp\"], letter=\"b\", x=0.005, y=0.97, ha=\"left\", va=\"top\")\n", 221 | "\n", 222 | "axd[\"sp\"].set_xlim(pd.Timestamp(\"1990\"), pd.Timestamp(\"2020\"))\n", 223 | "\n", 224 | "# fig.savefig(\"../../paper/figures/monthly_precipitation_surplus.png\", dpi=300, bbox_inches=\"tight\")" 225 | ] 226 | }, 227 | { 228 | "cell_type": "markdown", 229 | "metadata": {}, 230 | "source": [ 231 | "## Standardized Index Procedure" 232 | ] 233 | }, 234 | { 235 | "cell_type": "markdown", 236 | "metadata": {}, 237 | "source": [ 238 | "### Fit Distribution" 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": null, 244 | "metadata": {}, 245 | "outputs": [], 246 | "source": [ 247 | "dist = sps.fisk\n", 248 | "sispei = si.SI(\n", 249 | " series=surplus,\n", 250 | " dist=dist,\n", 251 | " timescale=1,\n", 252 | " # fit_freq=\"MS\",\n", 253 | ")\n", 254 | "sispei.fit_distribution()" 255 | ] 256 | }, 257 | { 258 | "cell_type": "markdown", 259 | "metadata": {}, 260 | "source": [ 261 | "### Equiprobability Transform" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": null, 267 | "metadata": {}, 268 | "outputs": [], 269 | "source": [ 270 | "fit_dist = sispei._dist_dict[ts]\n", 271 | "data = fit_dist.data.sort_values()\n", 272 | "cdf = fit_dist.cdf().loc[data.index]\n", 273 | "ecdf = sps.ecdf(data).cdf\n", 274 | "\n", 275 | "zscores = np.arange(-3.0, 3.1, 0.1)\n", 276 | "norm_cdf = sps.norm.cdf(zscores, loc=0.0, scale=1.0)\n", 277 | "norm_cdf_transformed = sps.norm.ppf(cdf.values, loc=0.0, scale=1.0)\n", 278 | "\n", 279 | "fig, axd = plt.subplot_mosaic(\n", 280 | " [[\"cdf\", \"norm\"]],\n", 281 | " figsize=(7.0, 3),\n", 282 | " width_ratios=[1.5, 1.0],\n", 283 | " sharey=True,\n", 284 | " layout=\"tight\",\n", 285 | ")\n", 286 | "plot_ecdf(\n", 287 | " ax=axd[\"cdf\"],\n", 288 | " data=data,\n", 289 | " cdf=cdf,\n", 290 | " ecdf=ecdf,\n", 291 | " s=10.0,\n", 292 | " color=\"C2\",\n", 293 | " label=f\"Data points {ts.strftime('%B')}\",\n", 294 | " zorder=3,\n", 295 | ")\n", 296 | "\n", 297 | "bin = 5.0\n", 298 | "bins = np.arange(data.min() // bin * bin, data.max() + bin, bin)\n", 299 | "axd[\"cdf\"].plot(\n", 300 | " bins,\n", 301 | " fit_dist.dist.cdf(bins, *fit_dist.pars, loc=fit_dist.loc, scale=fit_dist.scale),\n", 302 | " label=f\"Fitted {dist.name} distribution\",\n", 303 | " color=\"C0\",\n", 304 | ")\n", 305 | "\n", 306 | "axd[\"cdf\"].legend(loc=\"upper left\")\n", 307 | "axd[\"cdf\"].set_xlim(np.min(bins), np.max(bins))\n", 308 | "axd[\"cdf\"].xaxis.set_minor_locator(mpl.ticker.MultipleLocator(bin))\n", 309 | "axd[\"cdf\"].xaxis.set_major_locator(mpl.ticker.MultipleLocator(bin * 2))\n", 310 | "axd[\"cdf\"].set_ylim(0.0, 1.0)\n", 311 | "axd[\"cdf\"].yaxis.set_major_locator(mpl.ticker.MultipleLocator(0.1))\n", 312 | "axd[\"cdf\"].yaxis.set_major_formatter(mpl.ticker.PercentFormatter(1.0))\n", 313 | "axd[\"cdf\"].set_xlabel(\"Precipitation surplus (mm)\")\n", 314 | "axd[\"cdf\"].set_ylabel(\"Cumulative probability\")\n", 315 | "axes_indicator(axd[\"cdf\"], \"a\", 0.99, 0.02, ha=\"right\", va=\"bottom\")\n", 316 | "\n", 317 | "axd[\"norm\"].plot(\n", 318 | " zscores, norm_cdf, label=\"Standardized\\nnormal distribution\", color=\"C4\", zorder=3\n", 319 | ")\n", 320 | "axd[\"norm\"].scatter(\n", 321 | " norm_cdf_transformed,\n", 322 | " cdf.values,\n", 323 | " s=10.0,\n", 324 | " label=f\"Projected points\\n{dist.name} distribution\",\n", 325 | " color=\"C0\",\n", 326 | " zorder=2,\n", 327 | ")\n", 328 | "axd[\"norm\"].legend(loc=\"upper left\")\n", 329 | "axd[\"norm\"].set_xlim(np.min(zscores), np.max(zscores))\n", 330 | "axd[\"norm\"].set_xlabel(\"Z-score / SPEI value\")\n", 331 | "\n", 332 | "# visualize specific data point\n", 333 | "idx = data.index[20]\n", 334 | "cdf_idx = cdf.at[idx]\n", 335 | "ppf_idx = sps.norm.ppf(cdf_idx)\n", 336 | "print(\n", 337 | " f\"Data index: {idx.strftime('%Y')}, Data value: {data.loc[idx]:0.2f} CDF: {cdf_idx:0.1%}, PPF: {ppf_idx:0.4f}\"\n", 338 | ")\n", 339 | "axd[\"cdf\"].plot(\n", 340 | " [data.loc[idx], data.loc[idx], np.max(data)],\n", 341 | " [0.0, cdf_idx, cdf_idx],\n", 342 | " color=\"k\",\n", 343 | " linestyle=\"--\",\n", 344 | " linewidth=1.0,\n", 345 | " zorder=0,\n", 346 | ")\n", 347 | "axd[\"norm\"].plot(\n", 348 | " [np.min(zscores), ppf_idx, ppf_idx],\n", 349 | " [\n", 350 | " cdf_idx,\n", 351 | " cdf_idx,\n", 352 | " 0.0,\n", 353 | " ],\n", 354 | " color=\"k\",\n", 355 | " linestyle=\"--\",\n", 356 | " linewidth=1.0,\n", 357 | " zorder=0,\n", 358 | ")\n", 359 | "axes_indicator(axd[\"norm\"], \"b\", 0.99, 0.02, ha=\"right\", va=\"bottom\")\n", 360 | "\n", 361 | "# fig.savefig(\"../../paper/figures/surplus_fit_cdf.png\", dpi=300, bbox_inches=\"tight\")" 362 | ] 363 | }, 364 | { 365 | "cell_type": "markdown", 366 | "metadata": {}, 367 | "source": [ 368 | "### Results\n", 369 | "\n", 370 | "#### Time Series" 371 | ] 372 | }, 373 | { 374 | "cell_type": "code", 375 | "execution_count": null, 376 | "metadata": {}, 377 | "outputs": [], 378 | "source": [ 379 | "spei1 = sispei.norm_ppf()\n", 380 | "\n", 381 | "ax = si.plot.si(spei1, figsize=(7.0, 2.0), layout=\"tight\")\n", 382 | "# ax.xaxis.set_minor_locator(mpl.dates.MonthLocator())\n", 383 | "ax.xaxis.set_minor_locator(mpl.dates.YearLocator(1))\n", 384 | "ax.xaxis.set_major_locator(mpl.dates.YearLocator(2))\n", 385 | "ax.legend(labels=[\"SPEI-1\"], loc=(0, 1), frameon=False)\n", 386 | "ax.set_xlim(pd.Timestamp(\"1990\"), pd.Timestamp(\"2020\"))\n", 387 | "ax.set_ylabel(\"Z-score\")\n", 388 | "\n", 389 | "# ax.get_figure().savefig(\"../../paper/figures/spei1.png\", dpi=300, bbox_inches=\"tight\")" 390 | ] 391 | }, 392 | { 393 | "cell_type": "markdown", 394 | "metadata": {}, 395 | "source": [ 396 | "#### Heatmap" 397 | ] 398 | }, 399 | { 400 | "cell_type": "code", 401 | "execution_count": null, 402 | "metadata": {}, 403 | "outputs": [], 404 | "source": [ 405 | "speis = [\n", 406 | " spei1.rename(\"1\"),\n", 407 | " si.spei(surplus, timescale=3).rename(\"3\"),\n", 408 | " si.spei(surplus, timescale=6).rename(\"6\"),\n", 409 | " si.spei(surplus, timescale=9).rename(\"9\"),\n", 410 | " si.spei(surplus, timescale=12).rename(\"12\"),\n", 411 | " si.spei(surplus, timescale=24).rename(\"24\"),\n", 412 | "]\n", 413 | "f, ax = plt.subplots(figsize=(7.0, 2.0))\n", 414 | "si.plot.heatmap(speis, cmap=\"vik_r\", vmin=-3, vmax=3, add_category=False, ax=ax)\n", 415 | "ax.set_ylabel(\"Time scale (months)\")\n", 416 | "f.axes[-1].set_ylabel(\"Z-score\")\n", 417 | "ax.xaxis.set_minor_locator(mpl.dates.YearLocator(1))\n", 418 | "ax.xaxis.set_major_locator(mpl.dates.YearLocator(2))\n", 419 | "ax.set_xlim(pd.Timestamp(\"1990\"), pd.Timestamp(\"2020\"))\n", 420 | "\n", 421 | "# ax.get_figure().savefig(\"../../paper/figures/spei_heatmap.png\", dpi=300, bbox_inches=\"tight\")" 422 | ] 423 | }, 424 | { 425 | "cell_type": "markdown", 426 | "metadata": {}, 427 | "source": [ 428 | "#### Threshold" 429 | ] 430 | }, 431 | { 432 | "cell_type": "code", 433 | "execution_count": null, 434 | "metadata": {}, 435 | "outputs": [], 436 | "source": [ 437 | "perc = sps.norm.cdf(-1.0) # same as zscore -1.0\n", 438 | "thres = sispei.ppf(perc).rename(f\"Threshold {perc:0.0%} percentile\")\n", 439 | "fig, ax = plt.subplots(figsize=(7.0, 2.0), layout=\"tight\")\n", 440 | "ax = si.plot.threshold(\n", 441 | " surplus,\n", 442 | " thres,\n", 443 | " ax=ax,\n", 444 | " **dict(\n", 445 | " color=\"C3\",\n", 446 | " linewidth=1.0,\n", 447 | " marker=\".\",\n", 448 | " markersize=2.0,\n", 449 | " label=\"Monthly Surplus (Precipitation minus Evaporation)\",\n", 450 | " ),\n", 451 | ")\n", 452 | "ax.set_xlim(pd.Timestamp(\"2003\"), pd.Timestamp(\"2019\"))\n", 453 | "ax.yaxis.set_major_locator(mpl.ticker.MultipleLocator(50))\n", 454 | "ax.yaxis.set_minor_locator(mpl.ticker.MultipleLocator(25))\n", 455 | "ax.xaxis.set_major_locator(mpl.dates.YearLocator(1))\n", 456 | "ax.xaxis.set_minor_locator(mpl.dates.MonthLocator([4, 7, 10]))\n", 457 | "ax.set_ylabel(\"Precipitation\\nsurplus (mm)\")\n", 458 | "ax.legend(ncol=3, loc=(0, 1), frameon=False)\n", 459 | "\n", 460 | "# fig.savefig(\"../../paper/figures/threshold.png\", dpi=300, bbox_inches=\"tight\")" 461 | ] 462 | } 463 | ], 464 | "metadata": { 465 | "kernelspec": { 466 | "display_name": "SPEI", 467 | "language": "python", 468 | "name": "python3" 469 | }, 470 | "language_info": { 471 | "codemirror_mode": { 472 | "name": "ipython", 473 | "version": 3 474 | }, 475 | "file_extension": ".py", 476 | "mimetype": "text/x-python", 477 | "name": "python", 478 | "nbconvert_exporter": "python", 479 | "pygments_lexer": "ipython3", 480 | "version": "3.12.3" 481 | } 482 | }, 483 | "nbformat": 4, 484 | "nbformat_minor": 2 485 | } 486 | -------------------------------------------------------------------------------- /src/spei/si.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from dataclasses import dataclass, field 3 | from typing import Literal 4 | 5 | from numpy import ceil, interp, linspace, nan 6 | from pandas import DataFrame, Grouper, Series, Timedelta, Timestamp 7 | from scipy.stats import beta, fisk, gamma, genextreme, norm 8 | 9 | from ._typing import ContinuousDist 10 | from .dist import Dist 11 | from .utils import ( 12 | daily_window_group_yearly_df, 13 | get_data_series, 14 | group_yearly_df, 15 | infer_frequency, 16 | validate_series, 17 | ) 18 | 19 | 20 | def sgi( 21 | series: Series, 22 | timescale: int = 0, 23 | fit_freq: str | None = None, 24 | ) -> Series: 25 | """Method to compute the Standardized Groundwater Index. Uses 26 | the normal scores transform to calculate the cumulative density function. 27 | 28 | Parameters 29 | ---------- 30 | series: pandas.Series 31 | Pandas time series of the groundwater levels. Time series index 32 | should be a pandas DatetimeIndex. 33 | fit_freq : str, optional, default=None 34 | Frequency for fitting the distribution. Default is None in which case 35 | the frequency of the series is inferred. If this fails a monthly 36 | frequency is used. 37 | 38 | Returns 39 | ------- 40 | pandas.Series 41 | 42 | References 43 | ---------- 44 | Bloomfield, J. P. and Marchant, B. P.: Analysis of 45 | groundwater drought building on the standardised precipitation index 46 | approach. Hydrol. Earth Syst. Sci., 17, 4769-4787, 2013. 47 | """ 48 | 49 | mock_dist = norm 50 | sgi = SI( 51 | series=series, 52 | dist=mock_dist, 53 | timescale=timescale, 54 | fit_freq=fit_freq, 55 | fit_window=0, 56 | prob_zero=False, 57 | normal_scores_transform=True, 58 | agg_func="mean", 59 | ) 60 | return sgi.norm_ppf() 61 | 62 | 63 | def spi( 64 | series: Series, 65 | dist: ContinuousDist = gamma, 66 | timescale: int = 0, 67 | fit_freq: str | None = None, 68 | fit_window: int = 0, 69 | prob_zero: bool = True, 70 | ) -> Series: 71 | """Method to compute the Standardized Precipitation Index. 72 | 73 | Parameters 74 | ---------- 75 | series: pandas.Series 76 | Pandas time series of the precipitation. Time series index 77 | should be a pandas DatetimeIndex. 78 | dist: scipy.stats.rv_continuous 79 | Can be any continuous distribution from the scipy.stats library. 80 | However, for the SPI generally the Gamma probability density 81 | function is recommended. Other appropriate choices could be the 82 | lognormal, log-logistic (fisk) or PearsonIII distribution. 83 | timescale : int, optional, default=0 84 | Size of the moving window over which the series is summed. If zero, no 85 | summation is performed over the time series. If the time series 86 | frequency is daily, then one would provide timescale=30 for SI1, 87 | timescale=90 for SI3, timescale=180 for SI6 etc. 88 | fit_freq : str, optional, default=None 89 | Frequency for fitting the distribution. Default is None in which case 90 | the frequency of the series is inferred. If this fails a monthly 91 | frequency is used. 92 | fit_window : int, optional, default=0 93 | Window size for fitting data in fit_freq frequency's unit. Default is 94 | zero in which case only data within the fit_freq is considered. If 95 | larger than zero data data within the window is used to fit the 96 | distribution for the series. fit_window must be a odd number larger 97 | than 3 when used. 98 | prob_zero : bool, default=True 99 | Option to correct the distribution if x=0 is not in probability density 100 | function. E.g. the case with the Gamma distriubtion. If True, the 101 | probability of zero values in the series is calculated by the 102 | occurence. 103 | 104 | Returns 105 | ------- 106 | pandas.Series 107 | 108 | References 109 | ---------- 110 | LLoyd-Hughes, B. and Saunders, M.A.: A drought climatology for Europe. 111 | International Journal of Climatology, 22, 1571-1592, 2002. 112 | """ 113 | 114 | spi = SI( 115 | series=series, 116 | dist=dist, 117 | timescale=timescale, 118 | fit_freq=fit_freq, 119 | fit_window=fit_window, 120 | prob_zero=prob_zero, 121 | normal_scores_transform=False, 122 | agg_func="sum", 123 | ) 124 | spi.fit_distribution() 125 | return spi.norm_ppf() 126 | 127 | 128 | def spei( 129 | series: Series, 130 | dist: ContinuousDist = fisk, 131 | timescale: int = 0, 132 | fit_freq: str | None = None, 133 | fit_window: int = 0, 134 | prob_zero: bool = False, 135 | ) -> Series: 136 | """Method to compute the Standardized Precipitation Evaporation Index. 137 | 138 | Parameters 139 | ---------- 140 | series: pandas.Series 141 | Pandas time series of the precipitation. Time series index 142 | should be a pandas DatetimeIndex. 143 | dist: scipy.stats.rv_continuous 144 | Can be any continuous distribution from the scipy.stats library. 145 | However, for the SPEI generally the log-logistic (fisk) probability 146 | density function is recommended. Other appropriate choices could be 147 | the lognormal or PearsonIII distribution. 148 | timescale : int, optional, default=0 149 | Size of the moving window over which the series is summed. If zero, no 150 | summation is performed over the time series. If the time series 151 | frequency is daily, then one would provide timescale=30 for SI1, 152 | timescale=90 for SI3, timescale=180 for SI6 etc. 153 | fit_freq : str, optional, default=None 154 | Frequency for fitting the distribution. Default is None in which case 155 | the frequency of the series is inferred. If this fails a monthly 156 | frequency is used. 157 | fit_window : int, optional, default=0 158 | Window size for fitting data in fit_freq frequency's unit. Default is 159 | zero in which case only data within the fit_freq is considered. If 160 | larger than zero data data within the window is used to fit the 161 | distribution for the series. fit_window must be a odd number larger 162 | than 3 when used. 163 | prob_zero : bool, default=False 164 | Flag indicating whether the probability of zero values in the series is 165 | calculated by the occurence. 166 | 167 | Returns 168 | ------- 169 | pandas.Series 170 | 171 | References 172 | ---------- 173 | Vicente-Serrano S.M., Beguería S., López-Moreno J.I.: 174 | A Multi-scalar drought index sensitive to global warming: 175 | The Standardized Precipitation Evapotranspiration Index. 176 | Journal of Climate, 23, 1696-1718, 2010. 177 | """ 178 | 179 | spei = SI( 180 | series=series, 181 | dist=dist, 182 | timescale=timescale, 183 | fit_freq=fit_freq, 184 | fit_window=fit_window, 185 | prob_zero=prob_zero, 186 | normal_scores_transform=False, 187 | agg_func="sum", 188 | ) 189 | spei.fit_distribution() 190 | return spei.norm_ppf() 191 | 192 | 193 | def ssfi( 194 | series: Series, 195 | dist: ContinuousDist = genextreme, 196 | timescale: int = 0, 197 | fit_freq: str | None = None, 198 | fit_window: int = 0, 199 | prob_zero: bool = True, 200 | ) -> Series: 201 | """Method to compute the Standardized StreamFlow Index. 202 | 203 | Parameters 204 | ---------- 205 | series: pandas.Series 206 | Pandas time series of the precipitation. Time series index 207 | should be a pandas DatetimeIndex. 208 | dist: scipy.stats.rv_continuous 209 | Can be any continuous distribution from the scipy.stats library. 210 | However, for the SSFI generally the gamma probability density function 211 | is recommended. Other choices could be the normal, lognormal, 212 | pearsonIII, GEV or Gen-Logistic distribution or any distribution deemed 213 | appropriate. 214 | timescale : int, optional, default=0 215 | Size of the moving window over which the series is summed. If zero, no 216 | summation is performed over the time series. If the time series 217 | frequency is daily, then one would provide timescale=30 for SI1, 218 | timescale=90 for SI3, timescale=180 for SI6 etc. 219 | fit_freq : str, optional, default=None 220 | Frequency for fitting the distribution. Default is None in which case 221 | the frequency of the series is inferred. If this fails a monthly 222 | frequency is used. 223 | fit_window : int, optional, default=0 224 | Window size for fitting data in fit_freq frequency's unit. Default is 225 | zero in which case only data within the fit_freq is considered. If 226 | larger than zero data data within the window is used to fit the 227 | distribution for the series. fit_window must be a odd number larger 228 | than 3 when used. 229 | prob_zero : bool, default=False 230 | Flag indicating whether the probability of zero values in the series is 231 | calculated by the occurence. 232 | 233 | Returns 234 | ------- 235 | pandas.Series 236 | 237 | References 238 | ---------- 239 | Vicente-Serrano, S. M., J. I. López-Moreno, S. Beguería, J. Lorenzo-Lacruz, 240 | C. Azorin-Molina, and E. Morán-Tejeda. Accurate Computation of a Streamflow 241 | Drought Index. Journal of Hydrologic Engineering 17 (2): 318-332. 2012. 242 | """ 243 | ssfi = SI( 244 | series=series, 245 | dist=dist, 246 | timescale=timescale, 247 | fit_freq=fit_freq, 248 | fit_window=fit_window, 249 | prob_zero=prob_zero, 250 | normal_scores_transform=False, 251 | agg_func="mean", 252 | ) 253 | ssfi.fit_distribution() 254 | return ssfi.norm_ppf() 255 | 256 | 257 | def ssmi( 258 | series: Series, 259 | dist: ContinuousDist = beta, 260 | timescale: int = 0, 261 | fit_freq: str | None = None, 262 | fit_window: int = 0, 263 | prob_zero: bool = True, 264 | ) -> Series: 265 | """Method to compute the Standardized Soil Moisture Index. 266 | 267 | Parameters 268 | ---------- 269 | series: pandas.Series 270 | Pandas time series of the precipitation. Time series index 271 | should be a pandas DatetimeIndex. 272 | dist: scipy.stats.rv_continuous 273 | Can be any continuous distribution from the scipy.stats library. 274 | However, for the SSMI generally the beta probability density function 275 | is recommended. Other choices could be the normal or ECDF distribution 276 | or any distribution deemed appropriate. 277 | timescale : int, optional, default=0 278 | Size of the moving window over which the series is summed. If zero, no 279 | summation is performed over the time series. If the time series 280 | frequency is daily, then one would provide timescale=30 for SI1, 281 | timescale=90 for SI3, timescale=180 for SI6 etc. 282 | fit_freq : str, optional, default=None 283 | Frequency for fitting the distribution. Default is None in which case 284 | the frequency of the series is inferred. If this fails a monthly 285 | frequency is used. 286 | fit_window : int, optional, default=0 287 | Window size for fitting data in fit_freq frequency's unit. Default is 288 | zero in which case only data within the fit_freq is considered. If 289 | larger than zero data data within the window is used to fit the 290 | distribution for the series. fit_window must be a odd number larger 291 | than 3 when used. 292 | prob_zero : bool, default=False 293 | Flag indicating whether the probability of zero values in the series is 294 | calculated by the occurence. 295 | 296 | Returns 297 | ------- 298 | pandas.Series 299 | 300 | References 301 | ---------- 302 | Carrão. H., Russo, S., Sepulcre-Canto, G., Barbosa, P.: An empirical standardized 303 | soil moisture index for agricultural drought assessment from remotely sensed data. 304 | International Journal of Applied Earth Observation and Geoinformation, 48, 2016. 305 | """ 306 | 307 | ssmi = SI( 308 | series=series, 309 | dist=dist, 310 | timescale=timescale, 311 | fit_freq=fit_freq, 312 | fit_window=fit_window, 313 | prob_zero=prob_zero, 314 | normal_scores_transform=False, 315 | agg_func="mean", 316 | ) 317 | ssmi.fit_distribution() 318 | return ssmi.norm_ppf() 319 | 320 | 321 | @dataclass 322 | class SI: 323 | """ 324 | Standardized Index Class. 325 | 326 | Parameters 327 | ---------- 328 | series : Series 329 | The input time series data. 330 | dist : ContinuousDist 331 | The SciPy continuous distribution associated with the data. 332 | timescale : int, optional, default=0 333 | Size of the moving window over which the series is summed. If zero, no 334 | summation is performed over the time series. If the time series 335 | frequency is daily, then one would provide timescale=30 for SI1, 336 | timescale=90 for SI3, timescale=180 for SI6 etc. 337 | fit_freq : str, optional, default=None 338 | Frequency for fitting the distribution. Default is None in which case 339 | the frequency of the series is inferred. If this fails a monthly 340 | frequency is used. 341 | fit_window : int, optional, default=0 342 | Window size for fitting data in fit_freq frequency's unit. Default is 343 | zero in which case only data within the fit_freq is considered. If 344 | larger than zero data data within the window is used to fit the 345 | distribution for the series. fit_window must be a odd number larger 346 | than 3 when used. 347 | prob_zero : bool, default=False 348 | Flag indicating whether the probability of zero values in the series is 349 | calculated by the occurence. 350 | normal_scores_transform : bool, default=False 351 | Flag to use the normal scores transformation for calculating the 352 | cumulative density function. 353 | agg_func: Literal['sum', 'mean'], default='sum' 354 | String of the function to use for aggregating the time series if the 355 | timescale is larger than 0. Can either be 'sum' or 'mean'. 356 | 357 | Attributes 358 | ---------- 359 | _grouped_year : DataFrame 360 | Dataframe with all data grouped in a one-year (2000) DataFrame with the 361 | original years as columns 362 | _dist_dict : Dict[int, Dist] 363 | Dictionary of distributions used to fit the data. 364 | """ 365 | 366 | series: Series = field(repr=False) 367 | dist: ContinuousDist 368 | timescale: int = 0 369 | fit_freq: str | None = field(default=None) 370 | fit_window: int = field(default=0) 371 | prob_zero: bool = field(default=False) 372 | normal_scores_transform: bool = field(default=False) 373 | agg_func: Literal["sum", "mean"] = "sum" 374 | _grouped_year: DataFrame = field(init=False, repr=False, compare=False) 375 | _dist_dict: dict[int, Dist] = field( 376 | default_factory=dict, init=False, repr=False, compare=False 377 | ) 378 | 379 | def __post_init__(self) -> None: 380 | """ 381 | Post initializes the SI class and performs necessary data 382 | preprocessing and validation. 383 | """ 384 | self.series = validate_series(self.series) 385 | 386 | if self.timescale > 0: 387 | self.series = ( 388 | self.series.rolling(self.timescale, min_periods=self.timescale) 389 | .agg(self.agg_func) 390 | .dropna() 391 | .copy() 392 | ) 393 | 394 | if self.fit_freq is None: 395 | self.fit_freq = infer_frequency(self.series.index) 396 | 397 | self._grouped_year = group_yearly_df(series=self.series) 398 | 399 | if self.fit_window > 0: 400 | if self.fit_window < 3: 401 | logging.error( 402 | "Window should be larger than 2. Setting the window value to 3." 403 | ) 404 | self.fit_window = 3 # make sure window is at least three 405 | elif self.fit_window % 2 == 0: 406 | logging.error( 407 | "Window should be odd. Setting the window value to" 408 | f"{self.fit_window + 1}" 409 | ) 410 | self.fit_window += 1 # make sure window is odd 411 | 412 | def fit_distribution(self) -> None: 413 | """ 414 | Fit distribution on the time series per fit_frequency and/or fit_window 415 | """ 416 | 417 | if self.normal_scores_transform: 418 | logging.info("Using normal-scores-transform. No distribution is fitted.") 419 | 420 | elif self.fit_window > 0: 421 | if self.fit_freq not in ( 422 | "d", 423 | "w", 424 | "D", 425 | "W", 426 | ): # TODO: ideally 14D should also work. 427 | raise ValueError( 428 | "Frequency fit_freq must be 'D' or 'W', not " 429 | f"'{self.fit_freq}', if a fit_window is provided." 430 | ) 431 | 432 | logging.info("Using rolling window method") 433 | window = self.fit_window 434 | period = int(ceil(window / 2)) 435 | if self.fit_freq in ("W", "w"): 436 | period = Timedelta(value=period, unit="W").days 437 | window = period * 2 + 1 438 | 439 | dfval_window = daily_window_group_yearly_df( 440 | dfval=self._grouped_year, period=period 441 | ) 442 | for dfval_rwindow in dfval_window.rolling( 443 | window=window, min_periods=window, closed="right" 444 | ): 445 | if len(dfval_rwindow) < window: 446 | continue # min_periods ignored by Rolling.__iter__ 447 | date = dfval_rwindow.index[period] 448 | data = get_data_series(dfval_rwindow.loc[[date]]) 449 | data_window = get_data_series(dfval_rwindow) 450 | fd = Dist( 451 | data=data, 452 | dist=self.dist, 453 | prob_zero=self.prob_zero, 454 | data_window=data_window, 455 | ) 456 | self._dist_dict[date] = fd 457 | else: 458 | logging.info("Using groupby fit by frequency method") 459 | for date, grval in self._grouped_year.groupby( 460 | Grouper(freq=str(self.fit_freq)) 461 | ): 462 | data = get_data_series(grval) 463 | fd = Dist( 464 | data=data, 465 | dist=self.dist, 466 | prob_zero=self.prob_zero, 467 | data_window=None, 468 | ) 469 | self._dist_dict[date] = fd # type: ignore 470 | 471 | def cdf(self) -> Series: 472 | """Compute the cumulative density function""" 473 | if self.normal_scores_transform: 474 | cdf = self.cdf_nsf() 475 | else: 476 | cdf = Series(nan, index=self.series.index, dtype=float) 477 | for k in self._dist_dict: 478 | cdf_k = self._dist_dict[k].cdf() 479 | cdf.loc[cdf_k.index] = cdf_k.values 480 | 481 | return cdf 482 | 483 | def pdf(self) -> Series: 484 | """Compute the probability density function""" 485 | if self.normal_scores_transform: 486 | pdf = self.cdf().diff() 487 | else: 488 | pdf = Series(nan, index=self.series.index, dtype=float) 489 | for k in self._dist_dict: 490 | pdf_k = self._dist_dict[k].pdf() 491 | pdf.loc[pdf_k.index] = pdf_k.values 492 | return pdf 493 | 494 | def cdf_nsf(self) -> Series: 495 | """ 496 | Compute the cumulative density function using the Normal Scores 497 | Transform 498 | 499 | Returns 500 | ------- 501 | Series 502 | """ 503 | logging.info("Using the normal scores transform") 504 | cdf = Series(nan, index=self.series.index, dtype=float) 505 | for _, grval in self._grouped_year.groupby(Grouper(freq=str(self.fit_freq))): 506 | data = get_data_series(grval).sort_values() 507 | n = len(data) 508 | cdf.loc[data.index] = linspace(1 / (2 * n), 1 - 1 / (2 * n), n) 509 | return cdf 510 | 511 | def ppf(self, q: float) -> Series: 512 | """ 513 | Method to calculate the percentile point function 514 | (inverse of cdf — percentiles) of a fitted 515 | distribution. 516 | 517 | Parameters 518 | ---------- 519 | q : float 520 | The quantile value (between 0 and 1) for which to calculate the 521 | percentile point function. 522 | 523 | Returns 524 | ------- 525 | Series 526 | """ 527 | ppf = Series(nan, index=self.series.index, dtype=float) 528 | if self.normal_scores_transform: 529 | cdf = self.cdf_nsf() 530 | for _, grval in self._grouped_year.groupby( 531 | Grouper(freq=str(self.fit_freq)) 532 | ): 533 | data = get_data_series(grval).sort_values() 534 | cdf_i = cdf.loc[data.index] 535 | ppf.loc[data.index] = interp( 536 | x=q, 537 | xp=cdf_i.values.astype(float), 538 | fp=data.values.astype(float), 539 | ) 540 | else: 541 | for k in self._dist_dict: 542 | ppf_k = self._dist_dict[k].ppf(q=q) 543 | ppf.loc[ppf_k.index] = ppf_k.values 544 | return ppf 545 | 546 | def norm_ppf(self) -> Series: 547 | """ 548 | Method to calculate propability point function of normal distribution 549 | based on a cumulative density function of a fitted distribution 550 | 551 | Returns 552 | ------- 553 | Series 554 | """ 555 | 556 | cdf = self.cdf() 557 | ppf = Series( 558 | norm.ppf(cdf.values, loc=0, scale=1), index=self.series.index, dtype=float 559 | ) 560 | return ppf 561 | 562 | def get_dist(self, date: Timestamp) -> Dist: 563 | for k in self._dist_dict: 564 | dist = self._dist_dict[k] 565 | if date in dist.data.index: 566 | return dist 567 | 568 | raise KeyError("Date not found in distributions") 569 | --------------------------------------------------------------------------------